diff options
author | Leonard Richardson <leonardr@segfault.org> | 2013-10-01 21:55:22 -0400 |
---|---|---|
committer | Leonard Richardson <leonardr@segfault.org> | 2013-10-01 21:55:22 -0400 |
commit | 6a06b9d998ea9502a93db14ebb65395b20c1b30f (patch) | |
tree | 0999ebae9dcc14522bdbb8bb1a3289d7d7498cbf /bs4/__init__.py | |
parent | 623d8c13b79003921fd13b59328d0c28e01eabd0 (diff) |
Fixed a bug in which short Unicode input was improperly encoded to ASCII when checking whether or not it was a file on
disk. [bug=1227016]
Diffstat (limited to 'bs4/__init__.py')
-rw-r--r-- | bs4/__init__.py | 6 |
1 files changed, 5 insertions, 1 deletions
diff --git a/bs4/__init__.py b/bs4/__init__.py index 272d44a..6d44c95 100644 --- a/bs4/__init__.py +++ b/bs4/__init__.py @@ -164,7 +164,11 @@ class BeautifulSoup(Tag): # involving passing non-markup to Beautiful Soup. # Beautiful Soup will still parse the input as markup, # just in case that's what the user really wants. - if os.path.exists(markup): + if isinstance(markup, unicode): + possible_filename = markup.encode("utf8") + else: + possible_filename = markup + if os.path.exists(possible_filename): warnings.warn( '"%s" looks like a filename, not markup. You should probably open this file and pass the filehandle into Beautiful Soup.' % markup) if markup[:5] == "http:" or markup[:6] == "https:": |