summaryrefslogtreecommitdiff
path: root/bs4/__init__.py
diff options
context:
space:
mode:
authorLeonard Richardson <leonardr@segfault.org>2013-10-01 21:55:22 -0400
committerLeonard Richardson <leonardr@segfault.org>2013-10-01 21:55:22 -0400
commit6a06b9d998ea9502a93db14ebb65395b20c1b30f (patch)
tree0999ebae9dcc14522bdbb8bb1a3289d7d7498cbf /bs4/__init__.py
parent623d8c13b79003921fd13b59328d0c28e01eabd0 (diff)
Fixed a bug in which short Unicode input was improperly encoded to ASCII when checking whether or not it was a file on
disk. [bug=1227016]
Diffstat (limited to 'bs4/__init__.py')
-rw-r--r--bs4/__init__.py6
1 files changed, 5 insertions, 1 deletions
diff --git a/bs4/__init__.py b/bs4/__init__.py
index 272d44a..6d44c95 100644
--- a/bs4/__init__.py
+++ b/bs4/__init__.py
@@ -164,7 +164,11 @@ class BeautifulSoup(Tag):
# involving passing non-markup to Beautiful Soup.
# Beautiful Soup will still parse the input as markup,
# just in case that's what the user really wants.
- if os.path.exists(markup):
+ if isinstance(markup, unicode):
+ possible_filename = markup.encode("utf8")
+ else:
+ possible_filename = markup
+ if os.path.exists(possible_filename):
warnings.warn(
'"%s" looks like a filename, not markup. You should probably open this file and pass the filehandle into Beautiful Soup.' % markup)
if markup[:5] == "http:" or markup[:6] == "https:":