summaryrefslogtreecommitdiff
path: root/bs4/__init__.py
diff options
context:
space:
mode:
authorLeonard Richardson <leonardr@segfault.org>2013-10-01 22:15:53 -0400
committerLeonard Richardson <leonardr@segfault.org>2013-10-01 22:15:53 -0400
commitc0de2023544605dc1940e4d24d493a14b5300066 (patch)
tree73d5eeba9401dd3a174b7d8dc4aef3f13cbb5e62 /bs4/__init__.py
parent6a06b9d998ea9502a93db14ebb65395b20c1b30f (diff)
Fixed a crash when a short input contains data not valid in
filenames. [bug=1232604]
Diffstat (limited to 'bs4/__init__.py')
-rw-r--r--bs4/__init__.py13
1 files changed, 11 insertions, 2 deletions
diff --git a/bs4/__init__.py b/bs4/__init__.py
index 6d44c95..341efc6 100644
--- a/bs4/__init__.py
+++ b/bs4/__init__.py
@@ -164,11 +164,20 @@ class BeautifulSoup(Tag):
# involving passing non-markup to Beautiful Soup.
# Beautiful Soup will still parse the input as markup,
# just in case that's what the user really wants.
- if isinstance(markup, unicode):
+ if (isinstance(markup, unicode)
+ and not os.path.supports_unicode_filenames):
possible_filename = markup.encode("utf8")
else:
possible_filename = markup
- if os.path.exists(possible_filename):
+ is_file = False
+ try:
+ is_file = os.path.exists(possible_filename)
+ except Exception, e:
+ # This is almost certainly a problem involving
+ # characters not valid in filenames on this
+ # system. Just let it go.
+ pass
+ if is_file:
warnings.warn(
'"%s" looks like a filename, not markup. You should probably open this file and pass the filehandle into Beautiful Soup.' % markup)
if markup[:5] == "http:" or markup[:6] == "https:":