summaryrefslogtreecommitdiff
path: root/bs4/__init__.py
diff options
context:
space:
mode:
Diffstat (limited to 'bs4/__init__.py')
-rw-r--r--bs4/__init__.py13
1 files changed, 11 insertions, 2 deletions
diff --git a/bs4/__init__.py b/bs4/__init__.py
index 6d44c95..341efc6 100644
--- a/bs4/__init__.py
+++ b/bs4/__init__.py
@@ -164,11 +164,20 @@ class BeautifulSoup(Tag):
# involving passing non-markup to Beautiful Soup.
# Beautiful Soup will still parse the input as markup,
# just in case that's what the user really wants.
- if isinstance(markup, unicode):
+ if (isinstance(markup, unicode)
+ and not os.path.supports_unicode_filenames):
possible_filename = markup.encode("utf8")
else:
possible_filename = markup
- if os.path.exists(possible_filename):
+ is_file = False
+ try:
+ is_file = os.path.exists(possible_filename)
+ except Exception, e:
+ # This is almost certainly a problem involving
+ # characters not valid in filenames on this
+ # system. Just let it go.
+ pass
+ if is_file:
warnings.warn(
'"%s" looks like a filename, not markup. You should probably open this file and pass the filehandle into Beautiful Soup.' % markup)
if markup[:5] == "http:" or markup[:6] == "https:":