summaryrefslogtreecommitdiff
path: root/bs4
diff options
context:
space:
mode:
authorLeonard Richardson <leonardr@segfault.org>2014-12-10 19:06:33 -0500
committerLeonard Richardson <leonardr@segfault.org>2014-12-10 19:06:33 -0500
commita1d16f0b14a9f08f54677d933b4ddb8e32ffabb1 (patch)
treec7889c0245b47aea083bdec279afb2c0194ca644 /bs4
parent5048db4ab06f6065d8d364500b38631456b58690 (diff)
The warning when you pass in a filename or URL as markup will now be
displayed correctly even if the filename or URL is a Unicode string. [bug=1268888]
Diffstat (limited to 'bs4')
-rw-r--r--bs4/__init__.py4
1 files changed, 4 insertions, 0 deletions
diff --git a/bs4/__init__.py b/bs4/__init__.py
index b74acee..a53048d 100644
--- a/bs4/__init__.py
+++ b/bs4/__init__.py
@@ -186,6 +186,8 @@ class BeautifulSoup(Tag):
# system. Just let it go.
pass
if is_file:
+ if isinstance(markup, unicode):
+ markup = markup.encode("utf8")
warnings.warn(
'"%s" looks like a filename, not markup. You should probably open this file and pass the filehandle into Beautiful Soup.' % markup)
if markup[:5] == "http:" or markup[:6] == "https:":
@@ -193,6 +195,8 @@ class BeautifulSoup(Tag):
# Python 3 otherwise.
if ((isinstance(markup, bytes) and not b' ' in markup)
or (isinstance(markup, unicode) and not u' ' in markup)):
+ if isinstance(markup, unicode):
+ markup = markup.encode("utf8")
warnings.warn(
'"%s" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client to get the document behind the URL, and feed that document to Beautiful Soup.' % markup)