summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--NEWS.txt6
-rw-r--r--bs4/__init__.py4
2 files changed, 9 insertions, 1 deletions
diff --git a/NEWS.txt b/NEWS.txt
index 9b4ac1e..2a3bb1e 100644
--- a/NEWS.txt
+++ b/NEWS.txt
@@ -1,5 +1,7 @@
= 4.3.3 (Unreleased) =
+* Added a Chinese translation of the documentation by Delong .w.
+
* Fixed yet another problem that caused the html5lib tree builder to
create a disconnected parse tree. [bug=1237763]
@@ -17,7 +19,9 @@
the html.parser constructor to avoid a warning and future
failures. Patch by Stefano Revera. [bug=1375721]
-* Added a Chinese translation of the documentation by Delong .w.
+* The warning when you pass in a filename or URL as markup will now be
+ displayed correctly even if the filename or URL is a Unicode
+ string. [bug=1268888]
= 4.3.2 (20131002) =
diff --git a/bs4/__init__.py b/bs4/__init__.py
index b74acee..a53048d 100644
--- a/bs4/__init__.py
+++ b/bs4/__init__.py
@@ -186,6 +186,8 @@ class BeautifulSoup(Tag):
# system. Just let it go.
pass
if is_file:
+ if isinstance(markup, unicode):
+ markup = markup.encode("utf8")
warnings.warn(
'"%s" looks like a filename, not markup. You should probably open this file and pass the filehandle into Beautiful Soup.' % markup)
if markup[:5] == "http:" or markup[:6] == "https:":
@@ -193,6 +195,8 @@ class BeautifulSoup(Tag):
# Python 3 otherwise.
if ((isinstance(markup, bytes) and not b' ' in markup)
or (isinstance(markup, unicode) and not u' ' in markup)):
+ if isinstance(markup, unicode):
+ markup = markup.encode("utf8")
warnings.warn(
'"%s" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client to get the document behind the URL, and feed that document to Beautiful Soup.' % markup)