diff options
-rw-r--r-- | NEWS.txt | 6 | ||||
-rw-r--r-- | bs4/__init__.py | 4 |
2 files changed, 9 insertions, 1 deletions
@@ -1,5 +1,7 @@ = 4.3.3 (Unreleased) = +* Added a Chinese translation of the documentation by Delong .w. + * Fixed yet another problem that caused the html5lib tree builder to create a disconnected parse tree. [bug=1237763] @@ -17,7 +19,9 @@ the html.parser constructor to avoid a warning and future failures. Patch by Stefano Revera. [bug=1375721] -* Added a Chinese translation of the documentation by Delong .w. +* The warning when you pass in a filename or URL as markup will now be + displayed correctly even if the filename or URL is a Unicode + string. [bug=1268888] = 4.3.2 (20131002) = diff --git a/bs4/__init__.py b/bs4/__init__.py index b74acee..a53048d 100644 --- a/bs4/__init__.py +++ b/bs4/__init__.py @@ -186,6 +186,8 @@ class BeautifulSoup(Tag): # system. Just let it go. pass if is_file: + if isinstance(markup, unicode): + markup = markup.encode("utf8") warnings.warn( '"%s" looks like a filename, not markup. You should probably open this file and pass the filehandle into Beautiful Soup.' % markup) if markup[:5] == "http:" or markup[:6] == "https:": @@ -193,6 +195,8 @@ class BeautifulSoup(Tag): # Python 3 otherwise. if ((isinstance(markup, bytes) and not b' ' in markup) or (isinstance(markup, unicode) and not u' ' in markup)): + if isinstance(markup, unicode): + markup = markup.encode("utf8") warnings.warn( '"%s" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client to get the document behind the URL, and feed that document to Beautiful Soup.' % markup) |