diff options
author | Leonard Richardson <leonardr@segfault.org> | 2016-07-16 22:28:40 -0400 |
---|---|---|
committer | Leonard Richardson <leonardr@segfault.org> | 2016-07-16 22:28:40 -0400 |
commit | f18cbadf256b24912837c8b0d7fd6a2dc1a1d640 (patch) | |
tree | 9f8459bab4cdcf79ae76abea31025b22ce1cd111 /bs4/__init__.py | |
parent | 120f4fcedc825b6c207263858e5bbded60a7886e (diff) |
Beautiful Soup will now work with versions of html5lib greater than
0.99999999. [bug=1603299]
Diffstat (limited to 'bs4/__init__.py')
-rw-r--r-- | bs4/__init__.py | 9 |
1 files changed, 8 insertions, 1 deletions
diff --git a/bs4/__init__.py b/bs4/__init__.py index 4df3280..80b6d93 100644 --- a/bs4/__init__.py +++ b/bs4/__init__.py @@ -142,6 +142,10 @@ class BeautifulSoup(Tag): from_encoding = from_encoding or deprecated_argument( "fromEncoding", "from_encoding") + if from_encoding and isinstance(markup, unicode): + warnings.warn("You provided Unicode markup but also provided a value for from_encoding. Your from_encoding will be ignored.") + from_encoding = None + if len(kwargs) > 0: arg = kwargs.keys().pop() raise TypeError( @@ -184,7 +188,10 @@ class BeautifulSoup(Tag): if hasattr(markup, 'read'): # It's a file-type object. markup = markup.read() - elif len(markup) <= 256 and not '<' in markup: + elif len(markup) <= 256 and ( + (isinstance(markup, bytes) and not b'<' in markup) + or (isinstance(markup, unicode) and not u'<' in markup) + ): # Print out warnings for a couple beginner problems # involving passing non-markup to Beautiful Soup. # Beautiful Soup will still parse the input as markup, |