summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLeonard Richardson <leonard.richardson@canonical.com>2013-05-09 15:36:30 -0400
committerLeonard Richardson <leonard.richardson@canonical.com>2013-05-09 15:36:30 -0400
commitada530f6bc24bf4e536c1c859d798b836ec0799c (patch)
tree56ccbf149a19382b7e1b67ca2f9c8ad37d07ed0f
parentb010466713ad46606f310a9dcd54cea19e16ee9a (diff)
Changed lxml.feed() to handle the eventuality that it may be given a bytestring.
-rw-r--r--bs4/builder/_lxml.py7
-rw-r--r--bs4/tests/test_lxml.py1
2 files changed, 5 insertions, 3 deletions
diff --git a/bs4/builder/_lxml.py b/bs4/builder/_lxml.py
index 8638c59..be35d70 100644
--- a/bs4/builder/_lxml.py
+++ b/bs4/builder/_lxml.py
@@ -3,6 +3,7 @@ __all__ = [
'LXMLTreeBuilder',
]
+from io import BytesIO
from StringIO import StringIO
import collections
from lxml import etree
@@ -65,7 +66,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
:return: A 3-tuple (markup, original encoding, encoding
declared within markup).
"""
- if isinstance(markup, unicode) or True:
+ if isinstance(markup, unicode):
return markup, None, None, False
try_encodings = [user_specified_encoding, document_declared_encoding]
@@ -75,7 +76,9 @@ class LXMLTreeBuilderForXML(TreeBuilder):
dammit.contains_replacement_characters)
def feed(self, markup):
- if isinstance(markup, basestring):
+ if isinstance(markup, bytes):
+ markup = BytesIO(markup)
+ elif isinstance(markup, unicode):
markup = StringIO(markup)
# Call feed() at least once, even if the markup is empty,
# or the parser won't be initialized.
diff --git a/bs4/tests/test_lxml.py b/bs4/tests/test_lxml.py
index f32fc2b..39e4bd4 100644
--- a/bs4/tests/test_lxml.py
+++ b/bs4/tests/test_lxml.py
@@ -85,4 +85,3 @@ class LXMLXMLTreeBuilderSmokeTest(SoupTest, XMLTreeBuilderSmokeTest):
@property
def default_builder(self):
return LXMLTreeBuilderForXML()
-