summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--beautifulsoup/builder/__init__.py10
-rw-r--r--beautifulsoup/builder/lxml_builder.py11
-rw-r--r--tests/test_lxml.py3
3 files changed, 12 insertions, 12 deletions
diff --git a/beautifulsoup/builder/__init__.py b/beautifulsoup/builder/__init__.py
index deaa613..9ffa9ef 100644
--- a/beautifulsoup/builder/__init__.py
+++ b/beautifulsoup/builder/__init__.py
@@ -28,12 +28,12 @@ class TreeBuilder(Entities):
The final markup may or may not actually present this tag as
self-closing.
- For instance: an HTML builder does not consider a <p> tag to
- be an empty-element tag (it's not in empty_element_tags). This
- means an empty <p> tag will be presented as "<p></p>", not
- "<p />".
+ For instance: an HTMLBuilder does not consider a <p> tag to be
+ an empty-element tag (it's not in
+ HTMLBuilder.empty_element_tags). This means an empty <p> tag
+ will be presented as "<p></p>", not "<p />".
- The default builder has no opinion about which tags are
+ The default implementation has no opinion about which tags are
empty-element tags, so a tag will be presented as an
empty-element tag if and only if it has no contents.
"<foo></foo>" will become "<foo />", and "<foo>bar</foo>" will
diff --git a/beautifulsoup/builder/lxml_builder.py b/beautifulsoup/builder/lxml_builder.py
index e431a62..9f4c0bd 100644
--- a/beautifulsoup/builder/lxml_builder.py
+++ b/beautifulsoup/builder/lxml_builder.py
@@ -11,7 +11,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
def default_parser(self):
# This can either return a parser object or a class, which
# will be instantiated with default arguments.
- return etree.XMLParser
+ return etree.XMLParser(target=self, strip_cdata=False, recover=True)
def __init__(self, parser=None, empty_element_tags=None):
if empty_element_tags is not None:
@@ -71,10 +71,6 @@ class LXMLTreeBuilderForXML(TreeBuilder):
self.soup.handle_data(content)
self.soup.endData(Comment)
- def test_fragment_to_document(self, fragment):
- """See `TreeBuilder`."""
- return u'<html><body>%s</body></html>' % fragment
-
class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML):
@@ -82,5 +78,6 @@ class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML):
def default_parser(self):
return etree.HTMLParser
- def end(self, name):
- self.soup.handle_endtag(name)
+ def test_fragment_to_document(self, fragment):
+ """See `TreeBuilder`."""
+ return u'<html><body>%s</body></html>' % fragment
diff --git a/tests/test_lxml.py b/tests/test_lxml.py
index 7d916da..c178457 100644
--- a/tests/test_lxml.py
+++ b/tests/test_lxml.py
@@ -518,6 +518,9 @@ class TestLXMLXMLBuilder(SoupTest):
def default_builder(self):
return LXMLTreeBuilderForXML()
+ def test_can_handle_invalid_xml(self):
+ self.assertSoupEquals("<a><b>", "<a><b /></a>")
+
def test_empty_element_tag(self):
soup = self.soup("<p><iamselfclosing /></p>")
self.assertTrue(soup.iamselfclosing.is_empty_element)