diff options
-rw-r--r-- | beautifulsoup/builder/__init__.py | 10 | ||||
-rw-r--r-- | beautifulsoup/builder/lxml_builder.py | 11 | ||||
-rw-r--r-- | tests/test_lxml.py | 3 |
3 files changed, 12 insertions, 12 deletions
diff --git a/beautifulsoup/builder/__init__.py b/beautifulsoup/builder/__init__.py index deaa613..9ffa9ef 100644 --- a/beautifulsoup/builder/__init__.py +++ b/beautifulsoup/builder/__init__.py @@ -28,12 +28,12 @@ class TreeBuilder(Entities): The final markup may or may not actually present this tag as self-closing. - For instance: an HTML builder does not consider a <p> tag to - be an empty-element tag (it's not in empty_element_tags). This - means an empty <p> tag will be presented as "<p></p>", not - "<p />". + For instance: an HTMLBuilder does not consider a <p> tag to be + an empty-element tag (it's not in + HTMLBuilder.empty_element_tags). This means an empty <p> tag + will be presented as "<p></p>", not "<p />". - The default builder has no opinion about which tags are + The default implementation has no opinion about which tags are empty-element tags, so a tag will be presented as an empty-element tag if and only if it has no contents. "<foo></foo>" will become "<foo />", and "<foo>bar</foo>" will diff --git a/beautifulsoup/builder/lxml_builder.py b/beautifulsoup/builder/lxml_builder.py index e431a62..9f4c0bd 100644 --- a/beautifulsoup/builder/lxml_builder.py +++ b/beautifulsoup/builder/lxml_builder.py @@ -11,7 +11,7 @@ class LXMLTreeBuilderForXML(TreeBuilder): def default_parser(self): # This can either return a parser object or a class, which # will be instantiated with default arguments. - return etree.XMLParser + return etree.XMLParser(target=self, strip_cdata=False, recover=True) def __init__(self, parser=None, empty_element_tags=None): if empty_element_tags is not None: @@ -71,10 +71,6 @@ class LXMLTreeBuilderForXML(TreeBuilder): self.soup.handle_data(content) self.soup.endData(Comment) - def test_fragment_to_document(self, fragment): - """See `TreeBuilder`.""" - return u'<html><body>%s</body></html>' % fragment - class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML): @@ -82,5 +78,6 @@ class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML): def default_parser(self): return etree.HTMLParser - def end(self, name): - self.soup.handle_endtag(name) + def test_fragment_to_document(self, fragment): + """See `TreeBuilder`.""" + return u'<html><body>%s</body></html>' % fragment diff --git a/tests/test_lxml.py b/tests/test_lxml.py index 7d916da..c178457 100644 --- a/tests/test_lxml.py +++ b/tests/test_lxml.py @@ -518,6 +518,9 @@ class TestLXMLXMLBuilder(SoupTest): def default_builder(self): return LXMLTreeBuilderForXML() + def test_can_handle_invalid_xml(self): + self.assertSoupEquals("<a><b>", "<a><b /></a>") + def test_empty_element_tag(self): soup = self.soup("<p><iamselfclosing /></p>") self.assertTrue(soup.iamselfclosing.is_empty_element) |