diff options
-rw-r--r-- | beautifulsoup/builder/__init__.py | 2 | ||||
-rw-r--r-- | beautifulsoup/builder/lxml_builder.py | 6 | ||||
-rw-r--r-- | tests/test_html5lib.py | 3 | ||||
-rw-r--r-- | tests/test_lxml.py | 12 |
4 files changed, 17 insertions, 6 deletions
diff --git a/beautifulsoup/builder/__init__.py b/beautifulsoup/builder/__init__.py index 37e9c8a..deaa613 100644 --- a/beautifulsoup/builder/__init__.py +++ b/beautifulsoup/builder/__init__.py @@ -116,7 +116,7 @@ class SAXTreeBuilder(TreeBuilder): class HTMLTreeBuilder(TreeBuilder): """This TreeBuilder knows facts about HTML. - Such as which tags are self-closing tags. + Such as which tags are empty-element tags. """ assume_html = True diff --git a/beautifulsoup/builder/lxml_builder.py b/beautifulsoup/builder/lxml_builder.py index 0cc9e51..e431a62 100644 --- a/beautifulsoup/builder/lxml_builder.py +++ b/beautifulsoup/builder/lxml_builder.py @@ -13,9 +13,9 @@ class LXMLTreeBuilderForXML(TreeBuilder): # will be instantiated with default arguments. return etree.XMLParser - def __init__(self, parser=None, self_closing_tags=None): - if self_closing_tags is not None: - self.self_closing_tags = set(self_closing_tags) + def __init__(self, parser=None, empty_element_tags=None): + if empty_element_tags is not None: + self.empty_element_tags = set(empty_element_tags) if parser is None: # Use the default parser. parser = self.default_parser diff --git a/tests/test_html5lib.py b/tests/test_html5lib.py index 336f9a5..021c603 100644 --- a/tests/test_html5lib.py +++ b/tests/test_html5lib.py @@ -91,6 +91,9 @@ class TestHTML5BuilderInvalidMarkup(TestLXMLBuilderInvalidMarkup): ('<table><tbody><tr></tr></tbody></table>' '<table><tbody><tr id="nested"></tr></tbody></table>')) + def test_empty_element_tag_with_contents(self): + self.assertSoupEquals("<br>foo</br>", "<br />foo<br />") + def test_doctype_in_body(self): markup = "<p>one<!DOCTYPE foobar>two</p>" self.assertSoupEquals(markup, "<p>onetwo</p>") diff --git a/tests/test_lxml.py b/tests/test_lxml.py index 602fe05..77dd1f1 100644 --- a/tests/test_lxml.py +++ b/tests/test_lxml.py @@ -33,14 +33,19 @@ class TestLXMLBuilder(SoupTest): "<a><B><Cd><EFG></efg></CD></b></A>", "<a><b><cd><efg></efg></cd></b></a>") - def test_self_closing(self): - # HTML's self-closing tags are recognized as such. + def test_empty_element(self): + # HTML's empty-element tags are recognized as such. self.assertSoupEquals( "<p>A <meta> tag</p>", "<p>A <meta /> tag</p>") self.assertSoupEquals( "<p>Foo<br/>bar</p>", "<p>Foo<br />bar</p>") + def test_empty_tag_thats_not_an_empty_element_tag(self): + # A tag that is empty but not an HTML empty-element tag + # is not presented as an empty-element tag. + self.assertSoupEquals("<p>", "<p></p>") + def test_comment(self): # Comments are represented as Comment objects. markup = "<p>foo<!--foobar-->baz</p>" @@ -351,6 +356,9 @@ class TestLXMLBuilderInvalidMarkup(SoupTest): '<table><tr><table><tr id="nested">', '<table><tr><table><tr id="nested"></tr></table></tr></table>') + def test_empty_element_tag_with_contents(self): + self.assertSoupEquals("<br>foo</br>", "<br />foo") + def test_doctype_in_body(self): markup = "<p>one<!DOCTYPE foobar>two</p>" self.assertSoupEquals(markup) |