summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--beautifulsoup/builder/__init__.py2
-rw-r--r--beautifulsoup/builder/lxml_builder.py6
-rw-r--r--tests/test_html5lib.py3
-rw-r--r--tests/test_lxml.py12
4 files changed, 17 insertions, 6 deletions
diff --git a/beautifulsoup/builder/__init__.py b/beautifulsoup/builder/__init__.py
index 37e9c8a..deaa613 100644
--- a/beautifulsoup/builder/__init__.py
+++ b/beautifulsoup/builder/__init__.py
@@ -116,7 +116,7 @@ class SAXTreeBuilder(TreeBuilder):
class HTMLTreeBuilder(TreeBuilder):
"""This TreeBuilder knows facts about HTML.
- Such as which tags are self-closing tags.
+ Such as which tags are empty-element tags.
"""
assume_html = True
diff --git a/beautifulsoup/builder/lxml_builder.py b/beautifulsoup/builder/lxml_builder.py
index 0cc9e51..e431a62 100644
--- a/beautifulsoup/builder/lxml_builder.py
+++ b/beautifulsoup/builder/lxml_builder.py
@@ -13,9 +13,9 @@ class LXMLTreeBuilderForXML(TreeBuilder):
# will be instantiated with default arguments.
return etree.XMLParser
- def __init__(self, parser=None, self_closing_tags=None):
- if self_closing_tags is not None:
- self.self_closing_tags = set(self_closing_tags)
+ def __init__(self, parser=None, empty_element_tags=None):
+ if empty_element_tags is not None:
+ self.empty_element_tags = set(empty_element_tags)
if parser is None:
# Use the default parser.
parser = self.default_parser
diff --git a/tests/test_html5lib.py b/tests/test_html5lib.py
index 336f9a5..021c603 100644
--- a/tests/test_html5lib.py
+++ b/tests/test_html5lib.py
@@ -91,6 +91,9 @@ class TestHTML5BuilderInvalidMarkup(TestLXMLBuilderInvalidMarkup):
('<table><tbody><tr></tr></tbody></table>'
'<table><tbody><tr id="nested"></tr></tbody></table>'))
+ def test_empty_element_tag_with_contents(self):
+ self.assertSoupEquals("<br>foo</br>", "<br />foo<br />")
+
def test_doctype_in_body(self):
markup = "<p>one<!DOCTYPE foobar>two</p>"
self.assertSoupEquals(markup, "<p>onetwo</p>")
diff --git a/tests/test_lxml.py b/tests/test_lxml.py
index 602fe05..77dd1f1 100644
--- a/tests/test_lxml.py
+++ b/tests/test_lxml.py
@@ -33,14 +33,19 @@ class TestLXMLBuilder(SoupTest):
"<a><B><Cd><EFG></efg></CD></b></A>",
"<a><b><cd><efg></efg></cd></b></a>")
- def test_self_closing(self):
- # HTML's self-closing tags are recognized as such.
+ def test_empty_element(self):
+ # HTML's empty-element tags are recognized as such.
self.assertSoupEquals(
"<p>A <meta> tag</p>", "<p>A <meta /> tag</p>")
self.assertSoupEquals(
"<p>Foo<br/>bar</p>", "<p>Foo<br />bar</p>")
+ def test_empty_tag_thats_not_an_empty_element_tag(self):
+ # A tag that is empty but not an HTML empty-element tag
+ # is not presented as an empty-element tag.
+ self.assertSoupEquals("<p>", "<p></p>")
+
def test_comment(self):
# Comments are represented as Comment objects.
markup = "<p>foo<!--foobar-->baz</p>"
@@ -351,6 +356,9 @@ class TestLXMLBuilderInvalidMarkup(SoupTest):
'<table><tr><table><tr id="nested">',
'<table><tr><table><tr id="nested"></tr></table></tr></table>')
+ def test_empty_element_tag_with_contents(self):
+ self.assertSoupEquals("<br>foo</br>", "<br />foo")
+
def test_doctype_in_body(self):
markup = "<p>one<!DOCTYPE foobar>two</p>"
self.assertSoupEquals(markup)