summaryrefslogtreecommitdiff
path: root/tests/test_lxml.py
diff options
context:
space:
mode:
authorLeonard Richardson <leonard.richardson@canonical.com>2011-02-20 09:54:42 -0500
committerLeonard Richardson <leonard.richardson@canonical.com>2011-02-20 09:54:42 -0500
commitae349fd47c627f8166526fed8906811707d2f4b2 (patch)
tree116edd8c1d9a7cf6348f784162fd2291608833c2 /tests/test_lxml.py
parent158e76fd3e1005f6f5f932414cb741083d114cb6 (diff)
parent9f437ea591aeaf16d593350baf081315e56a8b73 (diff)
Greatly improved the handling of empty-element tags.
Diffstat (limited to 'tests/test_lxml.py')
-rw-r--r--tests/test_lxml.py71
1 files changed, 62 insertions, 9 deletions
diff --git a/tests/test_lxml.py b/tests/test_lxml.py
index 58d16ff..7d916da 100644
--- a/tests/test_lxml.py
+++ b/tests/test_lxml.py
@@ -33,14 +33,19 @@ class TestLXMLBuilder(SoupTest):
"<a><B><Cd><EFG></efg></CD></b></A>",
"<a><b><cd><efg></efg></cd></b></a>")
- def test_self_closing(self):
- # HTML's self-closing tags are recognized as such.
+ def test_empty_element(self):
+ # HTML's empty-element tags are recognized as such.
self.assertSoupEquals(
"<p>A <meta> tag</p>", "<p>A <meta /> tag</p>")
self.assertSoupEquals(
"<p>Foo<br/>bar</p>", "<p>Foo<br />bar</p>")
+ def test_empty_tag_thats_not_an_empty_element_tag(self):
+ # A tag that is empty but not an HTML empty-element tag
+ # is not presented as an empty-element tag.
+ self.assertSoupEquals("<p>", "<p></p>")
+
def test_comment(self):
# Comments are represented as Comment objects.
markup = "<p>foo<!--foobar-->baz</p>"
@@ -303,6 +308,18 @@ class TestLXMLBuilder(SoupTest):
str = soup.p.string
#self.assertEquals(str.encode("utf-8"), expected)
+ def test_br_tag_is_empty_element(self):
+ """A <br> tag is designated as an empty-element tag."""
+ soup = self.soup("<br></br>")
+ self.assertTrue(soup.br.is_empty_element)
+ self.assertEquals(str(soup.br), "<br />")
+
+ def test_p_tag_is_not_empty_element(self):
+ """A <p> tag is not designated as an empty-element tag."""
+ soup = self.soup("<p />")
+ self.assertFalse(soup.p.is_empty_element)
+ self.assertEquals(str(soup.p), "<p></p>")
+
class TestLXMLBuilderInvalidMarkup(SoupTest):
"""Tests of invalid markup for the LXML tree builder.
@@ -351,6 +368,9 @@ class TestLXMLBuilderInvalidMarkup(SoupTest):
'<table><tr><table><tr id="nested">',
'<table><tr><table><tr id="nested"></tr></table></tr></table>')
+ def test_empty_element_tag_with_contents(self):
+ self.assertSoupEquals("<br>foo</br>", "<br />foo")
+
def test_doctype_in_body(self):
markup = "<p>one<!DOCTYPE foobar>two</p>"
self.assertSoupEquals(markup)
@@ -487,20 +507,53 @@ class TestLXMLBuilderEncodingConversion(SoupTest):
from beautifulsoup.builder.lxml_builder import LXMLTreeBuilderForXML
class TestLXMLXMLBuilder(SoupTest):
+ """Test XML-specific parsing behavior.
+
+ Most of the tests use HTML as an example, since Beautiful Soup is
+ mainly an HTML parser. This test suite is a base for XML-specific
+ tree builders.
+ """
@property
def default_builder(self):
return LXMLTreeBuilderForXML()
- def test_self_closing_tag(self):
+ def test_empty_element_tag(self):
soup = self.soup("<p><iamselfclosing /></p>")
- self.assertTrue(soup.iamselfclosing.isSelfClosing)
+ self.assertTrue(soup.iamselfclosing.is_empty_element)
- def test_self_empty_tag_treated_as_self_closing(self):
+ def test_self_empty_tag_treated_as_empty_element(self):
soup = self.soup("<p><iamclosed></iamclosed></p>")
- self.assertFalse(soup.iamclosed.isSelfClosing)
+ self.assertTrue(soup.iamclosed.is_empty_element)
- def test_self_nonempty_tag_is_not_self_closing(self):
+ def test_self_nonempty_tag_is_not_empty_element(self):
soup = self.soup("<p><ihavecontents>contents</ihavecontents></p>")
- self.assertFalse(soup.ihavecontents.isSelfClosing)
-
+ self.assertFalse(soup.ihavecontents.is_empty_element)
+
+ def test_empty_tag_that_stops_being_empty_gets_a_closing_tag(self):
+ soup = self.soup("<bar />")
+ self.assertTrue(soup.bar.is_empty_element)
+ soup.bar.insert(1, "Contents")
+ self.assertFalse(soup.bar.is_empty_element)
+ self.assertEquals(str(soup), "<bar>Contents</bar>")
+
+ def test_designated_empty_element_tag_has_no_closing_tag(self):
+ builder = LXMLTreeBuilderForXML(empty_element_tags=['bar'])
+ soup = BeautifulSoup(builder=builder, markup="<bar></bar>")
+ self.assertTrue(soup.bar.is_empty_element)
+ self.assertEquals(str(soup), "<bar />")
+
+ def test_empty_tag_not_in_empty_element_tag_list_has_closing_tag(self):
+ builder = LXMLTreeBuilderForXML(empty_element_tags=['bar'])
+
+ soup = BeautifulSoup(builder=builder, markup="<foo />")
+ self.assertFalse(soup.foo.is_empty_element)
+ self.assertEquals(str(soup), "<foo></foo>")
+
+ def test_designated_empty_element_tag_does_not_change_parser_behavior(self):
+ # The designated list of empty-element tags only affects how
+ # empty tags are presented. It does not affect how tags are
+ # parsed--that's the parser's job.
+ builder = LXMLTreeBuilderForXML(empty_element_tags=['bar'])
+ soup = BeautifulSoup(builder=builder, markup="<bar>contents</bar>")
+ self.assertEquals(str(soup), "<bar>contents</bar>")