From be392a04860b6d3fe053164c4a03a5e84f972878 Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Sun, 20 Feb 2011 09:26:32 -0500 Subject: Test that empty-element tags that get children stop being empty-element tags. --- CHANGELOG | 16 ++++++++++++++++ tests/test_lxml.py | 24 +++++++++++++++++------- 2 files changed, 33 insertions(+), 7 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 5d13a6d..ef05813 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -17,6 +17,10 @@ work. Here are the renames: * findPreviousSibling -> find_previous_sibling * findPreviousSiblings -> find_previous_siblings +Some attributes have also been renamed: + + * Tag.isSelfClosing -> Tag.is_empty_element + == Generators are now properties == The generators have been given more sensible (and PEP 8-compliant) @@ -51,6 +55,18 @@ and nothing else, then A.string is the same as B.string. So: The value of a.string used to be None, and now it's "foo". +== Empty-element tags == + +Beautiful Soup's handling of empty-element tags (aka self-closing +tags) has been improved, especially when parsing XML. Previously you +had to explicitly specify a list of empty-element tags. You can still +do that, but if you don't, Beautiful Soup now considers any empty tag +to be an empty-element tag. + +The determination of empty-element-ness is now made at runtime rather +than parse time. If you add a child to an empty-element tag, it stops +being an empty-element tag. + == Entities are always converted to Unicode == An HTML or XML entity is always converted into the corresponding diff --git a/tests/test_lxml.py b/tests/test_lxml.py index 2f1e98e..a2f73cd 100644 --- a/tests/test_lxml.py +++ b/tests/test_lxml.py @@ -518,21 +518,31 @@ class TestLXMLXMLBuilder(SoupTest): soup = self.soup("

contents

") self.assertFalse(soup.ihavecontents.is_empty_element) - def test_designated_empty_element_tags(self): - # A constructor argument allows you to say which empty tags - # should be presented as empty-element tags. + def test_designated_empty_element_tag_has_no_closing_tag(self): + builder = LXMLTreeBuilderForXML(empty_element_tags=['bar']) + soup = BeautifulSoup(builder=builder, markup="") + self.assertTrue(soup.bar.is_empty_element) + self.assertEquals(str(soup), "") + + def test_empty_tag_that_stops_being_empty_gets_a_closing_tag(self): + builder = LXMLTreeBuilderForXML(empty_element_tags=['bar']) + soup = BeautifulSoup(builder=builder, markup="") + self.assertTrue(soup.bar.is_empty_element) + soup.bar.insert(1, "Contents") + self.assertFalse(soup.bar.is_empty_element) + self.assertEquals(str(soup), "Contents") + + def test_empty_tag_not_in_empty_element_tag_list_has_closing_tag(self): builder = LXMLTreeBuilderForXML(empty_element_tags=['bar']) soup = BeautifulSoup(builder=builder, markup="") + self.assertFalse(soup.foo.is_empty_element) self.assertEquals(str(soup), "") - soup = BeautifulSoup(builder=builder, markup="") - self.assertEquals(str(soup), "") - def test_designated_empty_element_tag_does_not_change_parser_behavior(self): # The designated list of empty-element tags only affects how # empty tags are presented. It does not affect how tags are # parsed--that's the parser's job. builder = LXMLTreeBuilderForXML(empty_element_tags=['bar']) soup = BeautifulSoup(builder=builder, markup="contents") - self.assertEquals(soup.encode(), "contents") + self.assertEquals(str(soup), "contents") -- cgit v1.2.3