diff options
-rw-r--r-- | CHANGELOG | 16 | ||||
-rw-r--r-- | tests/test_lxml.py | 24 |
2 files changed, 33 insertions, 7 deletions
@@ -17,6 +17,10 @@ work. Here are the renames: * findPreviousSibling -> find_previous_sibling * findPreviousSiblings -> find_previous_siblings +Some attributes have also been renamed: + + * Tag.isSelfClosing -> Tag.is_empty_element + == Generators are now properties == The generators have been given more sensible (and PEP 8-compliant) @@ -51,6 +55,18 @@ and nothing else, then A.string is the same as B.string. So: The value of a.string used to be None, and now it's "foo". +== Empty-element tags == + +Beautiful Soup's handling of empty-element tags (aka self-closing +tags) has been improved, especially when parsing XML. Previously you +had to explicitly specify a list of empty-element tags. You can still +do that, but if you don't, Beautiful Soup now considers any empty tag +to be an empty-element tag. + +The determination of empty-element-ness is now made at runtime rather +than parse time. If you add a child to an empty-element tag, it stops +being an empty-element tag. + == Entities are always converted to Unicode == An HTML or XML entity is always converted into the corresponding diff --git a/tests/test_lxml.py b/tests/test_lxml.py index 2f1e98e..a2f73cd 100644 --- a/tests/test_lxml.py +++ b/tests/test_lxml.py @@ -518,21 +518,31 @@ class TestLXMLXMLBuilder(SoupTest): soup = self.soup("<p><ihavecontents>contents</ihavecontents></p>") self.assertFalse(soup.ihavecontents.is_empty_element) - def test_designated_empty_element_tags(self): - # A constructor argument allows you to say which empty tags - # should be presented as empty-element tags. + def test_designated_empty_element_tag_has_no_closing_tag(self): + builder = LXMLTreeBuilderForXML(empty_element_tags=['bar']) + soup = BeautifulSoup(builder=builder, markup="<bar></bar>") + self.assertTrue(soup.bar.is_empty_element) + self.assertEquals(str(soup), "<bar />") + + def test_empty_tag_that_stops_being_empty_gets_a_closing_tag(self): + builder = LXMLTreeBuilderForXML(empty_element_tags=['bar']) + soup = BeautifulSoup(builder=builder, markup="<bar />") + self.assertTrue(soup.bar.is_empty_element) + soup.bar.insert(1, "Contents") + self.assertFalse(soup.bar.is_empty_element) + self.assertEquals(str(soup), "<bar>Contents</bar>") + + def test_empty_tag_not_in_empty_element_tag_list_has_closing_tag(self): builder = LXMLTreeBuilderForXML(empty_element_tags=['bar']) soup = BeautifulSoup(builder=builder, markup="<foo />") + self.assertFalse(soup.foo.is_empty_element) self.assertEquals(str(soup), "<foo></foo>") - soup = BeautifulSoup(builder=builder, markup="<bar></bar>") - self.assertEquals(str(soup), "<bar />") - def test_designated_empty_element_tag_does_not_change_parser_behavior(self): # The designated list of empty-element tags only affects how # empty tags are presented. It does not affect how tags are # parsed--that's the parser's job. builder = LXMLTreeBuilderForXML(empty_element_tags=['bar']) soup = BeautifulSoup(builder=builder, markup="<bar>contents</bar>") - self.assertEquals(soup.encode(), "<bar>contents</bar>") + self.assertEquals(str(soup), "<bar>contents</bar>") |