From 0c9e690dedf720c7c34cc2433f0ccd03f7eb2a85 Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Fri, 18 Feb 2011 14:24:42 -0500 Subject: Ported tests of bad markup that were lying around the TODO. --- tests/test_lxml.py | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) (limited to 'tests/test_lxml.py') diff --git a/tests/test_lxml.py b/tests/test_lxml.py index 11ef15a..85c6a1b 100644 --- a/tests/test_lxml.py +++ b/tests/test_lxml.py @@ -376,6 +376,55 @@ class TestLXMLBuilderInvalidMarkup(SoupTest): markup = "
" self.assertSoupEquals(markup, "
") + def test_attribute_value_never_got_closed(self): + markup = ' and blah and blah") + + def test_attribute_value_was_closed_by_subsequent_tag(self): + markup = """baz""" + soup = self.soup(markup) + # The string between the first and second quotes was interpreted + # as the value of the 'href' attribute. + self.assertEquals(soup.a['href'], 'foo, ') + self.assertEquals(soup.a['b'], '') + + def test_nonexistent_entity(self): + soup = self.soup("

foo&#bar;baz

") + self.assertEquals(soup.p.string, "foobar;baz") + + # Compare a real entity. + soup = self.soup("

foodbaz

") + self.assertEquals(soup.p.string, "foodbaz") + + # Also compare html5lib, which preserves the &# before the + # entity name. + + def test_entity_was_not_finished(self): + soup = self.soup("

<Hello>") + # Compare html5lib, which completes the entity. + self.assertEquals(soup.p.string, "a

a

') + # The declaration is ignored altogether. + self.assertEquals(soup.encode(), "

a

") + class TestLXMLBuilderEncodingConversion(SoupTest): # Test Beautiful Soup's ability to decode and encode from various -- cgit v1.2.3