summaryrefslogtreecommitdiff
path: root/tests/test_lxml.py
diff options
context:
space:
mode:
authorLeonard Richardson <leonard.richardson@canonical.com>2011-02-18 14:24:42 -0500
committerLeonard Richardson <leonard.richardson@canonical.com>2011-02-18 14:24:42 -0500
commit0c9e690dedf720c7c34cc2433f0ccd03f7eb2a85 (patch)
tree399e4aba45e8488914731b609ee8890680aa4535 /tests/test_lxml.py
parentb5fa9d7f5579f22f5fe0f7c9dc63e0aa7d29262f (diff)
Ported tests of bad markup that were lying around the TODO.
Diffstat (limited to 'tests/test_lxml.py')
-rw-r--r--tests/test_lxml.py49
1 files changed, 49 insertions, 0 deletions
diff --git a/tests/test_lxml.py b/tests/test_lxml.py
index 11ef15a..85c6a1b 100644
--- a/tests/test_lxml.py
+++ b/tests/test_lxml.py
@@ -376,6 +376,55 @@ class TestLXMLBuilderInvalidMarkup(SoupTest):
markup = "<div><![CDATA[foo]]>"
self.assertSoupEquals(markup, "<div></div>")
+ def test_attribute_value_never_got_closed(self):
+ markup = '<a href="http://foo.com/</a> and blah and blah'
+ soup = self.soup(markup)
+ self.assertEquals(
+ soup.a['href'], "http://foo.com/</a> and blah and blah")
+
+ def test_attribute_value_was_closed_by_subsequent_tag(self):
+ markup = """<a href="foo</a>, </a><a href="bar">baz</a>"""
+ soup = self.soup(markup)
+ # The string between the first and second quotes was interpreted
+ # as the value of the 'href' attribute.
+ self.assertEquals(soup.a['href'], 'foo</a>, </a><a href=')
+
+ #The string after the second quote (bar"), was treated as an
+ #empty attribute called bar.
+ self.assertEquals(soup.a['bar'], '')
+ self.assertEquals(soup.a.string, "baz")
+
+ def test_attribute_value_with_embedded_brackets(self):
+ soup = self.soup('<a b="<a>">')
+ self.assertEquals(soup.a['b'], '<a>')
+
+ def test_nonexistent_entity(self):
+ soup = self.soup("<p>foo&#bar;baz</p>")
+ self.assertEquals(soup.p.string, "foobar;baz")
+
+ # Compare a real entity.
+ soup = self.soup("<p>foo&#100;baz</p>")
+ self.assertEquals(soup.p.string, "foodbaz")
+
+ # Also compare html5lib, which preserves the &# before the
+ # entity name.
+
+ def test_entity_was_not_finished(self):
+ soup = self.soup("<p>&lt;Hello&gt")
+ # Compare html5lib, which completes the entity.
+ self.assertEquals(soup.p.string, "<Hello&gt")
+
+ def test_document_ends_with_incomplete_declaration(self):
+ soup = self.soup('<p>a<!b')
+ # This becomes a string 'a'. The incomplete declaration is ignored.
+ # Compare html5lib, which turns it into a comment.
+ self.assertEquals(soup.p.contents, ['a'])
+
+ def test_document_starts_with_bogus_declaration(self):
+ soup = self.soup('<! Foo ><p>a</p>')
+ # The declaration is ignored altogether.
+ self.assertEquals(soup.encode(), "<html><body><p>a</p></body></html>")
+
class TestLXMLBuilderEncodingConversion(SoupTest):
# Test Beautiful Soup's ability to decode and encode from various