summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--tests/test_html5lib.py22
-rw-r--r--tests/test_lxml.py11
2 files changed, 33 insertions, 0 deletions
diff --git a/tests/test_html5lib.py b/tests/test_html5lib.py
index 48f27ae..3efdebf 100644
--- a/tests/test_html5lib.py
+++ b/tests/test_html5lib.py
@@ -107,6 +107,28 @@ class TestHTML5BuilderInvalidMarkup(TestLXMLBuilderInvalidMarkup):
("<html><head></head><body><p>a</p>"
"<!-- Foo = -8--></body></html>"))
+ def test_whitespace_in_doctype(self):
+ # A declaration that has extra whitespace is turned into a comment.
+ soup = self.soup((
+ '<! DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN">'
+ '<p>foo</p>'))
+ self.assertEquals(
+ str(soup),
+ ('<!-- DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"-->'
+ '<html><head></head><body><p>foo</p></body></html>'))
+
+ def test_incomplete_declaration(self):
+ # An incomplete declaration is treated as a comment.
+ markup = 'a<!b <p>c'
+ self.assertSoupEquals(markup, "a<!--b <p-->c")
+
+ # Let's spell that out a little more explicitly.
+ soup = self.soup(markup)
+ str1, comment, str2 = soup.body.contents
+ self.assertEquals(str1, 'a')
+ self.assertEquals(comment.__class__, Comment)
+ self.assertEquals(comment, 'b <p')
+ self.assertEquals(str2, 'c')
def test_foo(self):
isolatin = """<html><meta http-equiv="Content-type" content="text/html; charset=ISO-Latin-1" />Sacr\xe9 bleu!</html>"""
diff --git a/tests/test_lxml.py b/tests/test_lxml.py
index cba5522..6adc2b3 100644
--- a/tests/test_lxml.py
+++ b/tests/test_lxml.py
@@ -302,6 +302,17 @@ class TestLXMLBuilderInvalidMarkup(SoupTest):
# Declarations that don't make any sense are ignored.
self.assertSoupEquals('<! Foo = -8><p>a</p>', "<p>a</p>")
+ def test_whitespace_in_doctype(self):
+ # A declaration that has extra whitespace is ignored.
+ self.assertSoupEquals(
+ ('<! DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN">'
+ '<p>foo</p>'),
+ '<p>foo</p>')
+
+ def test_incomplete_declaration(self):
+ # An incomplete declaration will screw up the rest of the document.
+ self.assertSoupEquals('a<!b <p>c', '<p>a</p>')
+
def test_cdata_where_it_doesnt_belong(self):
#CDATA sections are ignored.
markup = "<div><![CDATA[foo]]>"