"
soup = self.soup(markup)
data = soup.find(text="[CDATA[foo]]")
self.assertEquals(data.__class__, Comment)
def test_nonsensical_declaration(self):
# Declarations that don't make any sense are turned into comments.
soup = self.soup('
a
')
self.assertEquals(str(soup),
(""
"
a
"))
soup = self.soup('
a
')
self.assertEquals(str(soup),
("
a
"
""))
def test_whitespace_in_doctype(self):
# A declaration that has extra whitespace is turned into a comment.
soup = self.soup((
''
'
foo
'))
self.assertEquals(
str(soup),
(''
'
foo
'))
def test_incomplete_declaration(self):
# An incomplete declaration is treated as a comment.
markup = 'ac'
self.assertSoupEquals(markup, "ac")
# Let's spell that out a little more explicitly.
soup = self.soup(markup)
str1, comment, str2 = soup.body.contents
self.assertEquals(str1, 'a')
self.assertEquals(comment.__class__, Comment)
self.assertEquals(comment, 'b
a')
# 'Foo' becomes a comment that appears before the HTML.
comment = soup.contents[0]
self.assertTrue(isinstance(comment, Comment))
self.assertEquals(comment, 'Foo')
self.assertEquals(self.find(text="a") == "a")
def test_attribute_value_was_closed_by_subsequent_tag(self):
markup = """baz"""
soup = self.soup(markup)
# The string between the first and second quotes was interpreted
# as the value of the 'href' attribute.
self.assertEquals(soup.a['href'], 'foo, a
')
# The declaration becomes a comment.
comment = soup.contents[0]
self.assertTrue(isinstance(comment, Comment))
self.assertEquals(comment, ' Foo ')
self.assertEquals(soup.p.string, 'a')
def test_document_ends_with_incomplete_declaration(self):
soup = self.soup('a<Hello>")
# Compare html5lib, which completes the entity.
self.assertEquals(soup.p.string, "")
def test_nonexistent_entity(self):
soup = self.soup("foobar;baz
")
self.assertEquals(soup.p.string, "foobar;baz")
# Compare a real entity.
soup = self.soup("foodbaz
")
self.assertEquals(soup.p.string, "foodbaz")
class TestHTML5LibEncodingConversion(TestLXMLBuilderEncodingConversion):
@property
def default_builder(self):
return HTML5TreeBuilder()
def test_real_hebrew_document(self):
# A real-world test to make sure we can convert ISO-8859-9 (a
# Hebrew encoding) to UTF-8.
soup = self.soup(self.HEBREW_DOCUMENT,
fromEncoding="iso-8859-8")
self.assertEquals(soup.original_encoding, 'iso8859-8')
self.assertEquals(
soup.encode('utf-8'),
self.HEBREW_DOCUMENT.decode("iso-8859-8").encode("utf-8"))