From d35e92875c62cf43227ccc6fca75b5e74a6350e8 Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Fri, 18 Feb 2011 12:23:37 -0500 Subject: Made Unicode, Dammit more PEP-8 compliant. --- tests/test_html5lib.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tests/test_html5lib.py') diff --git a/tests/test_html5lib.py b/tests/test_html5lib.py index 59d84a3..5aeac76 100644 --- a/tests/test_html5lib.py +++ b/tests/test_html5lib.py @@ -151,7 +151,7 @@ class TestHTML5LibEncodingConversion(TestLXMLBuilderEncodingConversion): # Hebrew encoding) to UTF-8. soup = self.soup(self.HEBREW_DOCUMENT, fromEncoding="iso-8859-8") - self.assertEquals(soup.originalEncoding, 'iso8859-8') + self.assertEquals(soup.original_encoding, 'iso8859-8') self.assertEquals( soup.encode('utf-8'), self.HEBREW_DOCUMENT.decode("iso-8859-8").encode("utf-8")) -- cgit v1.2.3 From 0c9e690dedf720c7c34cc2433f0ccd03f7eb2a85 Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Fri, 18 Feb 2011 14:24:42 -0500 Subject: Ported tests of bad markup that were lying around the TODO. --- tests/test_html5lib.py | 54 ++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 48 insertions(+), 6 deletions(-) (limited to 'tests/test_html5lib.py') diff --git a/tests/test_html5lib.py b/tests/test_html5lib.py index 5aeac76..3045b02 100644 --- a/tests/test_html5lib.py +++ b/tests/test_html5lib.py @@ -131,14 +131,56 @@ class TestHTML5BuilderInvalidMarkup(TestLXMLBuilderInvalidMarkup): self.assertEquals(comment, 'b Sacr\xe9 bleu!""" - soup = self.soup(isolatin) + def test_document_starts_with_bogus_declaration(self): + soup = self.soup('a') + # 'Foo' becomes a comment that appears before the HTML. + comment = soup.contents[0] + self.assertTrue(isinstance(comment, Comment)) + self.assertEquals(comment, 'Foo') - utf8 = isolatin.replace("ISO-Latin-1".encode(), "utf-8".encode()) - utf8 = utf8.replace("\xe9", "\xc3\xa9") + self.assertEquals(self.find(text="a") == "a") - #print soup + def test_attribute_value_was_closed_by_subsequent_tag(self): + markup = """baz""" + soup = self.soup(markup) + # The string between the first and second quotes was interpreted + # as the value of the 'href' attribute. + self.assertEquals(soup.a['href'], 'foo,

a

') + # The declaration becomes a comment. + comment = soup.contents[0] + self.assertTrue(isinstance(comment, Comment)) + self.assertEquals(comment, ' Foo ') + self.assertEquals(soup.p.string, 'a') + + def test_document_ends_with_incomplete_declaration(self): + soup = self.soup('

a<Hello>") + # Compare html5lib, which completes the entity. + self.assertEquals(soup.p.string, "") + + def test_nonexistent_entity(self): + soup = self.soup("

foo&#bar;baz

") + self.assertEquals(soup.p.string, "foo&#bar;baz") + + # Compare a real entity. + soup = self.soup("

foodbaz

") + self.assertEquals(soup.p.string, "foodbaz") class TestHTML5LibEncodingConversion(TestLXMLBuilderEncodingConversion): -- cgit v1.2.3