diff options
-rw-r--r-- | tests/test_html5lib.py | 9 | ||||
-rw-r--r-- | tests/test_lxml.py | 9 |
2 files changed, 18 insertions, 0 deletions
diff --git a/tests/test_html5lib.py b/tests/test_html5lib.py index b9bdd1d..ac99832 100644 --- a/tests/test_html5lib.py +++ b/tests/test_html5lib.py @@ -200,6 +200,15 @@ class TestHTML5BuilderInvalidMarkup(TestLXMLBuilderInvalidMarkup): soup = self.soup("<p>foodbaz</p>") self.assertEquals(soup.p.string, "foodbaz") + def test_entity_out_of_range(self): + # An entity that's out of range will be converted to + # REPLACEMENT CHARACTER. + soup = self.soup("<p>�</p>") + self.assertEquals(soup.p.string, u"\N{REPLACEMENT CHARACTER}") + + soup = self.soup("<p>�</p>") + self.assertEquals(soup.p.string, u"\N{REPLACEMENT CHARACTER}") + class TestHTML5LibEncodingConversion(TestLXMLBuilderEncodingConversion): @property diff --git a/tests/test_lxml.py b/tests/test_lxml.py index c2b40c3..4b3df07 100644 --- a/tests/test_lxml.py +++ b/tests/test_lxml.py @@ -425,6 +425,15 @@ class TestLXMLBuilderInvalidMarkup(SoupTest): # Also compare html5lib, which preserves the &# before the # entity name. + def test_entity_out_of_range(self): + # An entity that's out of range will be ignored. + soup = self.soup("<p>�</p>") + self.assertEquals(soup.p.string, None) + + soup = self.soup("<p>�</p>") + self.assertEquals(soup.p.string, None) + + def test_entity_was_not_finished(self): soup = self.soup("<p><Hello>") # Compare html5lib, which completes the entity. |