summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--tests/test_html5lib.py9
-rw-r--r--tests/test_lxml.py9
2 files changed, 18 insertions, 0 deletions
diff --git a/tests/test_html5lib.py b/tests/test_html5lib.py
index b9bdd1d..ac99832 100644
--- a/tests/test_html5lib.py
+++ b/tests/test_html5lib.py
@@ -200,6 +200,15 @@ class TestHTML5BuilderInvalidMarkup(TestLXMLBuilderInvalidMarkup):
soup = self.soup("<p>foo&#100;baz</p>")
self.assertEquals(soup.p.string, "foodbaz")
+ def test_entity_out_of_range(self):
+ # An entity that's out of range will be converted to
+ # REPLACEMENT CHARACTER.
+ soup = self.soup("<p>&#10000000000000;</p>")
+ self.assertEquals(soup.p.string, u"\N{REPLACEMENT CHARACTER}")
+
+ soup = self.soup("<p>&#x1000000000000;</p>")
+ self.assertEquals(soup.p.string, u"\N{REPLACEMENT CHARACTER}")
+
class TestHTML5LibEncodingConversion(TestLXMLBuilderEncodingConversion):
@property
diff --git a/tests/test_lxml.py b/tests/test_lxml.py
index c2b40c3..4b3df07 100644
--- a/tests/test_lxml.py
+++ b/tests/test_lxml.py
@@ -425,6 +425,15 @@ class TestLXMLBuilderInvalidMarkup(SoupTest):
# Also compare html5lib, which preserves the &# before the
# entity name.
+ def test_entity_out_of_range(self):
+ # An entity that's out of range will be ignored.
+ soup = self.soup("<p>&#10000000000000;</p>")
+ self.assertEquals(soup.p.string, None)
+
+ soup = self.soup("<p>&#x1000000000000;</p>")
+ self.assertEquals(soup.p.string, None)
+
+
def test_entity_was_not_finished(self):
soup = self.soup("<p>&lt;Hello&gt")
# Compare html5lib, which completes the entity.