diff options
Diffstat (limited to 'bs4/tests')
-rw-r--r-- | bs4/tests/test_htmlparser.py | 1 | ||||
-rw-r--r-- | bs4/tests/test_lxml.py | 15 |
2 files changed, 15 insertions, 1 deletions
diff --git a/bs4/tests/test_htmlparser.py b/bs4/tests/test_htmlparser.py index 6215185..bcb5ed2 100644 --- a/bs4/tests/test_htmlparser.py +++ b/bs4/tests/test_htmlparser.py @@ -17,4 +17,3 @@ class HTMLParserTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest): def test_namespaced_public_doctype(self): # html.parser can't handle namespaced doctypes, so skip this one. pass - diff --git a/bs4/tests/test_lxml.py b/bs4/tests/test_lxml.py index 4e0b12e..39e26bf 100644 --- a/bs4/tests/test_lxml.py +++ b/bs4/tests/test_lxml.py @@ -48,6 +48,21 @@ class LXMLTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest): soup = BeautifulStoneSoup("<b />") self.assertEqual(u"<b/>", unicode(soup.b)) + def test_real_xhtml_document(self): + """lxml strips the XML definition from an XHTML doc, which is fine.""" + markup = b"""<?xml version="1.0" encoding="utf-8"?> +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"> +<html xmlns="http://www.w3.org/1999/xhtml"> +<head><title>Hello.</title></head> +<body>Goodbye.</body> +</html>""" + soup = self.soup(markup) + self.assertEqual( + soup.encode("utf-8").replace(b"\n", b''), + markup.replace(b'\n', b'').replace( + b'<?xml version="1.0" encoding="utf-8"?>', b'')) + + @skipIf( not LXML_PRESENT, "lxml seems not to be present, not testing its XML tree builder.") |