summaryrefslogtreecommitdiff
path: root/bs4/tests/test_lxml.py
diff options
context:
space:
mode:
Diffstat (limited to 'bs4/tests/test_lxml.py')
-rw-r--r--bs4/tests/test_lxml.py15
1 files changed, 15 insertions, 0 deletions
diff --git a/bs4/tests/test_lxml.py b/bs4/tests/test_lxml.py
index 4e0b12e..39e26bf 100644
--- a/bs4/tests/test_lxml.py
+++ b/bs4/tests/test_lxml.py
@@ -48,6 +48,21 @@ class LXMLTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
soup = BeautifulStoneSoup("<b />")
self.assertEqual(u"<b/>", unicode(soup.b))
+ def test_real_xhtml_document(self):
+ """lxml strips the XML definition from an XHTML doc, which is fine."""
+ markup = b"""<?xml version="1.0" encoding="utf-8"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head><title>Hello.</title></head>
+<body>Goodbye.</body>
+</html>"""
+ soup = self.soup(markup)
+ self.assertEqual(
+ soup.encode("utf-8").replace(b"\n", b''),
+ markup.replace(b'\n', b'').replace(
+ b'<?xml version="1.0" encoding="utf-8"?>', b''))
+
+
@skipIf(
not LXML_PRESENT,
"lxml seems not to be present, not testing its XML tree builder.")