From 09c9ca430e49449cc39cbeb7556230cb62df9b19 Mon Sep 17 00:00:00 2001
From: Leonard Richardson <leonard.richardson@canonical.com>
Date: Sun, 13 Feb 2011 19:40:29 -0500
Subject: Added tests for namespaced doctypes.

---
 beautifulsoup/__init__.py             | 14 +++++++++-----
 beautifulsoup/builder/lxml_builder.py |  4 ++--
 beautifulsoup/element.py              | 10 ++++++++++
 tests/test_lxml.py                    | 33 +++++++++++++++++++++++++++++----
 4 files changed, 50 insertions(+), 11 deletions(-)
diff --git a/beautifulsoup/__init__.py b/beautifulsoup/__init__.py
index 4a7e18b..ddf51f9 100644
--- a/beautifulsoup/__init__.py
+++ b/beautifulsoup/__init__.py
@@ -222,11 +222,15 @@ class BeautifulStoneSoup(Tag):
                     not self.parseOnlyThese.search(currentData)):
                 return
             o = containerClass(currentData)
-            o.setup(self.currentTag, self.previous)
-            if self.previous:
-                self.previous.next = o
-            self.previous = o
-            self.currentTag.contents.append(o)
+            self.object_was_parsed(o)
+
+    def object_was_parsed(self, o):
+        """Add an object to the parse tree."""
+        o.setup(self.currentTag, self.previous)
+        if self.previous:
+            self.previous.next = o
+        self.previous = o
+        self.currentTag.contents.append(o)
 
 
     def _popToTag(self, name, inclusivePop=True):
diff --git a/beautifulsoup/builder/lxml_builder.py b/beautifulsoup/builder/lxml_builder.py
index 86ac183..9ced9f0 100644
--- a/beautifulsoup/builder/lxml_builder.py
+++ b/beautifulsoup/builder/lxml_builder.py
@@ -32,8 +32,8 @@ class LXMLTreeBuilder(HTMLTreeBuilder):
 
     def doctype(self, name, pubid, system):
         self.soup.endData()
-        self.soup.handle_data(name)
-        self.soup.endData(Doctype)
+        doctype = Doctype.for_name_and_ids(name, pubid, system)
+        self.soup.object_was_parsed(doctype)
 
     def comment(self, content):
         "Handle comments as Comment objects."
diff --git a/beautifulsoup/element.py b/beautifulsoup/element.py
index b2e0e12..8749114 100644
--- a/beautifulsoup/element.py
+++ b/beautifulsoup/element.py
@@ -372,6 +372,16 @@ class Declaration(NavigableString):
 
 class Doctype(NavigableString):
 
+    @classmethod
+    def for_name_and_ids(cls, name, pub_id, system_id):
+        value = name
+        if pub_id is not None:
+            value += ' PUBLIC "%s"' % pub_id
+        if system_id is not None:
+            value += ' SYSTEM "%s"' % system_id
+
+        return Doctype(value)
+
     def decodeGivenEventualEncoding(self, eventualEncoding):
         return u'<!DOCTYPE ' + self + u'>'
 
diff --git a/tests/test_lxml.py b/tests/test_lxml.py
index 8f36b41..9f002cb 100644
--- a/tests/test_lxml.py
+++ b/tests/test_lxml.py
@@ -4,7 +4,7 @@ import re
 
 from beautifulsoup import BeautifulSoup
 from beautifulsoup.builder.lxml_builder import LXMLTreeBuilder
-from beautifulsoup.element import Comment
+from beautifulsoup.element import Comment, Doctype
 from beautifulsoup.testing import SoupTest
 
 
@@ -201,11 +201,33 @@ class TestLXMLBuilder(SoupTest):
         markup = "<svg><![CDATA[foobar]]>"
         self.assertSoupEquals(markup, "<svg></svg>")
 
+    def test_namespaced_system_doctype(self):
+        doctype_str = '<!DOCTYPE xsl:stylesheet SYSTEM "htmlent.dtd">'
+        markup = doctype_str + '<p>foo</p>'
+        soup = BeautifulSoup(markup)
+        doctype = soup.contents[0]
+        self.assertEquals(doctype.__class__, Doctype)
+        self.assertEquals(doctype, 'xsl:stylesheet SYSTEM "htmlent.dtd"')
+        self.assertEquals(str(soup)[:len(doctype_str)], doctype_str)
+        self.assertEquals(soup.p.contents[0], 'foo')
+
+    def test_namespaced_public_doctype(self):
+        doctype_str = '<!DOCTYPE xsl:stylesheet PUBLIC "htmlent.dtd">'
+        markup = doctype_str + '<p>foo</p>'
+        soup = BeautifulSoup(markup)
+        doctype = soup.contents[0]
+        self.assertEquals(doctype.__class__, Doctype)
+        self.assertEquals(doctype, 'xsl:stylesheet PUBLIC "htmlent.dtd"')
+        self.assertEquals(str(soup)[:len(doctype_str)], doctype_str)
+        self.assertEquals(soup.p.contents[0], 'foo')
+
     # Tests below this line need work.
 
-    #def test_doctype(self):
-    #    xml = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"><html>foo</html></p>'
-    #    self.assertSoupEquals(xml)
+
+    def test_doctype(self):
+        doctype_str = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN">
+        markup = doctype_str + '<p>foo</p>'
+        self.assertSoupEquals(xml)
 
     def test_entities_converted_on_the_way_out(self):
         text = "<p>&lt;&lt;sacr&eacute;&#32;bleu!&gt;&gt;</p>"
@@ -274,6 +296,9 @@ class TestLXMLBuilderInvalidMarkup(SoupTest):
         markup = "<p>one<!DOCTYPE foobar>two</p>"
         self.assertSoupEquals(markup)
 
+    #def testJunkInDeclaration(self):
+    #    self.assertSoupEquals('<! Foo = -8>a', '<!Foo = -8>a')
+
     def test_cdata_where_it_doesnt_belong(self):
         #CDATA sections are ignored.
         markup = "<div><![CDATA[foo]]>"
-- 
cgit v1.2.3