From 09c9ca430e49449cc39cbeb7556230cb62df9b19 Mon Sep 17 00:00:00 2001
From: Leonard Richardson
Date: Sun, 13 Feb 2011 19:40:29 -0500
Subject: Added tests for namespaced doctypes.
---
beautifulsoup/__init__.py | 14 +++++++++-----
beautifulsoup/builder/lxml_builder.py | 4 ++--
beautifulsoup/element.py | 10 ++++++++++
tests/test_lxml.py | 33 +++++++++++++++++++++++++++++----
4 files changed, 50 insertions(+), 11 deletions(-)
diff --git a/beautifulsoup/__init__.py b/beautifulsoup/__init__.py
index 4a7e18b..ddf51f9 100644
--- a/beautifulsoup/__init__.py
+++ b/beautifulsoup/__init__.py
@@ -222,11 +222,15 @@ class BeautifulStoneSoup(Tag):
not self.parseOnlyThese.search(currentData)):
return
o = containerClass(currentData)
- o.setup(self.currentTag, self.previous)
- if self.previous:
- self.previous.next = o
- self.previous = o
- self.currentTag.contents.append(o)
+ self.object_was_parsed(o)
+
+ def object_was_parsed(self, o):
+ """Add an object to the parse tree."""
+ o.setup(self.currentTag, self.previous)
+ if self.previous:
+ self.previous.next = o
+ self.previous = o
+ self.currentTag.contents.append(o)
def _popToTag(self, name, inclusivePop=True):
diff --git a/beautifulsoup/builder/lxml_builder.py b/beautifulsoup/builder/lxml_builder.py
index 86ac183..9ced9f0 100644
--- a/beautifulsoup/builder/lxml_builder.py
+++ b/beautifulsoup/builder/lxml_builder.py
@@ -32,8 +32,8 @@ class LXMLTreeBuilder(HTMLTreeBuilder):
def doctype(self, name, pubid, system):
self.soup.endData()
- self.soup.handle_data(name)
- self.soup.endData(Doctype)
+ doctype = Doctype.for_name_and_ids(name, pubid, system)
+ self.soup.object_was_parsed(doctype)
def comment(self, content):
"Handle comments as Comment objects."
diff --git a/beautifulsoup/element.py b/beautifulsoup/element.py
index b2e0e12..8749114 100644
--- a/beautifulsoup/element.py
+++ b/beautifulsoup/element.py
@@ -372,6 +372,16 @@ class Declaration(NavigableString):
class Doctype(NavigableString):
+ @classmethod
+ def for_name_and_ids(cls, name, pub_id, system_id):
+ value = name
+ if pub_id is not None:
+ value += ' PUBLIC "%s"' % pub_id
+ if system_id is not None:
+ value += ' SYSTEM "%s"' % system_id
+
+ return Doctype(value)
+
def decodeGivenEventualEncoding(self, eventualEncoding):
return u''
diff --git a/tests/test_lxml.py b/tests/test_lxml.py
index 8f36b41..9f002cb 100644
--- a/tests/test_lxml.py
+++ b/tests/test_lxml.py
@@ -4,7 +4,7 @@ import re
from beautifulsoup import BeautifulSoup
from beautifulsoup.builder.lxml_builder import LXMLTreeBuilder
-from beautifulsoup.element import Comment
+from beautifulsoup.element import Comment, Doctype
from beautifulsoup.testing import SoupTest
@@ -201,11 +201,33 @@ class TestLXMLBuilder(SoupTest):
markup = "
'
- # self.assertSoupEquals(xml)
+
+ def test_doctype(self):
+ doctype_str = '
+ markup = doctype_str + 'foo
'
+ self.assertSoupEquals(xml)
def test_entities_converted_on_the_way_out(self):
text = "<<sacré bleu!>>
"
@@ -274,6 +296,9 @@ class TestLXMLBuilderInvalidMarkup(SoupTest):
markup = "onetwo
"
self.assertSoupEquals(markup)
+ #def testJunkInDeclaration(self):
+ # self.assertSoupEquals('a', 'a')
+
def test_cdata_where_it_doesnt_belong(self):
#CDATA sections are ignored.
markup = ""
--
cgit v1.2.3