From 09c9ca430e49449cc39cbeb7556230cb62df9b19 Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Sun, 13 Feb 2011 19:40:29 -0500 Subject: Added tests for namespaced doctypes. --- beautifulsoup/__init__.py | 14 +++++++++----- beautifulsoup/builder/lxml_builder.py | 4 ++-- beautifulsoup/element.py | 10 ++++++++++ tests/test_lxml.py | 33 +++++++++++++++++++++++++++++---- 4 files changed, 50 insertions(+), 11 deletions(-) diff --git a/beautifulsoup/__init__.py b/beautifulsoup/__init__.py index 4a7e18b..ddf51f9 100644 --- a/beautifulsoup/__init__.py +++ b/beautifulsoup/__init__.py @@ -222,11 +222,15 @@ class BeautifulStoneSoup(Tag): not self.parseOnlyThese.search(currentData)): return o = containerClass(currentData) - o.setup(self.currentTag, self.previous) - if self.previous: - self.previous.next = o - self.previous = o - self.currentTag.contents.append(o) + self.object_was_parsed(o) + + def object_was_parsed(self, o): + """Add an object to the parse tree.""" + o.setup(self.currentTag, self.previous) + if self.previous: + self.previous.next = o + self.previous = o + self.currentTag.contents.append(o) def _popToTag(self, name, inclusivePop=True): diff --git a/beautifulsoup/builder/lxml_builder.py b/beautifulsoup/builder/lxml_builder.py index 86ac183..9ced9f0 100644 --- a/beautifulsoup/builder/lxml_builder.py +++ b/beautifulsoup/builder/lxml_builder.py @@ -32,8 +32,8 @@ class LXMLTreeBuilder(HTMLTreeBuilder): def doctype(self, name, pubid, system): self.soup.endData() - self.soup.handle_data(name) - self.soup.endData(Doctype) + doctype = Doctype.for_name_and_ids(name, pubid, system) + self.soup.object_was_parsed(doctype) def comment(self, content): "Handle comments as Comment objects." diff --git a/beautifulsoup/element.py b/beautifulsoup/element.py index b2e0e12..8749114 100644 --- a/beautifulsoup/element.py +++ b/beautifulsoup/element.py @@ -372,6 +372,16 @@ class Declaration(NavigableString): class Doctype(NavigableString): + @classmethod + def for_name_and_ids(cls, name, pub_id, system_id): + value = name + if pub_id is not None: + value += ' PUBLIC "%s"' % pub_id + if system_id is not None: + value += ' SYSTEM "%s"' % system_id + + return Doctype(value) + def decodeGivenEventualEncoding(self, eventualEncoding): return u'' diff --git a/tests/test_lxml.py b/tests/test_lxml.py index 8f36b41..9f002cb 100644 --- a/tests/test_lxml.py +++ b/tests/test_lxml.py @@ -4,7 +4,7 @@ import re from beautifulsoup import BeautifulSoup from beautifulsoup.builder.lxml_builder import LXMLTreeBuilder -from beautifulsoup.element import Comment +from beautifulsoup.element import Comment, Doctype from beautifulsoup.testing import SoupTest @@ -201,11 +201,33 @@ class TestLXMLBuilder(SoupTest): markup = "foobar" self.assertSoupEquals(markup, "") + def test_namespaced_system_doctype(self): + doctype_str = '' + markup = doctype_str + '

foo

' + soup = BeautifulSoup(markup) + doctype = soup.contents[0] + self.assertEquals(doctype.__class__, Doctype) + self.assertEquals(doctype, 'xsl:stylesheet SYSTEM "htmlent.dtd"') + self.assertEquals(str(soup)[:len(doctype_str)], doctype_str) + self.assertEquals(soup.p.contents[0], 'foo') + + def test_namespaced_public_doctype(self): + doctype_str = '' + markup = doctype_str + '

foo

' + soup = BeautifulSoup(markup) + doctype = soup.contents[0] + self.assertEquals(doctype.__class__, Doctype) + self.assertEquals(doctype, 'xsl:stylesheet PUBLIC "htmlent.dtd"') + self.assertEquals(str(soup)[:len(doctype_str)], doctype_str) + self.assertEquals(soup.p.contents[0], 'foo') + # Tests below this line need work. - #def test_doctype(self): - # xml = 'foo

' - # self.assertSoupEquals(xml) + + def test_doctype(self): + doctype_str = ' + markup = doctype_str + '

foo

' + self.assertSoupEquals(xml) def test_entities_converted_on_the_way_out(self): text = "

<<sacré bleu!>>

" @@ -274,6 +296,9 @@ class TestLXMLBuilderInvalidMarkup(SoupTest): markup = "

onetwo

" self.assertSoupEquals(markup) + #def testJunkInDeclaration(self): + # self.assertSoupEquals('a', 'a') + def test_cdata_where_it_doesnt_belong(self): #CDATA sections are ignored. markup = "
" -- cgit v1.2.3