diff options
author | Leonard Richardson <leonard.richardson@canonical.com> | 2011-02-13 20:00:33 -0500 |
---|---|---|
committer | Leonard Richardson <leonard.richardson@canonical.com> | 2011-02-13 20:00:33 -0500 |
commit | 5aa0e5ba8ec6a590c673db54c60ff2a76544f14f (patch) | |
tree | bf0637e79bc667a1b5ae08ca4794df414f169992 /tests/test_lxml.py | |
parent | 87a55b145f0a73e6fc9ede9a762d81d2527161b6 (diff) | |
parent | bc97bb3a83ee9fb4c8e31d11069ccf1cda61d4ff (diff) |
Fixed handling of doctypes and added tests for nonsensical declarations.
Diffstat (limited to 'tests/test_lxml.py')
-rw-r--r-- | tests/test_lxml.py | 43 |
1 files changed, 36 insertions, 7 deletions
diff --git a/tests/test_lxml.py b/tests/test_lxml.py index 207d141..cba5522 100644 --- a/tests/test_lxml.py +++ b/tests/test_lxml.py @@ -4,7 +4,7 @@ import re from beautifulsoup import BeautifulSoup from beautifulsoup.builder.lxml_builder import LXMLTreeBuilder -from beautifulsoup.element import Comment +from beautifulsoup.element import Comment, Doctype from beautifulsoup.testing import SoupTest @@ -196,15 +196,40 @@ class TestLXMLBuilder(SoupTest): soup = self.soup("<a> </a>") self.assertEquals(soup.a.string, u"\N{NO-BREAK SPACE}" * 2) - # Tests below this line need work. + def test_cdata_where_its_ok(self): + # lxml strips CDATA sections, no matter where they occur. + markup = "<svg><![CDATA[foobar]]>" + self.assertSoupEquals(markup, "<svg></svg>") + + def _test_doctype(self, doctype_fragment): + """Run a battery of assertions on a given doctype string.""" + doctype_str = '<!DOCTYPE %s>' % doctype_fragment + markup = doctype_str + '<p>foo</p>' + soup = self.soup(markup) + doctype = soup.contents[0] + self.assertEquals(doctype.__class__, Doctype) + self.assertEquals(doctype, doctype_fragment) + self.assertEquals(str(soup)[:len(doctype_str)], doctype_str) + + # Make sure that the doctype was correctly associated with the + # parse tree and that the rest of the document parsed. + self.assertEquals(soup.p.contents[0], 'foo') - #def test_doctype(self): - # xml = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"><html>foo</html></p>' - # self.assertSoupEquals(xml) + def test_doctype(self): + # Test a normal HTML doctype you'll commonly see in a real document. + self._test_doctype( + 'html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"') + def test_namespaced_system_doctype(self): + # Test a namespaced doctype with a system id. + self._test_doctype('xsl:stylesheet SYSTEM "htmlent.dtd"') + + def test_namespaced_system_doctype(self): + # Test a namespaced doctype with a public id. + self._test_doctype('xsl:stylesheet PUBLIC "htmlent.dtd"') + + # Tests below this line need work. - #def test_cdata(self): - # print self.soup("<div><![CDATA[foo]]></div>") def test_entities_converted_on_the_way_out(self): text = "<p><<sacré bleu!>></p>" @@ -273,6 +298,10 @@ class TestLXMLBuilderInvalidMarkup(SoupTest): markup = "<p>one<!DOCTYPE foobar>two</p>" self.assertSoupEquals(markup) + def test_nonsensical_declaration(self): + # Declarations that don't make any sense are ignored. + self.assertSoupEquals('<! Foo = -8><p>a</p>', "<p>a</p>") + def test_cdata_where_it_doesnt_belong(self): #CDATA sections are ignored. markup = "<div><![CDATA[foo]]>" |