From 4191d5ff45015c6fac1db0bbdd7b3fcaff234424 Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Sun, 13 Feb 2011 18:04:03 -0500 Subject: Clarified lxml's behavior w/r/t CDATA sections. --- tests/test_lxml.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'tests/test_lxml.py') diff --git a/tests/test_lxml.py b/tests/test_lxml.py index 207d141..8f36b41 100644 --- a/tests/test_lxml.py +++ b/tests/test_lxml.py @@ -196,16 +196,17 @@ class TestLXMLBuilder(SoupTest): soup = self.soup("  ") self.assertEquals(soup.a.string, u"\N{NO-BREAK SPACE}" * 2) + def test_cdata_where_its_ok(self): + # lxml strips CDATA sections, no matter where they occur. + markup = "foobar" + self.assertSoupEquals(markup, "") + # Tests below this line need work. #def test_doctype(self): # xml = 'foo

' # self.assertSoupEquals(xml) - - #def test_cdata(self): - # print self.soup("
") - def test_entities_converted_on_the_way_out(self): text = "

<<sacré bleu!>>

" expected = u"<<sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>".encode("utf-8") -- cgit v1.2.3 From 09c9ca430e49449cc39cbeb7556230cb62df9b19 Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Sun, 13 Feb 2011 19:40:29 -0500 Subject: Added tests for namespaced doctypes. --- tests/test_lxml.py | 33 +++++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) (limited to 'tests/test_lxml.py') diff --git a/tests/test_lxml.py b/tests/test_lxml.py index 8f36b41..9f002cb 100644 --- a/tests/test_lxml.py +++ b/tests/test_lxml.py @@ -4,7 +4,7 @@ import re from beautifulsoup import BeautifulSoup from beautifulsoup.builder.lxml_builder import LXMLTreeBuilder -from beautifulsoup.element import Comment +from beautifulsoup.element import Comment, Doctype from beautifulsoup.testing import SoupTest @@ -201,11 +201,33 @@ class TestLXMLBuilder(SoupTest): markup = "foobar" self.assertSoupEquals(markup, "") + def test_namespaced_system_doctype(self): + doctype_str = '' + markup = doctype_str + '

foo

' + soup = BeautifulSoup(markup) + doctype = soup.contents[0] + self.assertEquals(doctype.__class__, Doctype) + self.assertEquals(doctype, 'xsl:stylesheet SYSTEM "htmlent.dtd"') + self.assertEquals(str(soup)[:len(doctype_str)], doctype_str) + self.assertEquals(soup.p.contents[0], 'foo') + + def test_namespaced_public_doctype(self): + doctype_str = '' + markup = doctype_str + '

foo

' + soup = BeautifulSoup(markup) + doctype = soup.contents[0] + self.assertEquals(doctype.__class__, Doctype) + self.assertEquals(doctype, 'xsl:stylesheet PUBLIC "htmlent.dtd"') + self.assertEquals(str(soup)[:len(doctype_str)], doctype_str) + self.assertEquals(soup.p.contents[0], 'foo') + # Tests below this line need work. - #def test_doctype(self): - # xml = 'foo

' - # self.assertSoupEquals(xml) + + def test_doctype(self): + doctype_str = ' + markup = doctype_str + '

foo

' + self.assertSoupEquals(xml) def test_entities_converted_on_the_way_out(self): text = "

<<sacré bleu!>>

" @@ -274,6 +296,9 @@ class TestLXMLBuilderInvalidMarkup(SoupTest): markup = "

onetwo

" self.assertSoupEquals(markup) + #def testJunkInDeclaration(self): + # self.assertSoupEquals('a', 'a') + def test_cdata_where_it_doesnt_belong(self): #CDATA sections are ignored. markup = "
" -- cgit v1.2.3 From f5a7641d58754df92d0567291c79c7ebd29c2005 Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Sun, 13 Feb 2011 19:48:00 -0500 Subject: Got a variety of doctype tests working. --- tests/test_lxml.py | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) (limited to 'tests/test_lxml.py') diff --git a/tests/test_lxml.py b/tests/test_lxml.py index 9f002cb..d3dbe49 100644 --- a/tests/test_lxml.py +++ b/tests/test_lxml.py @@ -201,33 +201,35 @@ class TestLXMLBuilder(SoupTest): markup = "foobar" self.assertSoupEquals(markup, "") - def test_namespaced_system_doctype(self): - doctype_str = '' + def _test_doctype(self, doctype_fragment): + """Run a battery of assertions on a given doctype string.""" + doctype_str = '' % doctype_fragment markup = doctype_str + '

foo

' soup = BeautifulSoup(markup) doctype = soup.contents[0] self.assertEquals(doctype.__class__, Doctype) - self.assertEquals(doctype, 'xsl:stylesheet SYSTEM "htmlent.dtd"') + self.assertEquals(doctype, doctype_fragment) self.assertEquals(str(soup)[:len(doctype_str)], doctype_str) - self.assertEquals(soup.p.contents[0], 'foo') - def test_namespaced_public_doctype(self): - doctype_str = '' - markup = doctype_str + '

foo

' - soup = BeautifulSoup(markup) - doctype = soup.contents[0] - self.assertEquals(doctype.__class__, Doctype) - self.assertEquals(doctype, 'xsl:stylesheet PUBLIC "htmlent.dtd"') - self.assertEquals(str(soup)[:len(doctype_str)], doctype_str) + # Make sure that the doctype was correctly associated with the + # parse tree and that the rest of the document parsed. self.assertEquals(soup.p.contents[0], 'foo') - # Tests below this line need work. + def test_doctype(self): + # Test a normal HTML doctype you'll commonly see in a real document. + self._test_doctype( + 'html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"') + def test_namespaced_system_doctype(self): + # Test a namespaced doctype with a system id. + self._test_doctype('xsl:stylesheet SYSTEM "htmlent.dtd"') + + def test_namespaced_system_doctype(self): + # Test a namespaced doctype with a public id. + self._test_doctype('xsl:stylesheet PUBLIC "htmlent.dtd"') + + # Tests below this line need work. - def test_doctype(self): - doctype_str = ' - markup = doctype_str + '

foo

' - self.assertSoupEquals(xml) def test_entities_converted_on_the_way_out(self): text = "

<<sacré bleu!>>

" -- cgit v1.2.3 From a011baa0e95e3d42d647cdf68164ba1c30314492 Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Sun, 13 Feb 2011 19:53:10 -0500 Subject: Got the doctype tests to work for html5lib. --- tests/test_lxml.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tests/test_lxml.py') diff --git a/tests/test_lxml.py b/tests/test_lxml.py index d3dbe49..028b956 100644 --- a/tests/test_lxml.py +++ b/tests/test_lxml.py @@ -205,7 +205,7 @@ class TestLXMLBuilder(SoupTest): """Run a battery of assertions on a given doctype string.""" doctype_str = '' % doctype_fragment markup = doctype_str + '

foo

' - soup = BeautifulSoup(markup) + soup = self.soup(markup) doctype = soup.contents[0] self.assertEquals(doctype.__class__, Doctype) self.assertEquals(doctype, doctype_fragment) -- cgit v1.2.3 From bc97bb3a83ee9fb4c8e31d11069ccf1cda61d4ff Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Sun, 13 Feb 2011 19:59:44 -0500 Subject: Added tests of nonsensical declarations. --- tests/test_lxml.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'tests/test_lxml.py') diff --git a/tests/test_lxml.py b/tests/test_lxml.py index 028b956..cba5522 100644 --- a/tests/test_lxml.py +++ b/tests/test_lxml.py @@ -298,8 +298,9 @@ class TestLXMLBuilderInvalidMarkup(SoupTest): markup = "

onetwo

" self.assertSoupEquals(markup) - #def testJunkInDeclaration(self): - # self.assertSoupEquals('a', 'a') + def test_nonsensical_declaration(self): + # Declarations that don't make any sense are ignored. + self.assertSoupEquals('

a

', "

a

") def test_cdata_where_it_doesnt_belong(self): #CDATA sections are ignored. -- cgit v1.2.3