diff options
author | Leonard Richardson <leonard.richardson@canonical.com> | 2012-02-20 12:30:00 -0500 |
---|---|---|
committer | Leonard Richardson <leonard.richardson@canonical.com> | 2012-02-20 12:30:00 -0500 |
commit | 85baeb52b43c5d61dd7cc274ca9c31972ae88bfc (patch) | |
tree | e33fd95286186514739d6faa0b0ca92aa0a5a31d | |
parent | 3cf25e3143a8765a0fd129a6ab368f617ee1653c (diff) |
lxml tests are once again run and pass when lxml is installed.
-rw-r--r-- | bs4/tests/test_htmlparser.py | 2 | ||||
-rw-r--r-- | bs4/tests/test_lxml.py | 58 |
2 files changed, 55 insertions, 5 deletions
diff --git a/bs4/tests/test_htmlparser.py b/bs4/tests/test_htmlparser.py index 2eec428..474e92a 100644 --- a/bs4/tests/test_htmlparser.py +++ b/bs4/tests/test_htmlparser.py @@ -40,7 +40,7 @@ class TestHTMLParserTreeBuilder(SoupTest): def test_hex_entities_in_text(self): # XXX This tests a workaround for a bug in HTMLParser. - self.assertSoupEquals("ñ", u"\xf1") + self.assertSoupEquals("<p>ñ</p>", u"<p>\xf1</p>") def test_entities_in_attribute_values_converted_during_parsing(self): diff --git a/bs4/tests/test_lxml.py b/bs4/tests/test_lxml.py index b9cc540..4d19e7f 100644 --- a/bs4/tests/test_lxml.py +++ b/bs4/tests/test_lxml.py @@ -30,6 +30,10 @@ class TestLXMLTreeBuilder(test_htmlparser.TestHTMLParserTreeBuilder): trees that make most of these tests pass. """ + @property + def default_builder(self): + return LXMLTreeBuilder() + def test_bare_string(self): # A bare string is turned into some kind of HTML document or # fragment recognizable as the original string. @@ -94,6 +98,35 @@ class TestLXMLTreeBuilder(test_htmlparser.TestHTMLParserTreeBuilder): # Test a namespaced doctype with a public id. self._test_doctype('xsl:stylesheet PUBLIC "htmlent.dtd"') + def test_entities_in_attribute_values_converted_during_parsing(self): + + # The numeric entity isn't recognized without the closing + # semicolon. + text = '<x t="piñata">' + expected = u"pi\N{LATIN SMALL LETTER N WITH TILDE}ata" + soup = self.soup(text) + self.assertEqual(soup.x['t'], expected) + + text = '<x t="piñata">' + expected = u"pi\N{LATIN SMALL LETTER N WITH TILDE}ata" + soup = self.soup(text) + self.assertEqual(soup.x['t'], u"pi\xf1ata") + + text = '<x t="piñata">' + soup = self.soup(text) + self.assertEqual(soup.x['t'], expected) + + text = '<x t="sacré bleu">' + soup = self.soup(text) + self.assertEqual( + soup.x['t'], + u"sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu") + + # This can cause valid HTML to become invalid. + valid_url = '<a href="http://example.org?a=1&b=2;3">foo</a>' + soup = self.soup(valid_url) + self.assertEqual(soup.a['href'], "http://example.org?a=1&b=2;3") + @skipIf( not LXML_PRESENT, @@ -101,6 +134,10 @@ class TestLXMLTreeBuilder(test_htmlparser.TestHTMLParserTreeBuilder): class TestLXMLTreeBuilderInvalidMarkup( test_htmlparser.TestHTMLParserTreeBuilderInvalidMarkup): + @property + def default_builder(self): + return LXMLTreeBuilder() + def test_attribute_value_never_got_closed(self): markup = '<a href="http://foo.com/</a> and blah and blah' soup = self.soup(markup) @@ -143,7 +180,7 @@ class TestLXMLTreeBuilderInvalidMarkup( '<p>foo</p>'), '<p>foo</p>') - def test_boolean_attribute_with_no_value_gets_empty_value(self): + def test_boolean_attribute_with_no_value(self): soup = self.soup("<table><td nowrap>foo</td></table>") self.assertEqual(soup.table.td['nowrap'], '') @@ -153,7 +190,7 @@ class TestLXMLTreeBuilderInvalidMarkup( self.assertSoupEquals(markup, "<div></div>") def test_empty_element_tag_with_contents(self): - self.assertSoupEquals("<br>foo</br>", "<br/>foo</br>") + self.assertSoupEquals("<br>foo</br>", "<br/>foo") def test_nonexistent_entity(self): soup = self.soup("<p>foo&#bar;baz</p>") @@ -189,11 +226,24 @@ class TestLXMLTreeBuilderInvalidMarkup( markup = '<b b="20" a="1" b="10" a="2" a="3" a="4"></b>' self.assertSoupEquals(markup, '<b a="1" b="20"></b>') + def test_entity_out_of_range(self): + # An entity that's out of range will be ignored. + soup = self.soup("<p>�</p>") + self.assertEqual(0, len(soup.p.contents)) + + soup = self.soup("<p>�</p>") + self.assertEqual(0, len(soup.p.contents)) + + soup = self.soup("<p>�</p>") + self.assertEqual(0, len(soup.p.contents)) + @skipIf( not LXML_PRESENT, "lxml seems not to be present, not testing it on encoding conversion.") class TestLXMLParserTreeBuilderEncodingConversion( test_htmlparser.TestHTMLParserTreeBuilderEncodingConversion): - # Re-run the lxml tests for HTMLParser - pass + + @property + def default_builder(self): + return LXMLTreeBuilder() |