from HTMLParser import HTMLParseError from bs4.builder import HTMLParserTreeBuilder from bs4.element import CData from test_lxml import ( TestLXMLBuilder, TestLXMLBuilderEncodingConversion, TestLXMLBuilderInvalidMarkup, ) class TestHTMLParserTreeBuilder(TestLXMLBuilder): """See `BuilderSmokeTest`.""" @property def default_builder(self): return HTMLParserTreeBuilder() def test_bare_string(self): # A bare string is turned into some kind of HTML document or # fragment recognizable as the original string. # # HTMLParser does not modify the bare string at all. self.assertSoupEquals("A bare string") def test_cdata_where_its_ok(self): # HTMLParser recognizes CDATA sections and passes them through. markup = "" self.assertSoupEquals(markup) soup = self.soup(markup) string = soup.svg.string self.assertEquals(string, "foobar") self.assertTrue(isinstance(string, CData)) # These are tests that could be 'fixed' by improving the # HTMLParserTreeBuilder, but I don't think it's worth it. Users # will have fewer headaches if they use one of the other tree # builders. def test_empty_element(self): # HTML's empty-element tags are not recognized as such # unless they are presented as empty-element tags. self.assertSoupEquals( "
A tag
", "A tag
") self.assertSoupEquals( "Foo
bar
Foo
bar
AT&T
" soup = self.soup(text) self.assertEquals(soup.p.string, "AT&T;") def test_literal_in_textarea(self): # Anything inside a