"""Tests to ensure that the lxml tree builder generates good trees.""" from beautifulsoup import BeautifulSoup from beautifulsoup.builder.lxml_builder import LXMLTreeBuilder from beautifulsoup.element import Comment from beautifulsoup.testing import SoupTest class TestLXMLBuilder(SoupTest): """A smoke test for the LXML tree builders. Subclass this to test some other tree builder. Subclasses of this test ensure that all of Beautiful Soup's tree builders generate more or less the same trees. It's okay for trees to differ, especially when given invalid markup--just override the appropriate test method to demonstrate how one tree builder differs from the LXML builder. """ def test_bare_string(self): # A bare string is turned into some kind of HTML document or # fragment recognizable as the original string. # # In this case, lxml puts a
tag around the bare string. self.assertSoupEquals( "A bare string", "
A bare string
") def test_mixed_case_tags(self): # Mixed-case tags are folded to lowercase. self.assertSoupEquals( "A tag
", "A tag
") self.assertSoupEquals( "Foo
bar
Foo
bar
foobaz
" self.assertSoupEquals(markup) soup = self.soup(markup) comment = soup.find(text="foobar") self.assertEquals(comment.__class__, Comment) def test_nested_inline_elements(self): # Inline tags can be nested indefinitely. b_tag = "Inside a B tag" self.assertSoupEquals(b_tag) nested_b_tag = "A nested tag
" self.assertSoupEquals(nested_b_tag) double_nested_b_tag = "" self.assertSoupEquals(nested_b_tag) def test_nested_block_level_elements(self): soup = self.soup('') blockquote = soup.blockquote self.assertEqual(blockquote.p.b.string, 'Foo') self.assertEqual(blockquote.b.string, 'Foo') def test_collapsed_whitespace(self): """In most tags, whitespace is collapsed.""" self.assertSoupEquals("Foo
", "
") def test_preserved_whitespace_in_pre_and_textarea(self): """In
and