"""Tests to ensure that the lxml tree builder generates good trees.""" import re from beautifulsoup import BeautifulSoup from beautifulsoup.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML from beautifulsoup.element import Comment, Doctype, SoupStrainer from beautifulsoup.testing import SoupTest class TestLXMLBuilder(SoupTest): """A smoke test for the LXML tree builder. Subclass this to test some other HTML tree builder. Subclasses of this test ensure that all of Beautiful Soup's tree builders generate more or less the same trees. It's okay for trees to differ--just override the appropriate test method to demonstrate how one tree builder differs from the LXML builder. But in general, all HTML tree builders should generate trees that make most of these tests pass. """ def test_bare_string(self): # A bare string is turned into some kind of HTML document or # fragment recognizable as the original string. # # In this case, lxml puts a
tag around the bare string. self.assertSoupEquals( "A bare string", "
A bare string
") def test_mixed_case_tags(self): # Mixed-case tags are folded to lowercase. self.assertSoupEquals( "A tag
", "A tag
") self.assertSoupEquals( "Foo
bar
Foo
bar
", "
") def test_comment(self): # Comments are represented as Comment objects. markup = "foobaz
" self.assertSoupEquals(markup) soup = self.soup(markup) comment = soup.find(text="foobar") self.assertEquals(comment.__class__, Comment) def test_nested_inline_elements(self): # Inline tags can be nested indefinitely. b_tag = "Inside a B tag" self.assertSoupEquals(b_tag) nested_b_tag = "A nested tag
" self.assertSoupEquals(nested_b_tag) double_nested_b_tag = "" self.assertSoupEquals(nested_b_tag) def test_nested_block_level_elements(self): soup = self.soup('') blockquote = soup.blockquote self.assertEqual(blockquote.p.b.string, 'Foo') self.assertEqual(blockquote.b.string, 'Foo') # This is aFoo
Here's another table:"
'
| ')
def test_correctly_nested_tables(self):
markup = ('
Here's another table:"
'
| ')
self.assertSoupEquals(
markup,
'
Here\'s another table:'
'
|
Foo |
Bar |
Baz |
", "
") def test_preserved_whitespace_in_pre_and_textarea(self): """In
and