"""Helper classes for tests.""" import unittest from beautifulsoup import BeautifulSoup from beautifulsoup.element import SoupStrainer from beautifulsoup.builder.lxml_builder import LXMLTreeBuilder class SoupTest(unittest.TestCase): def setUp(self): # LXMLTreeBuilder won't handle bad markup, but that's fine, # since all the parsing tests take place in parser-specific # test suites that override default_builder. self.default_builder = LXMLTreeBuilder() def soup(self, markup): """Build a Beautiful Soup object from markup.""" return BeautifulSoup(markup, builder=self.default_builder) def assertSoupEquals(self, to_parse, compare_parsed_to=None): builder = self.default_builder obj = BeautifulSoup(to_parse, builder=builder) if compare_parsed_to is None: compare_parsed_to = to_parse self.assertEquals( obj.decode(), builder.test_fragment_to_document(compare_parsed_to)) class BuilderSmokeTest(SoupTest): """A generic smoke test for tree builders. Subclasses of this test ensure that all of Beautiful Soup's tree builders generate more or less the same trees. It's okay for trees to differ, especially when given invalid markup--just override the appropriate test method to demonstrate how one tree builder differs from others. """ def test_bare_string(self): # A bare string is turned into some kind of HTML document or # fragment recognizable as the original string. self.assertSoupEquals("A bare string") def test_mixed_case_tags(self): # Mixed-case tags are folded to lowercase. self.assertSoupEquals( "", "") def test_self_closing(self): # HTML's self-closing tags are recognized as such. self.assertSoupEquals( "

A tag

", "

A tag

") self.assertSoupEquals( "

Foo
bar

", "

Foo
bar

") def test_nested_inline_elements(self): # Inline tags can be nested indefinitely. b_tag = "Inside a B tag" self.assertSoupEquals(b_tag) nested_b_tag = "

A nested tag

" self.assertSoupEquals(nested_b_tag) double_nested_b_tag = "

A doubly nested tag

" self.assertSoupEquals(nested_b_tag) def test_nested_block_level_elements(self): soup = self.soup('

Foo

') blockquote = soup.blockquote self.assertEqual(blockquote.p.b.string, 'Foo') self.assertEqual(blockquote.b.string, 'Foo') def test_collapsed_whitespace(self): """In most tags, whitespace is collapsed.""" self.assertSoupEquals("

", "

") def test_preserved_whitespace_in_pre_and_textarea(self): """In
 and ")

    def test_single_quote_attribute_values_become_double_quotes(self):
        self.assertSoupEquals("",
                              '')

    def test_attribute_values_with_nested_quotes_are_left_alone(self):
        text = """a"""
        self.assertSoupEquals(text)

    def test_attribute_values_with_double_nested_quotes_get_quoted(self):
        text = """a"""
        soup = self.soup(text)
        soup.foo['attr'] = 'Brawls happen at "Bob\'s Bar"'
        self.assertSoupEquals(
            soup.foo.decode(),
            """a""")

    def test_ampersand_in_attribute_value_gets_quoted(self):
        self.assertSoupEquals('',
                              '')


class BuilderInvalidMarkupSmokeTest(SoupTest):
    """Tests of invalid markup.

    These are very likely to give different results for different tree
    builders. It's not required that a tree builder handle invalid
    markup at all.
    """

    def test_unclosed_block_level_elements(self):
        # Unclosed block-level elements should be closed.
        self.assertSoupEquals(
            '

Foo

Bar', '

Foo

Bar

') def test_fake_self_closing_tag(self): # If a self-closing tag presents as a normal tag, the 'open' # tag is treated as an instance of the self-closing tag and # the 'close' tag is ignored. self.assertSoupEquals( "http://foo.com/", "http://foo.com/") def test_boolean_attribute_with_no_value_gets_empty_value(self): soup = self.soup("
foo
") self.assertEquals(soup.table.td['nowrap'], '')