summaryrefslogtreecommitdiff
path: root/beautifulsoup/testing.py
diff options
context:
space:
mode:
Diffstat (limited to 'beautifulsoup/testing.py')
-rw-r--r--beautifulsoup/testing.py139
1 files changed, 0 insertions, 139 deletions
diff --git a/beautifulsoup/testing.py b/beautifulsoup/testing.py
index 9d0fa3a..74937d9 100644
--- a/beautifulsoup/testing.py
+++ b/beautifulsoup/testing.py
@@ -32,144 +32,5 @@ class SoupTest(unittest.TestCase):
-class BuilderSmokeTest(SoupTest):
- """A generic smoke test for tree builders.
-
- Subclasses of this test ensure that all of Beautiful Soup's tree
- builders generate more or less the same trees. It's okay for trees
- to differ, especially when given invalid markup--just override the
- appropriate test method to demonstrate how one tree builder
- differs from others.
- """
-
- def test_bare_string(self):
- # A bare string is turned into some kind of HTML document or
- # fragment recognizable as the original string.
- #
- # In this case, lxml puts a <p> tag around the bare string.
- self.assertSoupEquals(
- "A bare string", "<p>A bare string</p>")
-
- def test_mixed_case_tags(self):
- # Mixed-case tags are folded to lowercase.
- self.assertSoupEquals(
- "<a><B><Cd><EFG></efg></CD></b></A>",
- "<a><b><cd><efg></efg></cd></b></a>")
-
- def test_self_closing(self):
- # HTML's self-closing tags are recognized as such.
- self.assertSoupEquals(
- "<p>A <meta> tag</p>", "<p>A <meta /> tag</p>")
-
- self.assertSoupEquals(
- "<p>Foo<br/>bar</p>", "<p>Foo<br />bar</p>")
-
- def test_comment(self):
- # Comments are represented as Comment objects.
- markup = "<p>foo<!--foobar-->baz</p>"
- self.assertSoupEquals(markup)
-
- soup = self.soup(markup)
- comment = soup.find(text="foobar")
- self.assertEquals(comment.__class__, Comment)
-
- def test_nested_inline_elements(self):
- # Inline tags can be nested indefinitely.
- b_tag = "<b>Inside a B tag</b>"
- self.assertSoupEquals(b_tag)
-
- nested_b_tag = "<p>A <i>nested <b>tag</b></i></p>"
- self.assertSoupEquals(nested_b_tag)
-
- double_nested_b_tag = "<p>A <a>doubly <i>nested <b>tag</b></i></a></p>"
- self.assertSoupEquals(nested_b_tag)
-
- def test_nested_block_level_elements(self):
- soup = self.soup('<blockquote><p><b>Foo</b></p></blockquote>')
- blockquote = soup.blockquote
- self.assertEqual(blockquote.p.b.string, 'Foo')
- self.assertEqual(blockquote.b.string, 'Foo')
-
- def test_collapsed_whitespace(self):
- """In most tags, whitespace is collapsed."""
- self.assertSoupEquals("<p> </p>", "<p> </p>")
-
- def test_preserved_whitespace_in_pre_and_textarea(self):
- """In <pre> and <textarea> tags, whitespace is preserved."""
- self.assertSoupEquals("<pre> </pre>")
- self.assertSoupEquals("<textarea> woo </textarea>")
-
- def test_single_quote_attribute_values_become_double_quotes(self):
- self.assertSoupEquals("<foo attr='bar'></foo>",
- '<foo attr="bar"></foo>')
-
- def test_attribute_values_with_nested_quotes_are_left_alone(self):
- text = """<foo attr='bar "brawls" happen'>a</foo>"""
- self.assertSoupEquals(text)
-
- def test_attribute_values_with_double_nested_quotes_get_quoted(self):
- text = """<foo attr='bar "brawls" happen'>a</foo>"""
- soup = self.soup(text)
- soup.foo['attr'] = 'Brawls happen at "Bob\'s Bar"'
- self.assertSoupEquals(
- soup.foo.decode(),
- """<foo attr='Brawls happen at "Bob&squot;s Bar"'>a</foo>""")
-
- def test_ampersand_in_attribute_value_gets_quoted(self):
- self.assertSoupEquals('<this is="really messed up & stuff"></this>',
- '<this is="really messed up &amp; stuff"></this>')
-
- def test_literal_in_textarea(self):
- # Anything inside a <textarea> is supposed to be treated as
- # the literal value of the field, (XXX citation needed).
- #
- # But, both lxml and html5lib do their best to parse the
- # contents of a <textarea> as HTML.
- text = '<textarea>Junk like <b> tags and <&<&amp;</textarea>'
- soup = BeautifulSoup(text)
- self.assertEquals(len(soup.textarea.contents), 2)
- self.assertEquals(soup.textarea.contents[0], u"Junk like ")
- self.assertEquals(soup.textarea.contents[1].name, 'b')
- self.assertEquals(soup.textarea.b.string, u" tags and ")
-
- def test_literal_in_script(self):
- # The contents of a <script> tag are treated as a literal string,
- # even if that string contains HTML.
- javascript = 'if (i < 2) { alert("<b>foo</b>"); }'
- soup = BeautifulSoup('<script>%s</script>' % javascript)
- self.assertEquals(soup.script.string, javascript)
-
-
-class BuilderInvalidMarkupSmokeTest(SoupTest):
- """Tests of invalid markup.
-
- These are very likely to give different results for different tree
- builders. It's not required that a tree builder handle invalid
- markup at all.
- """
-
- def test_unclosed_block_level_elements(self):
- # Unclosed block-level elements should be closed.
- self.assertSoupEquals(
- '<blockquote><p><b>Foo</blockquote><p>Bar',
- '<blockquote><p><b>Foo</b></p></blockquote><p>Bar</p>')
-
- def test_fake_self_closing_tag(self):
- # If a self-closing tag presents as a normal tag, the 'open'
- # tag is treated as an instance of the self-closing tag and
- # the 'close' tag is ignored.
- self.assertSoupEquals(
- "<item><link>http://foo.com/</link></item>",
- "<item><link />http://foo.com/</item>")
-
- def test_boolean_attribute_with_no_value_gets_empty_value(self):
- soup = self.soup("<table><td nowrap>foo</td></table>")
- self.assertEquals(soup.table.td['nowrap'], '')
-
- def test_incorrectly_nested_tables(self):
- self.assertSoupEquals(
- '<table><tr><table><tr id="nested">',
- '<table><tr><table><tr id="nested"></tr></table></tr></table>')
-