Merge from trunk and added tests.

author: Leonard Richardson <leonard.richardson@canonical.com> 2012-02-23 10:21:30 -0500
committer: Leonard Richardson <leonard.richardson@canonical.com> 2012-02-23 10:21:30 -0500
commit: 0339d1f4a0f7306cb8c1ef4d7cd609b06e33eb27 (patch)
tree: 40b1d7453a284da936c40e5e53402942c9650b74 /bs4/tests/test_lxml.py
parent: 6e4b4dfffbd6e8d465aebd009108654003da338b (diff)
parent: a4ffd587fad7d2442a9ccdcdd0a8f6df347f39eb (diff)
1 files changed, 11 insertions, 223 deletions
diff --git a/bs4/tests/test_lxml.py b/bs4/tests/test_lxml.py
index 4d19e7f..92b7389 100644
--- a/bs4/tests/test_lxml.py
+++ b/bs4/tests/test_lxml.py
@@ -12,238 +12,26 @@ from bs4 import BeautifulSoup
 from bs4.element import Comment, Doctype, SoupStrainer
 from bs4.testing import skipIf
 from bs4.tests import test_htmlparser
-from bs4.testing import skipIf
+from bs4.testing import (
+    HTMLTreeBuilderSmokeTest,
+    SoupTest,
+    skipIf,
+)
 
 @skipIf(
     not LXML_PRESENT,
     "lxml seems not to be present, not testing its tree builder.")
-class TestLXMLTreeBuilder(test_htmlparser.TestHTMLParserTreeBuilder):
-    """A smoke test for the LXML tree builder.
-
-    Subclass this to test some other HTML tree builder. Subclasses of
-    this test ensure that all of Beautiful Soup's tree builders
-    generate more or less the same trees.
-
-    It's okay for trees to differ--just override the appropriate test
-    method to demonstrate how one tree builder differs from the LXML
-    builder. But in general, all HTML tree builders should generate
-    trees that make most of these tests pass.
-    """
+class LXMLTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
+    """See ``HTMLTreeBuilderSmokeTest``."""
 
     @property
     def default_builder(self):
         return LXMLTreeBuilder()
 
-    def test_bare_string(self):
-        # A bare string is turned into some kind of HTML document or
-        # fragment recognizable as the original string.
-        #
-        # In this case, lxml puts a <p> tag around the bare string.
-        self.assertSoupEquals(
-            "A bare string", "<p>A bare string</p>")
-
-    def test_cdata_where_its_ok(self):
-        # lxml strips CDATA sections, no matter where they occur.
-        markup = "<svg><![CDATA[foobar]]>"
-        self.assertSoupEquals(markup, "<svg></svg>")
-
-    def test_empty_element(self):
-        # HTML's empty-element tags are recognized as such.
-        self.assertSoupEquals(
-            "<p>A <meta> tag</p>", "<p>A <meta/> tag</p>")
-
+    def test_out_of_range_entity(self):
         self.assertSoupEquals(
-            "<p>Foo<br/>bar</p>", "<p>Foo<br/>bar</p>")
-
-    def test_naked_ampersands(self):
-        # Ampersands are left alone.
-        text = "<p>AT&T</p>"
-        soup = self.soup(text)
-        self.assertEqual(soup.p.string, "AT&T")
-
-        # Even if they're in attribute values.
-        invalid_url = '<a href="http://example.org?a=1&b=2;3">foo</a>'
-        soup = self.soup(invalid_url)
-        self.assertEqual(soup.a['href'], "http://example.org?a=1&b=2;3")
-
-    def test_literal_in_textarea(self):
-        # Anything inside a <textarea> is supposed to be treated as
-        # the literal value of the field, (XXX citation
-        # needed). html5lib does this correctly. But, lxml does its
-        # best to parse the contents of a <textarea> as HTML.
-        text = '<textarea>Junk like <b> tags and <&<&amp;</textarea>'
-        soup = self.soup(text)
-        self.assertEqual(len(soup.textarea.contents), 2)
-        self.assertEqual(soup.textarea.contents[0], u"Junk like ")
-        self.assertEqual(soup.textarea.contents[1].name, 'b')
-        self.assertEqual(soup.textarea.b.string, u" tags and ")
-
-    def test_literal_in_script(self):
-        # The contents of a <script> tag are treated as a literal string,
-        # even if that string contains HTML.
-        javascript = 'if (i < 2) { alert("<b>foo</b>"); }'
-        soup = self.soup('<script>%s</script>' % javascript)
-        self.assertEqual(soup.script.string, javascript)
-
-    def test_doctype(self):
-        # Test a normal HTML doctype you'll commonly see in a real document.
-        self._test_doctype(
-            'html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"')
-
-    def test_namespaced_system_doctype(self):
-        # Test a namespaced doctype with a system id.
-        self._test_doctype('xsl:stylesheet SYSTEM "htmlent.dtd"')
-
-    def test_namespaced_public_doctype(self):
-        # Test a namespaced doctype with a public id.
-        self._test_doctype('xsl:stylesheet PUBLIC "htmlent.dtd"')
-
-    def test_entities_in_attribute_values_converted_during_parsing(self):
-
-        # The numeric entity isn't recognized without the closing
-        # semicolon.
-        text = '<x t="pi&#241ata">'
-        expected = u"pi\N{LATIN SMALL LETTER N WITH TILDE}ata"
-        soup = self.soup(text)
-        self.assertEqual(soup.x['t'], expected)
-
-        text = '<x t="pi&#241;ata">'
-        expected = u"pi\N{LATIN SMALL LETTER N WITH TILDE}ata"
-        soup = self.soup(text)
-        self.assertEqual(soup.x['t'], u"pi\xf1ata")
-
-        text = '<x t="pi&#xf1;ata">'
-        soup = self.soup(text)
-        self.assertEqual(soup.x['t'], expected)
-
-        text = '<x t="sacr&eacute; bleu">'
-        soup = self.soup(text)
-        self.assertEqual(
-            soup.x['t'],
-            u"sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu")
-
-        # This can cause valid HTML to become invalid.
-        valid_url = '<a href="http://example.org?a=1&amp;b=2;3">foo</a>'
-        soup = self.soup(valid_url)
-        self.assertEqual(soup.a['href'], "http://example.org?a=1&b=2;3")
-
-
-@skipIf(
-    not LXML_PRESENT,
-    "lxml seems not to be present, not testing it on invalid markup.")
-class TestLXMLTreeBuilderInvalidMarkup(
-    test_htmlparser.TestHTMLParserTreeBuilderInvalidMarkup):
-
-    @property
-    def default_builder(self):
-        return LXMLTreeBuilder()
-
-    def test_attribute_value_never_got_closed(self):
-        markup = '<a href="http://foo.com/</a> and blah and blah'
-        soup = self.soup(markup)
-        self.assertEqual(
-            soup.a['href'], "http://foo.com/</a> and blah and blah")
-
-    def test_attribute_value_was_closed_by_subsequent_tag(self):
-        markup = """<a href="foo</a>, </a><a href="bar">baz</a>"""
-        soup = self.soup(markup)
-        # The string between the first and second quotes was interpreted
-        # as the value of the 'href' attribute.
-        self.assertEqual(soup.a['href'], 'foo</a>, </a><a href=')
-
-        #The string after the second quote (bar"), was treated as an
-        #empty attribute called bar.
-        self.assertEqual(soup.a['bar'], '')
-        self.assertEqual(soup.a.string, "baz")
-
-    def test_document_starts_with_bogus_declaration(self):
-        soup = self.soup('<! Foo ><p>a</p>')
-        # The declaration is ignored altogether.
-        self.assertEqual(soup.encode(), b"<html><body><p>a</p></body></html>")
-
-    def test_incomplete_declaration(self):
-        # An incomplete declaration will screw up the rest of the document.
-        self.assertSoupEquals('a<!b <p>c', '<p>a</p>')
-
-    def test_nonsensical_declaration(self):
-        # Declarations that don't make any sense are ignored.
-        self.assertSoupEquals('<! Foo = -8><p>a</p>', "<p>a</p>")
-
-    def test_unquoted_attribute_value(self):
-        soup = self.soup('<a style={height:21px;}></a>')
-        self.assertEqual(soup.a['style'], '{height:21px;}')
-
-    def test_whitespace_in_doctype(self):
-        # A declaration that has extra whitespace is ignored.
+            "<p>foo&#10000000000000;bar</p>", "<p>foobar</p>")
         self.assertSoupEquals(
-            ('<! DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN">'
-             '<p>foo</p>'),
-            '<p>foo</p>')
-
-    def test_boolean_attribute_with_no_value(self):
-        soup = self.soup("<table><td nowrap>foo</td></table>")
-        self.assertEqual(soup.table.td['nowrap'], '')
-
-    def test_cdata_where_it_doesnt_belong(self):
-        #CDATA sections are ignored.
-        markup = "<div><![CDATA[foo]]>"
-        self.assertSoupEquals(markup, "<div></div>")
-
-    def test_empty_element_tag_with_contents(self):
-        self.assertSoupEquals("<br>foo</br>", "<br/>foo")
-
-    def test_nonexistent_entity(self):
-        soup = self.soup("<p>foo&#bar;baz</p>")
-        self.assertEqual(soup.p.string, "foobar;baz")
-
-        # Compare a real entity.
-        soup = self.soup("<p>foo&#100;baz</p>")
-        self.assertEqual(soup.p.string, "foodbaz")
-
-        # Also compare html5lib, which preserves the &# before the
-        # entity name.
-
-    def test_entity_was_not_finished(self):
-        soup = self.soup("<p>&lt;Hello&gt")
-        # Compare html5lib, which completes the entity.
-        self.assertEqual(soup.p.string, "<Hello&gt")
-
-    def test_fake_self_closing_tag(self):
-        # If a self-closing tag presents as a normal tag, the 'open'
-        # tag is treated as an instance of the self-closing tag and
-        # the 'close' tag is ignored.
+            "<p>foo&#x10000000000000;bar</p>", "<p>foobar</p>")
         self.assertSoupEquals(
-            "<item><link>http://foo.com/</link></item>",
-            "<item><link/>http://foo.com/</item>")
-
-    def test_paragraphs_containing_block_display_elements(self):
-        markup = self.soup("<p>this is the definition:"
-                           "<dl><dt>first case</dt>")
-        # The <p> tag is closed before the <dl> tag begins.
-        self.assertEqual(markup.p.contents, ["this is the definition:"])
-
-    def test_multiple_values_for_the_same_attribute(self):
-        markup = '<b b="20" a="1" b="10" a="2" a="3" a="4"></b>'
-        self.assertSoupEquals(markup, '<b a="1" b="20"></b>')
-
-    def test_entity_out_of_range(self):
-        # An entity that's out of range will be ignored.
-        soup = self.soup("<p>&#10000000000000;</p>")
-        self.assertEqual(0, len(soup.p.contents))
-
-        soup = self.soup("<p>&#x1000000000000;</p>")
-        self.assertEqual(0, len(soup.p.contents))
-
-        soup = self.soup("<p>&#1000000000;</p>")
-        self.assertEqual(0, len(soup.p.contents))
-
-
-@skipIf(
-    not LXML_PRESENT,
-    "lxml seems not to be present, not testing it on encoding conversion.")
-class TestLXMLParserTreeBuilderEncodingConversion(
-    test_htmlparser.TestHTMLParserTreeBuilderEncodingConversion):
-
-    @property
-    def default_builder(self):
-        return LXMLTreeBuilder()
+            "<p>foo&#1000000000;bar</p>", "<p>foobar</p>")
author	Leonard Richardson <leonard.richardson@canonical.com>	2012-02-23 10:21:30 -0500
committer	Leonard Richardson <leonard.richardson@canonical.com>	2012-02-23 10:21:30 -0500
commit	0339d1f4a0f7306cb8c1ef4d7cd609b06e33eb27 (patch)
tree	40b1d7453a284da936c40e5e53402942c9650b74 /bs4/tests/test_lxml.py
parent	6e4b4dfffbd6e8d465aebd009108654003da338b (diff)
parent	a4ffd587fad7d2442a9ccdcdd0a8f6df347f39eb (diff)