From ab7ed77ab3560f6d574d577befc7a1f593e45327 Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Mon, 20 Feb 2012 11:43:46 -0500 Subject: Changd the class structure so that the default parser test class uses html.parser. --- bs4/tests/test_lxml.py | 584 +++++++------------------------------------------ 1 file changed, 84 insertions(+), 500 deletions(-) (limited to 'bs4/tests/test_lxml.py') diff --git a/bs4/tests/test_lxml.py b/bs4/tests/test_lxml.py index 3603528..b9cc540 100644 --- a/bs4/tests/test_lxml.py +++ b/bs4/tests/test_lxml.py @@ -2,13 +2,22 @@ import re +try: + from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML + LXML_PRESENT = True +except ImportError, e: + LXML_PRESENT = False + from bs4 import BeautifulSoup -from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML from bs4.element import Comment, Doctype, SoupStrainer -from bs4.testing import SoupTest - - -class TestLXMLBuilder(SoupTest): +from bs4.testing import skipIf +from bs4.tests import test_htmlparser +from bs4.testing import skipIf + +@skipIf( + not LXML_PRESENT, + "lxml seems not to be present, not testing its tree builder.") +class TestLXMLTreeBuilder(test_htmlparser.TestHTMLParserTreeBuilder): """A smoke test for the LXML tree builder. Subclass this to test some other HTML tree builder. Subclasses of @@ -29,11 +38,10 @@ class TestLXMLBuilder(SoupTest): self.assertSoupEquals( "A bare string", "

A bare string

") - def test_mixed_case_tags(self): - # Mixed-case tags are folded to lowercase. - self.assertSoupEquals( - "", - "") + def test_cdata_where_its_ok(self): + # lxml strips CDATA sections, no matter where they occur. + markup = "foobar" + self.assertSoupEquals(markup, "") def test_empty_element(self): # HTML's empty-element tags are recognized as such. @@ -43,93 +51,16 @@ class TestLXMLBuilder(SoupTest): self.assertSoupEquals( "

Foo
bar

", "

Foo
bar

") - def test_empty_tag_thats_not_an_empty_element_tag(self): - # A tag that is empty but not an HTML empty-element tag - # is not presented as an empty-element tag. - self.assertSoupEquals("

", "

") - - def test_comment(self): - # Comments are represented as Comment objects. - markup = "

foobaz

" - self.assertSoupEquals(markup) - - soup = self.soup(markup) - comment = soup.find(text="foobar") - self.assertEqual(comment.__class__, Comment) - - def test_nested_inline_elements(self): - # Inline tags can be nested indefinitely. - b_tag = "Inside a B tag" - self.assertSoupEquals(b_tag) - - nested_b_tag = "

A nested tag

" - self.assertSoupEquals(nested_b_tag) - - double_nested_b_tag = "

A doubly nested tag

" - self.assertSoupEquals(nested_b_tag) - - def test_nested_block_level_elements(self): - soup = self.soup('

Foo

') - blockquote = soup.blockquote - self.assertEqual(blockquote.p.b.string, 'Foo') - self.assertEqual(blockquote.b.string, 'Foo') - - # This is a tag containing another
tag in one of its - # cells. - TABLE_MARKUP_1 = ('
' - '' - "') - - def test_correctly_nested_tables(self): - markup = ('
Here's another table:" - '' - '' - '
foo
' - '' - "') - - self.assertSoupEquals( - markup, - '
Here's another table:" - '' - '' - '
foo
Here\'s another table:' - '
foo
' - '
') - - self.assertSoupEquals( - "" - "" - "
Foo
Bar
Baz
") - - def test_collapsed_whitespace(self): - """In most tags, whitespace is collapsed.""" - self.assertSoupEquals("

", "

") - - def test_preserved_whitespace_in_pre_and_textarea(self): - """In
 and ")
-
-    def test_single_quote_attribute_values_become_double_quotes(self):
-        self.assertSoupEquals("",
-                              '')
-
-    def test_attribute_values_with_nested_quotes_are_left_alone(self):
-        text = """a"""
-        self.assertSoupEquals(text)
-
-    def test_attribute_values_with_double_nested_quotes_get_quoted(self):
-        text = """a"""
+    def test_naked_ampersands(self):
+        # Ampersands are left alone.
+        text = "

AT&T

" soup = self.soup(text) - soup.foo['attr'] = 'Brawls happen at "Bob\'s Bar"' - self.assertSoupEquals( - soup.foo.decode(), - """a""") + self.assertEqual(soup.p.string, "AT&T") - def test_ampersand_in_attribute_value_gets_quoted(self): - self.assertSoupEquals('', - '') + # Even if they're in attribute values. + invalid_url = 'foo' + soup = self.soup(invalid_url) + self.assertEqual(soup.a['href'], "http://example.org?a=1&b=2;3") def test_literal_in_textarea(self): # Anything inside a