From 5d4b8cc6d288a705cf87c2f2c26036b94d825aa9 Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Fri, 28 Jan 2011 13:30:40 -0500 Subject: Refactored enough to get all of the tests to pass with test discovery, even though there are still some underlying problems. --- beautifulsoup/testing.py | 13 ++++++----- tests/test_html5lib.py | 57 ++++++++++++++++++++++++++++++++++++++++++++++++ tests/test_lxml.py | 24 ++++++++++++++++++++ 3 files changed, 88 insertions(+), 6 deletions(-) create mode 100644 tests/test_html5lib.py create mode 100644 tests/test_lxml.py diff --git a/beautifulsoup/testing.py b/beautifulsoup/testing.py index 20d087e..eea14f0 100644 --- a/beautifulsoup/testing.py +++ b/beautifulsoup/testing.py @@ -7,11 +7,9 @@ from beautifulsoup.builder.lxml_builder import LXMLTreeBuilder class SoupTest(unittest.TestCase): - def setUp(self): - # LXMLTreeBuilder won't handle bad markup, but that's fine, - # since all the parsing tests take place in parser-specific - # test suites that override default_builder. - self.default_builder = LXMLTreeBuilder() + @property + def default_builder(self): + return LXMLTreeBuilder() def soup(self, markup, **kwargs): """Build a Beautiful Soup object from markup.""" @@ -47,7 +45,10 @@ class BuilderSmokeTest(SoupTest): def test_bare_string(self): # A bare string is turned into some kind of HTML document or # fragment recognizable as the original string. - self.assertSoupEquals("A bare string") + # + # In this case, lxml puts a

tag around the bare string. + self.assertSoupEquals( + "A bare string", "

A bare string

") def test_mixed_case_tags(self): # Mixed-case tags are folded to lowercase. diff --git a/tests/test_html5lib.py b/tests/test_html5lib.py new file mode 100644 index 0000000..417e87b --- /dev/null +++ b/tests/test_html5lib.py @@ -0,0 +1,57 @@ +from beautifulsoup.builder.html5lib_builder import HTML5TreeBuilder +from beautifulsoup.testing import ( + BuilderInvalidMarkupSmokeTest, + BuilderSmokeTest, +) + + +class TestHTML5Builder(BuilderSmokeTest): + """See `BuilderSmokeTest`.""" + + @property + def default_builder(self): + return HTML5TreeBuilder() + + def test_bare_string(self): + # A bare string is turned into some kind of HTML document or + # fragment recognizable as the original string. + # + # In this case, lxml puts a

tag around the bare string. + self.assertSoupEquals( + "A bare string", "A bare string") + + def test_collapsed_whitespace(self): + """Whitespace is preserved even in tags that don't require it.""" + self.assertSoupEquals("

") + self.assertSoupEquals(" ") + + +class TestHTML5BuilderInvalidMarkup(BuilderInvalidMarkupSmokeTest): + """See `BuilderInvalidMarkupSmokeTest`.""" + + @property + def default_builder(self): + return HTML5TreeBuilder() + + def test_unclosed_block_level_elements(self): + # The unclosed tag is closed so that the block-level tag + # can be closed, and another tag is inserted after the + # next block-level tag begins. + self.assertSoupEquals( + '

Foo

Bar', + '

Foo

Bar

') + + def test_incorrectly_nested_tables(self): + self.assertSoupEquals( + '
', + ('
' + '
')) + + def test_foo(self): + isolatin = """Sacr\xe9 bleu!""" + soup = self.soup(isolatin) + + utf8 = isolatin.replace("ISO-Latin-1".encode(), "utf-8".encode()) + utf8 = utf8.replace("\xe9", "\xc3\xa9") + + print soup diff --git a/tests/test_lxml.py b/tests/test_lxml.py new file mode 100644 index 0000000..7fe6870 --- /dev/null +++ b/tests/test_lxml.py @@ -0,0 +1,24 @@ +"""Tests to ensure that the lxml tree builder generates good trees.""" + +from beautifulsoup.builder.lxml_builder import LXMLTreeBuilder +from beautifulsoup.testing import ( + BuilderInvalidMarkupSmokeTest, + BuilderSmokeTest, +) + +class TestLXMLBuilder(BuilderSmokeTest): + """See `BuilderSmokeTest`.""" + + def test_foo(self): + isolatin = """Sacr\xe9 bleu!""" + soup = self.soup(isolatin) + + utf8 = isolatin.replace("ISO-Latin-1".encode(), "utf-8".encode()) + utf8 = utf8.replace("\xe9", "\xc3\xa9") + + print soup + + +class TestLXMLBuilderInvalidMarkup(BuilderInvalidMarkupSmokeTest): + """See `BuilderInvalidMarkupSmokeTest`.""" + -- cgit v1.2.3