summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--beautifulsoup/testing.py13
-rw-r--r--tests/test_html5lib.py57
-rw-r--r--tests/test_lxml.py24
3 files changed, 88 insertions, 6 deletions
diff --git a/beautifulsoup/testing.py b/beautifulsoup/testing.py
index 20d087e..eea14f0 100644
--- a/beautifulsoup/testing.py
+++ b/beautifulsoup/testing.py
@@ -7,11 +7,9 @@ from beautifulsoup.builder.lxml_builder import LXMLTreeBuilder
class SoupTest(unittest.TestCase):
- def setUp(self):
- # LXMLTreeBuilder won't handle bad markup, but that's fine,
- # since all the parsing tests take place in parser-specific
- # test suites that override default_builder.
- self.default_builder = LXMLTreeBuilder()
+ @property
+ def default_builder(self):
+ return LXMLTreeBuilder()
def soup(self, markup, **kwargs):
"""Build a Beautiful Soup object from markup."""
@@ -47,7 +45,10 @@ class BuilderSmokeTest(SoupTest):
def test_bare_string(self):
# A bare string is turned into some kind of HTML document or
# fragment recognizable as the original string.
- self.assertSoupEquals("A bare string")
+ #
+ # In this case, lxml puts a <p> tag around the bare string.
+ self.assertSoupEquals(
+ "A bare string", "<p>A bare string</p>")
def test_mixed_case_tags(self):
# Mixed-case tags are folded to lowercase.
diff --git a/tests/test_html5lib.py b/tests/test_html5lib.py
new file mode 100644
index 0000000..417e87b
--- /dev/null
+++ b/tests/test_html5lib.py
@@ -0,0 +1,57 @@
+from beautifulsoup.builder.html5lib_builder import HTML5TreeBuilder
+from beautifulsoup.testing import (
+ BuilderInvalidMarkupSmokeTest,
+ BuilderSmokeTest,
+)
+
+
+class TestHTML5Builder(BuilderSmokeTest):
+ """See `BuilderSmokeTest`."""
+
+ @property
+ def default_builder(self):
+ return HTML5TreeBuilder()
+
+ def test_bare_string(self):
+ # A bare string is turned into some kind of HTML document or
+ # fragment recognizable as the original string.
+ #
+ # In this case, lxml puts a <p> tag around the bare string.
+ self.assertSoupEquals(
+ "A bare string", "A bare string")
+
+ def test_collapsed_whitespace(self):
+ """Whitespace is preserved even in tags that don't require it."""
+ self.assertSoupEquals("<p> </p>")
+ self.assertSoupEquals("<b> </b>")
+
+
+class TestHTML5BuilderInvalidMarkup(BuilderInvalidMarkupSmokeTest):
+ """See `BuilderInvalidMarkupSmokeTest`."""
+
+ @property
+ def default_builder(self):
+ return HTML5TreeBuilder()
+
+ def test_unclosed_block_level_elements(self):
+ # The unclosed <b> tag is closed so that the block-level tag
+ # can be closed, and another <b> tag is inserted after the
+ # next block-level tag begins.
+ self.assertSoupEquals(
+ '<blockquote><p><b>Foo</blockquote><p>Bar',
+ '<blockquote><p><b>Foo</b></p></blockquote><p><b>Bar</b></p>')
+
+ def test_incorrectly_nested_tables(self):
+ self.assertSoupEquals(
+ '<table><tr><table><tr id="nested">',
+ ('<table><tbody><tr></tr></tbody></table>'
+ '<table><tbody><tr id="nested"></tr></tbody></table>'))
+
+ def test_foo(self):
+ isolatin = """<html><meta http-equiv="Content-type" content="text/html; charset=ISO-Latin-1" />Sacr\xe9 bleu!</html>"""
+ soup = self.soup(isolatin)
+
+ utf8 = isolatin.replace("ISO-Latin-1".encode(), "utf-8".encode())
+ utf8 = utf8.replace("\xe9", "\xc3\xa9")
+
+ print soup
diff --git a/tests/test_lxml.py b/tests/test_lxml.py
new file mode 100644
index 0000000..7fe6870
--- /dev/null
+++ b/tests/test_lxml.py
@@ -0,0 +1,24 @@
+"""Tests to ensure that the lxml tree builder generates good trees."""
+
+from beautifulsoup.builder.lxml_builder import LXMLTreeBuilder
+from beautifulsoup.testing import (
+ BuilderInvalidMarkupSmokeTest,
+ BuilderSmokeTest,
+)
+
+class TestLXMLBuilder(BuilderSmokeTest):
+ """See `BuilderSmokeTest`."""
+
+ def test_foo(self):
+ isolatin = """<html><meta http-equiv="Content-type" content="text/html; charset=ISO-Latin-1" />Sacr\xe9 bleu!</html>"""
+ soup = self.soup(isolatin)
+
+ utf8 = isolatin.replace("ISO-Latin-1".encode(), "utf-8".encode())
+ utf8 = utf8.replace("\xe9", "\xc3\xa9")
+
+ print soup
+
+
+class TestLXMLBuilderInvalidMarkup(BuilderInvalidMarkupSmokeTest):
+ """See `BuilderInvalidMarkupSmokeTest`."""
+