summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLeonard Richardson <leonard.richardson@canonical.com>2012-01-20 13:56:02 -0500
committerLeonard Richardson <leonard.richardson@canonical.com>2012-01-20 13:56:02 -0500
commit45fd6b551dccc85f01061f26642614ceebf39843 (patch)
tree1451f49eff5a8a5b4c13037f8616bee199985fe7
parentf7e2daa88f5d9001e171d65b122a3bdc7052f955 (diff)
Got the test suite to pass on Python 3.2 (skipping the html5lib stuff, which doesn't seem to have Python 3 support yet.)
-rw-r--r--README.txt17
-rw-r--r--bs4/builder/_htmlparser.py12
-rw-r--r--bs4/tests/test_builder_registry.py131
-rw-r--r--bs4/tests/test_html5lib.py17
-rw-r--r--bs4/tests/test_tree.py2
-rwxr-xr-xconvert-python3.28
-rwxr-xr-xtest5
7 files changed, 183 insertions, 9 deletions
diff --git a/README.txt b/README.txt
index 5c99381..585487e 100644
--- a/README.txt
+++ b/README.txt
@@ -194,3 +194,20 @@ string:
The ['lxml', 'xml'] tree builder sets .is_xml to True; the other tree
builders set it to False. If you want to parse XHTML with an HTML
parser, you can set it manually.
+
+= Running the unit tests =
+
+Here's how to run the tests on Python 2.7:
+
+ $ cd bs4
+ $ python2.7 -m unittest discover
+
+Here's how to do it with Python 3.2:
+
+ $ ./convert-python3.2
+ $ cd python3.2/bs4
+ $ python3.2 -m unittest discover
+
+
+
+
diff --git a/bs4/builder/_htmlparser.py b/bs4/builder/_htmlparser.py
index c293d9e..f9476cd 100644
--- a/bs4/builder/_htmlparser.py
+++ b/bs4/builder/_htmlparser.py
@@ -4,7 +4,12 @@ __all__ = [
'HTMLParserTreeBuilder',
]
-from HTMLParser import HTMLParser
+try:
+ from html.parser import HTMLParser
+ CONSTRUCTOR_TAKES_STRICT = True
+except ImportError, e:
+ from HTMLParser import HTMLParser
+ CONSTRUCTOR_TAKES_STRICT = False
from bs4.element import (
CData,
Comment,
@@ -28,6 +33,11 @@ class HTMLParserTreeBuilder(HTMLParser, HTMLTreeBuilder):
is_xml = False
features = [HTML, STRICT, HTMLPARSER]
+ def __init__(self, *args, **kwargs):
+ if CONSTRUCTOR_TAKES_STRICT:
+ kwargs['strict'] = True
+ return super(HTMLParserTreeBuilder, self).__init__(*args, **kwargs)
+
def prepare_markup(self, markup, user_specified_encoding=None,
document_declared_encoding=None):
"""
diff --git a/bs4/tests/test_builder_registry.py b/bs4/tests/test_builder_registry.py
new file mode 100644
index 0000000..17caace
--- /dev/null
+++ b/bs4/tests/test_builder_registry.py
@@ -0,0 +1,131 @@
+"""Tests of the builder registry."""
+
+import unittest
+
+from bs4 import BeautifulSoup
+from bs4.builder import (
+ builder_registry as registry,
+ HTMLParserTreeBuilder,
+ LXMLTreeBuilderForXML,
+ LXMLTreeBuilder,
+ TreeBuilderRegistry,
+)
+
+try:
+ from bs4.builder import (
+ HTML5TreeBuilder,
+ )
+except ImportError:
+ HTML5LIB_PRESENT = False
+
+
+class BuiltInRegistryTest(unittest.TestCase):
+ """Test the built-in registry with the default builders registered."""
+
+ def test_combination(self):
+ self.assertEquals(registry.lookup('fast', 'html'),
+ LXMLTreeBuilder)
+ self.assertEquals(registry.lookup('permissive', 'xml'),
+ LXMLTreeBuilderForXML)
+ self.assertEquals(registry.lookup('strict', 'html'),
+ HTMLParserTreeBuilder)
+ if HTML5LIB_PRESENT:
+ self.assertEquals(registry.lookup('permissive', 'html'),
+ HTML5TreeBuilder)
+
+ def test_lookup_by_markup_type(self):
+ if HTML5LIB_PRESENT:
+ self.assertEquals(registry.lookup('html'), HTML5TreeBuilder)
+ else:
+ self.assertEquals(registry.lookup('html'), LXMLTreeBuilder)
+ self.assertEquals(registry.lookup('xml'), LXMLTreeBuilderForXML)
+
+ def test_named_library(self):
+ self.assertEquals(registry.lookup('lxml', 'xml'),
+ LXMLTreeBuilderForXML)
+ self.assertEquals(registry.lookup('lxml', 'html'),
+ LXMLTreeBuilder)
+ if HTML5LIB_PRESENT:
+ self.assertEquals(registry.lookup('html5lib'),
+ HTML5TreeBuilder)
+
+ self.assertEquals(registry.lookup('html.parser'),
+ HTMLParserTreeBuilder)
+
+ def test_unimplemented_combinations(self):
+ self.assertEquals(registry.lookup('fast', 'permissive', 'html'),
+ None)
+
+ def test_beautifulsoup_constructor_does_lookup(self):
+ # You can pass in a string.
+ BeautifulSoup("", features="html")
+ # Or a list of strings.
+ BeautifulSoup("", features=["html", "fast"])
+
+ # You'll get an exception if BS can't find an appropriate
+ # builder.
+ self.assertRaises(ValueError, BeautifulSoup,
+ "", features="no-such-feature")
+
+class RegistryTest(unittest.TestCase):
+ """Test the TreeBuilderRegistry class in general."""
+
+ def setUp(self):
+ self.registry = TreeBuilderRegistry()
+
+ def builder_for_features(self, *feature_list):
+ cls = type('Builder_' + '_'.join(feature_list),
+ (object,), {'features' : feature_list})
+
+ self.registry.register(cls)
+ return cls
+
+ def test_register_with_no_features(self):
+ builder = self.builder_for_features()
+
+ # Since the builder advertises no features, you can't find it
+ # by looking up features.
+ self.assertEquals(self.registry.lookup('foo'), None)
+
+ # But you can find it by doing a lookup with no features, if
+ # this happens to be the only registered builder.
+ self.assertEquals(self.registry.lookup(), builder)
+
+ def test_register_with_features_makes_lookup_succeed(self):
+ builder = self.builder_for_features('foo', 'bar')
+ self.assertEquals(self.registry.lookup('foo'), builder)
+ self.assertEquals(self.registry.lookup('bar'), builder)
+
+ def test_lookup_fails_when_no_builder_implements_feature(self):
+ builder = self.builder_for_features('foo', 'bar')
+ self.assertEquals(self.registry.lookup('baz'), None)
+
+ def test_lookup_gets_most_recent_registration_when_no_feature_specified(self):
+ builder1 = self.builder_for_features('foo')
+ builder2 = self.builder_for_features('bar')
+ self.assertEquals(self.registry.lookup(), builder2)
+
+ def test_lookup_fails_when_no_tree_builders_registered(self):
+ self.assertEquals(self.registry.lookup(), None)
+
+ def test_lookup_gets_most_recent_builder_supporting_all_features(self):
+ has_one = self.builder_for_features('foo')
+ has_the_other = self.builder_for_features('bar')
+ has_both_early = self.builder_for_features('foo', 'bar', 'baz')
+ has_both_late = self.builder_for_features('foo', 'bar', 'quux')
+ lacks_one = self.builder_for_features('bar')
+ has_the_other = self.builder_for_features('foo')
+
+ # There are two builders featuring 'foo' and 'bar', but
+ # the one that also features 'quux' was registered later.
+ self.assertEquals(self.registry.lookup('foo', 'bar'),
+ has_both_late)
+
+ # There is only one builder featuring 'foo', 'bar', and 'baz'.
+ self.assertEquals(self.registry.lookup('foo', 'bar', 'baz'),
+ has_both_early)
+
+ def test_lookup_fails_when_cannot_reconcile_requested_features(self):
+ builder1 = self.builder_for_features('foo', 'bar')
+ builder2 = self.builder_for_features('foo', 'baz')
+ self.assertEquals(self.registry.lookup('bar', 'baz'), None)
diff --git a/bs4/tests/test_html5lib.py b/bs4/tests/test_html5lib.py
index 4d8dcc0..85cedbf 100644
--- a/bs4/tests/test_html5lib.py
+++ b/bs4/tests/test_html5lib.py
@@ -1,11 +1,19 @@
-from bs4.builder import HTML5TreeBuilder
+try:
+ from bs4.builder import HTML5TreeBuilder
+ HTML5LIB_PRESENT = True
+except ImportError, e:
+ HTML5LIB_PRESENT = False
from bs4.element import Comment, SoupStrainer
from test_lxml import (
TestLXMLBuilder,
TestLXMLBuilderInvalidMarkup,
TestLXMLBuilderEncodingConversion,
)
+import unittest
+@unittest.skipIf(
+ not HTML5LIB_PRESENT,
+ "html5lib seems not to be present, not testing its tree builder.")
class TestHTML5Builder(TestLXMLBuilder):
"""See `BuilderSmokeTest`."""
@@ -73,7 +81,9 @@ class TestHTML5Builder(TestLXMLBuilder):
# get a CData object.
self.assertSoupEquals(markup, "<svg><!--[CDATA[foobar]]--></svg>")
-
+@unittest.skipIf(
+ not HTML5LIB_PRESENT,
+ "html5lib seems not to be present, not testing it on invalid markup.")
class TestHTML5BuilderInvalidMarkup(TestLXMLBuilderInvalidMarkup):
"""See `BuilderInvalidMarkupSmokeTest`."""
@@ -210,6 +220,9 @@ class TestHTML5BuilderInvalidMarkup(TestLXMLBuilderInvalidMarkup):
self.assertEquals(soup.p.string, u"\N{REPLACEMENT CHARACTER}")
+@unittest.skipIf(
+ not HTML5LIB_PRESENT,
+ "html5lib seems not to be present, not testing encoding conversion.")
class TestHTML5LibEncodingConversion(TestLXMLBuilderEncodingConversion):
@property
def default_builder(self):
diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py
index 68677ca..865ac68 100644
--- a/bs4/tests/test_tree.py
+++ b/bs4/tests/test_tree.py
@@ -546,7 +546,7 @@ class TestTreeModification(SoupTest):
self.assertEqual(soup.decode(), self.document_for('<a id2="foo"></a>'))
def test_new_tag_creation(self):
- builder = builder_registry.lookup('html5lib')()
+ builder = builder_registry.lookup('html')()
soup = self.soup("<body></body>", builder=builder)
a = Tag(soup, builder, 'a')
ol = Tag(soup, builder, 'ol')
diff --git a/convert-python3.2 b/convert-python3.2
new file mode 100755
index 0000000..a083124
--- /dev/null
+++ b/convert-python3.2
@@ -0,0 +1,8 @@
+#!/bin/sh
+#
+# The Python 2 source is the definitive source. This script uses 2to3-3.2 to
+# create a new python3/bs4 source tree that works under Python 3.
+rm -rf python3.2
+mkdir python3.2
+cp -r bs4 python3.2
+2to3-3.2 -w python3.2 \ No newline at end of file
diff --git a/test b/test
deleted file mode 100755
index 420e5ef..0000000
--- a/test
+++ /dev/null
@@ -1,5 +0,0 @@
-#!/bin/sh
-
-# Run all the tests in /tests/.
-# Don't run the test superclasses in /beautifulsoup/.
-unit2 discover -t . -s bs4.tests