Got the test suite to pass on Python 3.2 (skipping the html5lib stuff, which doesn't seem to have Python 3 support yet.)

author: Leonard Richardson <leonard.richardson@canonical.com> 2012-01-20 13:56:02 -0500
committer: Leonard Richardson <leonard.richardson@canonical.com> 2012-01-20 13:56:02 -0500
commit: 45fd6b551dccc85f01061f26642614ceebf39843 (patch)
tree: 1451f49eff5a8a5b4c13037f8616bee199985fe7
parent: f7e2daa88f5d9001e171d65b122a3bdc7052f955 (diff)
7 files changed, 183 insertions, 9 deletions
diff --git a/README.txt b/README.txt
index 5c99381..585487e 100644
--- a/README.txt
+++ b/README.txt
@@ -194,3 +194,20 @@ string:
 The ['lxml', 'xml'] tree builder sets .is_xml to True; the other tree
 builders set it to False. If you want to parse XHTML with an HTML
 parser, you can set it manually.
+
+= Running the unit tests =
+
+Here's how to run the tests on Python 2.7:
+
+ $ cd bs4
+ $ python2.7 -m unittest discover
+
+Here's how to do it with Python 3.2:
+
+ $ ./convert-python3.2
+ $ cd python3.2/bs4
+ $ python3.2 -m unittest discover
+
+
+
+
diff --git a/bs4/builder/_htmlparser.py b/bs4/builder/_htmlparser.py
index c293d9e..f9476cd 100644
--- a/bs4/builder/_htmlparser.py
+++ b/bs4/builder/_htmlparser.py
@@ -4,7 +4,12 @@ __all__ = [
     'HTMLParserTreeBuilder',
     ]
 
-from HTMLParser import HTMLParser
+try:
+    from html.parser import HTMLParser
+    CONSTRUCTOR_TAKES_STRICT = True
+except ImportError, e:
+    from HTMLParser import HTMLParser
+    CONSTRUCTOR_TAKES_STRICT = False
 from bs4.element import (
     CData,
     Comment,
@@ -28,6 +33,11 @@ class HTMLParserTreeBuilder(HTMLParser, HTMLTreeBuilder):
     is_xml = False
     features = [HTML, STRICT, HTMLPARSER]
 
+    def __init__(self, *args, **kwargs):
+        if CONSTRUCTOR_TAKES_STRICT:
+            kwargs['strict'] = True
+        return super(HTMLParserTreeBuilder, self).__init__(*args, **kwargs)
+
     def prepare_markup(self, markup, user_specified_encoding=None,
                        document_declared_encoding=None):
         """
diff --git a/bs4/tests/test_builder_registry.py b/bs4/tests/test_builder_registry.py
new file mode 100644
index 0000000..17caace
--- /dev/null
+++ b/bs4/tests/test_builder_registry.py
@@ -0,0 +1,131 @@
+"""Tests of the builder registry."""
+
+import unittest
+
+from bs4 import BeautifulSoup
+from bs4.builder import (
+    builder_registry as registry,
+    HTMLParserTreeBuilder,
+    LXMLTreeBuilderForXML,
+    LXMLTreeBuilder,
+    TreeBuilderRegistry,
+)
+
+try:
+    from bs4.builder import (
+        HTML5TreeBuilder,
+        )
+except ImportError:
+    HTML5LIB_PRESENT = False
+
+
+class BuiltInRegistryTest(unittest.TestCase):
+    """Test the built-in registry with the default builders registered."""
+
+    def test_combination(self):
+        self.assertEquals(registry.lookup('fast', 'html'),
+                          LXMLTreeBuilder)
+        self.assertEquals(registry.lookup('permissive', 'xml'),
+                          LXMLTreeBuilderForXML)
+        self.assertEquals(registry.lookup('strict', 'html'),
+                          HTMLParserTreeBuilder)
+        if HTML5LIB_PRESENT:
+            self.assertEquals(registry.lookup('permissive', 'html'),
+                              HTML5TreeBuilder)
+
+    def test_lookup_by_markup_type(self):
+        if HTML5LIB_PRESENT:
+            self.assertEquals(registry.lookup('html'), HTML5TreeBuilder)
+        else:
+            self.assertEquals(registry.lookup('html'), LXMLTreeBuilder)
+        self.assertEquals(registry.lookup('xml'), LXMLTreeBuilderForXML)
+
+    def test_named_library(self):
+        self.assertEquals(registry.lookup('lxml', 'xml'),
+                          LXMLTreeBuilderForXML)
+        self.assertEquals(registry.lookup('lxml', 'html'),
+                          LXMLTreeBuilder)
+        if HTML5LIB_PRESENT:
+            self.assertEquals(registry.lookup('html5lib'),
+                              HTML5TreeBuilder)
+
+        self.assertEquals(registry.lookup('html.parser'),
+                          HTMLParserTreeBuilder)
+
+    def test_unimplemented_combinations(self):
+        self.assertEquals(registry.lookup('fast', 'permissive', 'html'),
+                          None)
+
+    def test_beautifulsoup_constructor_does_lookup(self):
+        # You can pass in a string.
+        BeautifulSoup("", features="html")
+        # Or a list of strings.
+        BeautifulSoup("", features=["html", "fast"])
+
+        # You'll get an exception if BS can't find an appropriate
+        # builder.
+        self.assertRaises(ValueError, BeautifulSoup,
+                          "", features="no-such-feature")
+
+class RegistryTest(unittest.TestCase):
+    """Test the TreeBuilderRegistry class in general."""
+
+    def setUp(self):
+        self.registry = TreeBuilderRegistry()
+
+    def builder_for_features(self, *feature_list):
+        cls = type('Builder_' + '_'.join(feature_list),
+                   (object,), {'features' : feature_list})
+
+        self.registry.register(cls)
+        return cls
+
+    def test_register_with_no_features(self):
+        builder = self.builder_for_features()
+
+        # Since the builder advertises no features, you can't find it
+        # by looking up features.
+        self.assertEquals(self.registry.lookup('foo'), None)
+
+        # But you can find it by doing a lookup with no features, if
+        # this happens to be the only registered builder.
+        self.assertEquals(self.registry.lookup(), builder)
+
+    def test_register_with_features_makes_lookup_succeed(self):
+        builder = self.builder_for_features('foo', 'bar')
+        self.assertEquals(self.registry.lookup('foo'), builder)
+        self.assertEquals(self.registry.lookup('bar'), builder)
+
+    def test_lookup_fails_when_no_builder_implements_feature(self):
+        builder = self.builder_for_features('foo', 'bar')
+        self.assertEquals(self.registry.lookup('baz'), None)
+
+    def test_lookup_gets_most_recent_registration_when_no_feature_specified(self):
+        builder1 = self.builder_for_features('foo')
+        builder2 = self.builder_for_features('bar')
+        self.assertEquals(self.registry.lookup(), builder2)
+
+    def test_lookup_fails_when_no_tree_builders_registered(self):
+        self.assertEquals(self.registry.lookup(), None)
+
+    def test_lookup_gets_most_recent_builder_supporting_all_features(self):
+        has_one = self.builder_for_features('foo')
+        has_the_other = self.builder_for_features('bar')
+        has_both_early = self.builder_for_features('foo', 'bar', 'baz')
+        has_both_late = self.builder_for_features('foo', 'bar', 'quux')
+        lacks_one = self.builder_for_features('bar')
+        has_the_other = self.builder_for_features('foo')
+
+        # There are two builders featuring 'foo' and 'bar', but
+        # the one that also features 'quux' was registered later.
+        self.assertEquals(self.registry.lookup('foo', 'bar'),
+                          has_both_late)
+
+        # There is only one builder featuring 'foo', 'bar', and 'baz'.
+        self.assertEquals(self.registry.lookup('foo', 'bar', 'baz'),
+                          has_both_early)
+
+    def test_lookup_fails_when_cannot_reconcile_requested_features(self):
+        builder1 = self.builder_for_features('foo', 'bar')
+        builder2 = self.builder_for_features('foo', 'baz')
+        self.assertEquals(self.registry.lookup('bar', 'baz'), None)
diff --git a/bs4/tests/test_html5lib.py b/bs4/tests/test_html5lib.py
index 4d8dcc0..85cedbf 100644
--- a/bs4/tests/test_html5lib.py
+++ b/bs4/tests/test_html5lib.py
@@ -1,11 +1,19 @@
-from bs4.builder import HTML5TreeBuilder
+try:
+    from bs4.builder import HTML5TreeBuilder
+    HTML5LIB_PRESENT = True
+except ImportError, e:
+    HTML5LIB_PRESENT = False
 from bs4.element import Comment, SoupStrainer
 from test_lxml import (
     TestLXMLBuilder,
     TestLXMLBuilderInvalidMarkup,
     TestLXMLBuilderEncodingConversion,
     )
+import unittest
 
+@unittest.skipIf(
+    not HTML5LIB_PRESENT,
+    "html5lib seems not to be present, not testing its tree builder.")
 class TestHTML5Builder(TestLXMLBuilder):
     """See `BuilderSmokeTest`."""
 
@@ -73,7 +81,9 @@ class TestHTML5Builder(TestLXMLBuilder):
         # get a CData object.
         self.assertSoupEquals(markup, "<svg><!--[CDATA[foobar]]--></svg>")
 
-
+@unittest.skipIf(
+    not HTML5LIB_PRESENT,
+    "html5lib seems not to be present, not testing it on invalid markup.")
 class TestHTML5BuilderInvalidMarkup(TestLXMLBuilderInvalidMarkup):
     """See `BuilderInvalidMarkupSmokeTest`."""
 
@@ -210,6 +220,9 @@ class TestHTML5BuilderInvalidMarkup(TestLXMLBuilderInvalidMarkup):
         self.assertEquals(soup.p.string, u"\N{REPLACEMENT CHARACTER}")
 
 
+@unittest.skipIf(
+    not HTML5LIB_PRESENT,
+    "html5lib seems not to be present, not testing encoding conversion.")
 class TestHTML5LibEncodingConversion(TestLXMLBuilderEncodingConversion):
     @property
     def default_builder(self):
diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py
index 68677ca..865ac68 100644
--- a/bs4/tests/test_tree.py
+++ b/bs4/tests/test_tree.py
@@ -546,7 +546,7 @@ class TestTreeModification(SoupTest):
         self.assertEqual(soup.decode(), self.document_for('<a id2="foo"></a>'))
 
     def test_new_tag_creation(self):
-        builder = builder_registry.lookup('html5lib')()
+        builder = builder_registry.lookup('html')()
         soup = self.soup("<body></body>", builder=builder)
         a = Tag(soup, builder, 'a')
         ol = Tag(soup, builder, 'ol')
diff --git a/convert-python3.2 b/convert-python3.2
new file mode 100755
index 0000000..a083124
--- /dev/null
+++ b/convert-python3.2
@@ -0,0 +1,8 @@
+#!/bin/sh
+#
+# The Python 2 source is the definitive source. This script uses 2to3-3.2 to
+# create a new python3/bs4 source tree that works under Python 3.
+rm -rf python3.2
+mkdir python3.2
+cp -r bs4 python3.2
+2to3-3.2 -w python3.2
+\ No newline at end of file
diff --git a/test b/test
deleted file mode 100755
index 420e5ef..0000000
--- a/test
+++ /dev/null
@@ -1,5 +0,0 @@
-#!/bin/sh
-
-# Run all the tests in /tests/.
-# Don't run the test superclasses in /beautifulsoup/.
-unit2 discover -t . -s bs4.tests
author	Leonard Richardson <leonard.richardson@canonical.com>	2012-01-20 13:56:02 -0500
committer	Leonard Richardson <leonard.richardson@canonical.com>	2012-01-20 13:56:02 -0500
commit	45fd6b551dccc85f01061f26642614ceebf39843 (patch)
tree	1451f49eff5a8a5b4c13037f8616bee199985fe7
parent	f7e2daa88f5d9001e171d65b122a3bdc7052f955 (diff)