Minor late-night tweaks.

author: Leonard Richardson <leonard.richardson@canonical.com> 2011-02-26 23:47:06 -0500
committer: Leonard Richardson <leonard.richardson@canonical.com> 2011-02-26 23:47:06 -0500
commit: dc5682014c7360e723d4861d32ee933eea8fcd5d (patch)
tree: 021886f2b0e2158d87d33417b8b687dfb1cee923
parent: ace32031ac6c9787ee46c5ab19e6f71b99cd26d3 (diff)
parent: d9f49a66e4a7dfd93823f2396796ed6c55f69648 (diff)
5 files changed, 23 insertions, 21 deletions
diff --git a/TODO b/TODO
index a799bbb..a6f444f 100644
--- a/TODO
+++ b/TODO
@@ -1,11 +1,6 @@
 Bare ampersands should be converted to HTML entities upon output.
 
-It should also be possible to, on output, convert to HTML entities any
-Unicode characters found in htmlentitydefs.codepoint2name. (This
-algorithm would allow me to simplify Unicode, Dammit--convert
-everything to Unicode, and then convert to entities upon output, not
-treating smart quotes differently from any other Unicode character
-that can be represented as an entity.)
+Add namespace support.
 
 XML handling:
 
diff --git a/beautifulsoup/__init__.py b/beautifulsoup/__init__.py
index 53130e0..518e95f 100644
--- a/beautifulsoup/__init__.py
+++ b/beautifulsoup/__init__.py
@@ -112,7 +112,7 @@ class BeautifulSoup(Tag):
         if builder is None:
             if isinstance(features, basestring):
                 features = [features]
-            if len(features) == 0:
+            if features is None or len(features) == 0:
                 features = self.DEFAULT_BUILDER_FEATURES
             builder_class = builder_registry.lookup(*features)
             if builder_class is None:
diff --git a/beautifulsoup/dammit.py b/beautifulsoup/dammit.py
index 31dfa95..4483118 100644
--- a/beautifulsoup/dammit.py
+++ b/beautifulsoup/dammit.py
@@ -31,6 +31,8 @@ except ImportError:
 
 class EntitySubstitution(object):
 
+    """Substitute XML or HTML entities for the corresponding characters."""
+
     def _populate_class_variables():
         lookup = {}
         characters = []
@@ -61,17 +63,20 @@ class EntitySubstitution(object):
                                            "&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)"
                                            ")")
 
-    def _substitute_html_entity(self, matchobj):
-        entity = self.CHARACTER_TO_HTML_ENTITY.get(matchobj.group(0))
+    @classmethod
+    def _substitute_html_entity(cls, matchobj):
+        entity = cls.CHARACTER_TO_HTML_ENTITY.get(matchobj.group(0))
         return "&%s;" % entity
 
-    def _substitute_xml_entity(self, matchobj):
+    @classmethod
+    def _substitute_xml_entity(cls, matchobj):
         """Used with a regular expression to substitute the
         appropriate XML entity for an XML special character."""
-        entity = self.CHARACTER_TO_XML_ENTITY[matchobj.group(0)]
+        entity = cls.CHARACTER_TO_XML_ENTITY[matchobj.group(0)]
         return "&%s;" % entity
 
-    def substitute_xml(self, value, make_quoted_attribute=False):
+    @classmethod
+    def substitute_xml(cls, value, make_quoted_attribute=False):
         """Substitute XML entities for special XML characters.
 
         :param value: A string to be substituted. The less-than sign will
@@ -117,14 +122,15 @@ class EntitySubstitution(object):
 
         # Escape angle brackets, and ampersands that aren't part of
         # entities.
-        value = self.BARE_AMPERSAND_OR_BRACKET.sub(
-            self._substitute_xml_entity, value)
+        value = cls.BARE_AMPERSAND_OR_BRACKET.sub(
+            cls._substitute_xml_entity, value)
         if make_quoted_attribute:
             return quote_with + value + quote_with
         else:
             return value
 
-    def substitute_html(self, s):
+    @classmethod
+    def substitute_html(cls, s):
         """Replace certain Unicode characters with named HTML entities.
 
         This differs from data.encode(encoding, 'xmlcharrefreplace')
@@ -135,8 +141,8 @@ class EntitySubstitution(object):
         character with "&eacute;" will make it more readable to some
         people.
         """
-        return self.CHARACTER_TO_HTML_ENTITY_RE.sub(
-            self._substitute_html_entity, s)
+        return cls.CHARACTER_TO_HTML_ENTITY_RE.sub(
+            cls._substitute_html_entity, s)
 
 
 class UnicodeDammit:
diff --git a/beautifulsoup/element.py b/beautifulsoup/element.py
index 6af27a8..61ed4ab 100644
--- a/beautifulsoup/element.py
+++ b/beautifulsoup/element.py
@@ -11,7 +11,7 @@ from util import isList
 DEFAULT_OUTPUT_ENCODING = "utf-8"
 
 
-class PageElement(EntitySubstitution):
+class PageElement(object):
     """Contains the navigational information for some part of the page
     (either a tag or a piece of text)"""
 
@@ -363,7 +363,7 @@ class NavigableString(unicode, PageElement):
 
     def output_ready(self, substitute_html_entities=False):
         if substitute_html_entities:
-            output = self.substitute_html(self)
+            output = EntitySubstitution.substitute_html(self)
         else:
             output = self
         return self.PREFIX + output + self.SUFFIX
@@ -580,7 +580,8 @@ class Tag(PageElement):
                         and '%SOUP-ENCODING%' in val):
                         val = self.substituteEncoding(val, eventual_encoding)
 
-                    decoded = key + '=' + self.substitute_xml(val, True)
+                    decoded = (key + '='
+                               + EntitySubstitution.substitute_xml(val, True))
                 attrs.append(decoded)
         close = ''
         closeTag = ''
diff --git a/tests/test_soup.py b/tests/test_soup.py
index c4d9c2c..690db39 100644
--- a/tests/test_soup.py
+++ b/tests/test_soup.py
@@ -19,7 +19,7 @@ class TestSelectiveParsing(SoupTest):
 class TestEntitySubstitution(unittest.TestCase):
     """Standalone tests of the EntitySubstitution class."""
     def setUp(self):
-        self.sub = EntitySubstitution()
+        self.sub = EntitySubstitution
 
     def test_simple_html_substitution(self):
         # Unicode characters corresponding to named HTML entites
author	Leonard Richardson <leonard.richardson@canonical.com>	2011-02-26 23:47:06 -0500
committer	Leonard Richardson <leonard.richardson@canonical.com>	2011-02-26 23:47:06 -0500
commit	dc5682014c7360e723d4861d32ee933eea8fcd5d (patch)
tree	021886f2b0e2158d87d33417b8b687dfb1cee923
parent	ace32031ac6c9787ee46c5ab19e6f71b99cd26d3 (diff)
parent	d9f49a66e4a7dfd93823f2396796ed6c55f69648 (diff)