summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLeonard Richardson <leonard.richardson@canonical.com>2011-02-18 09:46:51 -0500
committerLeonard Richardson <leonard.richardson@canonical.com>2011-02-18 09:46:51 -0500
commitde5a4f116b14d61da2698036cbf426a83b1b2cac (patch)
treecc65252a2d3b0a333c56081081a2ef41792027a6
parent4ccc7c021d86ab648925072b9d4d3682587c5be8 (diff)
Still trying to get html5lib to rewrite the META tag.
-rw-r--r--beautifulsoup/builder/__init__.py12
-rw-r--r--beautifulsoup/element.py6
2 files changed, 8 insertions, 10 deletions
diff --git a/beautifulsoup/builder/__init__.py b/beautifulsoup/builder/__init__.py
index eb92e6b..cf5e6c6 100644
--- a/beautifulsoup/builder/__init__.py
+++ b/beautifulsoup/builder/__init__.py
@@ -107,8 +107,6 @@ class HTMLTreeBuilder(TreeBuilder):
if tag.name != 'meta':
return False
- httpEquiv = None
- contentType = None
http_equiv = tag.get('http-equiv')
content = tag.get('content')
@@ -127,15 +125,15 @@ class HTMLTreeBuilder(TreeBuilder):
# explicitly and it worked. Rewrite the meta tag.
def rewrite(match):
return match.group(1) + "%SOUP-ENCODING%"
- newAttr = self.CHARSET_RE.sub(rewrite, content)
- tag['content'] = newAttr
+ tag['content'] = self.CHARSET_RE.sub(rewrite, content)
return True
else:
# This is our first pass through the document.
# Go through it again with the encoding information.
- newCharset = match.group(3)
- if newCharset and newCharset != self.soup.originalEncoding:
- self.soup.declaredHTMLEncoding = newCharset
+ new_charset = match.group(3)
+ if (new_charset is not None
+ and new_charset != self.soup.originalEncoding):
+ self.soup.declaredHTMLEncoding = new_charset
self.soup._feed(self.soup.declaredHTMLEncoding)
raise StopParsing
pass
diff --git a/beautifulsoup/element.py b/beautifulsoup/element.py
index 6e2bada..5793d59 100644
--- a/beautifulsoup/element.py
+++ b/beautifulsoup/element.py
@@ -9,7 +9,7 @@ from util import isString, isList
DEFAULT_OUTPUT_ENCODING = "utf-8"
-class Entities:
+class Entities(object):
"""A mixin class that knows about XML entities."""
HTML_ENTITIES = "html"
@@ -31,7 +31,7 @@ class Entities:
XML_SPECIAL_CHARS_TO_ENTITIES = _invert(XML_ENTITIES_TO_SPECIAL_CHARS)
-class PageElement:
+class PageElement(object):
"""Contains the navigational information for some part of the page
(either a tag or a piece of text)"""
@@ -765,7 +765,7 @@ class Tag(PageElement, Entities):
# Next, a couple classes to represent queries and their results.
-class SoupStrainer:
+class SoupStrainer(object):
"""Encapsulates a number of ways of matching a markup element (tag or
text)."""