diff options
author | Leonard Richardson <leonard.richardson@canonical.com> | 2011-02-18 09:46:51 -0500 |
---|---|---|
committer | Leonard Richardson <leonard.richardson@canonical.com> | 2011-02-18 09:46:51 -0500 |
commit | de5a4f116b14d61da2698036cbf426a83b1b2cac (patch) | |
tree | cc65252a2d3b0a333c56081081a2ef41792027a6 | |
parent | 4ccc7c021d86ab648925072b9d4d3682587c5be8 (diff) |
Still trying to get html5lib to rewrite the META tag.
-rw-r--r-- | beautifulsoup/builder/__init__.py | 12 | ||||
-rw-r--r-- | beautifulsoup/element.py | 6 |
2 files changed, 8 insertions, 10 deletions
diff --git a/beautifulsoup/builder/__init__.py b/beautifulsoup/builder/__init__.py index eb92e6b..cf5e6c6 100644 --- a/beautifulsoup/builder/__init__.py +++ b/beautifulsoup/builder/__init__.py @@ -107,8 +107,6 @@ class HTMLTreeBuilder(TreeBuilder): if tag.name != 'meta': return False - httpEquiv = None - contentType = None http_equiv = tag.get('http-equiv') content = tag.get('content') @@ -127,15 +125,15 @@ class HTMLTreeBuilder(TreeBuilder): # explicitly and it worked. Rewrite the meta tag. def rewrite(match): return match.group(1) + "%SOUP-ENCODING%" - newAttr = self.CHARSET_RE.sub(rewrite, content) - tag['content'] = newAttr + tag['content'] = self.CHARSET_RE.sub(rewrite, content) return True else: # This is our first pass through the document. # Go through it again with the encoding information. - newCharset = match.group(3) - if newCharset and newCharset != self.soup.originalEncoding: - self.soup.declaredHTMLEncoding = newCharset + new_charset = match.group(3) + if (new_charset is not None + and new_charset != self.soup.originalEncoding): + self.soup.declaredHTMLEncoding = new_charset self.soup._feed(self.soup.declaredHTMLEncoding) raise StopParsing pass diff --git a/beautifulsoup/element.py b/beautifulsoup/element.py index 6e2bada..5793d59 100644 --- a/beautifulsoup/element.py +++ b/beautifulsoup/element.py @@ -9,7 +9,7 @@ from util import isString, isList DEFAULT_OUTPUT_ENCODING = "utf-8" -class Entities: +class Entities(object): """A mixin class that knows about XML entities.""" HTML_ENTITIES = "html" @@ -31,7 +31,7 @@ class Entities: XML_SPECIAL_CHARS_TO_ENTITIES = _invert(XML_ENTITIES_TO_SPECIAL_CHARS) -class PageElement: +class PageElement(object): """Contains the navigational information for some part of the page (either a tag or a piece of text)""" @@ -765,7 +765,7 @@ class Tag(PageElement, Entities): # Next, a couple classes to represent queries and their results. -class SoupStrainer: +class SoupStrainer(object): """Encapsulates a number of ways of matching a markup element (tag or text).""" |