diff options
author | Leonard Richardson <leonard.richardson@canonical.com> | 2011-02-18 09:46:51 -0500 |
---|---|---|
committer | Leonard Richardson <leonard.richardson@canonical.com> | 2011-02-18 09:46:51 -0500 |
commit | de5a4f116b14d61da2698036cbf426a83b1b2cac (patch) | |
tree | cc65252a2d3b0a333c56081081a2ef41792027a6 /beautifulsoup/builder/__init__.py | |
parent | 4ccc7c021d86ab648925072b9d4d3682587c5be8 (diff) |
Still trying to get html5lib to rewrite the META tag.
Diffstat (limited to 'beautifulsoup/builder/__init__.py')
-rw-r--r-- | beautifulsoup/builder/__init__.py | 12 |
1 files changed, 5 insertions, 7 deletions
diff --git a/beautifulsoup/builder/__init__.py b/beautifulsoup/builder/__init__.py index eb92e6b..cf5e6c6 100644 --- a/beautifulsoup/builder/__init__.py +++ b/beautifulsoup/builder/__init__.py @@ -107,8 +107,6 @@ class HTMLTreeBuilder(TreeBuilder): if tag.name != 'meta': return False - httpEquiv = None - contentType = None http_equiv = tag.get('http-equiv') content = tag.get('content') @@ -127,15 +125,15 @@ class HTMLTreeBuilder(TreeBuilder): # explicitly and it worked. Rewrite the meta tag. def rewrite(match): return match.group(1) + "%SOUP-ENCODING%" - newAttr = self.CHARSET_RE.sub(rewrite, content) - tag['content'] = newAttr + tag['content'] = self.CHARSET_RE.sub(rewrite, content) return True else: # This is our first pass through the document. # Go through it again with the encoding information. - newCharset = match.group(3) - if newCharset and newCharset != self.soup.originalEncoding: - self.soup.declaredHTMLEncoding = newCharset + new_charset = match.group(3) + if (new_charset is not None + and new_charset != self.soup.originalEncoding): + self.soup.declaredHTMLEncoding = new_charset self.soup._feed(self.soup.declaredHTMLEncoding) raise StopParsing pass |