summaryrefslogtreecommitdiff
path: root/src/beautifulsoup/__init__.py
diff options
context:
space:
mode:
Diffstat (limited to 'src/beautifulsoup/__init__.py')
-rw-r--r--src/beautifulsoup/__init__.py31
1 files changed, 20 insertions, 11 deletions
diff --git a/src/beautifulsoup/__init__.py b/src/beautifulsoup/__init__.py
index 79bb657..8817164 100644
--- a/src/beautifulsoup/__init__.py
+++ b/src/beautifulsoup/__init__.py
@@ -299,20 +299,26 @@ class BeautifulStoneSoup(Tag):
def handleSpecialMetaTag(self, attrs):
"""Beautiful Soup can detect a charset included in a META tag,
try to convert the document to that charset, and re-parse the
- document from the beginning."""
+ document from the beginning. Neither lxml nor html5lib does
+ this, so the feature is still here."""
httpEquiv = None
contentType = None
contentTypeIndex = None
tagNeedsEncodingSubstitution = False
- for i in range(0, len(attrs)):
- key, value = attrs[i]
- key = key.lower()
- if key == 'http-equiv':
- httpEquiv = value
- elif key == 'content':
- contentType = value
- contentTypeIndex = i
+ if isinstance(attrs, dict):
+ httpEquiv = attrs.get('http-equiv')
+ contentType = attrs.get('content')
+ else:
+ # XXX do we need this?
+ for i in range(0, len(attrs)):
+ key, value = attrs[i]
+ key = key.lower()
+ if key == 'http-equiv':
+ httpEquiv = value
+ elif key == 'content':
+ contentType = value
+ contentTypeIndex = i
if httpEquiv and contentType: # It's an interesting meta tag.
match = self.CHARSET_RE.search(contentType)
@@ -327,8 +333,11 @@ class BeautifulStoneSoup(Tag):
def rewrite(match):
return match.group(1) + "%SOUP-ENCODING%"
newAttr = self.CHARSET_RE.sub(rewrite, contentType)
- attrs[contentTypeIndex] = (attrs[contentTypeIndex][0],
- newAttr)
+ if isinstance(attrs, dict):
+ attrs['content'] = newAttr
+ else:
+ attrs[contentTypeIndex] = (attrs[contentTypeIndex][0],
+ newAttr)
tagNeedsEncodingSubstitution = True
else:
# This is our first pass through the document.