summaryrefslogtreecommitdiff
path: root/bs4/builder
diff options
context:
space:
mode:
authorLeonard Richardson <leonard.richardson@canonical.com>2012-01-20 16:18:45 -0500
committerLeonard Richardson <leonard.richardson@canonical.com>2012-01-20 16:18:45 -0500
commit703ee4a184e491be056ae5c4c7549e004be12622 (patch)
tree4dd26ef0757cae50fa9bfeb4a3a216a9319785a6 /bs4/builder
parentdf26dc64d868875d7cd8ca550f1a174d68dd7c67 (diff)
Made it easier to convert BS3 code to BS4.
Diffstat (limited to 'bs4/builder')
-rw-r--r--bs4/builder/_htmlparser.py8
1 files changed, 7 insertions, 1 deletions
diff --git a/bs4/builder/_htmlparser.py b/bs4/builder/_htmlparser.py
index f9476cd..53374f0 100644
--- a/bs4/builder/_htmlparser.py
+++ b/bs4/builder/_htmlparser.py
@@ -65,7 +65,13 @@ class HTMLParserTreeBuilder(HTMLParser, HTMLTreeBuilder):
self.soup.handle_data(data)
def handle_charref(self, name):
- self.handle_data(unichr(int(name)))
+ # XXX workaround for a bug in HTMLParser. Remove this once
+ # it's fixed.
+ if name.startswith('x'):
+ data = unichr(int(name.lstrip('x'), 16))
+ else:
+ data = unichr(int(name))
+ self.handle_data(data)
def handle_entityref(self, name):
character = EntitySubstitution.HTML_ENTITY_TO_CHARACTER.get(name)