summaryrefslogtreecommitdiff
path: root/bs4/builder/_htmlparser.py
diff options
context:
space:
mode:
Diffstat (limited to 'bs4/builder/_htmlparser.py')
-rw-r--r--bs4/builder/_htmlparser.py8
1 files changed, 7 insertions, 1 deletions
diff --git a/bs4/builder/_htmlparser.py b/bs4/builder/_htmlparser.py
index f9476cd..53374f0 100644
--- a/bs4/builder/_htmlparser.py
+++ b/bs4/builder/_htmlparser.py
@@ -65,7 +65,13 @@ class HTMLParserTreeBuilder(HTMLParser, HTMLTreeBuilder):
self.soup.handle_data(data)
def handle_charref(self, name):
- self.handle_data(unichr(int(name)))
+ # XXX workaround for a bug in HTMLParser. Remove this once
+ # it's fixed.
+ if name.startswith('x'):
+ data = unichr(int(name.lstrip('x'), 16))
+ else:
+ data = unichr(int(name))
+ self.handle_data(data)
def handle_entityref(self, name):
character = EntitySubstitution.HTML_ENTITY_TO_CHARACTER.get(name)