From 703ee4a184e491be056ae5c4c7549e004be12622 Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Fri, 20 Jan 2012 16:18:45 -0500 Subject: Made it easier to convert BS3 code to BS4. --- bs4/builder/_htmlparser.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'bs4/builder/_htmlparser.py') diff --git a/bs4/builder/_htmlparser.py b/bs4/builder/_htmlparser.py index f9476cd..53374f0 100644 --- a/bs4/builder/_htmlparser.py +++ b/bs4/builder/_htmlparser.py @@ -65,7 +65,13 @@ class HTMLParserTreeBuilder(HTMLParser, HTMLTreeBuilder): self.soup.handle_data(data) def handle_charref(self, name): - self.handle_data(unichr(int(name))) + # XXX workaround for a bug in HTMLParser. Remove this once + # it's fixed. + if name.startswith('x'): + data = unichr(int(name.lstrip('x'), 16)) + else: + data = unichr(int(name)) + self.handle_data(data) def handle_entityref(self, name): character = EntitySubstitution.HTML_ENTITY_TO_CHARACTER.get(name) -- cgit v1.2.3