diff options
author | Leonard Richardson <leonard.richardson@canonical.com> | 2012-01-20 16:18:45 -0500 |
---|---|---|
committer | Leonard Richardson <leonard.richardson@canonical.com> | 2012-01-20 16:18:45 -0500 |
commit | 703ee4a184e491be056ae5c4c7549e004be12622 (patch) | |
tree | 4dd26ef0757cae50fa9bfeb4a3a216a9319785a6 /bs4/builder | |
parent | df26dc64d868875d7cd8ca550f1a174d68dd7c67 (diff) |
Made it easier to convert BS3 code to BS4.
Diffstat (limited to 'bs4/builder')
-rw-r--r-- | bs4/builder/_htmlparser.py | 8 |
1 files changed, 7 insertions, 1 deletions
diff --git a/bs4/builder/_htmlparser.py b/bs4/builder/_htmlparser.py index f9476cd..53374f0 100644 --- a/bs4/builder/_htmlparser.py +++ b/bs4/builder/_htmlparser.py @@ -65,7 +65,13 @@ class HTMLParserTreeBuilder(HTMLParser, HTMLTreeBuilder): self.soup.handle_data(data) def handle_charref(self, name): - self.handle_data(unichr(int(name))) + # XXX workaround for a bug in HTMLParser. Remove this once + # it's fixed. + if name.startswith('x'): + data = unichr(int(name.lstrip('x'), 16)) + else: + data = unichr(int(name)) + self.handle_data(data) def handle_entityref(self, name): character = EntitySubstitution.HTML_ENTITY_TO_CHARACTER.get(name) |