summaryrefslogtreecommitdiff
path: root/bs4/builder
diff options
context:
space:
mode:
authorLeonard Richardson <leonard.richardson@canonical.com>2012-02-20 11:43:46 -0500
committerLeonard Richardson <leonard.richardson@canonical.com>2012-02-20 11:43:46 -0500
commitab7ed77ab3560f6d574d577befc7a1f593e45327 (patch)
tree5f6427c8057bcf28ac6a1f9ff3fec92ed9056cb2 /bs4/builder
parent0a53ebe4c61ecf78b19fd5a5fe0ae2a66654dd18 (diff)
Changd the class structure so that the default parser test class uses html.parser.
Diffstat (limited to 'bs4/builder')
-rw-r--r--bs4/builder/_htmlparser.py10
1 files changed, 8 insertions, 2 deletions
diff --git a/bs4/builder/_htmlparser.py b/bs4/builder/_htmlparser.py
index c785eed..0ec878b 100644
--- a/bs4/builder/_htmlparser.py
+++ b/bs4/builder/_htmlparser.py
@@ -80,9 +80,15 @@ class HTMLParserTreeBuilder(HTMLParser, HTMLTreeBuilder):
# XXX workaround for a bug in HTMLParser. Remove this once
# it's fixed.
if name.startswith('x'):
- data = unichr(int(name.lstrip('x'), 16))
+ real_name = int(name.lstrip('x'), 16)
else:
- data = unichr(int(name))
+ real_name = int(name)
+
+ try:
+ data = unichr(real_name)
+ except (ValueError, OverflowError), e:
+ data = u"\N{REPLACEMENT CHARACTER}"
+
self.handle_data(data)
def handle_entityref(self, name):