summaryrefslogtreecommitdiff
path: root/bs4/builder/_htmlparser.py
diff options
context:
space:
mode:
Diffstat (limited to 'bs4/builder/_htmlparser.py')
-rw-r--r--bs4/builder/_htmlparser.py10
1 files changed, 8 insertions, 2 deletions
diff --git a/bs4/builder/_htmlparser.py b/bs4/builder/_htmlparser.py
index c785eed..0ec878b 100644
--- a/bs4/builder/_htmlparser.py
+++ b/bs4/builder/_htmlparser.py
@@ -80,9 +80,15 @@ class HTMLParserTreeBuilder(HTMLParser, HTMLTreeBuilder):
# XXX workaround for a bug in HTMLParser. Remove this once
# it's fixed.
if name.startswith('x'):
- data = unichr(int(name.lstrip('x'), 16))
+ real_name = int(name.lstrip('x'), 16)
else:
- data = unichr(int(name))
+ real_name = int(name)
+
+ try:
+ data = unichr(real_name)
+ except (ValueError, OverflowError), e:
+ data = u"\N{REPLACEMENT CHARACTER}"
+
self.handle_data(data)
def handle_entityref(self, name):