diff options
author | Leonard Richardson <leonard.richardson@canonical.com> | 2012-02-20 11:43:46 -0500 |
---|---|---|
committer | Leonard Richardson <leonard.richardson@canonical.com> | 2012-02-20 11:43:46 -0500 |
commit | ab7ed77ab3560f6d574d577befc7a1f593e45327 (patch) | |
tree | 5f6427c8057bcf28ac6a1f9ff3fec92ed9056cb2 /bs4/builder | |
parent | 0a53ebe4c61ecf78b19fd5a5fe0ae2a66654dd18 (diff) |
Changd the class structure so that the default parser test class uses html.parser.
Diffstat (limited to 'bs4/builder')
-rw-r--r-- | bs4/builder/_htmlparser.py | 10 |
1 files changed, 8 insertions, 2 deletions
diff --git a/bs4/builder/_htmlparser.py b/bs4/builder/_htmlparser.py index c785eed..0ec878b 100644 --- a/bs4/builder/_htmlparser.py +++ b/bs4/builder/_htmlparser.py @@ -80,9 +80,15 @@ class HTMLParserTreeBuilder(HTMLParser, HTMLTreeBuilder): # XXX workaround for a bug in HTMLParser. Remove this once # it's fixed. if name.startswith('x'): - data = unichr(int(name.lstrip('x'), 16)) + real_name = int(name.lstrip('x'), 16) else: - data = unichr(int(name)) + real_name = int(name) + + try: + data = unichr(real_name) + except (ValueError, OverflowError), e: + data = u"\N{REPLACEMENT CHARACTER}" + self.handle_data(data) def handle_entityref(self, name): |