summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--NEWS.txt4
-rw-r--r--bs4/builder/_htmlparser.py11
2 files changed, 10 insertions, 5 deletions
diff --git a/NEWS.txt b/NEWS.txt
index e9ec6aa..2f0121e 100644
--- a/NEWS.txt
+++ b/NEWS.txt
@@ -10,6 +10,10 @@
* Added a warning when you instantiate a BeautifulSoup object without
explicitly naming a parser. [bug=1398866]
+* In Python 3.4 and above, set the new convert_charrefs argument to
+ the html.parser constructor to avoid a warning and future
+ failures. Patch by Stefano Revera. [bug=1375721]
+
* Added a Chinese translation of the documentation by Delong .w.
= 4.3.2 (20131002) =
diff --git a/bs4/builder/_htmlparser.py b/bs4/builder/_htmlparser.py
index 3e78c65..bf231f1 100644
--- a/bs4/builder/_htmlparser.py
+++ b/bs4/builder/_htmlparser.py
@@ -19,10 +19,8 @@ import warnings
# At the end of this file, we monkeypatch HTMLParser so that
# strict=True works well on Python 3.2.2.
major, minor, release = sys.version_info[:3]
-CONSTRUCTOR_TAKES_STRICT = (
- major > 3
- or (major == 3 and minor > 2)
- or (major == 3 and minor == 2 and release >= 3))
+CONSTRUCTOR_TAKES_STRICT = major == 3 and minor == 2 and release >= 3
+CONSTRUCTOR_TAKES_CONVERT_CHARREFS = major == 3 and minor >= 4
from bs4.element import (
CData,
@@ -63,7 +61,8 @@ class BeautifulSoupHTMLParser(HTMLParser):
def handle_charref(self, name):
# XXX workaround for a bug in HTMLParser. Remove this once
- # it's fixed.
+ # it's fixed in all supported versions.
+ # http://bugs.python.org/issue13633
if name.startswith('x'):
real_name = int(name.lstrip('x'), 16)
elif name.startswith('X'):
@@ -134,6 +133,8 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder):
def __init__(self, *args, **kwargs):
if CONSTRUCTOR_TAKES_STRICT:
kwargs['strict'] = False
+ if CONSTRUCTOR_TAKES_CONVERT_CHARREFS:
+ kwargs['convert_charrefs'] = False
self.parser_args = (args, kwargs)
def prepare_markup(self, markup, user_specified_encoding=None,