summaryrefslogtreecommitdiff
path: root/bs4/builder
diff options
context:
space:
mode:
Diffstat (limited to 'bs4/builder')
-rw-r--r--bs4/builder/__init__.py2
-rw-r--r--bs4/builder/_html5lib.py2
-rw-r--r--bs4/builder/_htmlparser.py10
-rw-r--r--bs4/builder/_lxml.py5
4 files changed, 11 insertions, 8 deletions
diff --git a/bs4/builder/__init__.py b/bs4/builder/__init__.py
index 2728606..067623e 100644
--- a/bs4/builder/__init__.py
+++ b/bs4/builder/__init__.py
@@ -119,7 +119,7 @@ class TreeBuilder(object):
def prepare_markup(self, markup, user_specified_encoding=None,
document_declared_encoding=None):
- return markup, None, None
+ return markup, None, None, False
def test_fragment_to_document(self, fragment):
"""Wrap an HTML fragment to make it look like a document.
diff --git a/bs4/builder/_html5lib.py b/bs4/builder/_html5lib.py
index 4b80870..9897675 100644
--- a/bs4/builder/_html5lib.py
+++ b/bs4/builder/_html5lib.py
@@ -29,7 +29,7 @@ class HTML5TreeBuilder(HTMLTreeBuilder):
def prepare_markup(self, markup, user_specified_encoding):
# Store the user-specified encoding for use later on.
self.user_specified_encoding = user_specified_encoding
- return markup, None, None
+ return markup, None, None, False
# These methods are defined by Beautiful Soup.
def feed(self, markup):
diff --git a/bs4/builder/_htmlparser.py b/bs4/builder/_htmlparser.py
index edd0bfb..c785eed 100644
--- a/bs4/builder/_htmlparser.py
+++ b/bs4/builder/_htmlparser.py
@@ -51,16 +51,18 @@ class HTMLParserTreeBuilder(HTMLParser, HTMLTreeBuilder):
def prepare_markup(self, markup, user_specified_encoding=None,
document_declared_encoding=None):
"""
- :return: A 3-tuple (markup, original encoding, encoding
- declared within markup).
+ :return: A 4-tuple (markup, original encoding, encoding
+ declared within markup, whether any characters had to be
+ replaced with REPLACEMENT CHARACTER).
"""
if isinstance(markup, unicode):
- return markup, None, None
+ return markup, None, None, False
try_encodings = [user_specified_encoding, document_declared_encoding]
dammit = UnicodeDammit(markup, try_encodings, is_html=True)
return (dammit.markup, dammit.original_encoding,
- dammit.declared_html_encoding)
+ dammit.declared_html_encoding,
+ dammit.contains_replacement_characters)
def feed(self, markup):
super(HTMLParserTreeBuilder, self).feed(markup)
diff --git a/bs4/builder/_lxml.py b/bs4/builder/_lxml.py
index 7219e49..cc3cb86 100644
--- a/bs4/builder/_lxml.py
+++ b/bs4/builder/_lxml.py
@@ -50,12 +50,13 @@ class LXMLTreeBuilderForXML(TreeBuilder):
declared within markup).
"""
if isinstance(markup, unicode):
- return markup, None, None
+ return markup, None, None, False
try_encodings = [user_specified_encoding, document_declared_encoding]
dammit = UnicodeDammit(markup, try_encodings, is_html=True)
return (dammit.markup, dammit.original_encoding,
- dammit.declared_html_encoding)
+ dammit.declared_html_encoding,
+ dammit.contains_replacement_characters)
def feed(self, markup):
self.parser.feed(markup)