diff options
Diffstat (limited to 'bs4/builder')
-rw-r--r-- | bs4/builder/__init__.py | 2 | ||||
-rw-r--r-- | bs4/builder/_html5lib.py | 2 | ||||
-rw-r--r-- | bs4/builder/_htmlparser.py | 10 | ||||
-rw-r--r-- | bs4/builder/_lxml.py | 5 |
4 files changed, 11 insertions, 8 deletions
diff --git a/bs4/builder/__init__.py b/bs4/builder/__init__.py index 2728606..067623e 100644 --- a/bs4/builder/__init__.py +++ b/bs4/builder/__init__.py @@ -119,7 +119,7 @@ class TreeBuilder(object): def prepare_markup(self, markup, user_specified_encoding=None, document_declared_encoding=None): - return markup, None, None + return markup, None, None, False def test_fragment_to_document(self, fragment): """Wrap an HTML fragment to make it look like a document. diff --git a/bs4/builder/_html5lib.py b/bs4/builder/_html5lib.py index 4b80870..9897675 100644 --- a/bs4/builder/_html5lib.py +++ b/bs4/builder/_html5lib.py @@ -29,7 +29,7 @@ class HTML5TreeBuilder(HTMLTreeBuilder): def prepare_markup(self, markup, user_specified_encoding): # Store the user-specified encoding for use later on. self.user_specified_encoding = user_specified_encoding - return markup, None, None + return markup, None, None, False # These methods are defined by Beautiful Soup. def feed(self, markup): diff --git a/bs4/builder/_htmlparser.py b/bs4/builder/_htmlparser.py index edd0bfb..c785eed 100644 --- a/bs4/builder/_htmlparser.py +++ b/bs4/builder/_htmlparser.py @@ -51,16 +51,18 @@ class HTMLParserTreeBuilder(HTMLParser, HTMLTreeBuilder): def prepare_markup(self, markup, user_specified_encoding=None, document_declared_encoding=None): """ - :return: A 3-tuple (markup, original encoding, encoding - declared within markup). + :return: A 4-tuple (markup, original encoding, encoding + declared within markup, whether any characters had to be + replaced with REPLACEMENT CHARACTER). """ if isinstance(markup, unicode): - return markup, None, None + return markup, None, None, False try_encodings = [user_specified_encoding, document_declared_encoding] dammit = UnicodeDammit(markup, try_encodings, is_html=True) return (dammit.markup, dammit.original_encoding, - dammit.declared_html_encoding) + dammit.declared_html_encoding, + dammit.contains_replacement_characters) def feed(self, markup): super(HTMLParserTreeBuilder, self).feed(markup) diff --git a/bs4/builder/_lxml.py b/bs4/builder/_lxml.py index 7219e49..cc3cb86 100644 --- a/bs4/builder/_lxml.py +++ b/bs4/builder/_lxml.py @@ -50,12 +50,13 @@ class LXMLTreeBuilderForXML(TreeBuilder): declared within markup). """ if isinstance(markup, unicode): - return markup, None, None + return markup, None, None, False try_encodings = [user_specified_encoding, document_declared_encoding] dammit = UnicodeDammit(markup, try_encodings, is_html=True) return (dammit.markup, dammit.original_encoding, - dammit.declared_html_encoding) + dammit.declared_html_encoding, + dammit.contains_replacement_characters) def feed(self, markup): self.parser.feed(markup) |