summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CHANGELOG7
-rw-r--r--bs4/__init__.py6
-rw-r--r--bs4/tests/test_pageelement.py3
-rw-r--r--bs4/tests/test_soup.py42
4 files changed, 53 insertions, 5 deletions
diff --git a/CHANGELOG b/CHANGELOG
index 0ed74b5..f6a7258 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,7 +1,12 @@
+= 4.12.2 (20230407)
+
+* Fixed an unhandled exception in BeautifulSoup.decode_contents
+ and methods that call it. [bug=2015545]
+
= 4.12.1 (20230405)
NOTE: the following things are likely to be dropped in the next
-release of Beautiful Soup:
+feature release of Beautiful Soup:
Official support for Python 3.6.
Inclusion of unit tests and test data in the wheel file.
diff --git a/bs4/__init__.py b/bs4/__init__.py
index 18d380b..3d2ab09 100644
--- a/bs4/__init__.py
+++ b/bs4/__init__.py
@@ -15,7 +15,7 @@ documentation: http://www.crummy.com/software/BeautifulSoup/bs4/doc/
"""
__author__ = "Leonard Richardson (leonardr@segfault.org)"
-__version__ = "4.12.1"
+__version__ = "4.12.2"
__copyright__ = "Copyright (c) 2004-2023 Leonard Richardson"
# Use of this source code is governed by the MIT license.
__license__ = "MIT"
@@ -776,7 +776,7 @@ class BeautifulSoup(Tag):
def decode(self, pretty_print=False,
eventual_encoding=DEFAULT_OUTPUT_ENCODING,
- formatter="minimal"):
+ formatter="minimal", iterator=None):
"""Returns a string or Unicode representation of the parse tree
as an HTML or XML document.
@@ -803,7 +803,7 @@ class BeautifulSoup(Tag):
else:
indent_level = 0
return prefix + super(BeautifulSoup, self).decode(
- indent_level, eventual_encoding, formatter)
+ indent_level, eventual_encoding, formatter, iterator)
# Aliases to make it easier to get started quickly, e.g. 'from bs4 import _soup'
_s = BeautifulSoup
diff --git a/bs4/tests/test_pageelement.py b/bs4/tests/test_pageelement.py
index e12df79..24f9385 100644
--- a/bs4/tests/test_pageelement.py
+++ b/bs4/tests/test_pageelement.py
@@ -49,7 +49,7 @@ class TestEncoding(SoupTest):
assert "\N{SNOWMAN}".encode("utf8") == soup.b.encode_contents(
encoding="utf8"
)
-
+
def test_encode_deeply_nested_document(self):
# This test verifies that encoding a string doesn't involve
# any recursive function calls. If it did, this test would
@@ -63,6 +63,7 @@ class TestEncoding(SoupTest):
def test_deprecated_renderContents(self):
html = "<b>\N{SNOWMAN}</b>"
soup = self.soup(html)
+ soup.renderContents()
assert "\N{SNOWMAN}".encode("utf8") == soup.b.renderContents()
def test_repr(self):
diff --git a/bs4/tests/test_soup.py b/bs4/tests/test_soup.py
index 64b8cf1..28013b8 100644
--- a/bs4/tests/test_soup.py
+++ b/bs4/tests/test_soup.py
@@ -24,6 +24,7 @@ from bs4.builder import (
from bs4.element import (
Comment,
SoupStrainer,
+ PYTHON_SPECIFIC_ENCODINGS,
Tag,
NavigableString,
)
@@ -210,6 +211,47 @@ class TestConstructor(SoupTest):
assert [] == soup.string_container_stack
+class TestOutput(SoupTest):
+
+ @pytest.mark.parametrize(
+ "eventual_encoding,actual_encoding", [
+ ("utf-8", "utf-8"),
+ ("utf-16", "utf-16"),
+ ]
+ )
+ def test_decode_xml_declaration(self, eventual_encoding, actual_encoding):
+ # Most of the time, calling decode() on an XML document will
+ # give you a document declaration that mentions the encoding
+ # you intend to use when encoding the document as a
+ # bytestring.
+ soup = self.soup("<tag></tag>")
+ soup.is_xml = True
+ assert (f'<?xml version="1.0" encoding="{actual_encoding}"?>\n<tag></tag>'
+ == soup.decode(eventual_encoding=eventual_encoding))
+
+ @pytest.mark.parametrize(
+ "eventual_encoding", [x for x in PYTHON_SPECIFIC_ENCODINGS] + [None]
+ )
+ def test_decode_xml_declaration_with_missing_or_python_internal_eventual_encoding(self, eventual_encoding):
+ # But if you pass a Python internal encoding into decode(), or
+ # omit the eventual_encoding altogether, the document
+ # declaration won't mention any particular encoding.
+ soup = BeautifulSoup("<tag></tag>", "html.parser")
+ soup.is_xml = True
+ assert (f'<?xml version="1.0"?>\n<tag></tag>'
+ == soup.decode(eventual_encoding=eventual_encoding))
+
+ def test(self):
+ # BeautifulSoup subclasses Tag and extends the decode() method.
+ # Make sure the other Tag methods which call decode() call
+ # it correctly.
+ soup = self.soup("<tag></tag>")
+ assert b"<tag></tag>" == soup.encode(encoding="utf-8")
+ assert b"<tag></tag>" == soup.encode_contents(encoding="utf-8")
+ assert "<tag></tag>" == soup.decode_contents()
+ assert "<tag>\n</tag>\n" == soup.prettify()
+
+
class TestWarnings(SoupTest):
# Note that some of the tests in this class create BeautifulSoup
# objects directly rather than using self.soup(). That's