Fixed an unhandled exception in BeautifulSoup.decode_contents

and methods that call it. [bug=2015545]
author: Leonard Richardson <leonardr@segfault.org> 2023-04-07 10:31:56 -0400
committer: Leonard Richardson <leonardr@segfault.org> 2023-04-07 10:31:56 -0400
commit: 67336dd4cd781a7d27716a5bcaae939e80a7bc24 (patch)
tree: 0a345409186c2e6cb0ab312e18feb4bbb16c206b
parent: fb8179d217dfb11e81c28076fc3bf14bdf9a0038 (diff)
4 files changed, 53 insertions, 5 deletions
diff --git a/CHANGELOG b/CHANGELOG
index 0ed74b5..f6a7258 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,7 +1,12 @@
+= 4.12.2 (20230407)
+
+* Fixed an unhandled exception in BeautifulSoup.decode_contents
+  and methods that call it. [bug=2015545]
+
 = 4.12.1 (20230405)
 
 NOTE: the following things are likely to be dropped in the next
-release of Beautiful Soup:
+feature release of Beautiful Soup:
 
  Official support for Python 3.6.
  Inclusion of unit tests and test data in the wheel file.
diff --git a/bs4/__init__.py b/bs4/__init__.py
index 18d380b..3d2ab09 100644
--- a/bs4/__init__.py
+++ b/bs4/__init__.py
@@ -15,7 +15,7 @@ documentation: http://www.crummy.com/software/BeautifulSoup/bs4/doc/
 """
 
 __author__ = "Leonard Richardson (leonardr@segfault.org)"
-__version__ = "4.12.1"
+__version__ = "4.12.2"
 __copyright__ = "Copyright (c) 2004-2023 Leonard Richardson"
 # Use of this source code is governed by the MIT license.
 __license__ = "MIT"
@@ -776,7 +776,7 @@ class BeautifulSoup(Tag):
        
     def decode(self, pretty_print=False,
                eventual_encoding=DEFAULT_OUTPUT_ENCODING,
-               formatter="minimal"):
+               formatter="minimal", iterator=None):
         """Returns a string or Unicode representation of the parse tree
             as an HTML or XML document.
 
@@ -803,7 +803,7 @@ class BeautifulSoup(Tag):
         else:
             indent_level = 0
         return prefix + super(BeautifulSoup, self).decode(
-            indent_level, eventual_encoding, formatter)
+            indent_level, eventual_encoding, formatter, iterator)
 
 # Aliases to make it easier to get started quickly, e.g. 'from bs4 import _soup'
 _s = BeautifulSoup
diff --git a/bs4/tests/test_pageelement.py b/bs4/tests/test_pageelement.py
index e12df79..24f9385 100644
--- a/bs4/tests/test_pageelement.py
+++ b/bs4/tests/test_pageelement.py
@@ -49,7 +49,7 @@ class TestEncoding(SoupTest):
         assert "\N{SNOWMAN}".encode("utf8") == soup.b.encode_contents(
             encoding="utf8"
         )
-
+        
     def test_encode_deeply_nested_document(self):
         # This test verifies that encoding a string doesn't involve
         # any recursive function calls. If it did, this test would
@@ -63,6 +63,7 @@ class TestEncoding(SoupTest):
     def test_deprecated_renderContents(self):
         html = "<b>\N{SNOWMAN}</b>"
         soup = self.soup(html)
+        soup.renderContents()
         assert "\N{SNOWMAN}".encode("utf8") == soup.b.renderContents()
 
     def test_repr(self):
diff --git a/bs4/tests/test_soup.py b/bs4/tests/test_soup.py
index 64b8cf1..28013b8 100644
--- a/bs4/tests/test_soup.py
+++ b/bs4/tests/test_soup.py
@@ -24,6 +24,7 @@ from bs4.builder import (
 from bs4.element import (
     Comment,
     SoupStrainer,
+    PYTHON_SPECIFIC_ENCODINGS,
     Tag,
     NavigableString,
 )
@@ -210,6 +211,47 @@ class TestConstructor(SoupTest):
         assert [] == soup.string_container_stack
 
 
+class TestOutput(SoupTest):
+
+    @pytest.mark.parametrize(
+        "eventual_encoding,actual_encoding", [
+            ("utf-8", "utf-8"),
+            ("utf-16", "utf-16"),
+        ]
+    )
+    def test_decode_xml_declaration(self, eventual_encoding, actual_encoding):
+        # Most of the time, calling decode() on an XML document will
+        # give you a document declaration that mentions the encoding
+        # you intend to use when encoding the document as a
+        # bytestring.
+        soup = self.soup("<tag></tag>")
+        soup.is_xml = True
+        assert (f'<?xml version="1.0" encoding="{actual_encoding}"?>\n<tag></tag>'
+                == soup.decode(eventual_encoding=eventual_encoding))
+
+    @pytest.mark.parametrize(
+        "eventual_encoding", [x for x in PYTHON_SPECIFIC_ENCODINGS] + [None]
+    )
+    def test_decode_xml_declaration_with_missing_or_python_internal_eventual_encoding(self, eventual_encoding):
+        # But if you pass a Python internal encoding into decode(), or
+        # omit the eventual_encoding altogether, the document
+        # declaration won't mention any particular encoding.
+        soup = BeautifulSoup("<tag></tag>", "html.parser")
+        soup.is_xml = True
+        assert (f'<?xml version="1.0"?>\n<tag></tag>'
+                == soup.decode(eventual_encoding=eventual_encoding))
+
+    def test(self):
+        # BeautifulSoup subclasses Tag and extends the decode() method.
+        # Make sure the other Tag methods which call decode() call
+        # it correctly.
+        soup = self.soup("<tag></tag>")
+        assert b"<tag></tag>" == soup.encode(encoding="utf-8")
+        assert b"<tag></tag>" == soup.encode_contents(encoding="utf-8")
+        assert "<tag></tag>" == soup.decode_contents()
+        assert "<tag>\n</tag>\n" == soup.prettify()
+
+        
 class TestWarnings(SoupTest):
     # Note that some of the tests in this class create BeautifulSoup
     # objects directly rather than using self.soup(). That's
author	Leonard Richardson <leonardr@segfault.org>	2023-04-07 10:31:56 -0400
committer	Leonard Richardson <leonardr@segfault.org>	2023-04-07 10:31:56 -0400
commit	67336dd4cd781a7d27716a5bcaae939e80a7bc24 (patch)
tree	0a345409186c2e6cb0ab312e18feb4bbb16c206b
parent	fb8179d217dfb11e81c28076fc3bf14bdf9a0038 (diff)