summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLeonard Richardson <leonardr@segfault.org>2023-03-24 14:50:39 -0400
committerLeonard Richardson <leonardr@segfault.org>2023-03-24 14:50:39 -0400
commitf834cd013865febdff3952b01bdd09b406c8ca66 (patch)
tree4368dc72e39574fddeffd701b40b7f8cd9ffff3c
parent2236d4acae21d9c5595924902134e5072648c29c (diff)
Added a test just to verify that you can encode a document more deeply nested than the Python recursion limit.
-rw-r--r--CHANGELOG9
-rw-r--r--bs4/tests/test_pageelement.py11
2 files changed, 20 insertions, 0 deletions
diff --git a/CHANGELOG b/CHANGELOG
index 0983360..74619a1 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -5,6 +5,15 @@ Python 2 was revision 70f546b1e689a70e2f103795efce6d261a3dadf7.
= 4.12.1 (Unreleased)
+* Rewrote the code for converting a Beautiful Soup tree to a
+ string, so that it no longer makes recursive function calls. This
+ makes it possible to output documents that have more nested
+ tags than there are levels in the Python interpreter stack.
+ [bug=1471755]
+
+* Tag.prettify() will now consistently end prettified markup with
+ a newline. This is a side effect of the work done for bug #1471755.
+
* Added unit tests for fuzz test cases created by third
parties. Most of these tests are skipped since they either point
out problems in code outside of Beautiful Soup, or problems with
diff --git a/bs4/tests/test_pageelement.py b/bs4/tests/test_pageelement.py
index a0476e4..d98c577 100644
--- a/bs4/tests/test_pageelement.py
+++ b/bs4/tests/test_pageelement.py
@@ -2,6 +2,7 @@
import copy
import pickle
import pytest
+import sys
from bs4 import BeautifulSoup
from bs4.element import (
@@ -49,6 +50,16 @@ class TestEncoding(SoupTest):
encoding="utf8"
)
+ def test_encode_deeply_nested_document(self):
+ # This test verifies that encoding a string doesn't involve
+ # any recursive function calls. If it did, this test would
+ # overflow the Python interpreter stack.
+ limit = sys.getrecursionlimit() + 1
+ markup = "<span>" * limit
+ soup = self.soup(markup)
+ encoded = soup.encode()
+ assert limit == encoded.count(b"<span>")
+
def test_deprecated_renderContents(self):
html = "<b>\N{SNOWMAN}</b>"
soup = self.soup(html)