Merge branch 'remove-recursion-on-output'

author: Leonard Richardson <leonardr@segfault.org> 2023-03-24 14:52:35 -0400
committer: Leonard Richardson <leonardr@segfault.org> 2023-03-24 14:52:35 -0400
commit: 8944fe70574914cabfc9e6fb6eb048d71be39fb1 (patch)
tree: e3f7cda44701144f59fbcbc702d27085363350bd /bs4/tests/test_pageelement.py
parent: 347cefbe76cc70bd99dc6b5f0274189cdb94bbb9 (diff)
parent: f834cd013865febdff3952b01bdd09b406c8ca66 (diff)
1 files changed, 36 insertions, 1 deletions
diff --git a/bs4/tests/test_pageelement.py b/bs4/tests/test_pageelement.py
index a94280f..d98c577 100644
--- a/bs4/tests/test_pageelement.py
+++ b/bs4/tests/test_pageelement.py
@@ -2,6 +2,7 @@
 import copy
 import pickle
 import pytest
+import sys
 
 from bs4 import BeautifulSoup
 from bs4.element import (
@@ -49,6 +50,16 @@ class TestEncoding(SoupTest):
             encoding="utf8"
         )
 
+    def test_encode_deeply_nested_document(self):
+        # This test verifies that encoding a string doesn't involve
+        # any recursive function calls. If it did, this test would
+        # overflow the Python interpreter stack.
+        limit = sys.getrecursionlimit() + 1
+        markup = "<span>" * limit
+        soup = self.soup(markup)
+        encoded = soup.encode()
+        assert limit == encoded.count(b"<span>")
+
     def test_deprecated_renderContents(self):
         html = "<b>\N{SNOWMAN}</b>"
         soup = self.soup(html)
@@ -156,7 +167,31 @@ class TestFormatters(SoupTest):
         soup = self.soup("<div>  foo  <pre>  \tbar\n  \n  </pre>  baz  <textarea> eee\nfff\t</textarea></div>")
         # Everything outside the <pre> tag is reformatted, but everything
         # inside is left alone.
-        assert '<div>\n foo\n <pre>  \tbar\n  \n  </pre>\n baz\n <textarea> eee\nfff\t</textarea>\n</div>' == soup.div.prettify()
+        assert '<div>\n foo\n <pre>  \tbar\n  \n  </pre>\n baz\n <textarea> eee\nfff\t</textarea>\n</div>\n' == soup.div.prettify()
+
+    def test_prettify_handles_nested_string_literal_tags(self):
+        # Most of this markup is inside a <pre> tag, so prettify()
+        # only does three things to it:
+        # 1. Add a newline and a space between the <div> and the <pre>
+        # 2. Add a newline after the </pre>
+        # 3. Add a newline at the end.
+        #
+        # The contents of the <pre> tag are left completely alone.  In
+        # particular, we don't start adding whitespace again once we
+        # encounter the first </pre> tag, because we know it's not
+        # the one that put us into string literal mode.
+        markup = """<div><pre><code>some
+<script><pre>code</pre></script> for you 
+</code></pre></div>"""
+
+        expect = """<div>
+ <pre><code>some
+<script><pre>code</pre></script> for you 
+</code></pre>
+</div>
+"""
+        soup = self.soup(markup)
+        assert expect == soup.div.prettify()
 
     def test_prettify_accepts_formatter_function(self):
         soup = BeautifulSoup("<html><body>foo</body></html>", 'html.parser')
author	Leonard Richardson <leonardr@segfault.org>	2023-03-24 14:52:35 -0400
committer	Leonard Richardson <leonardr@segfault.org>	2023-03-24 14:52:35 -0400
commit	8944fe70574914cabfc9e6fb6eb048d71be39fb1 (patch)
tree	e3f7cda44701144f59fbcbc702d27085363350bd /bs4/tests/test_pageelement.py
parent	347cefbe76cc70bd99dc6b5f0274189cdb94bbb9 (diff)
parent	f834cd013865febdff3952b01bdd09b406c8ca66 (diff)