summaryrefslogtreecommitdiff
path: root/bs4/tests/test_pageelement.py
diff options
context:
space:
mode:
authorLeonard Richardson <leonardr@segfault.org>2023-03-24 14:52:35 -0400
committerLeonard Richardson <leonardr@segfault.org>2023-03-24 14:52:35 -0400
commit8944fe70574914cabfc9e6fb6eb048d71be39fb1 (patch)
treee3f7cda44701144f59fbcbc702d27085363350bd /bs4/tests/test_pageelement.py
parent347cefbe76cc70bd99dc6b5f0274189cdb94bbb9 (diff)
parentf834cd013865febdff3952b01bdd09b406c8ca66 (diff)
Merge branch 'remove-recursion-on-output'
Diffstat (limited to 'bs4/tests/test_pageelement.py')
-rw-r--r--bs4/tests/test_pageelement.py37
1 files changed, 36 insertions, 1 deletions
diff --git a/bs4/tests/test_pageelement.py b/bs4/tests/test_pageelement.py
index a94280f..d98c577 100644
--- a/bs4/tests/test_pageelement.py
+++ b/bs4/tests/test_pageelement.py
@@ -2,6 +2,7 @@
import copy
import pickle
import pytest
+import sys
from bs4 import BeautifulSoup
from bs4.element import (
@@ -49,6 +50,16 @@ class TestEncoding(SoupTest):
encoding="utf8"
)
+ def test_encode_deeply_nested_document(self):
+ # This test verifies that encoding a string doesn't involve
+ # any recursive function calls. If it did, this test would
+ # overflow the Python interpreter stack.
+ limit = sys.getrecursionlimit() + 1
+ markup = "<span>" * limit
+ soup = self.soup(markup)
+ encoded = soup.encode()
+ assert limit == encoded.count(b"<span>")
+
def test_deprecated_renderContents(self):
html = "<b>\N{SNOWMAN}</b>"
soup = self.soup(html)
@@ -156,7 +167,31 @@ class TestFormatters(SoupTest):
soup = self.soup("<div> foo <pre> \tbar\n \n </pre> baz <textarea> eee\nfff\t</textarea></div>")
# Everything outside the <pre> tag is reformatted, but everything
# inside is left alone.
- assert '<div>\n foo\n <pre> \tbar\n \n </pre>\n baz\n <textarea> eee\nfff\t</textarea>\n</div>' == soup.div.prettify()
+ assert '<div>\n foo\n <pre> \tbar\n \n </pre>\n baz\n <textarea> eee\nfff\t</textarea>\n</div>\n' == soup.div.prettify()
+
+ def test_prettify_handles_nested_string_literal_tags(self):
+ # Most of this markup is inside a <pre> tag, so prettify()
+ # only does three things to it:
+ # 1. Add a newline and a space between the <div> and the <pre>
+ # 2. Add a newline after the </pre>
+ # 3. Add a newline at the end.
+ #
+ # The contents of the <pre> tag are left completely alone. In
+ # particular, we don't start adding whitespace again once we
+ # encounter the first </pre> tag, because we know it's not
+ # the one that put us into string literal mode.
+ markup = """<div><pre><code>some
+<script><pre>code</pre></script> for you
+</code></pre></div>"""
+
+ expect = """<div>
+ <pre><code>some
+<script><pre>code</pre></script> for you
+</code></pre>
+</div>
+"""
+ soup = self.soup(markup)
+ assert expect == soup.div.prettify()
def test_prettify_accepts_formatter_function(self):
soup = BeautifulSoup("<html><body>foo</body></html>", 'html.parser')