summaryrefslogtreecommitdiff
path: root/bs4/tests/test_pageelement.py
diff options
context:
space:
mode:
Diffstat (limited to 'bs4/tests/test_pageelement.py')
-rw-r--r--bs4/tests/test_pageelement.py37
1 files changed, 36 insertions, 1 deletions
diff --git a/bs4/tests/test_pageelement.py b/bs4/tests/test_pageelement.py
index a94280f..d98c577 100644
--- a/bs4/tests/test_pageelement.py
+++ b/bs4/tests/test_pageelement.py
@@ -2,6 +2,7 @@
import copy
import pickle
import pytest
+import sys
from bs4 import BeautifulSoup
from bs4.element import (
@@ -49,6 +50,16 @@ class TestEncoding(SoupTest):
encoding="utf8"
)
+ def test_encode_deeply_nested_document(self):
+ # This test verifies that encoding a string doesn't involve
+ # any recursive function calls. If it did, this test would
+ # overflow the Python interpreter stack.
+ limit = sys.getrecursionlimit() + 1
+ markup = "<span>" * limit
+ soup = self.soup(markup)
+ encoded = soup.encode()
+ assert limit == encoded.count(b"<span>")
+
def test_deprecated_renderContents(self):
html = "<b>\N{SNOWMAN}</b>"
soup = self.soup(html)
@@ -156,7 +167,31 @@ class TestFormatters(SoupTest):
soup = self.soup("<div> foo <pre> \tbar\n \n </pre> baz <textarea> eee\nfff\t</textarea></div>")
# Everything outside the <pre> tag is reformatted, but everything
# inside is left alone.
- assert '<div>\n foo\n <pre> \tbar\n \n </pre>\n baz\n <textarea> eee\nfff\t</textarea>\n</div>' == soup.div.prettify()
+ assert '<div>\n foo\n <pre> \tbar\n \n </pre>\n baz\n <textarea> eee\nfff\t</textarea>\n</div>\n' == soup.div.prettify()
+
+ def test_prettify_handles_nested_string_literal_tags(self):
+ # Most of this markup is inside a <pre> tag, so prettify()
+ # only does three things to it:
+ # 1. Add a newline and a space between the <div> and the <pre>
+ # 2. Add a newline after the </pre>
+ # 3. Add a newline at the end.
+ #
+ # The contents of the <pre> tag are left completely alone. In
+ # particular, we don't start adding whitespace again once we
+ # encounter the first </pre> tag, because we know it's not
+ # the one that put us into string literal mode.
+ markup = """<div><pre><code>some
+<script><pre>code</pre></script> for you
+</code></pre></div>"""
+
+ expect = """<div>
+ <pre><code>some
+<script><pre>code</pre></script> for you
+</code></pre>
+</div>
+"""
+ soup = self.soup(markup)
+ assert expect == soup.div.prettify()
def test_prettify_accepts_formatter_function(self):
soup = BeautifulSoup("<html><body>foo</body></html>", 'html.parser')