summaryrefslogtreecommitdiff
path: root/bs4/tests
diff options
context:
space:
mode:
Diffstat (limited to 'bs4/tests')
-rw-r--r--bs4/tests/__init__.py6
-rw-r--r--bs4/tests/test_formatter.py20
-rw-r--r--bs4/tests/test_pageelement.py37
3 files changed, 49 insertions, 14 deletions
diff --git a/bs4/tests/__init__.py b/bs4/tests/__init__.py
index d8b3b9b..dbb1593 100644
--- a/bs4/tests/__init__.py
+++ b/bs4/tests/__init__.py
@@ -551,8 +551,8 @@ Hello, world!
"""Whitespace must be preserved in <pre> and <textarea> tags,
even if that would mean not prettifying the markup.
"""
- pre_markup = "<pre> </pre>"
- textarea_markup = "<textarea> woo\nwoo </textarea>"
+ pre_markup = "<pre>a z</pre>\n"
+ textarea_markup = "<textarea> woo\nwoo </textarea>\n"
self.assert_soup(pre_markup)
self.assert_soup(textarea_markup)
@@ -563,7 +563,7 @@ Hello, world!
assert soup.textarea.prettify() == textarea_markup
soup = self.soup("<textarea></textarea>")
- assert soup.textarea.prettify() == "<textarea></textarea>"
+ assert soup.textarea.prettify() == "<textarea></textarea>\n"
def test_nested_inline_elements(self):
"""Inline elements can be nested indefinitely."""
diff --git a/bs4/tests/test_formatter.py b/bs4/tests/test_formatter.py
index 84d4e3b..528b16d 100644
--- a/bs4/tests/test_formatter.py
+++ b/bs4/tests/test_formatter.py
@@ -80,20 +80,20 @@ class TestFormatter(SoupTest):
@pytest.mark.parametrize(
"indent,expect",
[
- (None, '<a>\n<b>\ntext\n</b>\n</a>'),
- (-1, '<a>\n<b>\ntext\n</b>\n</a>'),
- (0, '<a>\n<b>\ntext\n</b>\n</a>'),
- ("", '<a>\n<b>\ntext\n</b>\n</a>'),
+ (None, '<a>\n<b>\ntext\n</b>\n</a>\n'),
+ (-1, '<a>\n<b>\ntext\n</b>\n</a>\n'),
+ (0, '<a>\n<b>\ntext\n</b>\n</a>\n'),
+ ("", '<a>\n<b>\ntext\n</b>\n</a>\n'),
- (1, '<a>\n <b>\n text\n </b>\n</a>'),
- (2, '<a>\n <b>\n text\n </b>\n</a>'),
+ (1, '<a>\n <b>\n text\n </b>\n</a>\n'),
+ (2, '<a>\n <b>\n text\n </b>\n</a>\n'),
- ("\t", '<a>\n\t<b>\n\t\ttext\n\t</b>\n</a>'),
- ('abc', '<a>\nabc<b>\nabcabctext\nabc</b>\n</a>'),
+ ("\t", '<a>\n\t<b>\n\t\ttext\n\t</b>\n</a>\n'),
+ ('abc', '<a>\nabc<b>\nabcabctext\nabc</b>\n</a>\n'),
# Some invalid inputs -- the default behavior is used.
- (object(), '<a>\n <b>\n text\n </b>\n</a>'),
- (b'bytes', '<a>\n <b>\n text\n </b>\n</a>'),
+ (object(), '<a>\n <b>\n text\n </b>\n</a>\n'),
+ (b'bytes', '<a>\n <b>\n text\n </b>\n</a>\n'),
]
)
def test_indent(self, indent, expect):
diff --git a/bs4/tests/test_pageelement.py b/bs4/tests/test_pageelement.py
index a94280f..d98c577 100644
--- a/bs4/tests/test_pageelement.py
+++ b/bs4/tests/test_pageelement.py
@@ -2,6 +2,7 @@
import copy
import pickle
import pytest
+import sys
from bs4 import BeautifulSoup
from bs4.element import (
@@ -49,6 +50,16 @@ class TestEncoding(SoupTest):
encoding="utf8"
)
+ def test_encode_deeply_nested_document(self):
+ # This test verifies that encoding a string doesn't involve
+ # any recursive function calls. If it did, this test would
+ # overflow the Python interpreter stack.
+ limit = sys.getrecursionlimit() + 1
+ markup = "<span>" * limit
+ soup = self.soup(markup)
+ encoded = soup.encode()
+ assert limit == encoded.count(b"<span>")
+
def test_deprecated_renderContents(self):
html = "<b>\N{SNOWMAN}</b>"
soup = self.soup(html)
@@ -156,7 +167,31 @@ class TestFormatters(SoupTest):
soup = self.soup("<div> foo <pre> \tbar\n \n </pre> baz <textarea> eee\nfff\t</textarea></div>")
# Everything outside the <pre> tag is reformatted, but everything
# inside is left alone.
- assert '<div>\n foo\n <pre> \tbar\n \n </pre>\n baz\n <textarea> eee\nfff\t</textarea>\n</div>' == soup.div.prettify()
+ assert '<div>\n foo\n <pre> \tbar\n \n </pre>\n baz\n <textarea> eee\nfff\t</textarea>\n</div>\n' == soup.div.prettify()
+
+ def test_prettify_handles_nested_string_literal_tags(self):
+ # Most of this markup is inside a <pre> tag, so prettify()
+ # only does three things to it:
+ # 1. Add a newline and a space between the <div> and the <pre>
+ # 2. Add a newline after the </pre>
+ # 3. Add a newline at the end.
+ #
+ # The contents of the <pre> tag are left completely alone. In
+ # particular, we don't start adding whitespace again once we
+ # encounter the first </pre> tag, because we know it's not
+ # the one that put us into string literal mode.
+ markup = """<div><pre><code>some
+<script><pre>code</pre></script> for you
+</code></pre></div>"""
+
+ expect = """<div>
+ <pre><code>some
+<script><pre>code</pre></script> for you
+</code></pre>
+</div>
+"""
+ soup = self.soup(markup)
+ assert expect == soup.div.prettify()
def test_prettify_accepts_formatter_function(self):
soup = BeautifulSoup("<html><body>foo</body></html>", 'html.parser')