diff options
Diffstat (limited to 'bs4/tests')
-rw-r--r-- | bs4/tests/__init__.py | 6 | ||||
-rw-r--r-- | bs4/tests/test_formatter.py | 20 | ||||
-rw-r--r-- | bs4/tests/test_pageelement.py | 37 |
3 files changed, 49 insertions, 14 deletions
diff --git a/bs4/tests/__init__.py b/bs4/tests/__init__.py index d8b3b9b..dbb1593 100644 --- a/bs4/tests/__init__.py +++ b/bs4/tests/__init__.py @@ -551,8 +551,8 @@ Hello, world! """Whitespace must be preserved in <pre> and <textarea> tags, even if that would mean not prettifying the markup. """ - pre_markup = "<pre> </pre>" - textarea_markup = "<textarea> woo\nwoo </textarea>" + pre_markup = "<pre>a z</pre>\n" + textarea_markup = "<textarea> woo\nwoo </textarea>\n" self.assert_soup(pre_markup) self.assert_soup(textarea_markup) @@ -563,7 +563,7 @@ Hello, world! assert soup.textarea.prettify() == textarea_markup soup = self.soup("<textarea></textarea>") - assert soup.textarea.prettify() == "<textarea></textarea>" + assert soup.textarea.prettify() == "<textarea></textarea>\n" def test_nested_inline_elements(self): """Inline elements can be nested indefinitely.""" diff --git a/bs4/tests/test_formatter.py b/bs4/tests/test_formatter.py index 84d4e3b..528b16d 100644 --- a/bs4/tests/test_formatter.py +++ b/bs4/tests/test_formatter.py @@ -80,20 +80,20 @@ class TestFormatter(SoupTest): @pytest.mark.parametrize( "indent,expect", [ - (None, '<a>\n<b>\ntext\n</b>\n</a>'), - (-1, '<a>\n<b>\ntext\n</b>\n</a>'), - (0, '<a>\n<b>\ntext\n</b>\n</a>'), - ("", '<a>\n<b>\ntext\n</b>\n</a>'), + (None, '<a>\n<b>\ntext\n</b>\n</a>\n'), + (-1, '<a>\n<b>\ntext\n</b>\n</a>\n'), + (0, '<a>\n<b>\ntext\n</b>\n</a>\n'), + ("", '<a>\n<b>\ntext\n</b>\n</a>\n'), - (1, '<a>\n <b>\n text\n </b>\n</a>'), - (2, '<a>\n <b>\n text\n </b>\n</a>'), + (1, '<a>\n <b>\n text\n </b>\n</a>\n'), + (2, '<a>\n <b>\n text\n </b>\n</a>\n'), - ("\t", '<a>\n\t<b>\n\t\ttext\n\t</b>\n</a>'), - ('abc', '<a>\nabc<b>\nabcabctext\nabc</b>\n</a>'), + ("\t", '<a>\n\t<b>\n\t\ttext\n\t</b>\n</a>\n'), + ('abc', '<a>\nabc<b>\nabcabctext\nabc</b>\n</a>\n'), # Some invalid inputs -- the default behavior is used. - (object(), '<a>\n <b>\n text\n </b>\n</a>'), - (b'bytes', '<a>\n <b>\n text\n </b>\n</a>'), + (object(), '<a>\n <b>\n text\n </b>\n</a>\n'), + (b'bytes', '<a>\n <b>\n text\n </b>\n</a>\n'), ] ) def test_indent(self, indent, expect): diff --git a/bs4/tests/test_pageelement.py b/bs4/tests/test_pageelement.py index a94280f..d98c577 100644 --- a/bs4/tests/test_pageelement.py +++ b/bs4/tests/test_pageelement.py @@ -2,6 +2,7 @@ import copy import pickle import pytest +import sys from bs4 import BeautifulSoup from bs4.element import ( @@ -49,6 +50,16 @@ class TestEncoding(SoupTest): encoding="utf8" ) + def test_encode_deeply_nested_document(self): + # This test verifies that encoding a string doesn't involve + # any recursive function calls. If it did, this test would + # overflow the Python interpreter stack. + limit = sys.getrecursionlimit() + 1 + markup = "<span>" * limit + soup = self.soup(markup) + encoded = soup.encode() + assert limit == encoded.count(b"<span>") + def test_deprecated_renderContents(self): html = "<b>\N{SNOWMAN}</b>" soup = self.soup(html) @@ -156,7 +167,31 @@ class TestFormatters(SoupTest): soup = self.soup("<div> foo <pre> \tbar\n \n </pre> baz <textarea> eee\nfff\t</textarea></div>") # Everything outside the <pre> tag is reformatted, but everything # inside is left alone. - assert '<div>\n foo\n <pre> \tbar\n \n </pre>\n baz\n <textarea> eee\nfff\t</textarea>\n</div>' == soup.div.prettify() + assert '<div>\n foo\n <pre> \tbar\n \n </pre>\n baz\n <textarea> eee\nfff\t</textarea>\n</div>\n' == soup.div.prettify() + + def test_prettify_handles_nested_string_literal_tags(self): + # Most of this markup is inside a <pre> tag, so prettify() + # only does three things to it: + # 1. Add a newline and a space between the <div> and the <pre> + # 2. Add a newline after the </pre> + # 3. Add a newline at the end. + # + # The contents of the <pre> tag are left completely alone. In + # particular, we don't start adding whitespace again once we + # encounter the first </pre> tag, because we know it's not + # the one that put us into string literal mode. + markup = """<div><pre><code>some +<script><pre>code</pre></script> for you +</code></pre></div>""" + + expect = """<div> + <pre><code>some +<script><pre>code</pre></script> for you +</code></pre> +</div> +""" + soup = self.soup(markup) + assert expect == soup.div.prettify() def test_prettify_accepts_formatter_function(self): soup = BeautifulSoup("<html><body>foo</body></html>", 'html.parser') |