diff options
author | Leonard Richardson <leonardr@segfault.org> | 2016-07-16 12:21:15 -0400 |
---|---|---|
committer | Leonard Richardson <leonardr@segfault.org> | 2016-07-16 12:21:15 -0400 |
commit | 682233e4c9e9fa11b5b84055e0bb272f5e941194 (patch) | |
tree | 67eed54264943be78b4a115922bae09453f61be2 | |
parent | adcfa8e5ec199c41f5b22041dbfeb852aa034434 (diff) |
The contents of <textarea> tags will no longer be modified when the
tree is prettified. [bug=1555829]
-rw-r--r-- | NEWS.txt | 3 | ||||
-rw-r--r-- | bs4/builder/__init__.py | 3 | ||||
-rw-r--r-- | bs4/element.py | 16 | ||||
-rw-r--r-- | bs4/testing.py | 19 |
4 files changed, 34 insertions, 7 deletions
@@ -2,6 +2,9 @@ * Corrected handling of XML processing instructions. [bug=1504393] +* The contents of <textarea> tags will no longer be modified when the + tree is prettified. [bug=1555829] + * Fixed a Python 3 ByteWarning when a URL was passed in as though it were markup. Thanks to James Salter for a patch and test. [bug=1533762] diff --git a/bs4/builder/__init__.py b/bs4/builder/__init__.py index f444edd..601979b 100644 --- a/bs4/builder/__init__.py +++ b/bs4/builder/__init__.py @@ -7,6 +7,7 @@ import sys from bs4.element import ( CharsetMetaAttributeValue, ContentMetaAttributeValue, + HTMLAwareEntitySubstitution, whitespace_re ) @@ -230,7 +231,7 @@ class HTMLTreeBuilder(TreeBuilder): Such as which tags are empty-element tags. """ - preserve_whitespace_tags = set(['pre', 'textarea']) + preserve_whitespace_tags = HTMLAwareEntitySubstitution.preserve_whitespace_tags empty_element_tags = set(['br' , 'hr', 'input', 'img', 'meta', 'spacer', 'link', 'frame', 'base']) diff --git a/bs4/element.py b/bs4/element.py index 9e2bfec..955f0fc 100644 --- a/bs4/element.py +++ b/bs4/element.py @@ -101,6 +101,8 @@ class HTMLAwareEntitySubstitution(EntitySubstitution): preformatted_tags = set(["pre"]) + preserve_whitespace_tags = set(['pre', 'textarea']) + @classmethod def _substitute_if_appropriate(cls, ns, f): if (isinstance(ns, NavigableString) @@ -1065,10 +1067,18 @@ class Tag(PageElement): def _should_pretty_print(self, indent_level): """Should this tag be pretty-printed?""" + if self.builder: + preserve_whitespace_tags = self.builder.preserve_whitespace_tags + else: + if self._is_xml: + preserve_whitespace_tags = [] + else: + preserve_whitespace_tags = HTMLAwareEntitySubstitution.preserve_whitespace_tags + return ( - indent_level is not None and - (self.name not in HTMLAwareEntitySubstitution.preformatted_tags - or self._is_xml)) + indent_level is not None + and self.name not in preserve_whitespace_tags + ) def decode(self, indent_level=None, eventual_encoding=DEFAULT_OUTPUT_ENCODING, diff --git a/bs4/testing.py b/bs4/testing.py index 1e2cc9c..676d7b3 100644 --- a/bs4/testing.py +++ b/bs4/testing.py @@ -217,9 +217,22 @@ Hello, world! self.assertEqual(comment, baz.previous_element) def test_preserved_whitespace_in_pre_and_textarea(self): - """Whitespace must be preserved in <pre> and <textarea> tags.""" - self.assertSoupEquals("<pre> </pre>") - self.assertSoupEquals("<textarea> woo </textarea>") + """Whitespace must be preserved in <pre> and <textarea> tags, + even if that would mean not prettifying the markup. + """ + pre_markup = "<pre> </pre>" + textarea_markup = "<textarea> woo\nwoo </textarea>" + self.assertSoupEquals(pre_markup) + self.assertSoupEquals(textarea_markup) + + soup = self.soup(pre_markup) + self.assertEqual(soup.pre.prettify(), pre_markup) + + soup = self.soup(textarea_markup) + self.assertEqual(soup.textarea.prettify(), textarea_markup) + + soup = self.soup("<textarea></textarea>") + self.assertEqual(soup.textarea.prettify(), "<textarea></textarea>") def test_nested_inline_elements(self): """Inline elements can be nested indefinitely.""" |