summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--NEWS.txt3
-rw-r--r--bs4/builder/__init__.py3
-rw-r--r--bs4/element.py16
-rw-r--r--bs4/testing.py19
4 files changed, 34 insertions, 7 deletions
diff --git a/NEWS.txt b/NEWS.txt
index 691cf1f..472538a 100644
--- a/NEWS.txt
+++ b/NEWS.txt
@@ -2,6 +2,9 @@
* Corrected handling of XML processing instructions. [bug=1504393]
+* The contents of <textarea> tags will no longer be modified when the
+ tree is prettified. [bug=1555829]
+
* Fixed a Python 3 ByteWarning when a URL was passed in as though it
were markup. Thanks to James Salter for a patch and
test. [bug=1533762]
diff --git a/bs4/builder/__init__.py b/bs4/builder/__init__.py
index f444edd..601979b 100644
--- a/bs4/builder/__init__.py
+++ b/bs4/builder/__init__.py
@@ -7,6 +7,7 @@ import sys
from bs4.element import (
CharsetMetaAttributeValue,
ContentMetaAttributeValue,
+ HTMLAwareEntitySubstitution,
whitespace_re
)
@@ -230,7 +231,7 @@ class HTMLTreeBuilder(TreeBuilder):
Such as which tags are empty-element tags.
"""
- preserve_whitespace_tags = set(['pre', 'textarea'])
+ preserve_whitespace_tags = HTMLAwareEntitySubstitution.preserve_whitespace_tags
empty_element_tags = set(['br' , 'hr', 'input', 'img', 'meta',
'spacer', 'link', 'frame', 'base'])
diff --git a/bs4/element.py b/bs4/element.py
index 9e2bfec..955f0fc 100644
--- a/bs4/element.py
+++ b/bs4/element.py
@@ -101,6 +101,8 @@ class HTMLAwareEntitySubstitution(EntitySubstitution):
preformatted_tags = set(["pre"])
+ preserve_whitespace_tags = set(['pre', 'textarea'])
+
@classmethod
def _substitute_if_appropriate(cls, ns, f):
if (isinstance(ns, NavigableString)
@@ -1065,10 +1067,18 @@ class Tag(PageElement):
def _should_pretty_print(self, indent_level):
"""Should this tag be pretty-printed?"""
+ if self.builder:
+ preserve_whitespace_tags = self.builder.preserve_whitespace_tags
+ else:
+ if self._is_xml:
+ preserve_whitespace_tags = []
+ else:
+ preserve_whitespace_tags = HTMLAwareEntitySubstitution.preserve_whitespace_tags
+
return (
- indent_level is not None and
- (self.name not in HTMLAwareEntitySubstitution.preformatted_tags
- or self._is_xml))
+ indent_level is not None
+ and self.name not in preserve_whitespace_tags
+ )
def decode(self, indent_level=None,
eventual_encoding=DEFAULT_OUTPUT_ENCODING,
diff --git a/bs4/testing.py b/bs4/testing.py
index 1e2cc9c..676d7b3 100644
--- a/bs4/testing.py
+++ b/bs4/testing.py
@@ -217,9 +217,22 @@ Hello, world!
self.assertEqual(comment, baz.previous_element)
def test_preserved_whitespace_in_pre_and_textarea(self):
- """Whitespace must be preserved in <pre> and <textarea> tags."""
- self.assertSoupEquals("<pre> </pre>")
- self.assertSoupEquals("<textarea> woo </textarea>")
+ """Whitespace must be preserved in <pre> and <textarea> tags,
+ even if that would mean not prettifying the markup.
+ """
+ pre_markup = "<pre> </pre>"
+ textarea_markup = "<textarea> woo\nwoo </textarea>"
+ self.assertSoupEquals(pre_markup)
+ self.assertSoupEquals(textarea_markup)
+
+ soup = self.soup(pre_markup)
+ self.assertEqual(soup.pre.prettify(), pre_markup)
+
+ soup = self.soup(textarea_markup)
+ self.assertEqual(soup.textarea.prettify(), textarea_markup)
+
+ soup = self.soup("<textarea></textarea>")
+ self.assertEqual(soup.textarea.prettify(), "<textarea></textarea>")
def test_nested_inline_elements(self):
"""Inline elements can be nested indefinitely."""