summaryrefslogtreecommitdiff
path: root/bs4/builder/__init__.py
diff options
context:
space:
mode:
authorLeonard Richardson <leonardr@segfault.org>2019-07-14 17:09:58 -0400
committerLeonard Richardson <leonardr@segfault.org>2019-07-14 17:09:58 -0400
commit0df054db08ef3286482694ee0c9aa85b5313dfd2 (patch)
treed1b38991f1148abccb0862484d87d760654cd18f /bs4/builder/__init__.py
parent519afbe269b671e15a1f1d2aecfe4fc579b61efc (diff)
Give the Formatter class more control over formatting decisions.
Diffstat (limited to 'bs4/builder/__init__.py')
-rw-r--r--bs4/builder/__init__.py18
1 files changed, 12 insertions, 6 deletions
diff --git a/bs4/builder/__init__.py b/bs4/builder/__init__.py
index c5e6e84..e087f07 100644
--- a/bs4/builder/__init__.py
+++ b/bs4/builder/__init__.py
@@ -7,7 +7,6 @@ import sys
from bs4.element import (
CharsetMetaAttributeValue,
ContentMetaAttributeValue,
- HTMLAwareEntitySubstitution,
nonwhitespace_re
)
@@ -90,7 +89,6 @@ class TreeBuilder(object):
is_xml = False
picklable = False
- preserve_whitespace_tags = set()
empty_element_tags = None # A tag will be considered an empty-element
# tag when and only when it has no contents.
@@ -98,9 +96,11 @@ class TreeBuilder(object):
# comma-separated list of CDATA, rather than a single CDATA.
DEFAULT_CDATA_LIST_ATTRIBUTES = {}
+ DEFAULT_PRESERVE_WHITESPACE_TAGS = set()
+
USE_DEFAULT = object()
- def __init__(self, multi_valued_attributes=USE_DEFAULT):
+ def __init__(self, multi_valued_attributes=USE_DEFAULT, preserve_whitespace_tags=USE_DEFAULT):
"""Constructor.
:param multi_valued_attributes: If this is set to None, the
@@ -110,14 +110,19 @@ class TreeBuilder(object):
for an example.
Internally, these are called "CDATA list attributes", but that
- probably doesn't make sense to an end-use, so the argument ame
+ probably doesn't make sense to an end-user, so the argument name
is `multi_valued_attributes`.
+
+ :param preserve_whitespace_tags:
"""
self.soup = None
if multi_valued_attributes is self.USE_DEFAULT:
multi_valued_attributes = self.DEFAULT_CDATA_LIST_ATTRIBUTES
self.cdata_list_attributes = multi_valued_attributes
-
+ if preserve_whitespace_tags is self.USE_DEFAULT:
+ preserve_whitespace_tags = self.DEFAULT_PRESERVE_WHITESPACE_TAGS
+ self.preserve_whitespace_tags = preserve_whitespace_tags
+
def initialize_soup(self, soup):
"""The BeautifulSoup object has been initialized and is now
being associated with the TreeBuilder.
@@ -253,7 +258,6 @@ class HTMLTreeBuilder(TreeBuilder):
Such as which tags are empty-element tags.
"""
- preserve_whitespace_tags = HTMLAwareEntitySubstitution.preserve_whitespace_tags
empty_element_tags = set([
# These are from HTML5.
'area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'menuitem', 'meta', 'param', 'source', 'track', 'wbr',
@@ -292,6 +296,8 @@ class HTMLTreeBuilder(TreeBuilder):
"output" : ["for"],
}
+ DEFAULT_PRESERVE_WHITESPACE_TAGS = set(['pre', 'textarea'])
+
def set_up_substitutions(self, tag):
# We are only interested in <meta> tags
if tag.name != 'meta':