diff options
author | Leonard Richardson <leonardr@segfault.org> | 2019-07-14 17:09:58 -0400 |
---|---|---|
committer | Leonard Richardson <leonardr@segfault.org> | 2019-07-14 17:09:58 -0400 |
commit | 0df054db08ef3286482694ee0c9aa85b5313dfd2 (patch) | |
tree | d1b38991f1148abccb0862484d87d760654cd18f /bs4/builder/__init__.py | |
parent | 519afbe269b671e15a1f1d2aecfe4fc579b61efc (diff) |
Give the Formatter class more control over formatting decisions.
Diffstat (limited to 'bs4/builder/__init__.py')
-rw-r--r-- | bs4/builder/__init__.py | 18 |
1 files changed, 12 insertions, 6 deletions
diff --git a/bs4/builder/__init__.py b/bs4/builder/__init__.py index c5e6e84..e087f07 100644 --- a/bs4/builder/__init__.py +++ b/bs4/builder/__init__.py @@ -7,7 +7,6 @@ import sys from bs4.element import ( CharsetMetaAttributeValue, ContentMetaAttributeValue, - HTMLAwareEntitySubstitution, nonwhitespace_re ) @@ -90,7 +89,6 @@ class TreeBuilder(object): is_xml = False picklable = False - preserve_whitespace_tags = set() empty_element_tags = None # A tag will be considered an empty-element # tag when and only when it has no contents. @@ -98,9 +96,11 @@ class TreeBuilder(object): # comma-separated list of CDATA, rather than a single CDATA. DEFAULT_CDATA_LIST_ATTRIBUTES = {} + DEFAULT_PRESERVE_WHITESPACE_TAGS = set() + USE_DEFAULT = object() - def __init__(self, multi_valued_attributes=USE_DEFAULT): + def __init__(self, multi_valued_attributes=USE_DEFAULT, preserve_whitespace_tags=USE_DEFAULT): """Constructor. :param multi_valued_attributes: If this is set to None, the @@ -110,14 +110,19 @@ class TreeBuilder(object): for an example. Internally, these are called "CDATA list attributes", but that - probably doesn't make sense to an end-use, so the argument ame + probably doesn't make sense to an end-user, so the argument name is `multi_valued_attributes`. + + :param preserve_whitespace_tags: """ self.soup = None if multi_valued_attributes is self.USE_DEFAULT: multi_valued_attributes = self.DEFAULT_CDATA_LIST_ATTRIBUTES self.cdata_list_attributes = multi_valued_attributes - + if preserve_whitespace_tags is self.USE_DEFAULT: + preserve_whitespace_tags = self.DEFAULT_PRESERVE_WHITESPACE_TAGS + self.preserve_whitespace_tags = preserve_whitespace_tags + def initialize_soup(self, soup): """The BeautifulSoup object has been initialized and is now being associated with the TreeBuilder. @@ -253,7 +258,6 @@ class HTMLTreeBuilder(TreeBuilder): Such as which tags are empty-element tags. """ - preserve_whitespace_tags = HTMLAwareEntitySubstitution.preserve_whitespace_tags empty_element_tags = set([ # These are from HTML5. 'area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'menuitem', 'meta', 'param', 'source', 'track', 'wbr', @@ -292,6 +296,8 @@ class HTMLTreeBuilder(TreeBuilder): "output" : ["for"], } + DEFAULT_PRESERVE_WHITESPACE_TAGS = set(['pre', 'textarea']) + def set_up_substitutions(self, tag): # We are only interested in <meta> tags if tag.name != 'meta': |