summaryrefslogtreecommitdiff
path: root/bs4/builder
diff options
context:
space:
mode:
Diffstat (limited to 'bs4/builder')
-rw-r--r--bs4/builder/__init__.py14
-rw-r--r--bs4/builder/_html5lib.py2
-rw-r--r--bs4/builder/_htmlparser.py11
-rw-r--r--bs4/builder/_lxml.py3
4 files changed, 19 insertions, 11 deletions
diff --git a/bs4/builder/__init__.py b/bs4/builder/__init__.py
index 4207750..9dad920 100644
--- a/bs4/builder/__init__.py
+++ b/bs4/builder/__init__.py
@@ -96,11 +96,15 @@ class TreeBuilder(object):
# A value for these tag/attribute combinations is a space- or
# comma-separated list of CDATA, rather than a single CDATA.
- cdata_list_attributes = {}
+ DEFAULT_CDATA_LIST_ATTRIBUTES = {}
-
- def __init__(self):
+ USE_DEFAULT = object()
+
+ def __init__(self, cdata_list_attributes=USE_DEFAULT):
self.soup = None
+ if cdata_list_attributes is self.USE_DEFAULT:
+ cdata_list_attributes = self.DEFAULT_CDATA_LIST_ATTRIBUTES
+ self.cdata_list_attributes = cdata_list_attributes
def initialize_soup(self, soup):
"""The BeautifulSoup object has been initialized and is now
@@ -131,7 +135,7 @@ class TreeBuilder(object):
if self.empty_element_tags is None:
return True
return tag_name in self.empty_element_tags
-
+
def feed(self, markup):
raise NotImplementedError()
@@ -259,7 +263,7 @@ class HTMLTreeBuilder(TreeBuilder):
# encounter one of these attributes, we will parse its value into
# a list of values if possible. Upon output, the list will be
# converted back into a string.
- cdata_list_attributes = {
+ DEFAULT_CDATA_LIST_ATTRIBUTES = {
"*" : ['class', 'accesskey', 'dropzone'],
"a" : ['rel', 'rev'],
"link" : ['rel', 'rev'],
diff --git a/bs4/builder/_html5lib.py b/bs4/builder/_html5lib.py
index 6fa8593..6892a93 100644
--- a/bs4/builder/_html5lib.py
+++ b/bs4/builder/_html5lib.py
@@ -199,7 +199,7 @@ class AttrList(object):
def __setitem__(self, name, value):
# If this attribute is a multi-valued attribute for this element,
# turn its value into a list.
- list_attr = HTML5TreeBuilder.cdata_list_attributes
+ list_attr = self.element.cdata_list_attributes
if (name in list_attr['*']
or (self.element.name in list_attr
and name in list_attr[self.element.name])):
diff --git a/bs4/builder/_htmlparser.py b/bs4/builder/_htmlparser.py
index ff09ca3..56b8b91 100644
--- a/bs4/builder/_htmlparser.py
+++ b/bs4/builder/_htmlparser.py
@@ -214,12 +214,15 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder):
NAME = HTMLPARSER
features = [NAME, HTML, STRICT]
- def __init__(self, *args, **kwargs):
+ def __init__(self, parser_args=None, parser_kwargs=None, **kwargs):
+ super(HTMLParserTreeBuilder, self).__init__(**kwargs)
+ parser_args = parser_args or []
+ parser_kwargs = parser_kwargs or {}
if CONSTRUCTOR_TAKES_STRICT and not CONSTRUCTOR_STRICT_IS_DEPRECATED:
- kwargs['strict'] = False
+ parser_kwargs['strict'] = False
if CONSTRUCTOR_TAKES_CONVERT_CHARREFS:
- kwargs['convert_charrefs'] = False
- self.parser_args = (args, kwargs)
+ parser_kwargs['convert_charrefs'] = False
+ self.parser_args = (parser_args, parser_kwargs)
def prepare_markup(self, markup, user_specified_encoding=None,
document_declared_encoding=None, exclude_encodings=None):
diff --git a/bs4/builder/_lxml.py b/bs4/builder/_lxml.py
index b7e172c..27cadcb 100644
--- a/bs4/builder/_lxml.py
+++ b/bs4/builder/_lxml.py
@@ -94,7 +94,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
parser = parser(target=self, strip_cdata=False, encoding=encoding)
return parser
- def __init__(self, parser=None, empty_element_tags=None):
+ def __init__(self, parser=None, empty_element_tags=None, **kwargs):
# TODO: Issue a warning if parser is present but not a
# callable, since that means there's no way to create new
# parsers for different encodings.
@@ -103,6 +103,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
self.empty_element_tags = set(empty_element_tags)
self.soup = None
self.nsmaps = [self.DEFAULT_NSMAPS_INVERTED]
+ super(LXMLTreeBuilderForXML, self).__init__(**kwargs)
def _getNsTag(self, tag):
# Split the namespace URL out of a fully-qualified lxml tag