summaryrefslogtreecommitdiff
path: root/bs4/builder/__init__.py
diff options
context:
space:
mode:
authorLeonard Richardson <leonard.richardson@canonical.com>2012-02-15 14:27:21 -0500
committerLeonard Richardson <leonard.richardson@canonical.com>2012-02-15 14:27:21 -0500
commita06152365c336f41bdb5fb9513b9316740c1564a (patch)
tree72f7f6993624453d8f4a8ccc21ef88ef93cee59a /bs4/builder/__init__.py
parentbe0c08585f54ec709740ff4352006bf3e605b8f2 (diff)
Some cdata-list attributes are only cdata lists for certain tags.
Diffstat (limited to 'bs4/builder/__init__.py')
-rw-r--r--bs4/builder/__init__.py24
1 files changed, 19 insertions, 5 deletions
diff --git a/bs4/builder/__init__.py b/bs4/builder/__init__.py
index 067623e..4e31572 100644
--- a/bs4/builder/__init__.py
+++ b/bs4/builder/__init__.py
@@ -82,9 +82,9 @@ class TreeBuilder(object):
empty_element_tags = None # A tag will be considered an empty-element
# tag when and only when it has no contents.
- # A value for these attributes is a space- or comma-separated list
- # of CDATA, rather than a single CDATA.
- cdata_list_attributes = None
+ # A value for these tag/attribute combinations is a space- or
+ # comma-separated list of CDATA, rather than a single CDATA.
+ cdata_list_attributes = {}
def __init__(self):
@@ -201,8 +201,22 @@ class HTMLTreeBuilder(TreeBuilder):
# encounter one of these attributes, we will parse its value into
# a list of values if possible. Upon output, the list will be
# converted back into a string.
- cdata_list_attributes = set(
- ['class', 'rel', 'rev', 'archive', 'accept-charset', 'headers'])
+ cdata_list_attributes = {
+ "*" : ['class', 'accesskey', 'dropzone'],
+ "a" : ['rel', 'rev'],
+ "link" : ['rel', 'rev'],
+ "td" : ["headers"],
+ "th" : ["headers"],
+ "td" : ["headers"],
+ "form" : ["accept-charset"],
+ "object" : ["archive"],
+
+ # These are HTML5 specific, as are *.accesskey and *.dropzone above.
+ "area" : ["rel"],
+ "icon" : ["sizes"],
+ "iframe" : ["sandbox"],
+ "output" : ["for"],
+ }
# Used by set_up_substitutions to detect the charset in a META tag
CHARSET_RE = re.compile("((^|;)\s*charset=)([^;]*)", re.M)