diff options
author | Leonard Richardson <leonard.richardson@canonical.com> | 2012-02-15 14:27:21 -0500 |
---|---|---|
committer | Leonard Richardson <leonard.richardson@canonical.com> | 2012-02-15 14:27:21 -0500 |
commit | a06152365c336f41bdb5fb9513b9316740c1564a (patch) | |
tree | 72f7f6993624453d8f4a8ccc21ef88ef93cee59a /bs4/builder/__init__.py | |
parent | be0c08585f54ec709740ff4352006bf3e605b8f2 (diff) |
Some cdata-list attributes are only cdata lists for certain tags.
Diffstat (limited to 'bs4/builder/__init__.py')
-rw-r--r-- | bs4/builder/__init__.py | 24 |
1 files changed, 19 insertions, 5 deletions
diff --git a/bs4/builder/__init__.py b/bs4/builder/__init__.py index 067623e..4e31572 100644 --- a/bs4/builder/__init__.py +++ b/bs4/builder/__init__.py @@ -82,9 +82,9 @@ class TreeBuilder(object): empty_element_tags = None # A tag will be considered an empty-element # tag when and only when it has no contents. - # A value for these attributes is a space- or comma-separated list - # of CDATA, rather than a single CDATA. - cdata_list_attributes = None + # A value for these tag/attribute combinations is a space- or + # comma-separated list of CDATA, rather than a single CDATA. + cdata_list_attributes = {} def __init__(self): @@ -201,8 +201,22 @@ class HTMLTreeBuilder(TreeBuilder): # encounter one of these attributes, we will parse its value into # a list of values if possible. Upon output, the list will be # converted back into a string. - cdata_list_attributes = set( - ['class', 'rel', 'rev', 'archive', 'accept-charset', 'headers']) + cdata_list_attributes = { + "*" : ['class', 'accesskey', 'dropzone'], + "a" : ['rel', 'rev'], + "link" : ['rel', 'rev'], + "td" : ["headers"], + "th" : ["headers"], + "td" : ["headers"], + "form" : ["accept-charset"], + "object" : ["archive"], + + # These are HTML5 specific, as are *.accesskey and *.dropzone above. + "area" : ["rel"], + "icon" : ["sizes"], + "iframe" : ["sandbox"], + "output" : ["for"], + } # Used by set_up_substitutions to detect the charset in a META tag CHARSET_RE = re.compile("((^|;)\s*charset=)([^;]*)", re.M) |