diff options
author | Leonard Richardson <leonardr@segfault.org> | 2015-09-28 19:53:43 -0400 |
---|---|---|
committer | Leonard Richardson <leonardr@segfault.org> | 2015-09-28 19:53:43 -0400 |
commit | 31cf0828b3b339c6136de1193fb6619d5c9c12ea (patch) | |
tree | b37c80349cd9ea0b6167e568d53a3f7dc89ab8d3 | |
parent | 85049f49a4571c4419440e72e8faed062516858d (diff) |
Fixed a parse bug with the html5lib tree-builder. Thanks to Roel
Kramer for the patch. [bug=1483781]
-rw-r--r-- | NEWS.txt | 3 | ||||
-rw-r--r-- | bs4/builder/_html5lib.py | 5 | ||||
-rw-r--r-- | bs4/tests/test_html5lib.py | 7 |
3 files changed, 14 insertions, 1 deletions
@@ -4,6 +4,9 @@ removed. Thanks to Eric Weiser for the patch and John Wiseman for a test. [bug=1481520] +* Fixed a parse bug with the html5lib tree-builder. Thanks to Roel + Kramer for the patch. [bug=1483781] + * Improved the implementation of CSS selector grouping. Thanks to Orangain for the patch. [bug=1484543] diff --git a/bs4/builder/_html5lib.py b/bs4/builder/_html5lib.py index ab5793c..8725a65 100644 --- a/bs4/builder/_html5lib.py +++ b/bs4/builder/_html5lib.py @@ -120,7 +120,10 @@ class AttrList(object): if (name in list_attr['*'] or (self.element.name in list_attr and name in list_attr[self.element.name])): - value = whitespace_re.split(value) + # A node that is being cloned may have already undergone + # this procedure. + if not isinstance(value, list): + value = whitespace_re.split(value) self.element[name] = value def items(self): return list(self.attrs.items()) diff --git a/bs4/tests/test_html5lib.py b/bs4/tests/test_html5lib.py index 9a2bacf..65536c2 100644 --- a/bs4/tests/test_html5lib.py +++ b/bs4/tests/test_html5lib.py @@ -89,3 +89,10 @@ class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest): markup = b"""<?PITarget PIContent?>""" soup = self.soup(markup) assert str(soup).startswith("<!--?PITarget PIContent?-->") + + def test_cloned_multivalue_node(self): + markup = b"""<a class="my_class"><p></a>""" + soup = self.soup(markup) + a1, a2 = soup.find_all('a') + self.assertEqual(a1, a2) + assert a1 is not a2 |