summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLeonard Richardson <leonardr@segfault.org>2015-09-28 19:53:43 -0400
committerLeonard Richardson <leonardr@segfault.org>2015-09-28 19:53:43 -0400
commit31cf0828b3b339c6136de1193fb6619d5c9c12ea (patch)
treeb37c80349cd9ea0b6167e568d53a3f7dc89ab8d3
parent85049f49a4571c4419440e72e8faed062516858d (diff)
Fixed a parse bug with the html5lib tree-builder. Thanks to Roel
Kramer for the patch. [bug=1483781]
-rw-r--r--NEWS.txt3
-rw-r--r--bs4/builder/_html5lib.py5
-rw-r--r--bs4/tests/test_html5lib.py7
3 files changed, 14 insertions, 1 deletions
diff --git a/NEWS.txt b/NEWS.txt
index 59f3b44..3726c57 100644
--- a/NEWS.txt
+++ b/NEWS.txt
@@ -4,6 +4,9 @@
removed. Thanks to Eric Weiser for the patch and John Wiseman for a
test. [bug=1481520]
+* Fixed a parse bug with the html5lib tree-builder. Thanks to Roel
+ Kramer for the patch. [bug=1483781]
+
* Improved the implementation of CSS selector grouping. Thanks to
Orangain for the patch. [bug=1484543]
diff --git a/bs4/builder/_html5lib.py b/bs4/builder/_html5lib.py
index ab5793c..8725a65 100644
--- a/bs4/builder/_html5lib.py
+++ b/bs4/builder/_html5lib.py
@@ -120,7 +120,10 @@ class AttrList(object):
if (name in list_attr['*']
or (self.element.name in list_attr
and name in list_attr[self.element.name])):
- value = whitespace_re.split(value)
+ # A node that is being cloned may have already undergone
+ # this procedure.
+ if not isinstance(value, list):
+ value = whitespace_re.split(value)
self.element[name] = value
def items(self):
return list(self.attrs.items())
diff --git a/bs4/tests/test_html5lib.py b/bs4/tests/test_html5lib.py
index 9a2bacf..65536c2 100644
--- a/bs4/tests/test_html5lib.py
+++ b/bs4/tests/test_html5lib.py
@@ -89,3 +89,10 @@ class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest):
markup = b"""<?PITarget PIContent?>"""
soup = self.soup(markup)
assert str(soup).startswith("<!--?PITarget PIContent?-->")
+
+ def test_cloned_multivalue_node(self):
+ markup = b"""<a class="my_class"><p></a>"""
+ soup = self.soup(markup)
+ a1, a2 = soup.find_all('a')
+ self.assertEqual(a1, a2)
+ assert a1 is not a2