summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLeonard Richardson <leonard.richardson@canonical.com>2012-04-26 10:08:45 -0400
committerLeonard Richardson <leonard.richardson@canonical.com>2012-04-26 10:08:45 -0400
commite82a52cacd936b66d9f6290136278af00ea1428c (patch)
treebf118c8d2bbc701c8143eb8c584cfa2c8c3d4116
parent3ff7bde5d320fbec4c16e7f245c345e8455ca887 (diff)
The test suite now passes when lxml is not installed, whether or not html5lib is installed. [bug=987004]
-rw-r--r--NEWS.txt3
-rw-r--r--bs4/builder/__init__.py20
-rw-r--r--bs4/builder/_html5lib.py15
-rw-r--r--bs4/element.py16
-rw-r--r--bs4/testing.py5
5 files changed, 43 insertions, 16 deletions
diff --git a/NEWS.txt b/NEWS.txt
index 72c388b..12922c9 100644
--- a/NEWS.txt
+++ b/NEWS.txt
@@ -7,6 +7,9 @@
definitions ending with two question marks instead of
one. [bug=984258]
+* The test suite now passes when lxml is not installed, whether or not
+ html5lib is installed. [bug=987004]
+
* Print a warning on HTMLParseErrors to let people know they should
install a better parser library.
diff --git a/bs4/builder/__init__.py b/bs4/builder/__init__.py
index 9f4f59e..4c22b86 100644
--- a/bs4/builder/__init__.py
+++ b/bs4/builder/__init__.py
@@ -1,8 +1,10 @@
from collections import defaultdict
+import itertools
import sys
from bs4.element import (
CharsetMetaAttributeValue,
ContentMetaAttributeValue,
+ whitespace_re
)
__all__ = [
@@ -140,6 +142,24 @@ class TreeBuilder(object):
def set_up_substitutions(self, tag):
return False
+ def _replace_cdata_list_attribute_values(self, tag_name, attrs):
+ """Replaces class="foo bar" with class=["foo", "bar"]
+
+ Modifies its input in place.
+ """
+ if self.cdata_list_attributes:
+ universal = self.cdata_list_attributes.get('*', [])
+ tag_specific = self.cdata_list_attributes.get(
+ tag_name.lower(), [])
+ for cdata_list_attr in itertools.chain(universal, tag_specific):
+ if cdata_list_attr in dict(attrs):
+ # Basically, we have a "class" attribute whose
+ # value is a whitespace-separated list of CSS
+ # classes. Split it into a list.
+ value = attrs[cdata_list_attr]
+ values = whitespace_re.split(value)
+ attrs[cdata_list_attr] = values
+ return attrs
class SAXTreeBuilder(TreeBuilder):
"""A Beautiful Soup treebuilder that listens for SAX events."""
diff --git a/bs4/builder/_html5lib.py b/bs4/builder/_html5lib.py
index 2d6fe31..6001e38 100644
--- a/bs4/builder/_html5lib.py
+++ b/bs4/builder/_html5lib.py
@@ -139,11 +139,20 @@ class Element(html5lib.treebuilders._base.Node):
return AttrList(self.element)
def setAttributes(self, attributes):
- if attributes is not None and attributes != {}:
+ if attributes is not None and len(attributes) > 0:
+
+ converted_attributes = []
for name, value in list(attributes.items()):
if isinstance(name, tuple):
- name = NamespacedAttribute(*name)
- self.element[name] = value
+ new_name = NamespacedAttribute(*name)
+ del attributes[name]
+ attributes[new_name] = value
+
+ self.soup.builder._replace_cdata_list_attribute_values(
+ self.name, attributes)
+ for name, value in attributes.items():
+ self.element[name] = value
+
# The attributes may contain variables that need substitution.
# Call set_up_substitutions manually.
#
diff --git a/bs4/element.py b/bs4/element.py
index aa9a3e9..282193e 100644
--- a/bs4/element.py
+++ b/bs4/element.py
@@ -1,5 +1,4 @@
import collections
-import itertools
import re
import sys
import warnings
@@ -735,20 +734,11 @@ class Tag(PageElement):
self.prefix = prefix
if attrs is None:
attrs = {}
+ elif builder.cdata_list_attributes:
+ attrs = builder._replace_cdata_list_attribute_values(
+ self.name, attrs)
else:
attrs = dict(attrs)
- if builder.cdata_list_attributes:
- universal = builder.cdata_list_attributes.get('*', [])
- tag_specific = builder.cdata_list_attributes.get(
- self.name.lower(), [])
- for cdata_list_attr in itertools.chain(universal, tag_specific):
- if cdata_list_attr in attrs:
- # Basically, we have a "class" attribute whose
- # value is a whitespace-separated list of CSS
- # classes. Split it into a list.
- value = attrs[cdata_list_attr]
- values = whitespace_re.split(value)
- attrs[cdata_list_attr] = values
self.attrs = attrs
self.contents = []
self.setup(parent, previous)
diff --git a/bs4/testing.py b/bs4/testing.py
index 94c87c9..b004c18 100644
--- a/bs4/testing.py
+++ b/bs4/testing.py
@@ -238,6 +238,11 @@ class HTMLTreeBuilderSmokeTest(object):
self.assertEqual(
'http://www.w3.org/2000/svg', soup.html['xmlns:svg'])
+ def test_multivalued_attribute_value_becomes_list(self):
+ markup = b'<a class="foo bar">'
+ soup = self.soup(markup)
+ self.assertEqual(['foo', 'bar'], soup.a['class'])
+
#
# Generally speaking, tests below this point are more tests of
# Beautiful Soup than tests of the tree builders. But parsers are