summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLeonard Richardson <leonard.richardson@canonical.com>2013-06-03 08:58:59 -0400
committerLeonard Richardson <leonard.richardson@canonical.com>2013-06-03 08:58:59 -0400
commitc8545a7b00a77d2f606620ef69b36b35291d8174 (patch)
tree2d4ee494b03ca80718fc77bc7b0556dde72b080d
parentb25f977fdae354fa15de789116e54e95d84d3d28 (diff)
Improved performance of _replace_cdata_list_attribute_values, and greatly reduced the number of times it is called.
-rw-r--r--bs4/builder/__init__.py18
-rw-r--r--bs4/diagnose.py2
-rw-r--r--bs4/element.py2
3 files changed, 12 insertions, 10 deletions
diff --git a/bs4/builder/__init__.py b/bs4/builder/__init__.py
index e59dae2..740f5f2 100644
--- a/bs4/builder/__init__.py
+++ b/bs4/builder/__init__.py
@@ -147,16 +147,18 @@ class TreeBuilder(object):
Modifies its input in place.
"""
+ if not attrs:
+ return attrs
if self.cdata_list_attributes:
universal = self.cdata_list_attributes.get('*', [])
tag_specific = self.cdata_list_attributes.get(
- tag_name.lower(), [])
- for cdata_list_attr in itertools.chain(universal, tag_specific):
- if cdata_list_attr in attrs:
- # Basically, we have a "class" attribute whose
- # value is a whitespace-separated list of CSS
- # classes. Split it into a list.
- value = attrs[cdata_list_attr]
+ tag_name.lower(), None)
+ for attr in attrs.keys():
+ if attr in universal or (tag_specific and attr in tag_specific):
+ # We have a "class"-type attribute whose string
+ # value is a whitespace-separated list of
+ # values. Split it into a list.
+ value = attrs[attr]
if isinstance(value, basestring):
values = whitespace_re.split(value)
else:
@@ -167,7 +169,7 @@ class TreeBuilder(object):
# leave the value alone rather than trying to
# split it again.
values = value
- attrs[cdata_list_attr] = values
+ attrs[attr] = values
return attrs
class SAXTreeBuilder(TreeBuilder):
diff --git a/bs4/diagnose.py b/bs4/diagnose.py
index b6eaa8d..c5a0c06 100644
--- a/bs4/diagnose.py
+++ b/bs4/diagnose.py
@@ -192,7 +192,7 @@ def profile(num_elements=100000, parser="lxml"):
stats.strip_dirs()
cumulative = stats.sort_stats("cumulative")
total = stats.sort_stats("time")
- import pdb; pdb.set_trace()
+ cumulative.print_stats(50)
if __name__ == '__main__':
#diagnose(sys.stdin.read())
diff --git a/bs4/element.py b/bs4/element.py
index f6864f2..7b63b30 100644
--- a/bs4/element.py
+++ b/bs4/element.py
@@ -746,7 +746,7 @@ class Tag(PageElement):
self.prefix = prefix
if attrs is None:
attrs = {}
- elif builder.cdata_list_attributes:
+ elif attrs and builder.cdata_list_attributes:
attrs = builder._replace_cdata_list_attribute_values(
self.name, attrs)
else: