summaryrefslogtreecommitdiff
path: root/bs4/builder/_lxml.py
diff options
context:
space:
mode:
authorLeonard Richardson <leonardr@segfault.org>2012-08-16 10:26:57 -0400
committerLeonard Richardson <leonardr@segfault.org>2012-08-16 10:26:57 -0400
commitf125bdd8f8370223329d1736c1f21224622e0b50 (patch)
treefb76c396fea1b5a19495239fed51c941522f7b0a /bs4/builder/_lxml.py
parentdc93ed59b82a0be368796a16f8f57b0a62f9a944 (diff)
Use namespace prefixes for namespaced attribute names, instead of
the fully-qualified names given by the lxml parser. [bug=1037597]
Diffstat (limited to 'bs4/builder/_lxml.py')
-rw-r--r--bs4/builder/_lxml.py30
1 files changed, 25 insertions, 5 deletions
diff --git a/bs4/builder/_lxml.py b/bs4/builder/_lxml.py
index c78fdff..f6b91ff 100644
--- a/bs4/builder/_lxml.py
+++ b/bs4/builder/_lxml.py
@@ -111,14 +111,34 @@ class LXMLTreeBuilderForXML(TreeBuilder):
attribute = NamespacedAttribute(
"xmlns", prefix, "http://www.w3.org/2000/xmlns/")
attrs[attribute] = namespace
+
+ if self.nsmaps is not None and len(self.nsmaps) > 0:
+ # Namespaces are in play. Find any attributes that came in
+ # from lxml with namespaces attached to their names, and
+ # turn then into NamespacedAttribute objects.
+ new_attrs = {}
+ for attr, value in attrs.items():
+ namespace, attr = self._getNsTag(attr)
+ if namespace is None:
+ new_attrs[attr] = value
+ else:
+ nsprefix = self._prefix_for_namespace(namespace)
+ attr = NamespacedAttribute(nsprefix, attr, namespace)
+ new_attrs[attr] = value
+ attrs = new_attrs
+
namespace, name = self._getNsTag(name)
- if namespace is not None:
- for inverted_nsmap in reversed(self.nsmaps):
- if inverted_nsmap is not None and namespace in inverted_nsmap:
- nsprefix = inverted_nsmap[namespace]
- break
+ nsprefix = self._prefix_for_namespace(namespace)
self.soup.handle_starttag(name, namespace, nsprefix, attrs)
+ def _prefix_for_namespace(self, namespace):
+ """Find the currently active prefix for the given namespace."""
+ if namespace is None:
+ return None
+ for inverted_nsmap in reversed(self.nsmaps):
+ if inverted_nsmap is not None and namespace in inverted_nsmap:
+ return inverted_nsmap[namespace]
+
def end(self, name):
self.soup.endData()
completed_tag = self.soup.tagStack[-1]