diff options
author | Leonard Richardson <leonardr@segfault.org> | 2012-08-16 10:26:57 -0400 |
---|---|---|
committer | Leonard Richardson <leonardr@segfault.org> | 2012-08-16 10:26:57 -0400 |
commit | f125bdd8f8370223329d1736c1f21224622e0b50 (patch) | |
tree | fb76c396fea1b5a19495239fed51c941522f7b0a /bs4/builder/_lxml.py | |
parent | dc93ed59b82a0be368796a16f8f57b0a62f9a944 (diff) |
Use namespace prefixes for namespaced attribute names, instead of
the fully-qualified names given by the lxml parser. [bug=1037597]
Diffstat (limited to 'bs4/builder/_lxml.py')
-rw-r--r-- | bs4/builder/_lxml.py | 30 |
1 files changed, 25 insertions, 5 deletions
diff --git a/bs4/builder/_lxml.py b/bs4/builder/_lxml.py index c78fdff..f6b91ff 100644 --- a/bs4/builder/_lxml.py +++ b/bs4/builder/_lxml.py @@ -111,14 +111,34 @@ class LXMLTreeBuilderForXML(TreeBuilder): attribute = NamespacedAttribute( "xmlns", prefix, "http://www.w3.org/2000/xmlns/") attrs[attribute] = namespace + + if self.nsmaps is not None and len(self.nsmaps) > 0: + # Namespaces are in play. Find any attributes that came in + # from lxml with namespaces attached to their names, and + # turn then into NamespacedAttribute objects. + new_attrs = {} + for attr, value in attrs.items(): + namespace, attr = self._getNsTag(attr) + if namespace is None: + new_attrs[attr] = value + else: + nsprefix = self._prefix_for_namespace(namespace) + attr = NamespacedAttribute(nsprefix, attr, namespace) + new_attrs[attr] = value + attrs = new_attrs + namespace, name = self._getNsTag(name) - if namespace is not None: - for inverted_nsmap in reversed(self.nsmaps): - if inverted_nsmap is not None and namespace in inverted_nsmap: - nsprefix = inverted_nsmap[namespace] - break + nsprefix = self._prefix_for_namespace(namespace) self.soup.handle_starttag(name, namespace, nsprefix, attrs) + def _prefix_for_namespace(self, namespace): + """Find the currently active prefix for the given namespace.""" + if namespace is None: + return None + for inverted_nsmap in reversed(self.nsmaps): + if inverted_nsmap is not None and namespace in inverted_nsmap: + return inverted_nsmap[namespace] + def end(self, name): self.soup.endData() completed_tag = self.soup.tagStack[-1] |