From f125bdd8f8370223329d1736c1f21224622e0b50 Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Thu, 16 Aug 2012 10:26:57 -0400 Subject: Use namespace prefixes for namespaced attribute names, instead of the fully-qualified names given by the lxml parser. [bug=1037597] --- bs4/builder/_lxml.py | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) (limited to 'bs4/builder/_lxml.py') diff --git a/bs4/builder/_lxml.py b/bs4/builder/_lxml.py index c78fdff..f6b91ff 100644 --- a/bs4/builder/_lxml.py +++ b/bs4/builder/_lxml.py @@ -111,14 +111,34 @@ class LXMLTreeBuilderForXML(TreeBuilder): attribute = NamespacedAttribute( "xmlns", prefix, "http://www.w3.org/2000/xmlns/") attrs[attribute] = namespace + + if self.nsmaps is not None and len(self.nsmaps) > 0: + # Namespaces are in play. Find any attributes that came in + # from lxml with namespaces attached to their names, and + # turn then into NamespacedAttribute objects. + new_attrs = {} + for attr, value in attrs.items(): + namespace, attr = self._getNsTag(attr) + if namespace is None: + new_attrs[attr] = value + else: + nsprefix = self._prefix_for_namespace(namespace) + attr = NamespacedAttribute(nsprefix, attr, namespace) + new_attrs[attr] = value + attrs = new_attrs + namespace, name = self._getNsTag(name) - if namespace is not None: - for inverted_nsmap in reversed(self.nsmaps): - if inverted_nsmap is not None and namespace in inverted_nsmap: - nsprefix = inverted_nsmap[namespace] - break + nsprefix = self._prefix_for_namespace(namespace) self.soup.handle_starttag(name, namespace, nsprefix, attrs) + def _prefix_for_namespace(self, namespace): + """Find the currently active prefix for the given namespace.""" + if namespace is None: + return None + for inverted_nsmap in reversed(self.nsmaps): + if inverted_nsmap is not None and namespace in inverted_nsmap: + return inverted_nsmap[namespace] + def end(self, name): self.soup.endData() completed_tag = self.soup.tagStack[-1] -- cgit v1.2.3