summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--NEWS.txt3
-rw-r--r--bs4/builder/_lxml.py30
-rw-r--r--bs4/testing.py5
3 files changed, 33 insertions, 5 deletions
diff --git a/NEWS.txt b/NEWS.txt
index 043d79e..9378f51 100644
--- a/NEWS.txt
+++ b/NEWS.txt
@@ -1,5 +1,8 @@
= 4.1.2 (Unreleased) =
+* Use namespace prefixes for namespaced attribute names, instead of
+ the fully-qualified names given by the lxml parser. [bug=1037597]
+
* When sniffing encodings, if the cchardet library is installed, use
it instead of chardet. It's much faster. [bug=1020748]
diff --git a/bs4/builder/_lxml.py b/bs4/builder/_lxml.py
index c78fdff..f6b91ff 100644
--- a/bs4/builder/_lxml.py
+++ b/bs4/builder/_lxml.py
@@ -111,14 +111,34 @@ class LXMLTreeBuilderForXML(TreeBuilder):
attribute = NamespacedAttribute(
"xmlns", prefix, "http://www.w3.org/2000/xmlns/")
attrs[attribute] = namespace
+
+ if self.nsmaps is not None and len(self.nsmaps) > 0:
+ # Namespaces are in play. Find any attributes that came in
+ # from lxml with namespaces attached to their names, and
+ # turn then into NamespacedAttribute objects.
+ new_attrs = {}
+ for attr, value in attrs.items():
+ namespace, attr = self._getNsTag(attr)
+ if namespace is None:
+ new_attrs[attr] = value
+ else:
+ nsprefix = self._prefix_for_namespace(namespace)
+ attr = NamespacedAttribute(nsprefix, attr, namespace)
+ new_attrs[attr] = value
+ attrs = new_attrs
+
namespace, name = self._getNsTag(name)
- if namespace is not None:
- for inverted_nsmap in reversed(self.nsmaps):
- if inverted_nsmap is not None and namespace in inverted_nsmap:
- nsprefix = inverted_nsmap[namespace]
- break
+ nsprefix = self._prefix_for_namespace(namespace)
self.soup.handle_starttag(name, namespace, nsprefix, attrs)
+ def _prefix_for_namespace(self, namespace):
+ """Find the currently active prefix for the given namespace."""
+ if namespace is None:
+ return None
+ for inverted_nsmap in reversed(self.nsmaps):
+ if inverted_nsmap is not None and namespace in inverted_nsmap:
+ return inverted_nsmap[namespace]
+
def end(self, name):
self.soup.endData()
completed_tag = self.soup.tagStack[-1]
diff --git a/bs4/testing.py b/bs4/testing.py
index 4fec24b..30e74f4 100644
--- a/bs4/testing.py
+++ b/bs4/testing.py
@@ -490,6 +490,11 @@ class XMLTreeBuilderSmokeTest(object):
soup = self.soup(markup)
self.assertEqual(unicode(soup.p), markup)
+ def test_namespaced_attributes(self):
+ markup = '<foo xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"><bar xsi:schemaLocation="http://www.example.com"/></foo>'
+ soup = self.soup(markup)
+ self.assertEqual(unicode(soup.foo), markup)
+
class HTML5TreeBuilderSmokeTest(HTMLTreeBuilderSmokeTest):
"""Smoke test for a tree builder that supports HTML5."""