summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--bs4/__init__.py2
-rw-r--r--bs4/builder/_lxml.py34
-rw-r--r--bs4/element.py7
3 files changed, 36 insertions, 7 deletions
diff --git a/bs4/__init__.py b/bs4/__init__.py
index 7007796..2dd0521 100644
--- a/bs4/__init__.py
+++ b/bs4/__init__.py
@@ -281,7 +281,7 @@ class BeautifulSoup(Tag):
don't call handle_endtag.
"""
- #print "Start tag %s: %s" % (name, attrs)
+ # print "Start tag %s: %s" % (name, attrs)
self.endData()
if (self.parse_only and len(self.tagStack) <= 1
diff --git a/bs4/builder/_lxml.py b/bs4/builder/_lxml.py
index ad566e6..7fccb8e 100644
--- a/bs4/builder/_lxml.py
+++ b/bs4/builder/_lxml.py
@@ -42,6 +42,15 @@ class LXMLTreeBuilderForXML(TreeBuilder):
parser = parser(target=self, strip_cdata=False)
self.parser = parser
self.soup = None
+ self.nsmaps = []
+
+ def _getNsTag(self, tag):
+ # Split the namespace URL out of a fully-qualified lxml tag
+ # name. Copied from lxml's src/lxml/sax.py.
+ if tag[0] == '{':
+ return tuple(tag[1:].split('}', 1))
+ else:
+ return (None, tag)
def prepare_markup(self, markup, user_specified_encoding=None,
document_declared_encoding=None):
@@ -63,16 +72,31 @@ class LXMLTreeBuilderForXML(TreeBuilder):
self.parser.close()
def close(self):
- pass
-
- def start(self, name, attrs):
- # XXX namespace
- self.soup.handle_starttag(name, None, None, attrs)
+ self.namespaces.clear()
+
+ def start(self, name, attrs, nsmap={}):
+ nsprefix = None
+ # Invert each namespace map as it comes in.
+ if len(nsmap) == 0:
+ self.nsmaps.append(None)
+ else:
+ inverted_nsmap = dict((value, key) for key, value in nsmap.items())
+ self.nsmaps.append(inverted_nsmap)
+ if "{" in name:
+ import pdb; pdb.set_trace()
+ namespace, name = self._getNsTag(name)
+ if namespace is not None:
+ for inverted_nsmap in reversed(self.nsmaps):
+ if inverted_nsmap is not None and namespace in inverted_nsmap:
+ nsprefix = inverted_nsmap[namespace]
+ break
+ self.soup.handle_starttag(name, namespace, nsprefix, attrs)
def end(self, name):
self.soup.endData()
completed_tag = self.soup.tagStack[-1]
self.soup.handle_endtag(name)
+ self.nsmaps.pop()
def pi(self, target, data):
pass
diff --git a/bs4/element.py b/bs4/element.py
index 73f225e..ab30951 100644
--- a/bs4/element.py
+++ b/bs4/element.py
@@ -805,6 +805,10 @@ class Tag(PageElement):
else:
closeTag = '</%s>' % self.name
+ prefix = ''
+ if self.nsprefix:
+ prefix = self.nsprefix + ":"
+
pretty_print = (indent_level is not None)
if pretty_print:
space = (' ' * (indent_level - 1))
@@ -825,7 +829,8 @@ class Tag(PageElement):
attribute_string = ' ' + ' '.join(attrs)
if pretty_print:
s.append(space)
- s.append('<%s%s%s>' % (self.name, attribute_string, close))
+ s.append('<%s%s%s%s>' % (
+ prefix, self.name, attribute_string, close))
if pretty_print:
s.append("\n")
s.append(contents)