Fixed handling of the closing of namespaced tags.

author: Leonard Richardson <leonard.richardson@canonical.com> 2012-02-23 12:23:12 -0500
committer: Leonard Richardson <leonard.richardson@canonical.com> 2012-02-23 12:23:12 -0500
commit: fcefebe15290b9ff44934efa73fb07c70ebf5171 (patch)
tree: c0b3ae8837a96975e1b88f3e2e9befc07a72e70c
parent: b7749c50a2c96ccf6982cfa1ca02d883e31e0af9 (diff)
7 files changed, 51 insertions, 8 deletions
diff --git a/NEWS.txt b/NEWS.txt
index fe2e0cc..c93541e 100644
--- a/NEWS.txt
+++ b/NEWS.txt
@@ -8,6 +8,8 @@
   to use namespace prefixes exactly as they're used in the original
   document.
 
+* The string representation of a DOCTYPE always ends in a newline.
+
 = 4.0.0b7 (20110223) =
 
 * Upon decoding to string, any characters that can't be represented in
diff --git a/bs4/__init__.py b/bs4/__init__.py
index bf800ea..9b5c155 100644
--- a/bs4/__init__.py
+++ b/bs4/__init__.py
@@ -249,7 +249,7 @@ class BeautifulSoup(Tag):
         self.previous_element = o
         self.currentTag.contents.append(o)
 
-    def _popToTag(self, name, inclusivePop=True):
+    def _popToTag(self, name, nsprefix=None, inclusivePop=True):
         """Pops the tag stack up to and including the most recent
         instance of the given tag. If inclusivePop is false, pops the tag
         stack up to but *not* including the most recent instqance of
@@ -262,7 +262,8 @@ class BeautifulSoup(Tag):
         mostRecentTag = None
 
         for i in range(len(self.tagStack) - 1, 0, -1):
-            if name == self.tagStack[i].name:
+            if (name == self.tagStack[i].name
+                and nsprefix == self.tagStack[i].nsprefix == nsprefix):
                 numPops = len(self.tagStack) - i
                 break
         if not inclusivePop:
@@ -299,10 +300,10 @@ class BeautifulSoup(Tag):
         self.pushTag(tag)
         return tag
 
-    def handle_endtag(self, name):
+    def handle_endtag(self, name, nsprefix=None):
         #print "End tag: " + name
         self.endData()
-        self._popToTag(name)
+        self._popToTag(name, nsprefix)
 
     def handle_data(self, data):
         self.currentData.append(data)
diff --git a/bs4/builder/_lxml.py b/bs4/builder/_lxml.py
index 870d59e..e5e30d4 100644
--- a/bs4/builder/_lxml.py
+++ b/bs4/builder/_lxml.py
@@ -106,7 +106,14 @@ class LXMLTreeBuilderForXML(TreeBuilder):
     def end(self, name):
         self.soup.endData()
         completed_tag = self.soup.tagStack[-1]
-        self.soup.handle_endtag(name)
+        namespace, name = self._getNsTag(name)
+        nsprefix = None
+        if namespace is not None:
+            for inverted_nsmap in reversed(self.nsmaps):
+                if inverted_nsmap is not None and namespace in inverted_nsmap:
+                    nsprefix = inverted_nsmap[namespace]
+                    break
+        self.soup.handle_endtag(name, nsprefix)
         if self.nsmaps != None:
             # This tag, or one of its parents, introduced a namespace
             # mapping, so pop it off the stack.
diff --git a/bs4/element.py b/bs4/element.py
index c2c4e2e..efc6ec7 100644
--- a/bs4/element.py
+++ b/bs4/element.py
@@ -25,7 +25,10 @@ def _alias(attr):
 class NamespacedAttribute(unicode):
 
     def __new__(cls, prefix, name, namespace=None):
-        obj = unicode.__new__(cls, prefix + ":" + name)
+        if name is None:
+            obj = unicode.__new__(cls, prefix)
+        else:
+            obj = unicode.__new__(cls, prefix + ":" + name)
         obj.prefix = prefix
         obj.name = name
         obj.namespace = namespace
@@ -510,7 +513,7 @@ class Doctype(NavigableString):
         return Doctype(value)
 
     PREFIX = u'<!DOCTYPE '
-    SUFFIX = u'>'
+    SUFFIX = u'>\n'
 
 
 class Tag(PageElement):
diff --git a/bs4/testing.py b/bs4/testing.py
index 1945c02..6f9d857 100644
--- a/bs4/testing.py
+++ b/bs4/testing.py
@@ -371,6 +371,17 @@ class XMLTreeBuilderSmokeTest(object):
             soup.encode("latin1"),
             b'<?xml version="1.0" encoding="latin1">\n<root/>')
 
+    def test_real_xhtml_document(self):
+        """A real XHTML document should come out the same as it went in."""
+        markup = b"""<?xml version="1.0" encoding="utf-8">
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head><title>Hello.</title></head>
+<body>Goodbye.</body>
+</html>"""
+        soup = self.soup(markup)
+        self.assertEqual(soup.encode("utf-8"), markup)
+
 
     def test_tags_are_empty_element_if_and_only_if_they_are_empty(self):
         self.assertSoupEquals("<p>", "<p/>")
diff --git a/bs4/tests/test_soup.py b/bs4/tests/test_soup.py
index 8333ad4..33ab0fa 100644
--- a/bs4/tests/test_soup.py
+++ b/bs4/tests/test_soup.py
@@ -240,6 +240,10 @@ class TestUnicodeDammit(unittest.TestCase):
 
 class TestNamedspacedAttribute(SoupTest):
 
+    def test_name_may_be_none(self):
+        a = NamespacedAttribute("xmlns", None)
+        self.assertEqual(a, "xmlns")
+
     def test_attribute_is_equivalent_to_colon_separated_string(self):
         a = NamespacedAttribute("a", "b")
         self.assertEqual("a:b", a)
diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py
index ce9a7ec..c75b561 100644
--- a/bs4/tests/test_tree.py
+++ b/bs4/tests/test_tree.py
@@ -18,7 +18,13 @@ from bs4.builder import (
     builder_registry,
     HTMLParserTreeBuilder,
 )
-from bs4.element import CData, NavigableString, SoupStrainer, Tag
+from bs4.element import (
+    CData,
+    Doctype,
+    NavigableString,
+    SoupStrainer,
+    Tag,
+)
 from bs4.testing import (
     SoupTest,
     skipIf,
@@ -1277,3 +1283,12 @@ class TestNavigableStringSubclasses(SoupTest):
         self.assertEqual(str(soup), "<![CDATA[foo]]>")
         self.assertEqual(soup.find(text="foo"), "foo")
         self.assertEqual(soup.contents[0], "foo")
+
+    def test_doctype_ends_in_newline(self):
+        # Unlike other NavigableString subclasses, a DOCTYPE always ends
+        # in a newline.
+        doctype = Doctype("foo")
+        soup = self.soup("")
+        soup.insert(1, doctype)
+        self.assertEqual(soup.encode(), b"<!DOCTYPE foo>\n")
+
author	Leonard Richardson <leonard.richardson@canonical.com>	2012-02-23 12:23:12 -0500
committer	Leonard Richardson <leonard.richardson@canonical.com>	2012-02-23 12:23:12 -0500
commit	fcefebe15290b9ff44934efa73fb07c70ebf5171 (patch)
tree	c0b3ae8837a96975e1b88f3e2e9befc07a72e70c
parent	b7749c50a2c96ccf6982cfa1ca02d883e31e0af9 (diff)