Fixed a bug with the lxml treebuilder that prevented the user from adding attributes to a tag that didn't originally have any. [bug=1002378] Thanks to Oliver Beattie for the patch.

author: Leonard Richardson <leonardr@segfault.org> 2012-05-24 08:28:14 -0400
committer: Leonard Richardson <leonardr@segfault.org> 2012-05-24 08:28:14 -0400
commit: 34c036cde4ed75e000be2d29f542a3f9ec215dfa (patch)
tree: f7da41504e50da02587ac4acd0d8a93ae2f50f70 /bs4
parent: c84e08aa77764578ca1be2a322a4a7bed12d6851 (diff)
3 files changed, 12 insertions, 4 deletions
diff --git a/bs4/builder/_lxml.py b/bs4/builder/_lxml.py
index 6491322..c78fdff 100644
--- a/bs4/builder/_lxml.py
+++ b/bs4/builder/_lxml.py
@@ -88,6 +88,9 @@ class LXMLTreeBuilderForXML(TreeBuilder):
         self.nsmaps = None
 
     def start(self, name, attrs, nsmap={}):
+        # Make sure attrs is a mutable dict--lxml may send an immutable dictproxy.
+        attrs = dict(attrs)
+
         nsprefix = None
         # Invert each namespace map as it comes in.
         if len(nsmap) == 0 and self.nsmaps != None:
diff --git a/bs4/testing.py b/bs4/testing.py
index 40dc976..5a84b0b 100644
--- a/bs4/testing.py
+++ b/bs4/testing.py
@@ -421,6 +421,11 @@ class HTMLTreeBuilderSmokeTest(object):
         # encoding.
         self.assertEqual('utf8', charset.encode("utf8"))
 
+    def test_tag_with_no_attributes_can_have_attributes_added(self):
+        data = self.soup("<a>text</a>")
+        data.a['foo'] = 'bar'
+        self.assertEqual('<a foo="bar">text</a>', data.a.decode())
+
 class XMLTreeBuilderSmokeTest(object):
 
     def test_docstring_generated(self):
diff --git a/bs4/tests/test_soup.py b/bs4/tests/test_soup.py
index ef58521..23a664e 100644
--- a/bs4/tests/test_soup.py
+++ b/bs4/tests/test_soup.py
@@ -286,7 +286,7 @@ class TestUnicodeDammit(unittest.TestCase):
         self.assertEqual(u"<a>áé</a>", dammit.unicode_markup)
         self.assertEqual("utf-16le", dammit.original_encoding)
 
-    def test_fix_embedded_windows_1252(self):
+    def test_detwingle(self):
         # Here's a UTF8 document.
         utf8 = (u"\N{SNOWMAN}" * 3).encode("utf8")
 
@@ -306,11 +306,11 @@ class TestUnicodeDammit(unittest.TestCase):
 
         # But if we run it through fix_embedded_windows_1252, it's fixed:
 
-        fixed = UnicodeDammit.fix_embedded_windows_1252(doc)
+        fixed = UnicodeDammit.detwingle(doc)
         self.assertEqual(
             u"☃☃☃“Hi, I like Windows!”☃☃☃", fixed.decode("utf8"))
 
-    def test_fix_embedded_windows_1252_ignores_multibyte_characters(self):
+    def test_detwingle_ignores_multibyte_characters(self):
         # Each of these characters has a UTF-8 representation ending
         # in \x93. \x93 is a smart quote if interpreted as
         # Windows-1252. But our code knows to skip over multibyte
@@ -322,7 +322,7 @@ class TestUnicodeDammit(unittest.TestCase):
             ):
             input = tricky_unicode_char.encode("utf8")
             self.assertTrue(input.endswith(b'\x93'))
-            output = UnicodeDammit.fix_embedded_windows_1252(input)
+            output = UnicodeDammit.detwingle(input)
             self.assertEqual(output, input)
 
 class TestNamedspacedAttribute(SoupTest):
author	Leonard Richardson <leonardr@segfault.org>	2012-05-24 08:28:14 -0400
committer	Leonard Richardson <leonardr@segfault.org>	2012-05-24 08:28:14 -0400
commit	34c036cde4ed75e000be2d29f542a3f9ec215dfa (patch)
tree	f7da41504e50da02587ac4acd0d8a93ae2f50f70 /bs4
parent	c84e08aa77764578ca1be2a322a4a7bed12d6851 (diff)