summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--NEWS.txt5
-rw-r--r--bs4/tests/test_soup.py3
2 files changed, 7 insertions, 1 deletions
diff --git a/NEWS.txt b/NEWS.txt
index 9e02805..c22f014 100644
--- a/NEWS.txt
+++ b/NEWS.txt
@@ -3,9 +3,12 @@
* As per PEP-8, allow searching by CSS class using the 'class_'
keyword argument. [bug=1037624]
-* Use namespace prefixes for namespaced attribute names, instead of
+* Display namespace prefixes for namespaced attribute names, instead of
the fully-qualified names given by the lxml parser. [bug=1037597]
+* Fixed a crash on encoding when an attribute name contained
+ non-ASCII characters.
+
* When sniffing encodings, if the cchardet library is installed,
Beautiful Soup uses cchardet instead of chardet. cchardet is much
faster. [bug=1020748]
diff --git a/bs4/tests/test_soup.py b/bs4/tests/test_soup.py
index a10a89e..c69ce31 100644
--- a/bs4/tests/test_soup.py
+++ b/bs4/tests/test_soup.py
@@ -175,6 +175,9 @@ class TestEncodingConversion(SoupTest):
soup_from_unicode = self.soup(self.unicode_data)
self.assertEqual(soup_from_unicode.encode('utf-8'), self.utf8_data)
+ def test_attribute_name_containing_unicode_characters(self):
+ markup = u'<div><a \N{SNOWMAN}="snowman"></a></div>'
+ self.assertEquals(self.soup(markup).div.encode("utf8"), markup.encode("utf8"))
class TestUnicodeDammit(unittest.TestCase):
"""Standalone tests of Unicode, Dammit."""