diff options
-rw-r--r-- | NEWS.txt | 5 | ||||
-rw-r--r-- | bs4/tests/test_soup.py | 3 |
2 files changed, 7 insertions, 1 deletions
@@ -3,9 +3,12 @@ * As per PEP-8, allow searching by CSS class using the 'class_' keyword argument. [bug=1037624] -* Use namespace prefixes for namespaced attribute names, instead of +* Display namespace prefixes for namespaced attribute names, instead of the fully-qualified names given by the lxml parser. [bug=1037597] +* Fixed a crash on encoding when an attribute name contained + non-ASCII characters. + * When sniffing encodings, if the cchardet library is installed, Beautiful Soup uses cchardet instead of chardet. cchardet is much faster. [bug=1020748] diff --git a/bs4/tests/test_soup.py b/bs4/tests/test_soup.py index a10a89e..c69ce31 100644 --- a/bs4/tests/test_soup.py +++ b/bs4/tests/test_soup.py @@ -175,6 +175,9 @@ class TestEncodingConversion(SoupTest): soup_from_unicode = self.soup(self.unicode_data) self.assertEqual(soup_from_unicode.encode('utf-8'), self.utf8_data) + def test_attribute_name_containing_unicode_characters(self): + markup = u'<div><a \N{SNOWMAN}="snowman"></a></div>' + self.assertEquals(self.soup(markup).div.encode("utf8"), markup.encode("utf8")) class TestUnicodeDammit(unittest.TestCase): """Standalone tests of Unicode, Dammit.""" |