From e170ff33e67e806cf33e2e51fcefcfa0b9310d96 Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Fri, 18 Feb 2011 14:49:41 -0500 Subject: Moved in the last of the tests from TODO. --- TODO | 18 ------------------ tests/test_lxml.py | 4 ++++ 2 files changed, 4 insertions(+), 18 deletions(-) diff --git a/TODO b/TODO index 74ce8bd..a799bbb 100644 --- a/TODO +++ b/TODO @@ -21,21 +21,3 @@ as-yet-unreleased version of html5lib changes the parser's handling of CDATA sections to allow CDATA sections in tags like and . The HTML5TreeBuilder will need to be updated to create CData objects instead of Comment objects in this situation. - - - ---- - -Tag names that contain Unicode characters crash the parser: - def testUnicodeTagNamesFAILS(self): - self.assertSoupEquals("<デダ芻デダtext>2PM") - -Here's the implementation of NavigableString.__unicode__: - - def __unicode__(self): - return unicode(str(self)) - -It converts the Unicode to a string, and then back to Unicode. I can't -find any other way of turning an element of a Unicode subclass into a -normal Unicode object. This is pretty bad and a better technique is -welcome. diff --git a/tests/test_lxml.py b/tests/test_lxml.py index 85c6a1b..7e15dcf 100644 --- a/tests/test_lxml.py +++ b/tests/test_lxml.py @@ -425,6 +425,10 @@ class TestLXMLBuilderInvalidMarkup(SoupTest): # The declaration is ignored altogether. self.assertEquals(soup.encode(), "

a

") + def test_tag_name_contains_unicode(self): + # Unicode characters in tag names are stripped. + tag_name = u"Joe" + self.assertSoupEquals("Joe") class TestLXMLBuilderEncodingConversion(SoupTest): # Test Beautiful Soup's ability to decode and encode from various -- cgit v1.2.3