diff options
author | Leonard Richardson <leonard.richardson@canonical.com> | 2011-02-18 14:49:41 -0500 |
---|---|---|
committer | Leonard Richardson <leonard.richardson@canonical.com> | 2011-02-18 14:49:41 -0500 |
commit | e170ff33e67e806cf33e2e51fcefcfa0b9310d96 (patch) | |
tree | 447cddabac142fefd583df1acd6268f6abcb8f5c | |
parent | 0c9e690dedf720c7c34cc2433f0ccd03f7eb2a85 (diff) |
Moved in the last of the tests from TODO.
-rw-r--r-- | TODO | 18 | ||||
-rw-r--r-- | tests/test_lxml.py | 4 |
2 files changed, 4 insertions, 18 deletions
@@ -21,21 +21,3 @@ as-yet-unreleased version of html5lib changes the parser's handling of CDATA sections to allow CDATA sections in tags like <svg> and <math>. The HTML5TreeBuilder will need to be updated to create CData objects instead of Comment objects in this situation. - - - ---- - -Tag names that contain Unicode characters crash the parser: - def testUnicodeTagNamesFAILS(self): - self.assertSoupEquals("<デダ芻デダtext>2PM</デダ芻デダtext>") - -Here's the implementation of NavigableString.__unicode__: - - def __unicode__(self): - return unicode(str(self)) - -It converts the Unicode to a string, and then back to Unicode. I can't -find any other way of turning an element of a Unicode subclass into a -normal Unicode object. This is pretty bad and a better technique is -welcome. diff --git a/tests/test_lxml.py b/tests/test_lxml.py index 85c6a1b..7e15dcf 100644 --- a/tests/test_lxml.py +++ b/tests/test_lxml.py @@ -425,6 +425,10 @@ class TestLXMLBuilderInvalidMarkup(SoupTest): # The declaration is ignored altogether. self.assertEquals(soup.encode(), "<html><body><p>a</p></body></html>") + def test_tag_name_contains_unicode(self): + # Unicode characters in tag names are stripped. + tag_name = u"<our\N{SNOWMAN}>Joe</our\N{SNOWMAN}>" + self.assertSoupEquals("<our>Joe</our>") class TestLXMLBuilderEncodingConversion(SoupTest): # Test Beautiful Soup's ability to decode and encode from various |