summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLeonard Richardson <leonard.richardson@canonical.com>2011-02-18 14:49:41 -0500
committerLeonard Richardson <leonard.richardson@canonical.com>2011-02-18 14:49:41 -0500
commite170ff33e67e806cf33e2e51fcefcfa0b9310d96 (patch)
tree447cddabac142fefd583df1acd6268f6abcb8f5c
parent0c9e690dedf720c7c34cc2433f0ccd03f7eb2a85 (diff)
Moved in the last of the tests from TODO.
-rw-r--r--TODO18
-rw-r--r--tests/test_lxml.py4
2 files changed, 4 insertions, 18 deletions
diff --git a/TODO b/TODO
index 74ce8bd..a799bbb 100644
--- a/TODO
+++ b/TODO
@@ -21,21 +21,3 @@ as-yet-unreleased version of html5lib changes the parser's handling of
CDATA sections to allow CDATA sections in tags like <svg> and
<math>. The HTML5TreeBuilder will need to be updated to create CData
objects instead of Comment objects in this situation.
-
-
-
----
-
-Tag names that contain Unicode characters crash the parser:
- def testUnicodeTagNamesFAILS(self):
- self.assertSoupEquals("<デダ芻デダtext>2PM</デダ芻デダtext>")
-
-Here's the implementation of NavigableString.__unicode__:
-
- def __unicode__(self):
- return unicode(str(self))
-
-It converts the Unicode to a string, and then back to Unicode. I can't
-find any other way of turning an element of a Unicode subclass into a
-normal Unicode object. This is pretty bad and a better technique is
-welcome.
diff --git a/tests/test_lxml.py b/tests/test_lxml.py
index 85c6a1b..7e15dcf 100644
--- a/tests/test_lxml.py
+++ b/tests/test_lxml.py
@@ -425,6 +425,10 @@ class TestLXMLBuilderInvalidMarkup(SoupTest):
# The declaration is ignored altogether.
self.assertEquals(soup.encode(), "<html><body><p>a</p></body></html>")
+ def test_tag_name_contains_unicode(self):
+ # Unicode characters in tag names are stripped.
+ tag_name = u"<our\N{SNOWMAN}>Joe</our\N{SNOWMAN}>"
+ self.assertSoupEquals("<our>Joe</our>")
class TestLXMLBuilderEncodingConversion(SoupTest):
# Test Beautiful Soup's ability to decode and encode from various