diff options
author | Leonard Richardson <leonard.richardson@canonical.com> | 2009-03-13 18:09:23 -0400 |
---|---|---|
committer | Leonard Richardson <leonard.richardson@canonical.com> | 2009-03-13 18:09:23 -0400 |
commit | cafaed172c6fd8b0668f1365bbb97331ca4fa1dc (patch) | |
tree | 35effc997e715cbb2c6577232e60c01e634dc77c /TODO |
Initial (manual) import.
Diffstat (limited to 'TODO')
-rw-r--r-- | TODO | 42 |
1 files changed, 42 insertions, 0 deletions
@@ -0,0 +1,42 @@ +Here are some unit tests that fail with HTMLParser. + + def testValidButBogusDeclarationFAILS(self): + self.assertSoupEquals('<! Foo >a', '<!Foo >a') + + def testIncompleteDeclarationAtEndFAILS(self): + self.assertSoupEquals('a<!b') + + def testIncompleteEntityAtEndFAILS(self): + self.assertSoupEquals('<Hello>') + + # This is not what the original author had in mind, but it's + # a legitimate interpretation of what they wrote. + self.assertSoupEquals("""<a href="foo</a>, </a><a href="bar">baz</a>""", + '<a href="foo</a>, </a><a href="></a>, <a href="bar">baz</a>') + # SGMLParser generates bogus parse events when attribute values + # contain embedded brackets, but at least Beautiful Soup fixes + # it up a little. + self.assertSoupEquals('<a b="<a>">', '<a b="<a>"></a><a>"></a>') + self.assertSoupEquals('<a href="http://foo.com/<a> and blah and blah', + """<a href='"http://foo.com/'></a><a> and blah and blah</a>""") + + invalidEntity = "foo&#bar;baz" + soup = BeautifulStoneSoup\ + (invalidEntity, + convertEntities=htmlEnt) + self.assertEquals(str(soup), invalidEntity) + + +Tag names that contain Unicode characters crash the parser: + def testUnicodeTagNamesFAILS(self): + self.assertSoupEquals("<デダ芻デダtext>2PM</デダ芻デダtext>") + +Here's the implementation of NavigableString.__unicode__: + + def __unicode__(self): + return unicode(str(self)) + +It converts the Unicode to a string, and then back to Unicode. I can't +find any other way of turning an element of a Unicode subclass into a +normal Unicode object. This is pretty bad and a better technique is +welcome. |