diff options
Diffstat (limited to 'src/beautifulsoup/TODO')
-rw-r--r-- | src/beautifulsoup/TODO | 42 |
1 files changed, 0 insertions, 42 deletions
diff --git a/src/beautifulsoup/TODO b/src/beautifulsoup/TODO deleted file mode 100644 index 84fa273..0000000 --- a/src/beautifulsoup/TODO +++ /dev/null @@ -1,42 +0,0 @@ -Here are some unit tests that fail with HTMLParser. - - def testValidButBogusDeclarationFAILS(self): - self.assertSoupEquals('<! Foo >a', '<!Foo >a') - - def testIncompleteDeclarationAtEndFAILS(self): - self.assertSoupEquals('a<!b') - - def testIncompleteEntityAtEndFAILS(self): - self.assertSoupEquals('<Hello>') - - # This is not what the original author had in mind, but it's - # a legitimate interpretation of what they wrote. - self.assertSoupEquals("""<a href="foo</a>, </a><a href="bar">baz</a>""", - '<a href="foo</a>, </a><a href="></a>, <a href="bar">baz</a>') - # SGMLParser generates bogus parse events when attribute values - # contain embedded brackets, but at least Beautiful Soup fixes - # it up a little. - self.assertSoupEquals('<a b="<a>">', '<a b="<a>"></a><a>"></a>') - self.assertSoupEquals('<a href="http://foo.com/<a> and blah and blah', - """<a href='"http://foo.com/'></a><a> and blah and blah</a>""") - - invalidEntity = "foo&#bar;baz" - soup = BeautifulStoneSoup\ - (invalidEntity, - convertEntities=htmlEnt) - self.assertEquals(str(soup), invalidEntity) - - -Tag names that contain Unicode characters crash the parser: - def testUnicodeTagNamesFAILS(self): - self.assertSoupEquals("<デダ芻デダtext>2PM</デダ芻デダtext>") - -Here's the implementation of NavigableString.__unicode__: - - def __unicode__(self): - return unicode(str(self)) - -It converts the Unicode to a string, and then back to Unicode. I can't -find any other way of turning an element of a Unicode subclass into a -normal Unicode object. This is pretty bad and a better technique is -welcome. |