diff options
author | Leonard Richardson <leonardr@segfault.org> | 2013-05-07 08:19:02 -0400 |
---|---|---|
committer | Leonard Richardson <leonardr@segfault.org> | 2013-05-07 08:19:02 -0400 |
commit | 269157a8f40dfdac082f39befd69f170263d2ce1 (patch) | |
tree | 6de82a5e216da6dc16430a94e2ed411e920d372a /bs4/builder/_htmlparser.py | |
parent | c4ce22b415ab81ba0e3fb4a3fb28f4ce68dccbde (diff) |
Now that lxml's segfault on invalid doctype has been fixed, fix a
corresponding problem on the Beautiful Soup end that was previously
invisible. [bug=984936]
Diffstat (limited to 'bs4/builder/_htmlparser.py')
-rw-r--r-- | bs4/builder/_htmlparser.py | 3 |
1 files changed, 3 insertions, 0 deletions
diff --git a/bs4/builder/_htmlparser.py b/bs4/builder/_htmlparser.py index ede5cec..e34c9fa 100644 --- a/bs4/builder/_htmlparser.py +++ b/bs4/builder/_htmlparser.py @@ -85,6 +85,9 @@ class BeautifulSoupHTMLParser(HTMLParser): self.soup.endData() if data.startswith("DOCTYPE "): data = data[len("DOCTYPE "):] + elif data == 'DOCTYPE': + # i.e. "<!DOCTYPE>" + data = '' self.soup.handle_data(data) self.soup.endData(Doctype) |