summaryrefslogtreecommitdiff
path: root/bs4/builder/_htmlparser.py
diff options
context:
space:
mode:
authorLeonard Richardson <leonardr@segfault.org>2013-05-07 08:19:02 -0400
committerLeonard Richardson <leonardr@segfault.org>2013-05-07 08:19:02 -0400
commit269157a8f40dfdac082f39befd69f170263d2ce1 (patch)
tree6de82a5e216da6dc16430a94e2ed411e920d372a /bs4/builder/_htmlparser.py
parentc4ce22b415ab81ba0e3fb4a3fb28f4ce68dccbde (diff)
Now that lxml's segfault on invalid doctype has been fixed, fix a
corresponding problem on the Beautiful Soup end that was previously invisible. [bug=984936]
Diffstat (limited to 'bs4/builder/_htmlparser.py')
-rw-r--r--bs4/builder/_htmlparser.py3
1 files changed, 3 insertions, 0 deletions
diff --git a/bs4/builder/_htmlparser.py b/bs4/builder/_htmlparser.py
index ede5cec..e34c9fa 100644
--- a/bs4/builder/_htmlparser.py
+++ b/bs4/builder/_htmlparser.py
@@ -85,6 +85,9 @@ class BeautifulSoupHTMLParser(HTMLParser):
self.soup.endData()
if data.startswith("DOCTYPE "):
data = data[len("DOCTYPE "):]
+ elif data == 'DOCTYPE':
+ # i.e. "<!DOCTYPE>"
+ data = ''
self.soup.handle_data(data)
self.soup.endData(Doctype)