From 68e5565dd1be82b0f3e981abd8b5419f9d8258b8 Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Mon, 11 Nov 2019 13:51:41 -0500 Subject: The html.parser tree builder now correctly handles DOCTYPEs that are not uppercase. [bug=1848401] --- bs4/builder/_htmlparser.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'bs4/builder/_htmlparser.py') diff --git a/bs4/builder/_htmlparser.py b/bs4/builder/_htmlparser.py index cd50eb0..6a076a1 100644 --- a/bs4/builder/_htmlparser.py +++ b/bs4/builder/_htmlparser.py @@ -187,9 +187,10 @@ class BeautifulSoupHTMLParser(HTMLParser): def handle_decl(self, data): self.soup.endData() - if data.startswith("DOCTYPE "): - data = data[len("DOCTYPE "):] - elif data == 'DOCTYPE': + doctype_len = len("DOCTYPE ") + if data[:doctype_len].lower() == "doctype ": + data = data[doctype_len:] + elif len(data) == doctype_len-1 and data.lower() == 'doctype': # i.e. "" data = '' self.soup.handle_data(data) -- cgit v1.2.3