diff options
author | Leonard Richardson <leonardr@segfault.org> | 2019-11-11 13:51:41 -0500 |
---|---|---|
committer | Leonard Richardson <leonardr@segfault.org> | 2019-11-11 13:51:41 -0500 |
commit | 68e5565dd1be82b0f3e981abd8b5419f9d8258b8 (patch) | |
tree | 4ca6790585c1af05f2f269b6b4ab635fe90683e8 /bs4/builder/_htmlparser.py | |
parent | 9b72457805dcf60e283bccd947fe4e88c79607a3 (diff) |
The html.parser tree builder now correctly handles DOCTYPEs that are
not uppercase. [bug=1848401]
Diffstat (limited to 'bs4/builder/_htmlparser.py')
-rw-r--r-- | bs4/builder/_htmlparser.py | 7 |
1 files changed, 4 insertions, 3 deletions
diff --git a/bs4/builder/_htmlparser.py b/bs4/builder/_htmlparser.py index cd50eb0..6a076a1 100644 --- a/bs4/builder/_htmlparser.py +++ b/bs4/builder/_htmlparser.py @@ -187,9 +187,10 @@ class BeautifulSoupHTMLParser(HTMLParser): def handle_decl(self, data): self.soup.endData() - if data.startswith("DOCTYPE "): - data = data[len("DOCTYPE "):] - elif data == 'DOCTYPE': + doctype_len = len("DOCTYPE ") + if data[:doctype_len].lower() == "doctype ": + data = data[doctype_len:] + elif len(data) == doctype_len-1 and data.lower() == 'doctype': # i.e. "<!DOCTYPE>" data = '' self.soup.handle_data(data) |