summaryrefslogtreecommitdiff
path: root/bs4/builder/_htmlparser.py
diff options
context:
space:
mode:
authorLeonard Richardson <leonardr@segfault.org>2019-11-11 13:51:41 -0500
committerLeonard Richardson <leonardr@segfault.org>2019-11-11 13:51:41 -0500
commit68e5565dd1be82b0f3e981abd8b5419f9d8258b8 (patch)
tree4ca6790585c1af05f2f269b6b4ab635fe90683e8 /bs4/builder/_htmlparser.py
parent9b72457805dcf60e283bccd947fe4e88c79607a3 (diff)
The html.parser tree builder now correctly handles DOCTYPEs that are
not uppercase. [bug=1848401]
Diffstat (limited to 'bs4/builder/_htmlparser.py')
-rw-r--r--bs4/builder/_htmlparser.py7
1 files changed, 4 insertions, 3 deletions
diff --git a/bs4/builder/_htmlparser.py b/bs4/builder/_htmlparser.py
index cd50eb0..6a076a1 100644
--- a/bs4/builder/_htmlparser.py
+++ b/bs4/builder/_htmlparser.py
@@ -187,9 +187,10 @@ class BeautifulSoupHTMLParser(HTMLParser):
def handle_decl(self, data):
self.soup.endData()
- if data.startswith("DOCTYPE "):
- data = data[len("DOCTYPE "):]
- elif data == 'DOCTYPE':
+ doctype_len = len("DOCTYPE ")
+ if data[:doctype_len].lower() == "doctype ":
+ data = data[doctype_len:]
+ elif len(data) == doctype_len-1 and data.lower() == 'doctype':
# i.e. "<!DOCTYPE>"
data = ''
self.soup.handle_data(data)