summaryrefslogtreecommitdiff
path: root/bs4/builder/_htmlparser.py
diff options
context:
space:
mode:
Diffstat (limited to 'bs4/builder/_htmlparser.py')
-rw-r--r--bs4/builder/_htmlparser.py8
1 files changed, 8 insertions, 0 deletions
diff --git a/bs4/builder/_htmlparser.py b/bs4/builder/_htmlparser.py
index c307ff8..3dee51b 100644
--- a/bs4/builder/_htmlparser.py
+++ b/bs4/builder/_htmlparser.py
@@ -96,6 +96,14 @@ class BeautifulSoupHTMLParser(HTMLParser):
def handle_pi(self, data):
self.soup.endData()
+ if data.endswith("?") and data.lower().startswith("xml"):
+ # "An XHTML processing instruction using the trailing '?'
+ # will cause the '?' to be included in data." - HTMLParser
+ # docs.
+ #
+ # Strip the question mark so we don't end up with two
+ # question marks.
+ data = data[:-1]
self.soup.handle_data(data)
self.soup.endData(ProcessingInstruction)