diff options
author | Leonard Richardson <leonard.richardson@canonical.com> | 2012-04-18 08:45:51 -0400 |
---|---|---|
committer | Leonard Richardson <leonard.richardson@canonical.com> | 2012-04-18 08:45:51 -0400 |
commit | 3d0ae02cc3d0b947ef6102b31f4b354eec9b543a (patch) | |
tree | e4ee65c0f4d9c32c6bf738a3524c66b6a5737d40 /bs4/builder/_htmlparser.py | |
parent | 4a587ff6996a2192944d7fec341180c2a116ea17 (diff) |
Fixed a bug that made the HTMLParser treebuilder generate XML definitions ending with two question marks instead of one. [bug=984258]
Diffstat (limited to 'bs4/builder/_htmlparser.py')
-rw-r--r-- | bs4/builder/_htmlparser.py | 8 |
1 files changed, 8 insertions, 0 deletions
diff --git a/bs4/builder/_htmlparser.py b/bs4/builder/_htmlparser.py index c307ff8..3dee51b 100644 --- a/bs4/builder/_htmlparser.py +++ b/bs4/builder/_htmlparser.py @@ -96,6 +96,14 @@ class BeautifulSoupHTMLParser(HTMLParser): def handle_pi(self, data): self.soup.endData() + if data.endswith("?") and data.lower().startswith("xml"): + # "An XHTML processing instruction using the trailing '?' + # will cause the '?' to be included in data." - HTMLParser + # docs. + # + # Strip the question mark so we don't end up with two + # question marks. + data = data[:-1] self.soup.handle_data(data) self.soup.endData(ProcessingInstruction) |