Fixed a bug that made the HTMLParser treebuilder generate XML definitions ending with two question marks instead of one. [bug=984258]

author: Leonard Richardson <leonard.richardson@canonical.com> 2012-04-18 08:45:51 -0400
committer: Leonard Richardson <leonard.richardson@canonical.com> 2012-04-18 08:45:51 -0400
commit: 3d0ae02cc3d0b947ef6102b31f4b354eec9b543a (patch)
tree: e4ee65c0f4d9c32c6bf738a3524c66b6a5737d40 /bs4/builder/_htmlparser.py
parent: 4a587ff6996a2192944d7fec341180c2a116ea17 (diff)
1 files changed, 8 insertions, 0 deletions
diff --git a/bs4/builder/_htmlparser.py b/bs4/builder/_htmlparser.py
index c307ff8..3dee51b 100644
--- a/bs4/builder/_htmlparser.py
+++ b/bs4/builder/_htmlparser.py
@@ -96,6 +96,14 @@ class BeautifulSoupHTMLParser(HTMLParser):
 
     def handle_pi(self, data):
         self.soup.endData()
+        if data.endswith("?") and data.lower().startswith("xml"):
+            # "An XHTML processing instruction using the trailing '?'
+            # will cause the '?' to be included in data." - HTMLParser
+            # docs.
+            #
+            # Strip the question mark so we don't end up with two
+            # question marks.
+            data = data[:-1]
         self.soup.handle_data(data)
         self.soup.endData(ProcessingInstruction)
author	Leonard Richardson <leonard.richardson@canonical.com>	2012-04-18 08:45:51 -0400
committer	Leonard Richardson <leonard.richardson@canonical.com>	2012-04-18 08:45:51 -0400
commit	3d0ae02cc3d0b947ef6102b31f4b354eec9b543a (patch)
tree	e4ee65c0f4d9c32c6bf738a3524c66b6a5737d40 /bs4/builder/_htmlparser.py
parent	4a587ff6996a2192944d7fec341180c2a116ea17 (diff)