summaryrefslogtreecommitdiff
path: root/bs4/builder/_lxml.py
diff options
context:
space:
mode:
Diffstat (limited to 'bs4/builder/_lxml.py')
-rw-r--r--bs4/builder/_lxml.py9
1 files changed, 8 insertions, 1 deletions
diff --git a/bs4/builder/_lxml.py b/bs4/builder/_lxml.py
index 9e8f88f..4495bb9 100644
--- a/bs4/builder/_lxml.py
+++ b/bs4/builder/_lxml.py
@@ -1,3 +1,5 @@
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
__all__ = [
'LXMLTreeBuilderForXML',
'LXMLTreeBuilder',
@@ -12,6 +14,7 @@ from bs4.element import (
Doctype,
NamespacedAttribute,
ProcessingInstruction,
+ XMLProcessingInstruction,
)
from bs4.builder import (
FAST,
@@ -103,6 +106,10 @@ class LXMLTreeBuilderForXML(TreeBuilder):
# iterate over the encodings, and tell lxml to try to parse
# the document as each one in turn.
is_html = not self.is_xml
+ if is_html:
+ self.processing_instruction_class = ProcessingInstruction
+ else:
+ self.processing_instruction_class = XMLProcessingInstruction
try_encodings = [user_specified_encoding, document_declared_encoding]
detector = EncodingDetector(
markup, try_encodings, is_html, exclude_encodings)
@@ -201,7 +208,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
def pi(self, target, data):
self.soup.endData()
self.soup.handle_data(target + ' ' + data)
- self.soup.endData(ProcessingInstruction)
+ self.soup.endData(self.processing_instruction_class)
def data(self, content):
self.soup.handle_data(content)