summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLeonard Richardson <leonardr@segfault.org>2021-02-13 13:54:56 -0500
committerLeonard Richardson <leonardr@segfault.org>2021-02-13 13:54:56 -0500
commit185ec704743ffa0dfd95b7a29e2f5d38a25433b5 (patch)
tree39c3535011cc9a1f1e32827d557eb40fba9fa183
parent8f763297abc8bb598c3aca25eccaef6db7f7c987 (diff)
Corrected the use of special string container classes in cases when a
single tag may contain strings with different containers; such as the <template> tag, which may contain both TemplateString objects and Comment objects. [bug=1913406]
-rw-r--r--CHANGELOG5
-rw-r--r--bs4/__init__.py2
-rw-r--r--bs4/tests/test_tree.py15
3 files changed, 19 insertions, 3 deletions
diff --git a/CHANGELOG b/CHANGELOG
index 93c59ba..288a276 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -7,6 +7,11 @@
* Performance improvement when processing tags that speeds up overall
tree construction by 2%. Patch by Morotti. [bug=1899358]
+* Corrected the use of special string container classes in cases when a
+ single tag may contain strings with different containers; such as
+ the <template> tag, which may contain both TemplateString objects
+ and Comment objects. [bug=1913406]
+
* Added a second way to pass specify encodings to UnicodeDammit and
EncodingDetector, based on the order of precedence defined in the
HTML5 spec, starting at:
diff --git a/bs4/__init__.py b/bs4/__init__.py
index 3714d67..7c6044a 100644
--- a/bs4/__init__.py
+++ b/bs4/__init__.py
@@ -497,7 +497,7 @@ class BeautifulSoup(Tag):
# On top of that, we may be inside a tag that needs a special
# container class.
- if self.string_container_stack:
+ if self.string_container_stack and container is NavigableString:
container = self.builder.string_containers.get(
self.string_container_stack[-1].name, container
)
diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py
index 2246346..1bd1577 100644
--- a/bs4/tests/test_tree.py
+++ b/bs4/tests/test_tree.py
@@ -1911,14 +1911,25 @@ class TestNavigableStringSubclasses(SoupTest):
soup = self.soup(
"<template>Some text<p>In a tag</p></template>Some text outside"
)
- assert all(isinstance(x, TemplateString) for x in soup.template.strings)
-
+ assert all(
+ isinstance(x, TemplateString)
+ for x in soup.template._all_strings(types=None)
+ )
+
# Once the <template> tag closed, we went back to using
# NavigableString.
outside = soup.template.next_sibling
assert isinstance(outside, NavigableString)
assert not isinstance(outside, TemplateString)
+ # The TemplateString is also unusual because it can contain
+ # NavigableString subclasses of _other_ types, such as
+ # Comment.
+ markup = b"<template>Some text<p>In a tag</p><!--with a comment--></template>"
+ soup = self.soup(markup)
+ self.assertEqual(markup, soup.template.encode("utf8"))
+
+
class TestSoupSelector(TreeTest):
HTML = """