diff options
-rw-r--r-- | CHANGELOG | 5 | ||||
-rw-r--r-- | bs4/__init__.py | 2 | ||||
-rw-r--r-- | bs4/tests/test_tree.py | 15 |
3 files changed, 19 insertions, 3 deletions
@@ -7,6 +7,11 @@ * Performance improvement when processing tags that speeds up overall tree construction by 2%. Patch by Morotti. [bug=1899358] +* Corrected the use of special string container classes in cases when a + single tag may contain strings with different containers; such as + the <template> tag, which may contain both TemplateString objects + and Comment objects. [bug=1913406] + * Added a second way to pass specify encodings to UnicodeDammit and EncodingDetector, based on the order of precedence defined in the HTML5 spec, starting at: diff --git a/bs4/__init__.py b/bs4/__init__.py index 3714d67..7c6044a 100644 --- a/bs4/__init__.py +++ b/bs4/__init__.py @@ -497,7 +497,7 @@ class BeautifulSoup(Tag): # On top of that, we may be inside a tag that needs a special # container class. - if self.string_container_stack: + if self.string_container_stack and container is NavigableString: container = self.builder.string_containers.get( self.string_container_stack[-1].name, container ) diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py index 2246346..1bd1577 100644 --- a/bs4/tests/test_tree.py +++ b/bs4/tests/test_tree.py @@ -1911,14 +1911,25 @@ class TestNavigableStringSubclasses(SoupTest): soup = self.soup( "<template>Some text<p>In a tag</p></template>Some text outside" ) - assert all(isinstance(x, TemplateString) for x in soup.template.strings) - + assert all( + isinstance(x, TemplateString) + for x in soup.template._all_strings(types=None) + ) + # Once the <template> tag closed, we went back to using # NavigableString. outside = soup.template.next_sibling assert isinstance(outside, NavigableString) assert not isinstance(outside, TemplateString) + # The TemplateString is also unusual because it can contain + # NavigableString subclasses of _other_ types, such as + # Comment. + markup = b"<template>Some text<p>In a tag</p><!--with a comment--></template>" + soup = self.soup(markup) + self.assertEqual(markup, soup.template.encode("utf8")) + + class TestSoupSelector(TreeTest): HTML = """ |