summaryrefslogtreecommitdiff
path: root/bs4/tests/test_tree.py
diff options
context:
space:
mode:
authorLeonard Richardson <leonardr@segfault.org>2020-04-05 15:43:58 -0400
committerLeonard Richardson <leonardr@segfault.org>2020-04-05 15:43:58 -0400
commita6f897b213bb08f0d8d8a1528937541c280abbd6 (patch)
tree866d3392a854ea27a172e9b456b2160307e39363 /bs4/tests/test_tree.py
parentddadf13ef66122d75eadaf7f10e0937429e6a3a6 (diff)
Embedded CSS and Javascript is now stored in distinct Stylesheet and
Script tags, which are ignored by methods like get_text(). This feature is not supported by the html5lib treebuilder. [bug=1868861]
Diffstat (limited to 'bs4/tests/test_tree.py')
-rw-r--r--bs4/tests/test_tree.py39
1 files changed, 37 insertions, 2 deletions
diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py
index 80aaaff..7ecab9e 100644
--- a/bs4/tests/test_tree.py
+++ b/bs4/tests/test_tree.py
@@ -27,8 +27,11 @@ from bs4.element import (
Doctype,
Formatter,
NavigableString,
+ Script,
SoupStrainer,
+ Stylesheet,
Tag,
+ TemplateString,
)
from bs4.testing import (
SoupTest,
@@ -1408,7 +1411,7 @@ class TestElementObjects(SoupTest):
self.assertEqual(soup.a.get_text(","), "a,r, , t ")
self.assertEqual(soup.a.get_text(",", strip=True), "a,r,t")
- def test_get_text_ignores_comments(self):
+ def test_get_text_ignores_special_string_containers(self):
soup = self.soup("foo<!--IGNORE-->bar")
self.assertEqual(soup.get_text(), "foobar")
@@ -1417,10 +1420,17 @@ class TestElementObjects(SoupTest):
self.assertEqual(
soup.get_text(types=None), "fooIGNOREbar")
- def test_all_strings_ignores_comments(self):
+ soup = self.soup("foo<style>CSS</style><script>Javascript</script>bar")
+ self.assertEqual(soup.get_text(), "foobar")
+
+ def test_all_strings_ignores_special_string_containers(self):
soup = self.soup("foo<!--IGNORE-->bar")
self.assertEqual(['foo', 'bar'], list(soup.strings))
+ soup = self.soup("foo<style>CSS</style><script>Javascript</script>bar")
+ self.assertEqual(['foo', 'bar'], list(soup.strings))
+
+
class TestCDAtaListAttributes(SoupTest):
"""Testing cdata-list attributes like 'class'.
@@ -1874,6 +1884,31 @@ class TestNavigableStringSubclasses(SoupTest):
d = Declaration("foo")
self.assertEqual("<?foo?>", d.output_ready())
+ def test_default_string_containers(self):
+ # In some cases, we use different NavigableString subclasses for
+ # the same text in different tags.
+ soup = self.soup(
+ "<div>text</div><script>text</script><style>text</style>"
+ )
+ self.assertEqual(
+ [NavigableString, Script, Stylesheet],
+ [x.__class__ for x in soup.find_all(text=True)]
+ )
+
+ # The TemplateString is a little unusual because it's generally found
+ # _inside_ children of a <template> element, not a direct child of the
+ # <template> element.
+ soup = self.soup(
+ "<template>Some text<p>In a tag</p></template>Some text outside"
+ )
+ assert all(isinstance(x, TemplateString) for x in soup.template.strings)
+
+ # Once the <template> tag closed, we went back to using
+ # NavigableString.
+ outside = soup.template.next_sibling
+ assert isinstance(outside, NavigableString)
+ assert not isinstance(outside, TemplateString)
+
class TestSoupSelector(TreeTest):
HTML = """