diff options
author | Leonard Richardson <leonardr@segfault.org> | 2020-04-05 15:43:58 -0400 |
---|---|---|
committer | Leonard Richardson <leonardr@segfault.org> | 2020-04-05 15:43:58 -0400 |
commit | a6f897b213bb08f0d8d8a1528937541c280abbd6 (patch) | |
tree | 866d3392a854ea27a172e9b456b2160307e39363 /bs4/tests/test_tree.py | |
parent | ddadf13ef66122d75eadaf7f10e0937429e6a3a6 (diff) |
Embedded CSS and Javascript is now stored in distinct Stylesheet and
Script tags, which are ignored by methods like get_text(). This
feature is not supported by the html5lib treebuilder. [bug=1868861]
Diffstat (limited to 'bs4/tests/test_tree.py')
-rw-r--r-- | bs4/tests/test_tree.py | 39 |
1 files changed, 37 insertions, 2 deletions
diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py index 80aaaff..7ecab9e 100644 --- a/bs4/tests/test_tree.py +++ b/bs4/tests/test_tree.py @@ -27,8 +27,11 @@ from bs4.element import ( Doctype, Formatter, NavigableString, + Script, SoupStrainer, + Stylesheet, Tag, + TemplateString, ) from bs4.testing import ( SoupTest, @@ -1408,7 +1411,7 @@ class TestElementObjects(SoupTest): self.assertEqual(soup.a.get_text(","), "a,r, , t ") self.assertEqual(soup.a.get_text(",", strip=True), "a,r,t") - def test_get_text_ignores_comments(self): + def test_get_text_ignores_special_string_containers(self): soup = self.soup("foo<!--IGNORE-->bar") self.assertEqual(soup.get_text(), "foobar") @@ -1417,10 +1420,17 @@ class TestElementObjects(SoupTest): self.assertEqual( soup.get_text(types=None), "fooIGNOREbar") - def test_all_strings_ignores_comments(self): + soup = self.soup("foo<style>CSS</style><script>Javascript</script>bar") + self.assertEqual(soup.get_text(), "foobar") + + def test_all_strings_ignores_special_string_containers(self): soup = self.soup("foo<!--IGNORE-->bar") self.assertEqual(['foo', 'bar'], list(soup.strings)) + soup = self.soup("foo<style>CSS</style><script>Javascript</script>bar") + self.assertEqual(['foo', 'bar'], list(soup.strings)) + + class TestCDAtaListAttributes(SoupTest): """Testing cdata-list attributes like 'class'. @@ -1874,6 +1884,31 @@ class TestNavigableStringSubclasses(SoupTest): d = Declaration("foo") self.assertEqual("<?foo?>", d.output_ready()) + def test_default_string_containers(self): + # In some cases, we use different NavigableString subclasses for + # the same text in different tags. + soup = self.soup( + "<div>text</div><script>text</script><style>text</style>" + ) + self.assertEqual( + [NavigableString, Script, Stylesheet], + [x.__class__ for x in soup.find_all(text=True)] + ) + + # The TemplateString is a little unusual because it's generally found + # _inside_ children of a <template> element, not a direct child of the + # <template> element. + soup = self.soup( + "<template>Some text<p>In a tag</p></template>Some text outside" + ) + assert all(isinstance(x, TemplateString) for x in soup.template.strings) + + # Once the <template> tag closed, we went back to using + # NavigableString. + outside = soup.template.next_sibling + assert isinstance(outside, NavigableString) + assert not isinstance(outside, TemplateString) + class TestSoupSelector(TreeTest): HTML = """ |