From a6f897b213bb08f0d8d8a1528937541c280abbd6 Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Sun, 5 Apr 2020 15:43:58 -0400 Subject: Embedded CSS and Javascript is now stored in distinct Stylesheet and Script tags, which are ignored by methods like get_text(). This feature is not supported by the html5lib treebuilder. [bug=1868861] --- bs4/tests/test_soup.py | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'bs4/tests/test_soup.py') diff --git a/bs4/tests/test_soup.py b/bs4/tests/test_soup.py index dc88662..8d0583c 100644 --- a/bs4/tests/test_soup.py +++ b/bs4/tests/test_soup.py @@ -73,6 +73,7 @@ class TestConstructor(SoupTest): self.store_line_numbers = False self.cdata_list_attributes = [] self.preserve_whitespace_tags = [] + self.string_containers = {} def initialize_soup(self, soup): pass def feed(self, markup): @@ -186,7 +187,41 @@ class TestConstructor(SoupTest): isinstance(x, (TagPlus, StringPlus, CommentPlus)) for x in soup.recursiveChildGenerator() ) + + def test_alternate_string_containers(self): + # Test the ability to customize the string containers for + # different types of tags. + class PString(NavigableString): + pass + + class BString(NavigableString): + pass + + soup = self.soup( + "
Hello.

Here is some bolded text", + string_containers = { + 'b': BString, + 'p': PString, + } + ) + + # The string before the

tag is a regular NavigableString. + assert isinstance(soup.div.contents[0], NavigableString) + # The string inside the

tag, but not inside the tag, + # is a PString. + assert isinstance(soup.p.contents[0], PString) + + # Every string inside the tag is a BString, even the one that + # was also inside an tag. + for s in soup.b.strings: + assert isinstance(s, BString) + + # Now that parsing was complete, the string_container_stack + # (where this information was kept) has been cleared out. + self.assertEqual([], soup.string_container_stack) + + class TestWarnings(SoupTest): def _no_parser_specified(self, s, is_there=True): -- cgit v1.2.3