diff options
Diffstat (limited to 'bs4')
-rw-r--r-- | bs4/element.py | 20 | ||||
-rw-r--r-- | bs4/tests/test_tree.py | 15 |
2 files changed, 30 insertions, 5 deletions
diff --git a/bs4/element.py b/bs4/element.py index 77c8da0..d58da92 100644 --- a/bs4/element.py +++ b/bs4/element.py @@ -873,16 +873,24 @@ class Tag(PageElement): self.clear() self.append(string.__class__(string)) - def _all_strings(self, strip=False): - """Yield all child strings, possibly stripping them.""" + def _all_strings(self, strip=False, types=(NavigableString, CData)): + """Yield all strings of certain classes, possibly stripping them. + + By default, yields only NavigableString and CData objects. So + no comments, processing instructions, etc. + """ for descendant in self.descendants: - if not isinstance(descendant, NavigableString): + if ( + (types is None and not isinstance(descendant, NavigableString)) + or + (types is not None and type(descendant) not in types)): continue if strip: descendant = descendant.strip() if len(descendant) == 0: continue yield descendant + strings = property(_all_strings) @property @@ -890,11 +898,13 @@ class Tag(PageElement): for string in self._all_strings(True): yield string - def get_text(self, separator=u"", strip=False): + def get_text(self, separator=u"", strip=False, + types=(NavigableString, CData)): """ Get all child strings, concatenated using the given separator. """ - return separator.join([s for s in self._all_strings(strip)]) + return separator.join([s for s in self._all_strings( + strip, types=types)]) getText = get_text text = property(get_text) diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py index a5e761f..5e4a9dd 100644 --- a/bs4/tests/test_tree.py +++ b/bs4/tests/test_tree.py @@ -20,6 +20,7 @@ from bs4.builder import ( ) from bs4.element import ( CData, + Comment, Doctype, NavigableString, SoupStrainer, @@ -1167,6 +1168,20 @@ class TestElementObjects(SoupTest): self.assertEqual(soup.a.get_text(","), "a,r, , t ") self.assertEqual(soup.a.get_text(",", strip=True), "a,r,t") + def test_get_text_ignores_comments(self): + soup = self.soup("foo<!--IGNORE-->bar") + self.assertEqual(soup.get_text(), "foobar") + + self.assertEqual( + soup.get_text(types=(NavigableString, Comment)), "fooIGNOREbar") + self.assertEqual( + soup.get_text(types=None), "fooIGNOREbar") + + def test_all_strings_ignores_comments(self): + soup = self.soup("foo<!--IGNORE-->bar") + self.assertEqual(['foo', 'bar'], list(soup.strings)) + + class TestCDAtaListAttributes(SoupTest): """Testing cdata-list attributes like 'class'. |