summaryrefslogtreecommitdiff
path: root/bs4
diff options
context:
space:
mode:
Diffstat (limited to 'bs4')
-rw-r--r--bs4/element.py20
-rw-r--r--bs4/tests/test_tree.py15
2 files changed, 30 insertions, 5 deletions
diff --git a/bs4/element.py b/bs4/element.py
index 77c8da0..d58da92 100644
--- a/bs4/element.py
+++ b/bs4/element.py
@@ -873,16 +873,24 @@ class Tag(PageElement):
self.clear()
self.append(string.__class__(string))
- def _all_strings(self, strip=False):
- """Yield all child strings, possibly stripping them."""
+ def _all_strings(self, strip=False, types=(NavigableString, CData)):
+ """Yield all strings of certain classes, possibly stripping them.
+
+ By default, yields only NavigableString and CData objects. So
+ no comments, processing instructions, etc.
+ """
for descendant in self.descendants:
- if not isinstance(descendant, NavigableString):
+ if (
+ (types is None and not isinstance(descendant, NavigableString))
+ or
+ (types is not None and type(descendant) not in types)):
continue
if strip:
descendant = descendant.strip()
if len(descendant) == 0:
continue
yield descendant
+
strings = property(_all_strings)
@property
@@ -890,11 +898,13 @@ class Tag(PageElement):
for string in self._all_strings(True):
yield string
- def get_text(self, separator=u"", strip=False):
+ def get_text(self, separator=u"", strip=False,
+ types=(NavigableString, CData)):
"""
Get all child strings, concatenated using the given separator.
"""
- return separator.join([s for s in self._all_strings(strip)])
+ return separator.join([s for s in self._all_strings(
+ strip, types=types)])
getText = get_text
text = property(get_text)
diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py
index a5e761f..5e4a9dd 100644
--- a/bs4/tests/test_tree.py
+++ b/bs4/tests/test_tree.py
@@ -20,6 +20,7 @@ from bs4.builder import (
)
from bs4.element import (
CData,
+ Comment,
Doctype,
NavigableString,
SoupStrainer,
@@ -1167,6 +1168,20 @@ class TestElementObjects(SoupTest):
self.assertEqual(soup.a.get_text(","), "a,r, , t ")
self.assertEqual(soup.a.get_text(",", strip=True), "a,r,t")
+ def test_get_text_ignores_comments(self):
+ soup = self.soup("foo<!--IGNORE-->bar")
+ self.assertEqual(soup.get_text(), "foobar")
+
+ self.assertEqual(
+ soup.get_text(types=(NavigableString, Comment)), "fooIGNOREbar")
+ self.assertEqual(
+ soup.get_text(types=None), "fooIGNOREbar")
+
+ def test_all_strings_ignores_comments(self):
+ soup = self.soup("foo<!--IGNORE-->bar")
+ self.assertEqual(['foo', 'bar'], list(soup.strings))
+
+
class TestCDAtaListAttributes(SoupTest):
"""Testing cdata-list attributes like 'class'.