summaryrefslogtreecommitdiff
path: root/bs4/element.py
diff options
context:
space:
mode:
Diffstat (limited to 'bs4/element.py')
-rw-r--r--bs4/element.py43
1 files changed, 41 insertions, 2 deletions
diff --git a/bs4/element.py b/bs4/element.py
index c4b5bc7..73e3867 100644
--- a/bs4/element.py
+++ b/bs4/element.py
@@ -191,7 +191,7 @@ class PageElement(object):
def replace_with(self, replace_with):
if self.parent is None:
raise ValueError(
- "Cannot replace one element with another when the"
+ "Cannot replace one element with another when the "
"element to be replaced is not part of a tree.")
if replace_with is self:
return
@@ -899,6 +899,43 @@ class Tag(PageElement):
for element in self.contents[:]:
element.extract()
+ def smooth(self):
+ """Smooth out this element's children by consolidating consecutive strings.
+
+ This makes pretty-printed output look more natural following a
+ lot of operations that modified the tree.
+ """
+ # Mark the first position of every pair of children that need
+ # to be consolidated. Do this rather than making a copy of
+ # self.contents, since in most cases very few strings will be
+ # affected.
+ marked = []
+ for i, a in enumerate(self.contents):
+ if isinstance(a, Tag):
+ # Recursively smooth children.
+ a.smooth()
+ if i == len(self.contents)-1:
+ # This is the last item in .contents, and it's not a
+ # tag. There's no chance it needs any work.
+ continue
+ b = self.contents[i+1]
+ if (isinstance(a, NavigableString)
+ and isinstance(b, NavigableString)
+ and not isinstance(a, PreformattedString)
+ and not isinstance(b, PreformattedString)
+ ):
+ marked.append(i)
+
+ # Go over the marked positions in reverse order, so that
+ # removing items from .contents won't affect the remaining
+ # positions.
+ for i in reversed(marked):
+ a = self.contents[i]
+ b = self.contents[i+1]
+ b.extract()
+ n = NavigableString(a+b)
+ a.replace_with(n)
+
def index(self, element):
"""
Find the index of a child by identity, not value. Avoids issues with
@@ -1173,7 +1210,9 @@ class Tag(PageElement):
elif isinstance(c, Tag):
s.append(c.decode(indent_level, eventual_encoding,
formatter))
- preserve_whitespace = self.name in self.preserve_whitespace_tags
+ preserve_whitespace = (
+ self.preserve_whitespace_tags and self.name in self.preserve_whitespace_tags
+ )
if text and indent_level and not preserve_whitespace:
text = text.strip()
if text: