3 files changed, 84 insertions, 3 deletions
diff --git a/bs4/element.py b/bs4/element.py
index c4b5bc7..73e3867 100644
--- a/bs4/element.py
+++ b/bs4/element.py
@@ -191,7 +191,7 @@ class PageElement(object):
     def replace_with(self, replace_with):
         if self.parent is None:
             raise ValueError(
-                "Cannot replace one element with another when the"
+                "Cannot replace one element with another when the "
                 "element to be replaced is not part of a tree.")
         if replace_with is self:
             return
@@ -899,6 +899,43 @@ class Tag(PageElement):
             for element in self.contents[:]:
                 element.extract()
 
+    def smooth(self):
+        """Smooth out this element's children by consolidating consecutive strings.
+
+        This makes pretty-printed output look more natural following a
+        lot of operations that modified the tree.
+        """
+        # Mark the first position of every pair of children that need
+        # to be consolidated.  Do this rather than making a copy of
+        # self.contents, since in most cases very few strings will be
+        # affected.
+        marked = []
+        for i, a in enumerate(self.contents):
+            if isinstance(a, Tag):
+                # Recursively smooth children.
+                a.smooth()
+            if i == len(self.contents)-1:
+                # This is the last item in .contents, and it's not a
+                # tag. There's no chance it needs any work.
+                continue
+            b = self.contents[i+1]
+            if (isinstance(a, NavigableString)
+                and isinstance(b, NavigableString)
+                and not isinstance(a, PreformattedString)
+                and not isinstance(b, PreformattedString)
+            ):
+                marked.append(i)
+
+        # Go over the marked positions in reverse order, so that
+        # removing items from .contents won't affect the remaining
+        # positions.
+        for i in reversed(marked):
+            a = self.contents[i]
+            b = self.contents[i+1]
+            b.extract()
+            n = NavigableString(a+b)
+            a.replace_with(n)
+
     def index(self, element):
         """
         Find the index of a child by identity, not value. Avoids issues with
@@ -1173,7 +1210,9 @@ class Tag(PageElement):
             elif isinstance(c, Tag):
                 s.append(c.decode(indent_level, eventual_encoding,
                                   formatter))
-            preserve_whitespace = self.name in self.preserve_whitespace_tags
+            preserve_whitespace = (
+                self.preserve_whitespace_tags and self.name in self.preserve_whitespace_tags
+            )
             if text and indent_level and not preserve_whitespace:
                 text = text.strip()
             if text:
diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py
index 6510f85..e655dcc 100644
--- a/bs4/tests/test_tree.py
+++ b/bs4/tests/test_tree.py
@@ -417,6 +417,48 @@ class TestFindAllByAttribute(TreeTest):
         self.assertEqual([], soup.find_all(id=1, text="bar"))
 
 
+class TestSmooth(TreeTest):
+    """Test Tag.smooth."""
+
+    def test_smooth(self):
+        soup = self.soup("<div>a</div>")
+        div = soup.div
+        div.append("b")
+        div.append("c")
+        div.append(Comment("Comment 1"))
+        div.append(Comment("Comment 2"))
+        div.append("d")
+        builder = self.default_builder()
+        span = Tag(soup, builder, 'span')
+        span.append('1')
+        span.append('2')
+        div.append(span)
+
+        # At this point the tree has a bunch of adjacent
+        # NavigableStrings. This is normal, but it has no meaning in
+        # terms of HTML, so we may want to smooth things out for
+        # output.
+
+        # Since the <span> tag has two children, its .string is None.
+        self.assertEquals(None, div.span.string)
+
+        self.assertEqual(7, len(div.contents))
+        div.smooth()
+        self.assertEqual(5, len(div.contents))
+
+        # The three strings at the beginning of div.contents have been
+        # merged into on string.
+        #
+        self.assertEqual('abc', div.contents[0])
+
+        # The call is recursive -- the <span> tag was also smoothed.
+        self.assertEqual('12', div.span.string)
+
+        # The two comments have _not_ been merged, even though
+        # comments are strings. Merging comments would change the
+        # meaning of the HTML.
+        self.assertEqual('Comment 1', div.contents[1])
+        self.assertEqual('Comment 2', div.contents[2])
 
 
 class TestIndex(TreeTest):
diff --git a/doc/source/index.rst b/doc/source/index.rst
index 0c09964..4bca0ae 100644
--- a/doc/source/index.rst
+++ b/doc/source/index.rst
@@ -2292,7 +2292,7 @@ Subclassing ``HTMLFormatter`` or ``XMLFormatter`` will give you even
 more control over the output. For example, Beautiful Soup sorts the
 attributes in every tag by default::
 
- attr_soup = BeautifulSoup('<p z="1" m="2" a="3"></p>')
+ attr_soup = BeautifulSoup(b'<p z="1" m="2" a="3"></p>')
  print(attr_soup.p.encode())
  # <p a="3" m="2" z="1"></p>