2 files changed, 94 insertions, 19 deletions
diff --git a/bs4/element.py b/bs4/element.py
index aaa00fb..80ebbef 100644
--- a/bs4/element.py
+++ b/bs4/element.py
@@ -1656,38 +1656,89 @@ class Tag(PageElement):
         if indent_level is True:
             indent_level = 0
 
-        string_literal_mode = False
+        # The currently active tag that put us into string literal
+        # mode. Until this element is closed, children will be treated
+        # as string literals and not pretty-printed. String literal
+        # mode is turned on immediately after this tag begins, and
+        # turned off immediately before it's closed. This means there
+        # will be whitespace before and after the tag itself.
+        string_literal_tag = None
+
         for event, element in self._event_stream(iterator):
             if event in (Tag.START_ELEMENT_EVENT, Tag.EMPTY_ELEMENT_EVENT):
                 piece = element._format_tag(
-                    eventual_encoding, formatter, opening=True)
+                    eventual_encoding, formatter, opening=True
+                )
             elif event is Tag.END_ELEMENT_EVENT:
                 piece = element._format_tag(
-                    eventual_encoding, formatter, opening=False)
+                    eventual_encoding, formatter, opening=False
+                )
                 if indent_level is not None:
                     indent_level -= 1
-                string_literal_mode = False
             else:
                 piece = element.output_ready(formatter)
 
-            if isinstance(element, Tag) and not element._should_pretty_print():
-                if event is Tag.START_ELEMENT_EVENT:
-                    # After processing this event we will be in string
-                    # literal mode.
-                    string_literal_mode = True
-                    indent_before = True
-                    indent_after = False
-                else:
-                    # After processing this event we will no longer be
-                    # in string literal mode.
-                    string_literal_mode = False
-                    indent_before = False
-                    indent_after = True
-            elif string_literal_mode:
+            # Now we need to apply the 'prettiness' -- extra
+            # whitespace before and/or after this tag. This can get
+            # complicated because certain tags, like <pre> and
+            # <script>, can't be prettified, since adding whitespace would
+            # change the meaning of the content.
+
+            # When we encounter one of those Tags we need to enter
+            # what I'm calling "string literal mode". We will stay
+            # inside string literal mode until that particular Tag is
+            # closed.
+            #
+            # By definition, string literal mode is on when the
+            # string_literal_tag is set to a Tag.
+            #
+            # For each event we process, there are four possibilities:
+            #
+            # 1. We are entering string literal mode (e.g. by
+            #    encountering a <pre> tag). In this case we want
+            #    whitespace before the tag but not after.
+            #
+            # 2. We are exiting string literal mode (by closing the
+            #    tag that originally put us into string literal
+            #    mode). In this case we want whitespace after the tag
+            #    but not before.
+            #
+            # 3. We are in string literal mode and will be staying
+            #    there. We will not be adding whitespace before or
+            #    after this element.
+            #
+            # 4. We are outside string literal mode and will be
+            #    staying there. We will be putting whitespace before
+            #    and after this element.
+
+            # The default behavior is to add whitespace before and
+            # after an element when string literal mode is off, and to
+            # leave things as they are when string literal mode is on.
+            if string_literal_tag:
                 indent_before = indent_after = False
             else:
                 indent_before = indent_after = True
 
+            # The only time the behavior is more complex than that is
+            # when we encounter an opening or closing tag that might
+            # put us into or out of string literal mode.
+            if isinstance(element, Tag) and not element._should_pretty_print():
+                if event is Tag.END_ELEMENT_EVENT and element is string_literal_tag:
+                    # We are about to exit string literal mode.  Add
+                    # whitespace after this tag but not before.
+                    indent_before = False
+                    indent_after = True
+                    string_literal_tag = None
+                elif event is Tag.START_ELEMENT_EVENT:
+                    if not string_literal_tag:
+                        # We are about to enter string literal mode.
+                        # Add whitespace before this tag but not after.
+                        indent_before = True
+                        indent_after = False
+                        string_literal_tag = element
+
+            # Now we know whether to add whitespace before and/or
+            # after this element.
             if indent_level is not None:
                 if (indent_before or indent_after):
                     if isinstance(element, NavigableString):
@@ -1763,7 +1814,7 @@ class Tag(PageElement):
            (a newline) after the string.
         """
         space_before = ''
-        if indent_before:
+        if indent_before and indent_level:
             space_before = (formatter.indent * indent_level)
 
         space_after = ''
diff --git a/bs4/tests/test_pageelement.py b/bs4/tests/test_pageelement.py
index f8eb9bb..a0476e4 100644
--- a/bs4/tests/test_pageelement.py
+++ b/bs4/tests/test_pageelement.py
@@ -158,6 +158,30 @@ class TestFormatters(SoupTest):
         # inside is left alone.
         assert '<div>\n foo\n <pre>  \tbar\n  \n  </pre>\n baz\n <textarea> eee\nfff\t</textarea>\n</div>\n' == soup.div.prettify()
 
+    def test_prettify_handles_nested_string_literal_tags(self):
+        # Most of this markup is inside a <pre> tag, so prettify()
+        # only does three things to it:
+        # 1. Add a newline and a space between the <div> and the <pre>
+        # 2. Add a newline after the </pre>
+        # 3. Add a newline at the end.
+        #
+        # The contents of the <pre> tag are left completely alone.  In
+        # particular, we don't start adding whitespace again once we
+        # encounter the first </pre> tag, because we know it's not
+        # the one that put us into string literal mode.
+        markup = """<div><pre><code>some
+<script><pre>code</pre></script> for you 
+</code></pre></div>"""
+
+        expect = """<div>
+ <pre><code>some
+<script><pre>code</pre></script> for you 
+</code></pre>
+</div>
+"""
+        soup = self.soup(markup)
+        assert expect == soup.div.prettify()
+
     def test_prettify_accepts_formatter_function(self):
         soup = BeautifulSoup("<html><body>foo</body></html>", 'html.parser')
         pretty = soup.prettify(formatter = lambda x: x.upper())