2 files changed, 67 insertions, 0 deletions
diff --git a/beautifulsoup/testing.py b/beautifulsoup/testing.py
index eea14f0..9d0fa3a 100644
--- a/beautifulsoup/testing.py
+++ b/beautifulsoup/testing.py
@@ -119,6 +119,26 @@ class BuilderSmokeTest(SoupTest):
         self.assertSoupEquals('<this is="really messed up & stuff"></this>',
                               '<this is="really messed up &amp; stuff"></this>')
 
+    def test_literal_in_textarea(self):
+        # Anything inside a <textarea> is supposed to be treated as
+        # the literal value of the field, (XXX citation needed).
+        #
+        # But, both lxml and html5lib do their best to parse the
+        # contents of a <textarea> as HTML.
+        text = '<textarea>Junk like <b> tags and <&<&amp;</textarea>'
+        soup = BeautifulSoup(text)
+        self.assertEquals(len(soup.textarea.contents), 2)
+        self.assertEquals(soup.textarea.contents[0], u"Junk like ")
+        self.assertEquals(soup.textarea.contents[1].name, 'b')
+        self.assertEquals(soup.textarea.b.string, u" tags and ")
+
+    def test_literal_in_script(self):
+        # The contents of a <script> tag are treated as a literal string,
+        # even if that string contains HTML.
+        javascript = 'if (i < 2) { alert("<b>foo</b>"); }'
+        soup = BeautifulSoup('<script>%s</script>' % javascript)
+        self.assertEquals(soup.script.string, javascript)
+
 
 class BuilderInvalidMarkupSmokeTest(SoupTest):
     """Tests of invalid markup.
diff --git a/tests/test_tree.py b/tests/test_tree.py
index a3c4b3b..eac4e72 100644
--- a/tests/test_tree.py
+++ b/tests/test_tree.py
@@ -9,6 +9,8 @@ same markup, but all Beautiful Soup trees can be traversed with the
 methods tested here.
 """
 
+import copy
+import cPickle as pickle
 import re
 from beautifulsoup import BeautifulSoup
 from beautifulsoup.element import SoupStrainer, Tag
@@ -768,3 +770,48 @@ class TestElementObjects(SoupTest):
 
         soup = self.soup("<b></b>")
         self.assertFalse(soup.b.string)
+
+
+class TestPersistence(SoupTest):
+    "Testing features like pickle and deepcopy."
+
+    def setUp(self):
+        super(TestPersistence, self).setUp()
+        self.page = """<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"
+"http://www.w3.org/TR/REC-html40/transitional.dtd">
+<html>
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+<title>Beautiful Soup: We called him Tortoise because he taught us.</title>
+<link rev="made" href="mailto:leonardr@segfault.org">
+<meta name="Description" content="Beautiful Soup: an HTML parser optimized for screen-scraping.">
+<meta name="generator" content="Markov Approximation 1.4 (module: leonardr)">
+<meta name="author" content="Leonard Richardson">
+</head>
+<body>
+<a href="foo">foo</a>
+<a href="foo"><b>bar</b></a>
+</body>
+</html>"""
+        self.tree = self.soup(self.page)
+
+    def test_pickle_and_unpickle_identity(self):
+        # Pickling a tree, then unpickling it, yields a tree identical
+        # to the original.
+        dumped = pickle.dumps(self.tree, 2)
+        loaded = pickle.loads(dumped)
+        self.assertEqual(loaded.__class__, BeautifulSoup)
+        self.assertEqual(loaded.decode(), self.tree.decode())
+
+    def test_deepcopy_identity(self):
+        # Making a deepcopy of a tree yields an identical tree.
+        copied = copy.deepcopy(self.tree)
+        self.assertEqual(copied.decode(), self.tree.decode())
+
+    def test_unicode_pickle(self):
+        # A tree containing Unicode characters can be pickled.
+        html = u"<b>\N{SNOWMAN}</b>"
+        soup = self.soup(html)
+        dumped = pickle.dumps(soup, pickle.HIGHEST_PROTOCOL)
+        loaded = pickle.loads(dumped)
+        self.assertEqual(loaded.decode(), soup.decode())