summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLeonard Richardson <leonard.richardson@canonical.com>2011-01-29 00:23:43 -0500
committerLeonard Richardson <leonard.richardson@canonical.com>2011-01-29 00:23:43 -0500
commit04e658f7036a4889a5a0f307c9860adf049f8eca (patch)
tree31a3c1782a1ca855fd9294333871e61a904f72a0
parent7a6f07b1650eaba61d4c669feb5565cf443b2531 (diff)
parentac851828df94b49769f7bed38cca6182a60540f5 (diff)
Ported even more tests.
-rw-r--r--beautifulsoup/testing.py20
-rw-r--r--tests/test_tree.py47
2 files changed, 67 insertions, 0 deletions
diff --git a/beautifulsoup/testing.py b/beautifulsoup/testing.py
index eea14f0..9d0fa3a 100644
--- a/beautifulsoup/testing.py
+++ b/beautifulsoup/testing.py
@@ -119,6 +119,26 @@ class BuilderSmokeTest(SoupTest):
self.assertSoupEquals('<this is="really messed up & stuff"></this>',
'<this is="really messed up &amp; stuff"></this>')
+ def test_literal_in_textarea(self):
+ # Anything inside a <textarea> is supposed to be treated as
+ # the literal value of the field, (XXX citation needed).
+ #
+ # But, both lxml and html5lib do their best to parse the
+ # contents of a <textarea> as HTML.
+ text = '<textarea>Junk like <b> tags and <&<&amp;</textarea>'
+ soup = BeautifulSoup(text)
+ self.assertEquals(len(soup.textarea.contents), 2)
+ self.assertEquals(soup.textarea.contents[0], u"Junk like ")
+ self.assertEquals(soup.textarea.contents[1].name, 'b')
+ self.assertEquals(soup.textarea.b.string, u" tags and ")
+
+ def test_literal_in_script(self):
+ # The contents of a <script> tag are treated as a literal string,
+ # even if that string contains HTML.
+ javascript = 'if (i < 2) { alert("<b>foo</b>"); }'
+ soup = BeautifulSoup('<script>%s</script>' % javascript)
+ self.assertEquals(soup.script.string, javascript)
+
class BuilderInvalidMarkupSmokeTest(SoupTest):
"""Tests of invalid markup.
diff --git a/tests/test_tree.py b/tests/test_tree.py
index a3c4b3b..eac4e72 100644
--- a/tests/test_tree.py
+++ b/tests/test_tree.py
@@ -9,6 +9,8 @@ same markup, but all Beautiful Soup trees can be traversed with the
methods tested here.
"""
+import copy
+import cPickle as pickle
import re
from beautifulsoup import BeautifulSoup
from beautifulsoup.element import SoupStrainer, Tag
@@ -768,3 +770,48 @@ class TestElementObjects(SoupTest):
soup = self.soup("<b></b>")
self.assertFalse(soup.b.string)
+
+
+class TestPersistence(SoupTest):
+ "Testing features like pickle and deepcopy."
+
+ def setUp(self):
+ super(TestPersistence, self).setUp()
+ self.page = """<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"
+"http://www.w3.org/TR/REC-html40/transitional.dtd">
+<html>
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+<title>Beautiful Soup: We called him Tortoise because he taught us.</title>
+<link rev="made" href="mailto:leonardr@segfault.org">
+<meta name="Description" content="Beautiful Soup: an HTML parser optimized for screen-scraping.">
+<meta name="generator" content="Markov Approximation 1.4 (module: leonardr)">
+<meta name="author" content="Leonard Richardson">
+</head>
+<body>
+<a href="foo">foo</a>
+<a href="foo"><b>bar</b></a>
+</body>
+</html>"""
+ self.tree = self.soup(self.page)
+
+ def test_pickle_and_unpickle_identity(self):
+ # Pickling a tree, then unpickling it, yields a tree identical
+ # to the original.
+ dumped = pickle.dumps(self.tree, 2)
+ loaded = pickle.loads(dumped)
+ self.assertEqual(loaded.__class__, BeautifulSoup)
+ self.assertEqual(loaded.decode(), self.tree.decode())
+
+ def test_deepcopy_identity(self):
+ # Making a deepcopy of a tree yields an identical tree.
+ copied = copy.deepcopy(self.tree)
+ self.assertEqual(copied.decode(), self.tree.decode())
+
+ def test_unicode_pickle(self):
+ # A tree containing Unicode characters can be pickled.
+ html = u"<b>\N{SNOWMAN}</b>"
+ soup = self.soup(html)
+ dumped = pickle.dumps(soup, pickle.HIGHEST_PROTOCOL)
+ loaded = pickle.loads(dumped)
+ self.assertEqual(loaded.decode(), soup.decode())