summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--bs4/diagnose.py8
-rw-r--r--bs4/testing.py2
-rw-r--r--bs4/tests/test_tree.py18
3 files changed, 25 insertions, 3 deletions
diff --git a/bs4/diagnose.py b/bs4/diagnose.py
index 5192a3f..daaf523 100644
--- a/bs4/diagnose.py
+++ b/bs4/diagnose.py
@@ -24,13 +24,17 @@ def diagnose(data):
for parser in basic_parsers:
print "Trying to parse your data with %s" % parser
+ success = False
try:
soup = BeautifulSoup(data, parser)
- print "Here's what %s did with the document:" % parser
- print soup.prettify()
+ success = True
except Exception, e:
print "%s could not parse the document." % parser
traceback.print_exc()
+ if success:
+ print "Here's what %s did with the document:" % parser
+ print soup.prettify()
+
print "-" * 80
def lxml_trace(data, html=True):
diff --git a/bs4/testing.py b/bs4/testing.py
index 1a92af4..c9307d3 100644
--- a/bs4/testing.py
+++ b/bs4/testing.py
@@ -163,7 +163,7 @@ class HTMLTreeBuilderSmokeTest(object):
foo = soup.find(text="foo")
self.assertEqual(comment, foo.next_element)
baz = soup.find(text="baz")
- self.assertEquals(comment, baz.previous_element)
+ self.assertEqual(comment, baz.previous_element)
def test_preserved_whitespace_in_pre_and_textarea(self):
"""Whitespace must be preserved in <pre> and <textarea> tags."""
diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py
index 5f3395b..3e75fae 100644
--- a/bs4/tests/test_tree.py
+++ b/bs4/tests/test_tree.py
@@ -1310,6 +1310,24 @@ class TestSubstitutions(SoupTest):
expect_upper = u'<a href="HTTP://A.COM?A=B&C=É">E</a>'
self.assertEqual(expect_upper, a.decode(formatter=lambda x: x.upper()))
+ def test_formatter_skips_script_tag_for_html_documents(self):
+ doc = """
+ <script type="text/javascript">
+ console.log("< < hey > > ");
+ </script>
+"""
+ encoded = BeautifulSoup(doc).encode()
+ self.assertTrue("< < hey > >" in encoded)
+
+ def test_formatter_processes_script_tag_for_xml_documents(self):
+ doc = """
+ <script type="text/javascript">
+ console.log("< < hey > > ");
+ </script>
+"""
+ encoded = BeautifulSoup(doc).encode()
+ self.assertTrue("&lt; &lt; hey &gt; &gt;" in encoded)
+
def test_prettify_accepts_formatter(self):
soup = BeautifulSoup("<html><body>foo</body></html>")
pretty = soup.prettify(formatter = lambda x: x.upper())