diff options
-rw-r--r-- | bs4/diagnose.py | 8 | ||||
-rw-r--r-- | bs4/testing.py | 2 | ||||
-rw-r--r-- | bs4/tests/test_tree.py | 18 |
3 files changed, 25 insertions, 3 deletions
diff --git a/bs4/diagnose.py b/bs4/diagnose.py index 5192a3f..daaf523 100644 --- a/bs4/diagnose.py +++ b/bs4/diagnose.py @@ -24,13 +24,17 @@ def diagnose(data): for parser in basic_parsers: print "Trying to parse your data with %s" % parser + success = False try: soup = BeautifulSoup(data, parser) - print "Here's what %s did with the document:" % parser - print soup.prettify() + success = True except Exception, e: print "%s could not parse the document." % parser traceback.print_exc() + if success: + print "Here's what %s did with the document:" % parser + print soup.prettify() + print "-" * 80 def lxml_trace(data, html=True): diff --git a/bs4/testing.py b/bs4/testing.py index 1a92af4..c9307d3 100644 --- a/bs4/testing.py +++ b/bs4/testing.py @@ -163,7 +163,7 @@ class HTMLTreeBuilderSmokeTest(object): foo = soup.find(text="foo") self.assertEqual(comment, foo.next_element) baz = soup.find(text="baz") - self.assertEquals(comment, baz.previous_element) + self.assertEqual(comment, baz.previous_element) def test_preserved_whitespace_in_pre_and_textarea(self): """Whitespace must be preserved in <pre> and <textarea> tags.""" diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py index 5f3395b..3e75fae 100644 --- a/bs4/tests/test_tree.py +++ b/bs4/tests/test_tree.py @@ -1310,6 +1310,24 @@ class TestSubstitutions(SoupTest): expect_upper = u'<a href="HTTP://A.COM?A=B&C=É">E</a>' self.assertEqual(expect_upper, a.decode(formatter=lambda x: x.upper())) + def test_formatter_skips_script_tag_for_html_documents(self): + doc = """ + <script type="text/javascript"> + console.log("< < hey > > "); + </script> +""" + encoded = BeautifulSoup(doc).encode() + self.assertTrue("< < hey > >" in encoded) + + def test_formatter_processes_script_tag_for_xml_documents(self): + doc = """ + <script type="text/javascript"> + console.log("< < hey > > "); + </script> +""" + encoded = BeautifulSoup(doc).encode() + self.assertTrue("< < hey > >" in encoded) + def test_prettify_accepts_formatter(self): soup = BeautifulSoup("<html><body>foo</body></html>") pretty = soup.prettify(formatter = lambda x: x.upper()) |