summaryrefslogtreecommitdiff
path: root/bs4/tests
diff options
context:
space:
mode:
Diffstat (limited to 'bs4/tests')
-rw-r--r--bs4/tests/test_htmlparser.py15
-rw-r--r--bs4/tests/test_tree.py36
2 files changed, 47 insertions, 4 deletions
diff --git a/bs4/tests/test_htmlparser.py b/bs4/tests/test_htmlparser.py
index d5cf025..0381c7d 100644
--- a/bs4/tests/test_htmlparser.py
+++ b/bs4/tests/test_htmlparser.py
@@ -5,6 +5,7 @@ from pdb import set_trace
import pickle
from bs4.testing import SoupTest, HTMLTreeBuilderSmokeTest
from bs4.builder import HTMLParserTreeBuilder
+from bs4.builder._htmlparser import BeautifulSoupHTMLParser
class HTMLParserTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
@@ -32,3 +33,17 @@ class HTMLParserTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
def test_redundant_empty_element_closing_tags(self):
self.assertSoupEquals('<br></br><br></br><br></br>', "<br/><br/><br/>")
self.assertSoupEquals('</br></br></br>', "")
+
+ def test_empty_element(self):
+ # This verifies that any buffered data present when the parser
+ # finishes working is handled.
+ self.assertSoupEquals("foo &# bar", "foo &amp;# bar")
+
+
+class TestHTMLParserSubclass(SoupTest):
+ def test_error(self):
+ """Verify that our HTMLParser subclass implements error() in a way
+ that doesn't cause a crash.
+ """
+ parser = BeautifulSoupHTMLParser()
+ parser.error("don't crash")
diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py
index c0e7c40..e8903e3 100644
--- a/bs4/tests/test_tree.py
+++ b/bs4/tests/test_tree.py
@@ -605,7 +605,7 @@ class SiblingTest(TreeTest):
</html>'''
# All that whitespace looks good but makes the tests more
# difficult. Get rid of it.
- markup = re.compile("\n\s*").sub("", markup)
+ markup = re.compile(r"\n\s*").sub("", markup)
self.tree = self.soup(markup)
@@ -821,6 +821,26 @@ class TestTreeModification(SoupTest):
soup = self.soup(text)
self.assertRaises(ValueError, soup.a.insert, 0, soup.a)
+ def test_insert_beautifulsoup_object_inserts_children(self):
+ """Inserting one BeautifulSoup object into another actually inserts all
+ of its children -- you'll never combine BeautifulSoup objects.
+ """
+ soup = self.soup("<p>And now, a word:</p><p>And we're back.</p>")
+
+ text = "<p>p2</p><p>p3</p>"
+ to_insert = self.soup(text)
+ soup.insert(1, to_insert)
+
+ for i in soup.descendants:
+ assert not isinstance(i, BeautifulSoup)
+
+ p1, p2, p3, p4 = list(soup.children)
+ self.assertEquals("And now, a word:", p1.string)
+ self.assertEquals("p2", p2.string)
+ self.assertEquals("p3", p3.string)
+ self.assertEquals("And we're back.", p4.string)
+
+
def test_replace_with_maintains_next_element_throughout(self):
soup = self.soup('<p><a>one</a><b>three</b></p>')
a = soup.a
@@ -1419,13 +1439,21 @@ class TestSubstitutions(SoupTest):
u"<b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"))
def test_formatter_html(self):
- markup = u"<b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"
+ markup = u"<br><b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"
soup = self.soup(markup)
decoded = soup.decode(formatter="html")
self.assertEqual(
decoded,
- self.document_for("<b>&lt;&lt;Sacr&eacute; bleu!&gt;&gt;</b>"))
+ self.document_for("<br/><b>&lt;&lt;Sacr&eacute; bleu!&gt;&gt;</b>"))
+ def test_formatter_html5(self):
+ markup = u"<br><b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"
+ soup = self.soup(markup)
+ decoded = soup.decode(formatter="html5")
+ self.assertEqual(
+ decoded,
+ self.document_for("<br><b>&lt;&lt;Sacr&eacute; bleu!&gt;&gt;</b>"))
+
def test_formatter_minimal(self):
markup = u"<b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"
soup = self.soup(markup)
@@ -1498,7 +1526,7 @@ class TestSubstitutions(SoupTest):
u'<div>\n foo\n <pre> \tbar\n \n </pre>\n baz\n</div>',
soup.div.prettify())
- def test_prettify_accepts_formatter(self):
+ def test_prettify_accepts_formatter_function(self):
soup = BeautifulSoup("<html><body>foo</body></html>", 'html.parser')
pretty = soup.prettify(formatter = lambda x: x.upper())
self.assertTrue("FOO" in pretty)