From fa925f92375ef42c16cdd6580d29a1a61106dd36 Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Thu, 30 Dec 2010 20:54:16 -0500 Subject: Ported a lot of tests of tree modification. --- src/beautifulsoup/__init__.py | 14 ++- src/beautifulsoup/tests/test_tree.py | 189 ++++++++++++++++++++++++++++++++++- 2 files changed, 194 insertions(+), 9 deletions(-) (limited to 'src') diff --git a/src/beautifulsoup/__init__.py b/src/beautifulsoup/__init__.py index e4a8ca4..79bb657 100644 --- a/src/beautifulsoup/__init__.py +++ b/src/beautifulsoup/__init__.py @@ -129,7 +129,8 @@ class BeautifulStoneSoup(Tag): # alone. STRIP_ASCII_SPACES = { 9: None, 10: None, 12: None, 13: None, 32: None, } - def _defaultBuilder(self): + @classmethod + def default_builder(self): from lxml import etree from builder.lxml_builder import LXMLTreeBuilder return LXMLTreeBuilder(parser_class=etree.XMLParser) @@ -141,7 +142,7 @@ class BeautifulStoneSoup(Tag): is fed into the underlying parser.""" if builder is None: - builder = self._defaultBuilder() + builder = self.default_builder() self.builder = builder self.builder.soup = self @@ -343,7 +344,9 @@ class BeautifulStoneSoup(Tag): class BeautifulSoup(BeautifulStoneSoup): """A convenience class for parsing HTML without creating a builder.""" - def _defaultBuilder(self): + + @classmethod + def default_builder(self): try: from builder.html5_builder import HTML5TreeBuilder return HTML5TreeBuilder() @@ -356,11 +359,6 @@ class StopParsing(Exception): pass -class ICantBelieveItsBeautifulSoup(BeautifulStoneSoup): - def _defaultBuilder(self): - return ICantBelieveItsValidHTMLBuilder() - - #By default, act as an HTML pretty-printer. if __name__ == '__main__': import sys diff --git a/src/beautifulsoup/tests/test_tree.py b/src/beautifulsoup/tests/test_tree.py index 31d5dc2..42430d3 100644 --- a/src/beautifulsoup/tests/test_tree.py +++ b/src/beautifulsoup/tests/test_tree.py @@ -10,7 +10,8 @@ methods tested here. """ import re -from beautifulsoup.element import SoupStrainer +from beautifulsoup import BeautifulSoup +from beautifulsoup.element import SoupStrainer, Tag from helpers import SoupTest class TreeTest(SoupTest): @@ -508,6 +509,192 @@ class TestPreviousSibling(SiblingTest): self.assertEquals(start.findPreviousSibling(text="nonesuch"), None) +class TestTreeModification(SoupTest): + + def test_attribute_modification(self): + soup = self.soup('') + soup.a['id'] = 2 + self.assertEqual(soup.decode(), self.document_for('')) + del(soup.a['id']) + self.assertEqual(soup.decode(), self.document_for('')) + soup.a['id2'] = 'foo' + self.assertEqual(soup.decode(), self.document_for('')) + + def test_new_tag_creation(self): + builder = BeautifulSoup.default_builder() + soup = BeautifulSoup("", builder=builder) + a = Tag(soup, builder, 'a') + ol = Tag(soup, builder, 'ol') + a['href'] = 'http://foo.com/' + soup.insert(0, a) + soup.insert(1, ol) + self.assertEqual( + soup.decode(), '
    ') + + def test_append_to_contents_moves_tag(self): + doc = """

    Don't leave me here.

    +

    Don\'t leave!

    """ + soup = self.soup(doc) + second_para = soup.find(id='2') + bold = soup.b + + # Move the tag to the end of the second paragraph. + soup.find(id='2').append(soup.b) + + # The tag is now a child of the second paragraph. + self.assertEqual(bold.parent, second_para) + + self.assertEqual( + soup.decode(), self.document_for( + '

    Don\'t leave me .

    \n' + '

    Don\'t leave!here

    ')) + + def test_replace_tag_with_itself(self): + text = "Foo" + soup = BeautifulSoup(text) + c = soup.c + soup.c.replaceWith(c) + self.assertEquals(soup.decode(), self.document_for(text)) + + def test_replace_final_node(self): + soup = self.soup("Argh!") + soup.find(text="Argh!").replaceWith("Hooray!") + new_text = soup.find(text="Hooray!") + b = soup.b + self.assertEqual(new_text.previous, b) + self.assertEqual(new_text.parent, b) + self.assertEqual(new_text.previous.next, new_text) + self.assertEqual(new_text.next, None) + + def test_consecutive_text_nodes(self): + # A builder should never create two consecutive text nodes, + # but if you insert one next to another, Beautiful Soup will + # handle it correctly. + soup = self.soup("Argh!") + soup.b.insert(1, "Hooray!") + + self.assertEqual( + soup.decode(), self.document_for( + "Argh!Hooray!")) + + new_text = soup.find(text="Hooray!") + self.assertEqual(new_text.previous, "Argh!") + self.assertEqual(new_text.previous.next, new_text) + + self.assertEqual(new_text.previousSibling, "Argh!") + self.assertEqual(new_text.previousSibling.nextSibling, new_text) + + self.assertEqual(new_text.nextSibling, None) + self.assertEqual(new_text.next, soup.c) + + + def test_insert_tag(self): + builder = self.default_builder + soup = BeautifulSoup( + "Findlady!", builder=builder) + magic_tag = Tag(soup, builder, 'magictag') + magic_tag.insert(0, "the") + soup.a.insert(1, magic_tag) + + self.assertEqual( + soup.decode(), self.document_for( + "Findthelady!")) + + # Make sure all the relationships are hooked up correctly. + b_tag = soup.b + self.assertEqual(b_tag.nextSibling, magic_tag) + self.assertEqual(magic_tag.previousSibling, b_tag) + + find = b_tag.find(text="Find") + self.assertEqual(find.next, magic_tag) + self.assertEqual(magic_tag.previous, find) + + c_tag = soup.c + self.assertEqual(magic_tag.nextSibling, c_tag) + self.assertEqual(c_tag.previousSibling, magic_tag) + + the = magic_tag.find(text="the") + self.assertEqual(the.parent, magic_tag) + self.assertEqual(the.next, c_tag) + self.assertEqual(c_tag.previous, the) + + def test_replace_with(self): + soup = self.soup( + "

    There's no business like show business

    ") + no, show = soup.findAll('b') + show.replaceWith(no) + self.assertEquals( + soup.decode(), + self.document_for( + "

    There's business like no business

    ")) + + self.assertEquals(show.parent, None) + self.assertEquals(no.parent, soup.p) + self.assertEquals(no.next, "no") + self.assertEquals(no.nextSibling, " business") + + def test_nested_tag_replace_with(self): + soup = BeautifulSoup( + """Wereservetherighttorefuseservice""") + + # Replace the entire tag and its contents ("reserve the + # right") with the tag ("refuse"). + remove_tag = soup.b + move_tag = soup.f + remove_tag.replaceWith(move_tag) + + self.assertEqual( + soup.decode(), self.document_for( + "Werefusetoservice")) + + # The tag is now an orphan. + self.assertEqual(remove_tag.parent, None) + self.assertEqual(remove_tag.find(text="right").next, None) + self.assertEqual(remove_tag.previous, None) + self.assertEqual(remove_tag.nextSibling, None) + self.assertEqual(remove_tag.previousSibling, None) + + # The tag is now connected to the tag. + self.assertEqual(move_tag.parent, soup.a) + self.assertEqual(move_tag.previous, "We") + self.assertEqual(move_tag.next.next, soup.e) + self.assertEqual(move_tag.nextSibling, None) + + # The gap where the tag used to be has been mended, and + # the word "to" is now connected to the tag. + to_text = soup.find(text="to") + g_tag = soup.g + self.assertEqual(to_text.next, g_tag) + self.assertEqual(to_text.nextSibling, g_tag) + self.assertEqual(g_tag.previous, to_text) + self.assertEqual(g_tag.previousSibling, to_text) + + def test_extract(self): + soup = self.soup( + 'Some content. More content.') + + self.assertEqual(len(soup.body.contents), 3) + extracted = soup.find(id="nav").extract() + + self.assertEqual( + soup.decode(), "Some content. More content.") + self.assertEqual(extracted.decode(), '') + + # The extracted tag is now an orphan. + self.assertEqual(len(soup.body.contents), 2) + self.assertEqual(extracted.parent, None) + self.assertEqual(extracted.previous, None) + self.assertEqual(extracted.next.next, None) + + # The gap where the extracted tag used to be has been mended. + content_1 = soup.find(text="Some content. ") + content_2 = soup.find(text=" More content.") + self.assertEquals(content_1.next, content_2) + self.assertEquals(content_1.nextSibling, content_2) + self.assertEquals(content_2.previous, content_1) + self.assertEquals(content_2.previousSibling, content_1) + + class TestElementObjects(SoupTest): """Test various features of element objects.""" -- cgit v1.2.3