') def testNextSibling(self): soup = self.SIBLING_TEST tag = 'blockquote' b = soup.find(tag, {'id' : 2}) self.assertEquals(b.findNext(tag)['id'], '2.1') self.assertEquals(b.findNextSibling(tag)['id'], '3') self.assertEquals(b.findNextSibling(tag)['id'], '3') self.assertEquals(len(b.findNextSiblings(tag)), 2) self.assertEquals(len(b.findNextSiblings(tag, {'id' : 4})), 1) def testPreviousSibling(self): soup = self.SIBLING_TEST tag = 'blockquote' b = soup.find(tag, {'id' : 3}) self.assertEquals(b.findPrevious(tag)['id'], '2.1') self.assertEquals(b.findPreviousSibling(tag)['id'], '2') self.assertEquals(b.findPreviousSibling(tag)['id'], '2') self.assertEquals(len(b.findPreviousSiblings(tag)), 2) self.assertEquals(len(b.findPreviousSiblings(tag, id=1)), 1) def testTextNavigation(self): soup = BeautifulSoup('FooBarBaz<&<&") class OperatorOverload(SoupTest): "Our operators do it all! Call now!" def testTagNameAsFind(self): "Tests that referencing a tag name as a member delegates to find()." soup = BeautifulSoup('foobarRed herring') self.assertEqual(soup.b.i, soup.find('b').find('i')) self.assertEqual(soup.b.i.string, 'bar') self.assertEqual(soup.b['id'], '1') self.assertEqual(soup.b.contents[0], 'foo') self.assert_(not soup.a) #Test the .fooTag variant of .foo. self.assertEqual(soup.bTag.iTag.string, 'bar') self.assertEqual(soup.b.iTag.string, 'bar') self.assertEqual(soup.find('b').find('i'), soup.bTag.iTag) class NestableEgg(SoupTest): """Here we test tag nesting. TEST THE NEST, DUDE! X-TREME!""" def testParaInsideBlockquote(self): soup = BeautifulSoup('
Blee
Blargh') baz = soup.find(text='Baz') self.assertEquals(baz.findParent("i")['id'], '1') self.assertEquals(baz.findNext(text='Blee'), 'Blee') self.assertEquals(baz.findNextSibling(text='Blee'), 'Blee') self.assertEquals(baz.findNextSibling(text='Blargh'), None) self.assertEquals(baz.findNextSibling('hr')['id'], '1') class SiblingRivalry(SoupTest): "Tests the nextSibling and previousSibling navigation." def testSiblings(self): soup = BeautifulSoup("") secondLI = soup.find('li').nextSibling self.assert_(secondLI.name == 'li' and secondLI.string == '2') self.assertEquals(soup.find(text='1').nextSibling.name, 'p') self.assertEquals(soup.find('p').nextSibling, 'B') self.assertEquals(soup.find('p').nextSibling.previousSibling.nextSibling, 'B') class TagsAreObjectsToo(SoupTest): "Tests the various built-in functions of Tag objects." def testLen(self): soup = BeautifulSoup("
- 1
A
B- 2
- 3
123 ") self.assertEquals(len(soup.top), 3) class StringEmUp(SoupTest): "Tests the use of 'string' as an alias for a tag's only content." def testString(self): s = BeautifulSoup("foo") self.assertEquals(s.b.string, 'foo') def testLackOfString(self): s = BeautifulSoup("feo") self.assert_(not s.b.string) class ThatsMyLimit(SoupTest): "Tests the limit argument." def testBasicLimits(self): s = BeautifulSoup('
') self.assertEquals(len(s.findAll('br')), 4) self.assertEquals(len(s.findAll('br', limit=2)), 2) self.assertEquals(len(s('br', limit=2)), 2) class OnlyTheLonely(SoupTest): "Tests the parseOnly argument to the constructor." def setUp(self): x = [] for i in range(1,6): x.append('' % i) for j in range(100,103): x.append('Content %s.%s' % (i,j, i,j)) x.append('') self.x = ''.join(x) def testOnly(self): strainer = SoupStrainer("b") soup = BeautifulSoup(self.x, parseOnlyThese=strainer) self.assertEquals(len(soup), 15) strainer = SoupStrainer(id=re.compile("100.*")) soup = BeautifulSoup(self.x, parseOnlyThese=strainer) self.assertEquals(len(soup), 5) strainer = SoupStrainer(text=re.compile("10[01].*")) soup = BeautifulSoup(self.x, parseOnlyThese=strainer) self.assertEquals(len(soup), 10) strainer = SoupStrainer(text=lambda(x):x[8]=='3') soup = BeautifulSoup(self.x, parseOnlyThese=strainer) self.assertEquals(len(soup), 3) class PickleMeThis(SoupTest): "Testing features like pickle and deepcopy." def setUp(self): self.page = """Beautiful Soup: We called him Tortoise because he taught us. foo bar """ self.soup = BeautifulSoup(self.page) def testPickle(self): import pickle dumped = pickle.dumps(self.soup, 2) loaded = pickle.loads(dumped) self.assertEqual(loaded.__class__, BeautifulSoup) self.assertEqual(loaded.decode(), self.soup.decode()) def testDeepcopy(self): from copy import deepcopy deepcopy(BeautifulSoup("")) copied = deepcopy(self.soup) self.assertEqual(copied.decode(), self.soup.decode()) def testUnicodePickle(self): import cPickle as pickle html = "" + chr(0xc3) + "" soup = BeautifulSoup(html) dumped = pickle.dumps(soup, pickle.HIGHEST_PROTOCOL) loaded = pickle.loads(dumped) self.assertEqual(loaded.decode(), soup.decode()) class WriteOnlyCode(SoupTest): "Testing the modification of the tree." def testModifyAttributes(self): soup = BeautifulSoup('') soup.a['id'] = 2 self.assertEqual(soup.decode(), '') del(soup.a['id']) self.assertEqual(soup.decode(), '') soup.a['id2'] = 'foo' self.assertEqual(soup.decode(), '') def testNewTagCreation(self): "Makes sure tags don't step on each others' toes." soup = BeautifulSoup() builder = HTMLParserTreeBuilder() a = Tag(soup, builder, 'a') ol = Tag(soup, builder, 'ol') a['href'] = 'http://foo.com/' self.assertRaises(KeyError, lambda : ol['href']) def testTagReplacement(self): # Make sure you can replace an element with itself. text = "Foo " soup = BeautifulSoup(text) c = soup.c soup.c.replaceWith(c) self.assertEquals(soup.decode(), text) # A very simple case soup = BeautifulSoup("Argh!") soup.find(text="Argh!").replaceWith("Hooray!") newText = soup.find(text="Hooray!") b = soup.b self.assertEqual(newText.previous, b) self.assertEqual(newText.parent, b) self.assertEqual(newText.previous.next, newText) self.assertEqual(newText.next, None) # A more complex case soup = BeautifulSoup("Argh! ") soup.b.insert(1, "Hooray!") newText = soup.find(text="Hooray!") self.assertEqual(newText.previous, "Argh!") self.assertEqual(newText.previous.next, newText) self.assertEqual(newText.previousSibling, "Argh!") self.assertEqual(newText.previousSibling.nextSibling, newText) self.assertEqual(newText.nextSibling, None) self.assertEqual(newText.next, soup.c) text = "There's no business like show business" soup = BeautifulSoup(text) no, show = soup.findAll('b') show.replaceWith(no) self.assertEquals(soup.decode(), "There's business like no business") # Even more complex soup = BeautifulSoup("Find lady! ") builder = HTMLParserTreeBuilder() tag = Tag(soup, builder, 'magictag') tag.insert(0, "the") soup.a.insert(1, tag) b = soup.b c = soup.c theText = tag.find(text=True) findText = b.find(text="Find") self.assertEqual(findText.next, tag) self.assertEqual(tag.previous, findText) self.assertEqual(b.nextSibling, tag) self.assertEqual(tag.previousSibling, b) self.assertEqual(tag.nextSibling, c) self.assertEqual(c.previousSibling, tag) self.assertEqual(theText.next, c) self.assertEqual(c.previous, theText) # Aand... incredibly complex. soup = BeautifulSoup("""Wereserve the right to """) f = soup.f a = soup.a c = soup.c e = soup.e weText = a.find(text="We") soup.b.replaceWith(soup.f) self.assertEqual(soup.decode(), "Werefuse service refuse to ") self.assertEqual(f.previous, weText) self.assertEqual(weText.next, f) self.assertEqual(f.previousSibling, weText) self.assertEqual(f.nextSibling, None) self.assertEqual(weText.nextSibling, f) def testAppend(self): doc = "service Don't leave me here.
Don't leave me.
" soup = BeautifulSoup(doc) second_para = soup('p')[1] bold = soup.find('b') soup('p')[1].append(soup.find('b')) self.assertEqual(bold.parent, second_para) self.assertEqual(soup.decode(), "Don't leave me .
" "Don't leave me.here
") def testTagExtraction(self): # A very simple case text = ' Real content here.' soup = BeautifulSoup(text) extracted = soup.find("div", id="nav").extract() self.assertEqual(soup.decode(), "Real content here.") self.assertEqual(extracted.decode(), ' ') # A simple case, a more complex test. text = "12iiiAB " soup = BeautifulStoneSoup(text) doc = soup.doc numbers, roman, letters = soup("a") self.assertEqual(roman.parent, doc) oldPrevious = roman.previous endOfThisTag = roman.nextSibling.previous self.assertEqual(oldPrevious, "2") self.assertEqual(roman.next, "i") self.assertEqual(endOfThisTag, "ii") self.assertEqual(roman.previousSibling, numbers) self.assertEqual(roman.nextSibling, letters) roman.extract() self.assertEqual(roman.parent, None) self.assertEqual(roman.previous, None) self.assertEqual(roman.next, "i") self.assertEqual(letters.previous, '2') self.assertEqual(roman.previousSibling, None) self.assertEqual(roman.nextSibling, None) self.assertEqual(endOfThisTag.next, None) self.assertEqual(roman.b.contents[0].next, None) self.assertEqual(numbers.nextSibling, letters) self.assertEqual(letters.previousSibling, numbers) self.assertEqual(len(doc.contents), 2) self.assertEqual(doc.contents[0], numbers) self.assertEqual(doc.contents[1], letters) # A more complex case. text = "12Hollywood, baby! 3" soup = BeautifulStoneSoup(text) one = soup.find(text="1") three = soup.find(text="3") toExtract = soup.b soup.b.extract() self.assertEqual(one.next, three) self.assertEqual(three.previous, one) self.assertEqual(one.parent.nextSibling, three) self.assertEqual(three.previousSibling, soup.a) class TheManWithoutAttributes(SoupTest): "Test attribute access" def testHasKey(self): text = "" self.assertTrue(BeautifulSoup(text).foo.has_key('attr')) class QuoteMeOnThat(SoupTest): "Test quoting" def testQuotedAttributeValues(self): self.assertSoupEquals(" ", ' ') text = """ a """ soup = BeautifulSoup(text) self.assertEquals(soup.decode(), text) soup.foo['attr'] = 'Brawls happen at "Bob\'s Bar"' newText = """a """ self.assertSoupEquals(soup.decode(), newText) self.assertSoupEquals('', ' ') class YoureSoLiteral(SoupTest): "Test literal mode." def testLiteralMode(self): text = "Foo" soup = BeautifulSoup(text) self.assertEqual(soup.script.contents[0], "if (i This is an example of an HTML tag
Foo
Bar') self.assertEqual(soup.blockquote.p.b.string, 'Foo') self.assertEqual(soup.blockquote.b.string, 'Foo') self.assertEqual(soup.find('p', recursive=False).string, 'Bar') def testNestedTables(self): text = """
Here's another table:
|
foo | " self.assertSoupEquals(text, text) def testCData(self): xml = "