# -*- coding: utf-8 -*- """Unit tests for Beautiful Soup. These tests make sure the Beautiful Soup works as it should. If you find a bug in Beautiful Soup, the best way to express it is as a test case like this that fails.""" import re import unittest from beautifulsoup import * from beautifulsoup.element import CData, Comment, Declaration, SoupStrainer, Tag from beautifulsoup.builder import ICantBelieveItsValidHTMLTreeBuilder from beautifulsoup.dammit import UnicodeDammit def additional_tests(): return unittest.TestLoader().loadTestsFromName(__name__) class SoupTest(unittest.TestCase): default_builder = HTMLParserXMLTreeBuilder() def assertSoupEquals(self, toParse, rep=None, builder=None, encoding=None): """Parse the given text and make sure its string rep is the other given text.""" if rep == None: rep = toParse obj = BeautifulSoup(toParse, builder=self.default_builder) if encoding is None: rep2 = obj.decode() else: rep2 = obj.encode(encoding) self.assertEqual(rep2, rep) class FollowThatTag(SoupTest): "Tests the various ways of fetching tags from a soup." def setUp(self): ml = """ 1 2 3 4 4""" self.soup = BeautifulStoneSoup(ml) def testFindAllByName(self): matching = self.soup('a') self.assertEqual(len(matching), 2) self.assertEqual(matching[0].name, 'a') self.assertEqual(matching, self.soup.findAll('a')) self.assertEqual(matching, self.soup.findAll(SoupStrainer('a'))) def testFindAllByAttribute(self): matching = self.soup.findAll(id='x') self.assertEqual(len(matching), 2) self.assertEqual(matching[0].name, 'a') self.assertEqual(matching[1].name, 'b') matching2 = self.soup.findAll(attrs={'id' : 'x'}) self.assertEqual(matching, matching2) strainer = SoupStrainer(attrs={'id' : 'x'}) self.assertEqual(matching, self.soup.findAll(strainer)) self.assertEqual(len(self.soup.findAll(id=None)), 1) self.assertEqual(len(self.soup.findAll(width=100)), 1) self.assertEqual(len(self.soup.findAll(junk=None)), 5) self.assertEqual(len(self.soup.findAll(junk=[1, None])), 5) self.assertEqual(len(self.soup.findAll(junk=re.compile('.*'))), 0) self.assertEqual(len(self.soup.findAll(junk=True)), 0) self.assertEqual(len(self.soup.findAll(junk=True)), 0) self.assertEqual(len(self.soup.findAll(href=True)), 1) def testFindallByClass(self): soup = BeautifulSoup('FooBar') self.assertEqual(soup.find('a', '1').string, "Bar") def testFindAllByList(self): matching = self.soup(['a', 'ac']) self.assertEqual(len(matching), 3) def testFindAllByHash(self): matching = self.soup({'a' : True, 'b' : True}) self.assertEqual(len(matching), 4) def testFindAllText(self): soup = BeautifulSoup("\xbb") self.assertEqual(soup.findAll(text=re.compile('.*')), [u'\xbb']) def testFindAllByRE(self): import re r = re.compile('a.*') self.assertEqual(len(self.soup(r)), 3) def testFindAllByMethod(self): def matchTagWhereIDMatchesName(tag): return tag.name == tag.get('id') matching = self.soup.findAll(matchTagWhereIDMatchesName) self.assertEqual(len(matching), 2) self.assertEqual(matching[0].name, 'a') def testParents(self): soup = BeautifulSoup('