diff options
Diffstat (limited to 'bs4/tests')
-rw-r--r-- | bs4/tests/test_html5lib.py | 2 | ||||
-rw-r--r-- | bs4/tests/test_htmlparser.py | 4 | ||||
-rw-r--r-- | bs4/tests/test_lxml.py | 4 | ||||
-rw-r--r-- | bs4/tests/test_soup.py | 29 | ||||
-rw-r--r-- | bs4/tests/test_tree.py | 2 |
5 files changed, 33 insertions, 8 deletions
diff --git a/bs4/tests/test_html5lib.py b/bs4/tests/test_html5lib.py index 3a04787..371463a 100644 --- a/bs4/tests/test_html5lib.py +++ b/bs4/tests/test_html5lib.py @@ -22,7 +22,7 @@ class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest): @property def default_builder(self): - return HTML5TreeBuilder() + return HTML5TreeBuilder def test_soupstrainer(self): # The html5lib tree builder does not support SoupStrainers. diff --git a/bs4/tests/test_htmlparser.py b/bs4/tests/test_htmlparser.py index 0381c7d..790489a 100644 --- a/bs4/tests/test_htmlparser.py +++ b/bs4/tests/test_htmlparser.py @@ -9,9 +9,7 @@ from bs4.builder._htmlparser import BeautifulSoupHTMLParser class HTMLParserTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest): - @property - def default_builder(self): - return HTMLParserTreeBuilder() + default_builder = HTMLParserTreeBuilder def test_namespaced_system_doctype(self): # html.parser can't handle namespaced doctypes, so skip this one. diff --git a/bs4/tests/test_lxml.py b/bs4/tests/test_lxml.py index 1a4f27c..3b7858f 100644 --- a/bs4/tests/test_lxml.py +++ b/bs4/tests/test_lxml.py @@ -36,7 +36,7 @@ class LXMLTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest): @property def default_builder(self): - return LXMLTreeBuilder() + return LXMLTreeBuilder def test_out_of_range_entity(self): self.assertSoupEquals( @@ -79,7 +79,7 @@ class LXMLXMLTreeBuilderSmokeTest(SoupTest, XMLTreeBuilderSmokeTest): @property def default_builder(self): - return LXMLTreeBuilderForXML() + return LXMLTreeBuilderForXML def test_namespace_indexing(self): # We should not track un-prefixed namespaces as we can only hold one diff --git a/bs4/tests/test_soup.py b/bs4/tests/test_soup.py index 1c6b7a6..213255d 100644 --- a/bs4/tests/test_soup.py +++ b/bs4/tests/test_soup.py @@ -24,6 +24,7 @@ from bs4.dammit import ( EncodingDetector, ) from bs4.testing import ( + default_builder, SoupTest, skipIf, ) @@ -89,7 +90,33 @@ class TestConstructor(SoupTest): self.assertEqual(builder, soup.builder) self.assertEqual(kwargs, builder.called_with) - + def test_cdata_list_attributes(self): + # Most attribute values are represented as scalars, but the + # HTML standard says that some attributes, like 'class' have + # space-separated lists as values. + markup = '<a id=" an id " class=" a class "></a>' + soup = self.soup(markup) + + # Note that the spaces are stripped for 'class' but not for 'id'. + a = soup.a + self.assertEqual(" an id ", a['id']) + self.assertEqual(["a", "class"], a['class']) + + # TreeBuilder takes an argument called 'cdata_list_attributes' which lets + # you customize or disable this. As always, you can customize the TreeBuilder + # by passing in a keyword argument to the BeautifulSoup constructor. + soup = self.soup(markup, builder=default_builder, cdata_list_attributes=None) + self.assertEqual(" a class ", soup.a['class']) + + # Here are two ways of saying that `id` is a CDATA list + # attribute and 'class' is not. + for switcheroo in ({'*': 'id'}, {'a': 'id'}): + soup = self.soup(markup, builder=None, cdata_list_attributes=switcheroo) + a = soup.a + self.assertEqual(["an", "id"], a['id']) + self.assertEqual(" a class ", a['class']) + + class TestWarnings(SoupTest): def _no_parser_specified(self, s, is_there=True): diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py index 6d79454..a14928e 100644 --- a/bs4/tests/test_tree.py +++ b/bs4/tests/test_tree.py @@ -896,7 +896,7 @@ class TestTreeModification(SoupTest): self.assertEqual(soup.a.contents[0].next_element, "bar") def test_insert_tag(self): - builder = self.default_builder + builder = self.default_builder() soup = self.soup( "<a><b>Find</b><c>lady!</c><d></d></a>", builder=builder) magic_tag = Tag(soup, builder, 'magictag') |