summaryrefslogtreecommitdiff
path: root/tests/test_soup.py
diff options
context:
space:
mode:
authorLeonard Richardson <leonardr@segfault.org>2023-01-25 14:36:30 -0500
committerLeonard Richardson <leonardr@segfault.org>2023-01-25 14:36:30 -0500
commite4f967730af7d5ea59f6a05d03c779df16a24bd3 (patch)
tree02af82ae27b992e4e863db21fd0e21ebe8584819 /tests/test_soup.py
parentd84821852a32d1314a8fcb5fa4180e3beecea39c (diff)
Removed very old tests that were imported as part of the bzr import but not removed.
Diffstat (limited to 'tests/test_soup.py')
-rw-r--r--tests/test_soup.py130
1 files changed, 0 insertions, 130 deletions
diff --git a/tests/test_soup.py b/tests/test_soup.py
deleted file mode 100644
index 87d6f3b..0000000
--- a/tests/test_soup.py
+++ /dev/null
@@ -1,130 +0,0 @@
-# -*- coding: utf-8 -*-
-"""Tests of Beautiful Soup as a whole."""
-
-import unittest
-from bs4.element import SoupStrainer
-from bs4.dammit import EntitySubstitution, UnicodeDammit
-from bs4.testing import SoupTest
-
-
-class TestSelectiveParsing(SoupTest):
-
- def test_parse_with_soupstrainer(self):
- markup = "No<b>Yes</b><a>No<b>Yes <c>Yes</c></b>"
- strainer = SoupStrainer("b")
- soup = self.soup(markup, parse_only=strainer)
- self.assertEquals(soup.encode(), "<b>Yes</b><b>Yes <c>Yes</c></b>")
-
-
-class TestEntitySubstitution(unittest.TestCase):
- """Standalone tests of the EntitySubstitution class."""
- def setUp(self):
- self.sub = EntitySubstitution
-
- def test_simple_html_substitution(self):
- # Unicode characters corresponding to named HTML entites
- # are substituted, and no others.
- s = u"foo\u2200\N{SNOWMAN}\u00f5bar"
- self.assertEquals(self.sub.substitute_html(s),
- u"foo&forall;\N{SNOWMAN}&otilde;bar")
-
- def test_smart_quote_substitution(self):
- # MS smart quotes are a common source of frustration, so we
- # give them a special test.
- quotes = "\x91\x92foo\x93\x94"
- dammit = UnicodeDammit(quotes)
- self.assertEquals(self.sub.substitute_html(dammit.markup),
- "&lsquo;&rsquo;foo&ldquo;&rdquo;")
-
- def test_xml_converstion_includes_no_quotes_if_make_quoted_attribute_is_false(self):
- s = 'Welcome to "my bar"'
- self.assertEquals(self.sub.substitute_xml(s, False), s)
-
- def test_xml_attribute_quoting_normally_uses_double_quotes(self):
- self.assertEquals(self.sub.substitute_xml("Welcome", True),
- '"Welcome"')
- self.assertEquals(self.sub.substitute_xml("Bob's Bar", True),
- '"Bob\'s Bar"')
-
- def test_xml_attribute_quoting_uses_single_quotes_when_value_contains_double_quotes(self):
- s = 'Welcome to "my bar"'
- self.assertEquals(self.sub.substitute_xml(s, True),
- "'Welcome to \"my bar\"'")
-
- def test_xml_attribute_quoting_escapes_single_quotes_when_value_contains_both_single_and_double_quotes(self):
- s = 'Welcome to "Bob\'s Bar"'
- self.assertEquals(
- self.sub.substitute_xml(s, True),
- '"Welcome to &quot;Bob\'s Bar&quot;"')
-
- def test_xml_quotes_arent_escaped_when_value_is_not_being_quoted(self):
- quoted = 'Welcome to "Bob\'s Bar"'
- self.assertEquals(self.sub.substitute_xml(quoted), quoted)
-
- def test_xml_quoting_handles_angle_brackets(self):
- self.assertEquals(
- self.sub.substitute_xml("foo<bar>"),
- "foo&lt;bar&gt;")
-
- def test_xml_quoting_handles_ampersands(self):
- self.assertEquals(self.sub.substitute_xml("AT&T"), "AT&amp;T")
-
- def test_xml_quoting_ignores_ampersands_when_they_are_part_of_an_entity(self):
- self.assertEquals(
- self.sub.substitute_xml("&Aacute;T&T"),
- "&Aacute;T&amp;T")
-
- def test_quotes_not_html_substituted(self):
- """There's no need to do this except inside attribute values."""
- text = 'Bob\'s "bar"'
- self.assertEquals(self.sub.substitute_html(text), text)
-
-class TestUnicodeDammit(unittest.TestCase):
- """Standalone tests of Unicode, Dammit."""
-
- def test_smart_quotes_to_unicode(self):
- markup = "<foo>\x91\x92\x93\x94</foo>"
- dammit = UnicodeDammit(markup)
- self.assertEquals(
- dammit.unicode_markup, u"<foo>\u2018\u2019\u201c\u201d</foo>")
-
- def test_smart_quotes_to_xml_entities(self):
- markup = "<foo>\x91\x92\x93\x94</foo>"
- dammit = UnicodeDammit(markup, smart_quotes_to="xml")
- self.assertEquals(
- dammit.unicode_markup, "<foo>&#x2018;&#x2019;&#x201C;&#x201D;</foo>")
-
- def test_smart_quotes_to_html_entities(self):
- markup = "<foo>\x91\x92\x93\x94</foo>"
- dammit = UnicodeDammit(markup, smart_quotes_to="html")
- self.assertEquals(
- dammit.unicode_markup, "<foo>&lsquo;&rsquo;&ldquo;&rdquo;</foo>")
-
- def test_detect_utf8(self):
- utf8 = "\xc3\xa9"
- dammit = UnicodeDammit(utf8)
- self.assertEquals(dammit.unicode_markup, u'\xe9')
- self.assertEquals(dammit.original_encoding, 'utf-8')
-
- def test_convert_hebrew(self):
- hebrew = "\xed\xe5\xec\xf9"
- dammit = UnicodeDammit(hebrew, ["iso-8859-8"])
- self.assertEquals(dammit.original_encoding, 'iso-8859-8')
- self.assertEquals(dammit.unicode_markup, u'\u05dd\u05d5\u05dc\u05e9')
-
- def test_dont_see_smart_quotes_where_there_are_none(self):
- utf_8 = "\343\202\261\343\203\274\343\202\277\343\202\244 Watch"
- dammit = UnicodeDammit(utf_8)
- self.assertEquals(dammit.original_encoding, 'utf-8')
- self.assertEquals(dammit.unicode_markup.encode("utf-8"), utf_8)
-
- def test_ignore_inappropriate_codecs(self):
- utf8_data = u"Räksmörgås".encode("utf-8")
- dammit = UnicodeDammit(utf8_data, ["iso-8859-8"])
- self.assertEquals(dammit.original_encoding, 'utf-8')
-
- def test_ignore_invalid_codecs(self):
- utf8_data = u"Räksmörgås".encode("utf-8")
- for bad_encoding in ['.utf8', '...', 'utF---16.!']:
- dammit = UnicodeDammit(utf8_data, [bad_encoding])
- self.assertEquals(dammit.original_encoding, 'utf-8')