1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
|
from bs4.testing import SoupTest
from bs4.element import (
CData,
Comment,
Declaration,
Doctype,
NavigableString,
Script,
Stylesheet,
TemplateString,
)
class TestNavigableString(SoupTest):
def test_text_acquisition_methods(self):
# These methods are intended for use against Tag, but they
# work on NavigableString as well,
eq_ = self.assertEquals
s = NavigableString("fee ")
cdata = CData("fie ")
comment = Comment("foe ")
eq_("fee ", s.get_text())
eq_("fee", s.get_text(strip=True))
eq_(["fee "], list(s.strings))
eq_(["fee"], list(s.stripped_strings))
eq_(["fee "], list(s._all_strings()))
eq_("fie ", cdata.get_text())
eq_("fie", cdata.get_text(strip=True))
eq_(["fie "], list(cdata.strings))
eq_(["fie"], list(cdata.stripped_strings))
eq_(["fie "], list(cdata._all_strings()))
# Since a Comment isn't normally considered 'text',
# these methods generally do nothing.
eq_("", comment.get_text())
eq_([], list(comment.strings))
eq_([], list(comment.stripped_strings))
eq_([], list(comment._all_strings()))
# Unless you specifically say that comments are okay.
eq_("foe", comment.get_text(strip=True, types=Comment))
eq_("foe ", comment.get_text(types=(Comment, NavigableString)))
class TestNavigableStringSubclasses(SoupTest):
def test_cdata(self):
# None of the current builders turn CDATA sections into CData
# objects, but you can create them manually.
soup = self.soup("")
cdata = CData("foo")
soup.insert(1, cdata)
self.assertEqual(str(soup), "<![CDATA[foo]]>")
self.assertEqual(soup.find(text="foo"), "foo")
self.assertEqual(soup.contents[0], "foo")
def test_cdata_is_never_formatted(self):
"""Text inside a CData object is passed into the formatter.
But the return value is ignored.
"""
self.count = 0
def increment(*args):
self.count += 1
return "BITTER FAILURE"
soup = self.soup("")
cdata = CData("<><><>")
soup.insert(1, cdata)
self.assertEqual(
b"<![CDATA[<><><>]]>", soup.encode(formatter=increment))
self.assertEqual(1, self.count)
def test_doctype_ends_in_newline(self):
# Unlike other NavigableString subclasses, a DOCTYPE always ends
# in a newline.
doctype = Doctype("foo")
soup = self.soup("")
soup.insert(1, doctype)
self.assertEqual(soup.encode(), b"<!DOCTYPE foo>\n")
def test_declaration(self):
d = Declaration("foo")
self.assertEqual("<?foo?>", d.output_ready())
def test_default_string_containers(self):
# In some cases, we use different NavigableString subclasses for
# the same text in different tags.
soup = self.soup(
"<div>text</div><script>text</script><style>text</style>"
)
self.assertEqual(
[NavigableString, Script, Stylesheet],
[x.__class__ for x in soup.find_all(text=True)]
)
# The TemplateString is a little unusual because it's generally found
# _inside_ children of a <template> element, not a direct child of the
# <template> element.
soup = self.soup(
"<template>Some text<p>In a tag</p></template>Some text outside"
)
assert all(
isinstance(x, TemplateString)
for x in soup.template._all_strings(types=None)
)
# Once the <template> tag closed, we went back to using
# NavigableString.
outside = soup.template.next_sibling
assert isinstance(outside, NavigableString)
assert not isinstance(outside, TemplateString)
# The TemplateString is also unusual because it can contain
# NavigableString subclasses of _other_ types, such as
# Comment.
markup = b"<template>Some text<p>In a tag</p><!--with a comment--></template>"
soup = self.soup(markup)
self.assertEqual(markup, soup.template.encode("utf8"))
|