1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
|
from beautifulsoup.builder.html5lib_builder import HTML5TreeBuilder
from test_lxml import (
TestLXMLBuilder,
TestLXMLBuilderInvalidMarkup,
)
class TestHTML5Builder(TestLXMLBuilder):
"""See `BuilderSmokeTest`."""
@property
def default_builder(self):
return HTML5TreeBuilder()
def test_bare_string(self):
# A bare string is turned into some kind of HTML document or
# fragment recognizable as the original string.
#
# In this case, lxml puts a <p> tag around the bare string.
self.assertSoupEquals(
"A bare string", "A bare string")
def test_correctly_nested_tables(self):
markup = ('<table id="1">'
'<tr>'
"<td>Here's another table:"
'<table id="2">'
'<tr><td>foo</td></tr>'
'</table></td>')
self.assertSoupEquals(
markup,
'<table id="1"><tbody><tr><td>Here\'s another table:'
'<table id="2"><tbody><tr><td>foo</td></tr></tbody></table>'
'</td></tr></tbody></table>')
self.assertSoupEquals(
"<table><thead><tr><td>Foo</td></tr></thead>"
"<tbody><tr><td>Bar</td></tr></tbody>"
"<tfoot><tr><td>Baz</td></tr></tfoot></table>")
def test_collapsed_whitespace(self):
"""Whitespace is preserved even in tags that don't require it."""
self.assertSoupEquals("<p> </p>")
self.assertSoupEquals("<b> </b>")
def test_cdata(self):
print self.soup("<div><![CDATA[foo]]></div>")
class TestHTML5BuilderInvalidMarkup(TestLXMLBuilderInvalidMarkup):
"""See `BuilderInvalidMarkupSmokeTest`."""
@property
def default_builder(self):
return HTML5TreeBuilder()
def test_unclosed_block_level_elements(self):
# The unclosed <b> tag is closed so that the block-level tag
# can be closed, and another <b> tag is inserted after the
# next block-level tag begins.
self.assertSoupEquals(
'<blockquote><p><b>Foo</blockquote><p>Bar',
'<blockquote><p><b>Foo</b></p></blockquote><p><b>Bar</b></p>')
def test_table_containing_bare_markup(self):
# Markup should be in table cells, not directly in the table.
self.assertSoupEquals("<table><div>Foo</div></table>",
"<div>Foo</div><table></table>")
def test_incorrectly_nested_tables(self):
self.assertSoupEquals(
'<table><tr><table><tr id="nested">',
('<table><tbody><tr></tr></tbody></table>'
'<table><tbody><tr id="nested"></tr></tbody></table>'))
def test_doctype_in_body(self):
markup = "<p>one<!DOCTYPE foobar>two</p>"
self.assertSoupEquals(markup, "<p>onetwo</p>")
def test_foo(self):
isolatin = """<html><meta http-equiv="Content-type" content="text/html; charset=ISO-Latin-1" />Sacr\xe9 bleu!</html>"""
soup = self.soup(isolatin)
utf8 = isolatin.replace("ISO-Latin-1".encode(), "utf-8".encode())
utf8 = utf8.replace("\xe9", "\xc3\xa9")
#print soup
|