blob: 544e896c5f94812d9c2bbf75c19a24b5cfeaa068 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
|
from beautifulsoup.element import Entities
__all__ = ['TreeBuilder',
'HTMLTreeBuilder',
]
class TreeBuilder(Entities):
"""Turn a document into a Beautiful Soup object tree."""
assume_html = False
smart_quotes_to = Entities.XML_ENTITIES
def __init__(self):
self.soup = None
self.self_closing_tags = set()
self.preserve_whitespace_tags = set()
def isSelfClosingTag(self, name):
return name in self.self_closing_tags
def reset(self):
pass
def feed(self, markup):
raise NotImplementedError()
class HTMLTreeBuilder(TreeBuilder):
"""This TreeBuilder knows facts about HTML.
Such as which tags are self-closing tags.
"""
assume_html = True
smart_quotes_to = Entities.HTML_ENTITIES
preserve_whitespace_tags = set(['pre', 'textarea'])
self_closing_tags = set(['br' , 'hr', 'input', 'img', 'meta',
'spacer', 'link', 'frame', 'base'])
|