summaryrefslogtreecommitdiff
path: root/src/beautifulsoup/builder/__init__.py
blob: 544e896c5f94812d9c2bbf75c19a24b5cfeaa068 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
from beautifulsoup.element import Entities

__all__ = ['TreeBuilder',
           'HTMLTreeBuilder',
           ]

class TreeBuilder(Entities):
    """Turn a document into a Beautiful Soup object tree."""

    assume_html = False
    smart_quotes_to = Entities.XML_ENTITIES

    def __init__(self):
        self.soup = None
        self.self_closing_tags = set()
        self.preserve_whitespace_tags = set()

    def isSelfClosingTag(self, name):
        return name in self.self_closing_tags

    def reset(self):
        pass

    def feed(self, markup):
        raise NotImplementedError()


class HTMLTreeBuilder(TreeBuilder):
    """This TreeBuilder knows facts about HTML.

    Such as which tags are self-closing tags.
    """

    assume_html = True
    smart_quotes_to = Entities.HTML_ENTITIES

    preserve_whitespace_tags = set(['pre', 'textarea'])
    self_closing_tags = set(['br' , 'hr', 'input', 'img', 'meta',
                            'spacer', 'link', 'frame', 'base'])