diff options
author | Leonard Richardson <leonard.richardson@canonical.com> | 2009-04-08 18:11:12 -0400 |
---|---|---|
committer | Leonard Richardson <leonard.richardson@canonical.com> | 2009-04-08 18:11:12 -0400 |
commit | ba2790a953d0370ca103f9f9e502a3ce11a80eab (patch) | |
tree | ec081c6de3448de94929fbf6e18edc5df6404698 | |
parent | d1abb37f36b7594d284504095d83d0b11a8ee95b (diff) |
Moved isHTML to a builder property, assume_html.
-rw-r--r-- | BeautifulSoup.py | 10 |
1 files changed, 4 insertions, 6 deletions
diff --git a/BeautifulSoup.py b/BeautifulSoup.py index 2615edc..3a25207 100644 --- a/BeautifulSoup.py +++ b/BeautifulSoup.py @@ -1010,6 +1010,7 @@ class TreeBuilder(Entities): PRESERVE_WHITESPACE_TAGS = set() QUOTE_TAGS = set() self_closing_tags = set() + assume_html = False def isSelfClosingTag(self, name): return name in self.self_closing_tags @@ -1218,6 +1219,7 @@ class XMLParserBuilder(HTMLParser, TreeBuilder): class HTMLParserBuilder(XMLParserBuilder): + assume_html = True PRESERVE_WHITESPACE_TAGS = set(['pre', 'textarea']) QUOTE_TAGS = set(['script', 'textarea']) self_closing_tags = set(['br' , 'hr', 'input', 'img', 'meta', @@ -1308,7 +1310,7 @@ class BeautifulStoneSoup(Tag): return XMLParserBuilder() def __init__(self, markup="", builder=None, parseOnlyThese=None, - fromEncoding=None, isHTML=False): + fromEncoding=None): """The Soup object is initialized as the 'root tag', and the provided markup (which can be a string or a file-like object) is fed into the underlying parser.""" @@ -1327,7 +1329,7 @@ class BeautifulStoneSoup(Tag): markup = markup.read() self.markup = markup try: - self._feed(isHTML=isHTML) + self._feed(isHTML=self.builder.assume_html) except StopParsing: pass self.markup = None # The markup can now be GCed. @@ -1581,10 +1583,6 @@ class BeautifulSoup(BeautifulStoneSoup): def _defaultBuilder(self): return HTMLParserBuilder() - def __init__(self, *args, **kwargs): - kwargs['isHTML'] = True - BeautifulStoneSoup.__init__(self, *args, **kwargs) - # Used to detect the charset in a META tag; see start_meta CHARSET_RE = re.compile("((^|;)\s*charset=)([^;]*)", re.M) |