diff options
Diffstat (limited to 'BeautifulSoup.py')
-rw-r--r-- | BeautifulSoup.py | 10 |
1 files changed, 4 insertions, 6 deletions
diff --git a/BeautifulSoup.py b/BeautifulSoup.py index 2615edc..3a25207 100644 --- a/BeautifulSoup.py +++ b/BeautifulSoup.py @@ -1010,6 +1010,7 @@ class TreeBuilder(Entities): PRESERVE_WHITESPACE_TAGS = set() QUOTE_TAGS = set() self_closing_tags = set() + assume_html = False def isSelfClosingTag(self, name): return name in self.self_closing_tags @@ -1218,6 +1219,7 @@ class XMLParserBuilder(HTMLParser, TreeBuilder): class HTMLParserBuilder(XMLParserBuilder): + assume_html = True PRESERVE_WHITESPACE_TAGS = set(['pre', 'textarea']) QUOTE_TAGS = set(['script', 'textarea']) self_closing_tags = set(['br' , 'hr', 'input', 'img', 'meta', @@ -1308,7 +1310,7 @@ class BeautifulStoneSoup(Tag): return XMLParserBuilder() def __init__(self, markup="", builder=None, parseOnlyThese=None, - fromEncoding=None, isHTML=False): + fromEncoding=None): """The Soup object is initialized as the 'root tag', and the provided markup (which can be a string or a file-like object) is fed into the underlying parser.""" @@ -1327,7 +1329,7 @@ class BeautifulStoneSoup(Tag): markup = markup.read() self.markup = markup try: - self._feed(isHTML=isHTML) + self._feed(isHTML=self.builder.assume_html) except StopParsing: pass self.markup = None # The markup can now be GCed. @@ -1581,10 +1583,6 @@ class BeautifulSoup(BeautifulStoneSoup): def _defaultBuilder(self): return HTMLParserBuilder() - def __init__(self, *args, **kwargs): - kwargs['isHTML'] = True - BeautifulStoneSoup.__init__(self, *args, **kwargs) - # Used to detect the charset in a META tag; see start_meta CHARSET_RE = re.compile("((^|;)\s*charset=)([^;]*)", re.M) |