From e7435b9f7c2779f7930aa2a8e5f25c6efc7f4b21 Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Wed, 8 Apr 2009 18:13:16 -0400 Subject: Moved docstring. --- BeautifulSoup.py | 91 ++++++++++++++++++++++++++++---------------------------- 1 file changed, 45 insertions(+), 46 deletions(-) (limited to 'BeautifulSoup.py') diff --git a/BeautifulSoup.py b/BeautifulSoup.py index 3a25207..1427ad2 100644 --- a/BeautifulSoup.py +++ b/BeautifulSoup.py @@ -1218,6 +1218,51 @@ class XMLParserBuilder(HTMLParser, TreeBuilder): class HTMLParserBuilder(XMLParserBuilder): + """This builder knows the following facts about HTML: + + * Some tags have no closing tag and should be interpreted as being + closed as soon as they are encountered. + + * The text inside some tags (ie. 'script') may contain tags which + are not really part of the document and which should be parsed + as text, not tags. If you want to parse the text as tags, you can + always fetch it and parse it explicitly. + + * Tag nesting rules: + + Most tags can't be nested at all. For instance, the occurance of + a

tag should implicitly close the previous

tag. + +

Para1

Para2 + should be transformed into: +

Para1

Para2 + + Some tags can be nested arbitrarily. For instance, the occurance + of a

tag should _not_ implicitly close the previous +
tag. + + Alice said:
Bob said:
Blah + should NOT be transformed into: + Alice said:
Bob said:
Blah + + Some tags can be nested, but the nesting is reset by the + interposition of other tags. For instance, a tag should + implicitly close the previous tag within the same , + but not close a tag in another table. + +
BlahBlah + should be transformed into: +
BlahBlah + but, + Blah
Blah + should NOT be transformed into + Blah
Blah + + Differing assumptions about tag nesting rules are a major source + of problems with the BeautifulSoup class. If BeautifulSoup is not + treating as nestable a tag your page author treats as nestable, + try subclassing this tree builder or using another parser's tree + builder.""" assume_html = True PRESERVE_WHITESPACE_TAGS = set(['pre', 'textarea']) @@ -1534,52 +1579,6 @@ class BeautifulStoneSoup(Tag): class BeautifulSoup(BeautifulStoneSoup): - """This parser knows the following facts about HTML: - - * Some tags have no closing tag and should be interpreted as being - closed as soon as they are encountered. - - * The text inside some tags (ie. 'script') may contain tags which - are not really part of the document and which should be parsed - as text, not tags. If you want to parse the text as tags, you can - always fetch it and parse it explicitly. - - * Tag nesting rules: - - Most tags can't be nested at all. For instance, the occurance of - a

tag should implicitly close the previous

tag. - -

Para1

Para2 - should be transformed into: -

Para1

Para2 - - Some tags can be nested arbitrarily. For instance, the occurance - of a

tag should _not_ implicitly close the previous -
tag. - - Alice said:
Bob said:
Blah - should NOT be transformed into: - Alice said:
Bob said:
Blah - - Some tags can be nested, but the nesting is reset by the - interposition of other tags. For instance, a
tag should - implicitly close the previous tag within the same
, - but not close a tag in another table. - -
BlahBlah - should be transformed into: -
BlahBlah - but, - Blah
Blah - should NOT be transformed into - Blah
Blah - - Differing assumptions about tag nesting rules are a major source - of problems with the BeautifulSoup class. If BeautifulSoup is not - treating as nestable a tag your page author treats as nestable, - try ICantBelieveItsBeautifulSoup, MinimalSoup, or - BeautifulStoneSoup before writing your own subclass.""" - def _defaultBuilder(self): return HTMLParserBuilder() -- cgit v1.2.3