Removed rarely used parsers--if we need them again, we can make tree builders.

author: Leonard Richardson <leonard.richardson@canonical.com> 2009-04-08 18:01:26 -0400
committer: Leonard Richardson <leonard.richardson@canonical.com> 2009-04-08 18:01:26 -0400
commit: 6de21249eab260ecfa17499dc593204db5bd1f43 (patch)
tree: d7fef1e714dad702a309d55c8fe98e8f07cb586c /BeautifulSoup.py
parent: 5e932f046847c1417b7ff7dab580b56fd214db88 (diff)
1 files changed, 0 insertions, 99 deletions
diff --git a/BeautifulSoup.py b/BeautifulSoup.py
index 918077e..2f8c3ef 100644
--- a/BeautifulSoup.py
+++ b/BeautifulSoup.py
@@ -1638,105 +1638,6 @@ class BeautifulSoup(BeautifulStoneSoup):
 class StopParsing(Exception):
     pass
 
-class ICantBelieveItsBeautifulSoup(BeautifulSoup):
-
-    """The BeautifulSoup class is oriented towards skipping over
-    common HTML errors like unclosed tags. However, sometimes it makes
-    errors of its own. For instance, consider this fragment:
-
-     <b>Foo<b>Bar</b></b>
-
-    This is perfectly valid (if bizarre) HTML. However, the
-    BeautifulSoup class will implicitly close the first b tag when it
-    encounters the second 'b'. It will think the author wrote
-    "<b>Foo<b>Bar", and didn't close the first 'b' tag, because
-    there's no real-world reason to bold something that's already
-    bold. When it encounters '</b></b>' it will close two more 'b'
-    tags, for a grand total of three tags closed instead of two. This
-    can throw off the rest of your document structure. The same is
-    true of a number of other tags, listed below.
-
-    It's much more common for someone to forget to close a 'b' tag
-    than to actually use nested 'b' tags, and the BeautifulSoup class
-    handles the common case. This class handles the not-co-common
-    case: where you can't believe someone wrote what they did, but
-    it's valid HTML and BeautifulSoup screwed up by assuming it
-    wouldn't be."""
-
-    I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS = \
-     ['em', 'big', 'i', 'small', 'tt', 'abbr', 'acronym', 'strong',
-      'cite', 'code', 'dfn', 'kbd', 'samp', 'strong', 'var', 'b',
-      'big']
-
-    I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS = ['noscript']
-
-    NESTABLE_TAGS = buildTagMap([], BeautifulSoup.NESTABLE_TAGS,
-                                I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS,
-                                I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS)
-
-class MinimalSoup(BeautifulSoup):
-    """The MinimalSoup class is for parsing HTML that contains
-    pathologically bad markup. It makes no assumptions about tag
-    nesting, but it does know which tags are self-closing, that
-    <script> tags contain Javascript and should not be parsed, that
-    META tags may contain encoding information, and so on.
-
-    This also makes it better for subclassing than BeautifulStoneSoup
-    or BeautifulSoup."""
-
-    RESET_NESTING_TAGS = buildTagMap('noscript')
-    NESTABLE_TAGS = {}
-
-class BeautifulSOAP(BeautifulStoneSoup):
-    """This class will push a tag with only a single string child into
-    the tag's parent as an attribute. The attribute's name is the tag
-    name, and the value is the string child. An example should give
-    the flavor of the change:
-
-    <foo><bar>baz</bar></foo>
-     =>
-    <foo bar="baz"><bar>baz</bar></foo>
-
-    You can then access fooTag['bar'] instead of fooTag.barTag.string.
-
-    This is, of course, useful for scraping structures that tend to
-    use subelements instead of attributes, such as SOAP messages. Note
-    that it modifies its input, so don't print the modified version
-    out.
-
-    I'm not sure how many people really want to use this class; let me
-    know if you do. Mainly I like the name."""
-
-    def popTag(self):
-        if len(self.tagStack) > 1:
-            tag = self.tagStack[-1]
-            parent = self.tagStack[-2]
-            parent._getAttrMap()
-            if (isinstance(tag, Tag) and len(tag.contents) == 1 and
-                isinstance(tag.contents[0], NavigableString) and
-                not parent.attrMap.has_key(tag.name)):
-                parent[tag.name] = tag.contents[0]
-        BeautifulStoneSoup.popTag(self)
-
-#Enterprise class names! It has come to our attention that some people
-#think the names of the Beautiful Soup parser classes are too silly
-#and "unprofessional" for use in enterprise screen-scraping. We feel
-#your pain! For such-minded folk, the Beautiful Soup Consortium And
-#All-Night Kosher Bakery recommends renaming this file to
-#"RobustParser.py" (or, in cases of extreme enterprisiness,
-#"RobustParserBeanInterface.class") and using the following
-#enterprise-friendly class aliases:
-class RobustXMLParser(BeautifulStoneSoup):
-    pass
-class RobustHTMLParser(BeautifulSoup):
-    pass
-class RobustWackAssHTMLParser(ICantBelieveItsBeautifulSoup):
-    pass
-class RobustInsanelyWackAssHTMLParser(MinimalSoup):
-    pass
-class SimplifyingSOAPParser(BeautifulSOAP):
-    pass
-
 ######################################################
 #
 # Bonus library: Unicode, Dammit
author	Leonard Richardson <leonard.richardson@canonical.com>	2009-04-08 18:01:26 -0400
committer	Leonard Richardson <leonard.richardson@canonical.com>	2009-04-08 18:01:26 -0400
commit	6de21249eab260ecfa17499dc593204db5bd1f43 (patch)
tree	d7fef1e714dad702a309d55c8fe98e8f07cb586c /BeautifulSoup.py
parent	5e932f046847c1417b7ff7dab580b56fd214db88 (diff)