diff options
author | Leonard Richardson <leonard.richardson@canonical.com> | 2011-02-20 19:30:01 -0500 |
---|---|---|
committer | Leonard Richardson <leonard.richardson@canonical.com> | 2011-02-20 19:30:01 -0500 |
commit | c3090d7e7337f88853fc5371c6d8011eb638c37f (patch) | |
tree | cfdc90333c894ade4e2dad99e16329253be5fea5 /beautifulsoup | |
parent | 39a2b266b634aa2eca4329a6719e090087113f46 (diff) |
Renamed constructor arguments to comply with PEP 8.
Diffstat (limited to 'beautifulsoup')
-rw-r--r-- | beautifulsoup/__init__.py | 56 | ||||
-rw-r--r-- | beautifulsoup/builder/__init__.py | 2 |
2 files changed, 19 insertions, 39 deletions
diff --git a/beautifulsoup/__init__.py b/beautifulsoup/__init__.py index 968be08..b8598e2 100644 --- a/beautifulsoup/__init__.py +++ b/beautifulsoup/__init__.py @@ -3,34 +3,14 @@ Elixir and Tonic "The Screen-Scraper's Friend" http://www.crummy.com/software/BeautifulSoup/ -Beautiful Soup parses a (possibly invalid) XML or HTML document into a -tree representation. It provides methods and Pythonic idioms that make -it easy to navigate, search, and modify the tree. +Beautiful Soup uses a plug-in parser to parse a (possibly invalid) XML +or HTML document into a tree representation. The parser does the work +of building a parse tree, and Beautiful Soup provides provides methods +and Pythonic idioms that make it easy to navigate, search, and modify +the parse tree. -A well-formed XML/HTML document yields a well-formed data -structure. An ill-formed XML/HTML document yields a correspondingly -ill-formed data structure. If your document is only locally -well-formed, you can use this library to find and process the -well-formed part of it. - -Beautiful Soup works with Python 2.2 and up. It has no external -dependencies, but you'll have more success at converting data to UTF-8 -if you also install these three packages: - -* chardet, for auto-detecting character encodings - http://chardet.feedparser.org/ -* cjkcodecs and iconv_codec, which add more encodings to the ones supported - by stock Python. - http://cjkpython.i18n.org/ - -Beautiful Soup defines classes for two main parsing strategies: - - * BeautifulStoneSoup, for parsing XML, SGML, or your domain-specific - language that kind of looks like XML. - - * BeautifulSoup, for parsing run-of-the-mill HTML code, be it valid - or invalid. This class has web browser-like heuristics for - obtaining a sensible parse tree in the face of common HTML errors. +Beautiful Soup works with Python 2.5 and up. To get it to work, you +must install either lxml or html5lib. For more than you ever wanted to know about Beautiful Soup, see the documentation: @@ -38,7 +18,7 @@ http://www.crummy.com/software/BeautifulSoup/documentation.html Here, have some legalese: -Copyright (c) 2004-2009, Leonard Richardson +Copyright (c) 2004-2011, Leonard Richardson All rights reserved. @@ -127,8 +107,8 @@ class BeautifulSoup(Tag): from builder import LXMLTreeBuilder return LXMLTreeBuilder() - def __init__(self, markup="", builder=None, parseOnlyThese=None, - fromEncoding=None): + def __init__(self, markup="", builder=None, parse_only=None, + from_encoding=None): """The Soup object is initialized as the 'root tag', and the provided markup (which can be a string or a file-like object) is fed into the underlying parser.""" @@ -138,14 +118,14 @@ class BeautifulSoup(Tag): self.builder = builder self.builder.soup = self - self.parseOnlyThese = parseOnlyThese + self.parse_only = parse_only self.reset() if hasattr(markup, 'read'): # It's a file-type object. markup = markup.read() self.markup, self.original_encoding, self.declared_html_encoding = ( - self.builder.prepare_markup(markup, fromEncoding)) + self.builder.prepare_markup(markup, from_encoding)) try: self._feed() @@ -201,9 +181,9 @@ class BeautifulSoup(Tag): else: currentData = ' ' self.currentData = [] - if self.parseOnlyThese and len(self.tagStack) <= 1 and \ - (not self.parseOnlyThese.text or \ - not self.parseOnlyThese.search(currentData)): + if self.parse_only and len(self.tagStack) <= 1 and \ + (not self.parse_only.text or \ + not self.parse_only.search(currentData)): return o = containerClass(currentData) self.object_was_parsed(o) @@ -251,9 +231,9 @@ class BeautifulSoup(Tag): #print "Start tag %s: %s" % (name, attrs) self.endData() - if (self.parseOnlyThese and len(self.tagStack) <= 1 - and (self.parseOnlyThese.text - or not self.parseOnlyThese.searchTag(name, attrs))): + if (self.parse_only and len(self.tagStack) <= 1 + and (self.parse_only.text + or not self.parse_only.searchTag(name, attrs))): return None tag = Tag(self, self.builder, name, attrs, self.currentTag, diff --git a/beautifulsoup/builder/__init__.py b/beautifulsoup/builder/__init__.py index 522960a..854cc56 100644 --- a/beautifulsoup/builder/__init__.py +++ b/beautifulsoup/builder/__init__.py @@ -208,7 +208,7 @@ class HTMLTreeBuilder(TreeBuilder): match = self.CHARSET_RE.search(content) if match: if (self.soup.declared_html_encoding is not None or - self.soup.original_encoding == self.soup.fromEncoding): + self.soup.original_encoding == self.soup.from_encoding): # An HTML encoding was sniffed while converting # the document to Unicode, or an HTML encoding was # sniffed during a previous pass through the |