diff options
| author | Leonard Richardson <leonard.richardson@canonical.com> | 2012-02-01 11:21:08 -0500 |
|---|---|---|
| committer | Leonard Richardson <leonard.richardson@canonical.com> | 2012-02-01 11:21:08 -0500 |
| commit | 342fbb95061e21cfda550f41b4faef7e3d569077 (patch) | |
| tree | 2070c3d4e8e61baa890b7e81d0ab187d268c6646 | |
| parent | 50f872c78f8b894996f81176d5e03f4a6bbe8efd (diff) | |
Committed minor changes made while writing docs.
| -rw-r--r-- | README.txt | 8 | ||||
| -rw-r--r-- | TODO | 6 | ||||
| -rw-r--r-- | bs4/element.py | 4 |
3 files changed, 14 insertions, 4 deletions
@@ -42,13 +42,17 @@ code and plug in any HTML or XML parser you want. Beautiful Soup 4.0 comes with glue code for four parsers: - * Python's standard HTMLParser + * Python's standard HTMLParser (html.parser in Python 3) * lxml's HTML and XML parsers * html5lib's HTML parser HTMLParser is the default, but I recommend you install one of the other parsers, or you'll have problems handling real-world markup. +For complete documentation, see the Sphinx documentation in +docs/source. What follows is a summary of the changes from Beautiful +Soup 3. + == The module name has changed == Previously you imported the BeautifulSoup class from a module also @@ -122,7 +126,7 @@ names, and turned into properties: * nextSiblingGenerator() -> next_siblings * previousGenerator() -> previous_elements * previousSiblingGenerator() -> previous_siblings - * recursiveChildGenerator() -> recursive_children + * recursiveChildGenerator() -> descendants * parentGenerator() -> parents So instead of this: @@ -1,3 +1,9 @@ +soup.new_tar("<br>") should create an empty-element tag if the soup +was created with an HTML-aware builder, but not otherwise. This +requires keeping around information about the builder. + +Is whitespace being processed correctly? + if len(tag) > 3 and tag.endswith('Tag'): -> endswith('_tag') markup_attr_map can be optimized since it's always a map now. Can we get rid of isList? diff --git a/bs4/element.py b/bs4/element.py index 5db5b36..0ba2bdc 100644 --- a/bs4/element.py +++ b/bs4/element.py @@ -271,11 +271,11 @@ class PageElement(object): if isinstance(name, SoupStrainer): strainer = name elif text is None and not limit and not attrs and not kwargs: - # findAll*(True) + # Optimization to find all tags. if name is True or name is None: return [element for element in generator if isinstance(element, Tag)] - # findAll*('tag-name') + # Optimization to find all tags with a given name. elif isinstance(name, basestring): return [element for element in generator if isinstance(element, Tag) and element.name == name] |
