From 342fbb95061e21cfda550f41b4faef7e3d569077 Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Wed, 1 Feb 2012 11:21:08 -0500 Subject: Committed minor changes made while writing docs. --- README.txt | 8 ++++++-- TODO | 6 ++++++ bs4/element.py | 4 ++-- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/README.txt b/README.txt index fb87497..ac343e4 100644 --- a/README.txt +++ b/README.txt @@ -42,13 +42,17 @@ code and plug in any HTML or XML parser you want. Beautiful Soup 4.0 comes with glue code for four parsers: - * Python's standard HTMLParser + * Python's standard HTMLParser (html.parser in Python 3) * lxml's HTML and XML parsers * html5lib's HTML parser HTMLParser is the default, but I recommend you install one of the other parsers, or you'll have problems handling real-world markup. +For complete documentation, see the Sphinx documentation in +docs/source. What follows is a summary of the changes from Beautiful +Soup 3. + == The module name has changed == Previously you imported the BeautifulSoup class from a module also @@ -122,7 +126,7 @@ names, and turned into properties: * nextSiblingGenerator() -> next_siblings * previousGenerator() -> previous_elements * previousSiblingGenerator() -> previous_siblings - * recursiveChildGenerator() -> recursive_children + * recursiveChildGenerator() -> descendants * parentGenerator() -> parents So instead of this: diff --git a/TODO b/TODO index 060dc13..b40fb18 100644 --- a/TODO +++ b/TODO @@ -1,3 +1,9 @@ +soup.new_tar("
") should create an empty-element tag if the soup +was created with an HTML-aware builder, but not otherwise. This +requires keeping around information about the builder. + +Is whitespace being processed correctly? + if len(tag) > 3 and tag.endswith('Tag'): -> endswith('_tag') markup_attr_map can be optimized since it's always a map now. Can we get rid of isList? diff --git a/bs4/element.py b/bs4/element.py index 5db5b36..0ba2bdc 100644 --- a/bs4/element.py +++ b/bs4/element.py @@ -271,11 +271,11 @@ class PageElement(object): if isinstance(name, SoupStrainer): strainer = name elif text is None and not limit and not attrs and not kwargs: - # findAll*(True) + # Optimization to find all tags. if name is True or name is None: return [element for element in generator if isinstance(element, Tag)] - # findAll*('tag-name') + # Optimization to find all tags with a given name. elif isinstance(name, basestring): return [element for element in generator if isinstance(element, Tag) and element.name == name] -- cgit v1.2.3