diff options
-rw-r--r-- | README.txt | 8 | ||||
-rw-r--r-- | TODO | 6 | ||||
-rw-r--r-- | bs4/element.py | 4 |
3 files changed, 14 insertions, 4 deletions
@@ -42,13 +42,17 @@ code and plug in any HTML or XML parser you want. Beautiful Soup 4.0 comes with glue code for four parsers: - * Python's standard HTMLParser + * Python's standard HTMLParser (html.parser in Python 3) * lxml's HTML and XML parsers * html5lib's HTML parser HTMLParser is the default, but I recommend you install one of the other parsers, or you'll have problems handling real-world markup. +For complete documentation, see the Sphinx documentation in +docs/source. What follows is a summary of the changes from Beautiful +Soup 3. + == The module name has changed == Previously you imported the BeautifulSoup class from a module also @@ -122,7 +126,7 @@ names, and turned into properties: * nextSiblingGenerator() -> next_siblings * previousGenerator() -> previous_elements * previousSiblingGenerator() -> previous_siblings - * recursiveChildGenerator() -> recursive_children + * recursiveChildGenerator() -> descendants * parentGenerator() -> parents So instead of this: @@ -1,3 +1,9 @@ +soup.new_tar("<br>") should create an empty-element tag if the soup +was created with an HTML-aware builder, but not otherwise. This +requires keeping around information about the builder. + +Is whitespace being processed correctly? + if len(tag) > 3 and tag.endswith('Tag'): -> endswith('_tag') markup_attr_map can be optimized since it's always a map now. Can we get rid of isList? diff --git a/bs4/element.py b/bs4/element.py index 5db5b36..0ba2bdc 100644 --- a/bs4/element.py +++ b/bs4/element.py @@ -271,11 +271,11 @@ class PageElement(object): if isinstance(name, SoupStrainer): strainer = name elif text is None and not limit and not attrs and not kwargs: - # findAll*(True) + # Optimization to find all tags. if name is True or name is None: return [element for element in generator if isinstance(element, Tag)] - # findAll*('tag-name') + # Optimization to find all tags with a given name. elif isinstance(name, basestring): return [element for element in generator if isinstance(element, Tag) and element.name == name] |