summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--NEWS.txt4
-rw-r--r--bs4/element.py11
-rw-r--r--bs4/tests/test_tree.py2
-rw-r--r--doc/source/conf.py256
-rw-r--r--doc/source/index.rst30
5 files changed, 289 insertions, 14 deletions
diff --git a/NEWS.txt b/NEWS.txt
index c65058c..862513a 100644
--- a/NEWS.txt
+++ b/NEWS.txt
@@ -1,4 +1,4 @@
-= 4.2.0 (Unreleased) =
+= 4.2.0 (20130514) =
* The Tag.select() method now supports a much wider variety of CSS
selectors.
@@ -60,7 +60,7 @@
* Stop a crash when unwisely messing with a tag that's been
decomposed. [bug=1097699]
-* Now that lxml's segfault on invalid doctype has been fixed, fix a
+* Now that lxml's segfault on invalid doctype has been fixed, fixed a
corresponding problem on the Beautiful Soup end that was previously
invisible. [bug=984936]
diff --git a/bs4/element.py b/bs4/element.py
index 1f121f4..f6864f2 100644
--- a/bs4/element.py
+++ b/bs4/element.py
@@ -1392,10 +1392,13 @@ class Tag(PageElement):
def recursiveChildGenerator(self):
return self.descendants
- # This was kind of misleading because has_key() (attributes) was
- # different from __in__ (contents). has_key() is gone in Python 3,
- # anyway.
- has_key = has_attr
+ def has_key(self, key):
+ """This was kind of misleading because has_key() (attributes)
+ was different from __in__ (contents). has_key() is gone in
+ Python 3, anyway."""
+ warnings.warn('has_key is deprecated. Use has_attr("%s") instead.' % (
+ key))
+ return self.has_attr(key)
# Next, a couple classes to represent queries and their results.
class SoupStrainer(object):
diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py
index b07de8c..f60485b 100644
--- a/bs4/tests/test_tree.py
+++ b/bs4/tests/test_tree.py
@@ -1618,7 +1618,7 @@ class TestSoupSelector(TreeTest):
for el in els:
self.assertEqual(el.name, 'p')
self.assertEqual(els[1]['class'], ['onep'])
- self.assertFalse(els[0].has_key('class'))
+ self.assertFalse(els[0].has_attr('class'))
def test_a_bunch_of_emptys(self):
for selector in ('div#main del', 'div#main div.oops', 'div div#main'):
diff --git a/doc/source/conf.py b/doc/source/conf.py
new file mode 100644
index 0000000..102c3cf
--- /dev/null
+++ b/doc/source/conf.py
@@ -0,0 +1,256 @@
+# -*- coding: utf-8 -*-
+#
+# Beautiful Soup documentation build configuration file, created by
+# sphinx-quickstart on Thu Jan 26 11:22:55 2012.
+#
+# This file is execfile()d with the current directory set to its containing dir.
+#
+# Note that not all possible configuration values are present in this
+# autogenerated file.
+#
+# All configuration values have a default; values that are commented out
+# serve to show the default.
+
+import sys, os
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#sys.path.insert(0, os.path.abspath('.'))
+
+# -- General configuration -----------------------------------------------------
+
+# If your documentation needs a minimal Sphinx version, state it here.
+#needs_sphinx = '1.0'
+
+# Add any Sphinx extension module names here, as strings. They can be extensions
+# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
+extensions = []
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# The suffix of source filenames.
+source_suffix = '.rst'
+
+# The encoding of source files.
+#source_encoding = 'utf-8-sig'
+
+# The master toctree document.
+master_doc = 'index'
+
+# General information about the project.
+project = u'Beautiful Soup'
+copyright = u'2012, Leonard Richardson'
+
+# The version info for the project you're documenting, acts as replacement for
+# |version| and |release|, also used in various other places throughout the
+# built documents.
+#
+# The short X.Y version.
+version = '4'
+# The full version, including alpha/beta/rc tags.
+release = '4.2.0'
+
+# The language for content autogenerated by Sphinx. Refer to documentation
+# for a list of supported languages.
+#language = None
+
+# There are two options for replacing |today|: either, you set today to some
+# non-false value, then it is used:
+#today = ''
+# Else, today_fmt is used as the format for a strftime call.
+#today_fmt = '%B %d, %Y'
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+exclude_patterns = []
+
+# The reST default role (used for this markup: `text`) to use for all documents.
+#default_role = None
+
+# If true, '()' will be appended to :func: etc. cross-reference text.
+#add_function_parentheses = True
+
+# If true, the current module name will be prepended to all description
+# unit titles (such as .. function::).
+#add_module_names = True
+
+# If true, sectionauthor and moduleauthor directives will be shown in the
+# output. They are ignored by default.
+#show_authors = False
+
+# The name of the Pygments (syntax highlighting) style to use.
+pygments_style = 'sphinx'
+
+# A list of ignored prefixes for module index sorting.
+#modindex_common_prefix = []
+
+
+# -- Options for HTML output ---------------------------------------------------
+
+# The theme to use for HTML and HTML Help pages. See the documentation for
+# a list of builtin themes.
+html_theme = 'default'
+
+# Theme options are theme-specific and customize the look and feel of a theme
+# further. For a list of options available for each theme, see the
+# documentation.
+#html_theme_options = {}
+
+# Add any paths that contain custom themes here, relative to this directory.
+#html_theme_path = []
+
+# The name for this set of Sphinx documents. If None, it defaults to
+# "<project> v<release> documentation".
+#html_title = None
+
+# A shorter title for the navigation bar. Default is the same as html_title.
+#html_short_title = None
+
+# The name of an image file (relative to this directory) to place at the top
+# of the sidebar.
+#html_logo = None
+
+# The name of an image file (within the static path) to use as favicon of the
+# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
+# pixels large.
+#html_favicon = None
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['_static']
+
+# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
+# using the given strftime format.
+#html_last_updated_fmt = '%b %d, %Y'
+
+# If true, SmartyPants will be used to convert quotes and dashes to
+# typographically correct entities.
+#html_use_smartypants = True
+
+# Custom sidebar templates, maps document names to template names.
+#html_sidebars = {}
+
+# Additional templates that should be rendered to pages, maps page names to
+# template names.
+#html_additional_pages = {}
+
+# If false, no module index is generated.
+#html_domain_indices = True
+
+# If false, no index is generated.
+#html_use_index = True
+
+# If true, the index is split into individual pages for each letter.
+#html_split_index = False
+
+# If true, links to the reST sources are added to the pages.
+#html_show_sourcelink = True
+
+# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
+#html_show_sphinx = True
+
+# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
+#html_show_copyright = True
+
+# If true, an OpenSearch description file will be output, and all pages will
+# contain a <link> tag referring to it. The value of this option must be the
+# base URL from which the finished HTML is served.
+#html_use_opensearch = ''
+
+# This is the file name suffix for HTML files (e.g. ".xhtml").
+#html_file_suffix = None
+
+# Output file base name for HTML help builder.
+htmlhelp_basename = 'BeautifulSoupdoc'
+
+
+# -- Options for LaTeX output --------------------------------------------------
+
+# The paper size ('letter' or 'a4').
+#latex_paper_size = 'letter'
+
+# The font size ('10pt', '11pt' or '12pt').
+#latex_font_size = '10pt'
+
+# Grouping the document tree into LaTeX files. List of tuples
+# (source start file, target name, title, author, documentclass [howto/manual]).
+latex_documents = [
+ ('index', 'BeautifulSoup.tex', u'Beautiful Soup Documentation',
+ u'Leonard Richardson', 'manual'),
+]
+
+# The name of an image file (relative to this directory) to place at the top of
+# the title page.
+#latex_logo = None
+
+# For "manual" documents, if this is true, then toplevel headings are parts,
+# not chapters.
+#latex_use_parts = False
+
+# If true, show page references after internal links.
+#latex_show_pagerefs = False
+
+# If true, show URL addresses after external links.
+#latex_show_urls = False
+
+# Additional stuff for the LaTeX preamble.
+#latex_preamble = ''
+
+# Documents to append as an appendix to all manuals.
+#latex_appendices = []
+
+# If false, no module index is generated.
+#latex_domain_indices = True
+
+
+# -- Options for manual page output --------------------------------------------
+
+# One entry per manual page. List of tuples
+# (source start file, name, description, authors, manual section).
+man_pages = [
+ ('index', 'beautifulsoup', u'Beautiful Soup Documentation',
+ [u'Leonard Richardson'], 1)
+]
+
+
+# -- Options for Epub output ---------------------------------------------------
+
+# Bibliographic Dublin Core info.
+epub_title = u'Beautiful Soup'
+epub_author = u'Leonard Richardson'
+epub_publisher = u'Leonard Richardson'
+epub_copyright = u'2012, Leonard Richardson'
+
+# The language of the text. It defaults to the language option
+# or en if the language is not set.
+#epub_language = ''
+
+# The scheme of the identifier. Typical schemes are ISBN or URL.
+#epub_scheme = ''
+
+# The unique identifier of the text. This can be a ISBN number
+# or the project homepage.
+#epub_identifier = ''
+
+# A unique identification for the text.
+#epub_uid = ''
+
+# HTML files that should be inserted before the pages created by sphinx.
+# The format is a list of tuples containing the path and title.
+#epub_pre_files = []
+
+# HTML files shat should be inserted after the pages created by sphinx.
+# The format is a list of tuples containing the path and title.
+#epub_post_files = []
+
+# A list of files that should not be packed into the epub file.
+#epub_exclude_files = []
+
+# The depth of the table of contents in toc.ncx.
+#epub_tocdepth = 3
+
+# Allow duplicate toc entries.
+#epub_tocdup = True
diff --git a/doc/source/index.rst b/doc/source/index.rst
index 8e0204b..b1cbd21 100644
--- a/doc/source/index.rst
+++ b/doc/source/index.rst
@@ -1095,7 +1095,7 @@ Here's a function that returns ``True`` if a tag defines the "class"
attribute but doesn't define the "id" attribute::
def has_class_but_no_id(tag):
- return tag.has_key('class') and not tag.has_key('id')
+ return tag.has_attr('class') and not tag.has_attr('id')
Pass this function into ``find_all()`` and you'll pick up all the <p>
tags::
@@ -2746,16 +2746,11 @@ Other parser problems
preserve mixed-case or uppercase tags and attributes, you'll need to
:ref:`parse the document as XML. <parsing-xml>`
+.. _misc:
Miscellaneous
-------------
-* ``KeyError: [attr]`` - Caused by accessing ``tag['attr']`` when the
- tag in question doesn't define the ``attr`` attribute. The most
- common errors are ``KeyError: 'href'`` and ``KeyError:
- 'class'``. Use ``tag.get('attr')`` if you're not sure ``attr`` is
- defined, just as you would with a Python dictionary.
-
* ``UnicodeEncodeError: 'charmap' codec can't encode character
u'\xfoo' in position bar`` (or just about any other
``UnicodeEncodeError``) - This is not a problem with Beautiful Soup.
@@ -2768,6 +2763,27 @@ Miscellaneous
solution is to explicitly encode the Unicode string into UTF-8 with
``u.encode("utf8")``.
+* ``KeyError: [attr]`` - Caused by accessing ``tag['attr']`` when the
+ tag in question doesn't define the ``attr`` attribute. The most
+ common errors are ``KeyError: 'href'`` and ``KeyError:
+ 'class'``. Use ``tag.get('attr')`` if you're not sure ``attr`` is
+ defined, just as you would with a Python dictionary.
+
+* ``AttributeError: 'ResultSet' object has no attribute 'foo'`` - This
+ usually happens because you expected ``find_all()`` to return a
+ single tag or string. But ``find_all()`` returns a _list_ of tags
+ and strings--a ``ResultSet`` object. You need to iterate over the
+ list and look at the ``.foo`` of each one. Or, if you really only
+ want one result, you need to use ``find()`` instead of
+ ``find_all()``.
+
+* ``AttributeError: 'NoneType' object has no attribute 'foo'`` - This
+ usually happens because you called ``find()`` and then tried to
+ access the `.foo`` attribute of the result. But in your case,
+ ``find()`` didn't find anything, so it returned ``None``, instead of
+ returning a tag or a string. You need to figure out why your
+ ``find()`` call isn't returning anything.
+
Improving Performance
---------------------