summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--README.txt7
-rw-r--r--TODO41
-rw-r--r--bs4/element.py8
-rw-r--r--bs4/tests/test_tree.py17
4 files changed, 55 insertions, 18 deletions
diff --git a/README.txt b/README.txt
index ac343e4..e7670af 100644
--- a/README.txt
+++ b/README.txt
@@ -141,6 +141,13 @@ You can write this:
(But the old code will still work.)
+Some of the generators used to yield None after they were done, and
+then stop. That was a bug. Now, the generators just stop.
+
+There are two new generators, .strings and .stripped_strings. .strings
+yields NavigableString objects, and .stripped_strings yields Python
+strings that have had whitespace stripped.
+
== tag.string is recursive ==
tag.string now operates recursively. If tag A contains a single tag B
diff --git a/TODO b/TODO
index b40fb18..19dbd30 100644
--- a/TODO
+++ b/TODO
@@ -1,26 +1,43 @@
-soup.new_tar("<br>") should create an empty-element tag if the soup
+Bugs
+----
+
+* I think whitespace may not be processed correctly.
+
+* Characters like & < > should always be converted to HTML entities on
+ output, even if substitute_html_entities is False.
+
+Big features
+------------
+
+* Add namespace support.
+
+* soup.new_tag("<br>") should create an empty-element tag if the soup
was created with an HTML-aware builder, but not otherwise. This
requires keeping around information about the builder.
-Is whitespace being processed correctly?
+Optimizations
+-------------
-if len(tag) > 3 and tag.endswith('Tag'): -> endswith('_tag')
markup_attr_map can be optimized since it's always a map now.
-Can we get rid of isList?
-Split self.assertRaises(ValueError, tree.index, 1) into a separate test
-Bare ampersands should be converted to HTML entities upon output.
+BS3 features not yet ported
+---------------------------
+
+* In BS3, "soup.aTag" is the same as 'soup.find("a")'. This lets you
+locate a tag called (let's say) "find" with attribute
+access. "soup.find" won't do what you want, but "soup.findTag" will.
-Add namespace support.
+This still works In BS4 but it's deprecated. I could make
+"soup.find_tag" work the same way as "soup.find('find')", but I don't
+think it's worth it.
-XML handling:
+CDATA
+-----
The elementtree XMLParser has a strip_cdata argument that, when set to
False, should allow Beautiful Soup to preserve CDATA sections instead
-of treating them as text. (This argument is also present for
-HTMLParser, but does nothing.)
-
-Later:
+of treating them as text. Except it doesn't. (This argument is also
+present for HTMLParser, and also does nothing there.)
Currently, htm5lib converts CDATA sections into comments. An
as-yet-unreleased version of html5lib changes the parser's handling of
diff --git a/bs4/element.py b/bs4/element.py
index 75d5d9d..08a0181 100644
--- a/bs4/element.py
+++ b/bs4/element.py
@@ -1,6 +1,7 @@
import collections
import re
import sys
+import warnings
from bs4.dammit import EntitySubstitution
DEFAULT_OUTPUT_ENCODING = "utf-8"
@@ -618,7 +619,12 @@ class Tag(PageElement):
def __getattr__(self, tag):
#print "Getattr %s.%s" % (self.__class__, tag)
if len(tag) > 3 and tag.endswith('Tag'):
- return self.find(tag[:-3])
+ # BS3: soup.aTag -> "soup.find("a")
+ tag_name = tag[:-3]
+ warnings.warn(
+ '.%sTag is deprecated, use .find("%s") instead.' % (
+ tag_name, tag_name))
+ return self.find(tag_name)
# We special case contents to avoid recursion.
elif not tag.startswith("__") and not tag=="contents":
return self.find(tag)
diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py
index 60b9b91..c991a85 100644
--- a/bs4/tests/test_tree.py
+++ b/bs4/tests/test_tree.py
@@ -2,7 +2,7 @@
"""Tests for Beautiful Soup's tree traversal methods.
The tree traversal methods are the main advantage of using Beautiful
-Soup over other parsers.
+Soup over just using a parser.
Different parsers will build different Beautiful Soup trees given the
same markup, but all Beautiful Soup trees can be traversed with the
@@ -12,6 +12,7 @@ methods tested here.
import copy
import pickle
import re
+import warnings
from bs4 import BeautifulSoup
from bs4.builder import builder_registry
from bs4.element import CData, SoupStrainer, Tag
@@ -778,14 +779,20 @@ class TestElementObjects(SoupTest):
self.assertEqual(len(soup.top.contents), 3)
def test_member_access_invokes_find(self):
- """Accessing a Python member .foo or .fooTag invokes find('foo')"""
+ """Accessing a Python member .foo invokes find('foo')"""
soup = self.soup('<b><i></i></b>')
self.assertEqual(soup.b, soup.find('b'))
- self.assertEqual(soup.bTag, soup.find('b'))
self.assertEqual(soup.b.i, soup.find('b').find('i'))
- self.assertEqual(soup.bTag.iTag, soup.find('b').find('i'))
self.assertEqual(soup.a, None)
- self.assertEqual(soup.aTag, None)
+
+ def test_deprecated_member_access(self):
+ soup = self.soup('<b><i></i></b>')
+ with warnings.catch_warnings(record=True) as w:
+ tag = soup.bTag
+ self.assertEqual(soup.b, tag)
+ self.assertEqual(
+ '.bTag is deprecated, use .find("b") instead.',
+ str(w[0].message))
def test_has_attr(self):
"""has_attr() checks for the presence of an attribute.