diff options
author | Leonard Richardson <leonard.richardson@canonical.com> | 2012-02-23 11:56:40 -0500 |
---|---|---|
committer | Leonard Richardson <leonard.richardson@canonical.com> | 2012-02-23 11:56:40 -0500 |
commit | b7749c50a2c96ccf6982cfa1ca02d883e31e0af9 (patch) | |
tree | 404063e3bb580627a6cabad1b80774981d4f5232 | |
parent | 2f72913160bedb509a8042693328d139e7c6b945 (diff) |
Bumped version number.
-rw-r--r-- | NEWS.txt | 10 | ||||
-rw-r--r-- | bs4/__init__.py | 2 | ||||
-rw-r--r-- | bs4/builder/_html5lib.py | 6 | ||||
-rw-r--r-- | bs4/element.py | 32 | ||||
-rw-r--r-- | bs4/testing.py | 26 | ||||
-rw-r--r-- | bs4/tests/test_soup.py | 21 | ||||
-rw-r--r-- | bs4/tests/test_tree.py | 4 | ||||
-rw-r--r-- | setup.py | 2 |
8 files changed, 59 insertions, 44 deletions
@@ -1,3 +1,13 @@ += 4.0.0b8 () = + +* All tree builders now preserve namespace information in the + documents they parse. + + However, there is no special support for namespace-oriented + searching or tree manipulation. When you search the tree, you need + to use namespace prefixes exactly as they're used in the original + document. + = 4.0.0b7 (20110223) = * Upon decoding to string, any characters that can't be represented in diff --git a/bs4/__init__.py b/bs4/__init__.py index 2dd0521..bf800ea 100644 --- a/bs4/__init__.py +++ b/bs4/__init__.py @@ -17,7 +17,7 @@ http://www.crummy.com/software/BeautifulSoup/bs4/doc/ """ __author__ = "Leonard Richardson (leonardr@segfault.org)" -__version__ = "4.0.0b7" +__version__ = "4.0.0b8" __copyright__ = "Copyright (c) 2004-2012 Leonard Richardson" __license__ = "MIT" diff --git a/bs4/builder/_html5lib.py b/bs4/builder/_html5lib.py index 7ce69aa..26b1773 100644 --- a/bs4/builder/_html5lib.py +++ b/bs4/builder/_html5lib.py @@ -10,11 +10,7 @@ from bs4.builder import ( ) from bs4.element import NamespacedAttribute import html5lib -from html5lib.constants import ( - DataLossWarning, - namespaces, - ) -import warnings +from html5lib.constants import namespaces from bs4.element import ( Comment, Doctype, diff --git a/bs4/element.py b/bs4/element.py index 7e5810a..c2c4e2e 100644 --- a/bs4/element.py +++ b/bs4/element.py @@ -22,29 +22,14 @@ def _alias(attr): return alias -class NamespacedAttribute(object): +class NamespacedAttribute(unicode): - def __init__(self, namespace_abbreviation, name, namespace=None): - self.namespace_abbreviation = namespace_abbreviation - self.name = name - self.namespace = namespace - - def __eq__(self, other): - if isinstance(other, NamespacedAttribute): - return ( - self.namespace_abbreviation == other.namespace_abbreviation - and self.name == other.name - and self.namespace == other.namespace) - elif isinstance(other, basestring): - return str(self) == other - else: - return super(NamespacedAttribute, self).__eq__(other) - - def __str__(self): - name = self.name - if self.namespace_abbreviation: - name = self.namespace_abbreviation + ":" + name - return name + def __new__(cls, prefix, name, namespace=None): + obj = unicode.__new__(cls, prefix + ":" + name) + obj.prefix = prefix + obj.name = name + obj.namespace = namespace + return obj class PageElement(object): @@ -686,6 +671,9 @@ class Tag(PageElement): def has_attr(self, key): return key in self.attrs + def __hash__(self): + return str(self).__hash__() + def __getitem__(self, key): """tag[key] returns the value of the 'key' attribute for the tag, and throws an exception if it's not there.""" diff --git a/bs4/testing.py b/bs4/testing.py index b2ca180..1945c02 100644 --- a/bs4/testing.py +++ b/bs4/testing.py @@ -360,16 +360,28 @@ class HTMLTreeBuilderSmokeTest(object): class XMLTreeBuilderSmokeTest(object): + def test_docstring_generated(self): + soup = self.soup("<root/>") + self.assertEqual( + soup.encode(), b'<?xml version="1.0" encoding="utf-8">\n<root/>') + + def test_docstring_includes_correct_encoding(self): + soup = self.soup("<root/>") + self.assertEqual( + soup.encode("latin1"), + b'<?xml version="1.0" encoding="latin1">\n<root/>') + + + def test_tags_are_empty_element_if_and_only_if_they_are_empty(self): + self.assertSoupEquals("<p>", "<p/>") + self.assertSoupEquals("<p>foo</p>") + def test_namespaces_are_preserved(self): - markup = '<root xmlns:a="http://www.example.com/" xmlns:b="http://example.net/"><a:foo>This tag is in the a namespace</a:foo><b:foo>This tag is in the b namespace</b:foo></root>' + markup = '<root xmlns:a="http://example.com/" xmlns:b="http://example.net/"><a:foo>This tag is in the a namespace</a:foo><b:foo>This tag is in the b namespace</b:foo></root>' soup = self.soup(markup) root = soup.root - import pdb; pdb.set_trace() - self.assertEquals("http://www.example.com/", root['xmlns:a']) - self.assertEquals("http://www.example.net/", root['xmlns:b']) - - - pass + self.assertEquals("http://example.com/", root['xmlns:a']) + self.assertEquals("http://example.net/", root['xmlns:b']) diff --git a/bs4/tests/test_soup.py b/bs4/tests/test_soup.py index e9aaa78..8333ad4 100644 --- a/bs4/tests/test_soup.py +++ b/bs4/tests/test_soup.py @@ -19,7 +19,7 @@ class TestDeprecatedConstructorArguments(SoupTest): msg = str(w[0].message) self.assertTrue("parseOnlyThese" in msg) self.assertTrue("parse_only" in msg) - self.assertEquals(b"<b></b>", soup.encode()) + self.assertEqual(b"<b></b>", soup.encode()) def test_fromEncoding_renamed_to_from_encoding(self): with warnings.catch_warnings(record=True) as w: @@ -28,7 +28,7 @@ class TestDeprecatedConstructorArguments(SoupTest): msg = str(w[0].message) self.assertTrue("fromEncoding" in msg) self.assertTrue("from_encoding" in msg) - self.assertEquals("utf8", soup.original_encoding) + self.assertEqual("utf8", soup.original_encoding) def test_unrecognized_keyword_argument(self): self.assertRaises( @@ -209,7 +209,7 @@ class TestUnicodeDammit(unittest.TestCase): b"<html><meta charset=euc-jp /></html>", b"<html><meta charset=euc-jp/></html>"): dammit = UnicodeDammit(data, is_html=True) - self.assertEquals( + self.assertEqual( "euc-jp", dammit.original_encoding) def test_last_ditch_entity_replacement(self): @@ -244,9 +244,18 @@ class TestNamedspacedAttribute(SoupTest): a = NamespacedAttribute("a", "b") self.assertEqual("a:b", a) - def test_attributes_are_equivalent_if_all_members_identical(self): + def test_attributes_are_equivalent_if_prefix_and_name_identical(self): a = NamespacedAttribute("a", "b", "c") b = NamespacedAttribute("a", "b", "c") self.assertEqual(a, b) - b.namespace = "d" - self.assertNotEqual(a, b) + + # The actual namespace is not considered. + c = NamespacedAttribute("a", "b", None) + self.assertEqual(a, c) + + # But name and prefix are important. + d = NamespacedAttribute("a", "z", "c") + self.assertNotEqual(a, d) + + e = NamespacedAttribute("z", "b", "c") + self.assertNotEqual(a, e) diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py index 6aa02cb..ce9a7ec 100644 --- a/bs4/tests/test_tree.py +++ b/bs4/tests/test_tree.py @@ -97,8 +97,8 @@ class TestFindAllBasicNamespaces(TreeTest): def test_find_by_namespaced_name(self): soup = self.soup('<mathml:msqrt>4</mathml:msqrt><a svg:fill="red">') - self.assertEquals("4", soup.find("mathml:msqrt").string) - self.assertEquals("a", soup.find(attrs= { "svg:fill" : "red" }).name) + self.assertEqual("4", soup.find("mathml:msqrt").string) + self.assertEqual("a", soup.find(attrs= { "svg:fill" : "red" }).name) class TestFindAllByName(TreeTest): @@ -7,7 +7,7 @@ except ImportError: from distutils.command.build_py import build_py setup(name="beautifulsoup4", - version = "4.0.0b7", + version = "4.0.0b8", author="Leonard Richardson", author_email='leonardr@segfault.org', url="http://www.crummy.com/software/BeautifulSoup/bs4/", |