summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLeonard Richardson <leonard.richardson@canonical.com>2012-02-23 11:56:40 -0500
committerLeonard Richardson <leonard.richardson@canonical.com>2012-02-23 11:56:40 -0500
commitb7749c50a2c96ccf6982cfa1ca02d883e31e0af9 (patch)
tree404063e3bb580627a6cabad1b80774981d4f5232
parent2f72913160bedb509a8042693328d139e7c6b945 (diff)
Bumped version number.
-rw-r--r--NEWS.txt10
-rw-r--r--bs4/__init__.py2
-rw-r--r--bs4/builder/_html5lib.py6
-rw-r--r--bs4/element.py32
-rw-r--r--bs4/testing.py26
-rw-r--r--bs4/tests/test_soup.py21
-rw-r--r--bs4/tests/test_tree.py4
-rw-r--r--setup.py2
8 files changed, 59 insertions, 44 deletions
diff --git a/NEWS.txt b/NEWS.txt
index cd5d305..fe2e0cc 100644
--- a/NEWS.txt
+++ b/NEWS.txt
@@ -1,3 +1,13 @@
+= 4.0.0b8 () =
+
+* All tree builders now preserve namespace information in the
+ documents they parse.
+
+ However, there is no special support for namespace-oriented
+ searching or tree manipulation. When you search the tree, you need
+ to use namespace prefixes exactly as they're used in the original
+ document.
+
= 4.0.0b7 (20110223) =
* Upon decoding to string, any characters that can't be represented in
diff --git a/bs4/__init__.py b/bs4/__init__.py
index 2dd0521..bf800ea 100644
--- a/bs4/__init__.py
+++ b/bs4/__init__.py
@@ -17,7 +17,7 @@ http://www.crummy.com/software/BeautifulSoup/bs4/doc/
"""
__author__ = "Leonard Richardson (leonardr@segfault.org)"
-__version__ = "4.0.0b7"
+__version__ = "4.0.0b8"
__copyright__ = "Copyright (c) 2004-2012 Leonard Richardson"
__license__ = "MIT"
diff --git a/bs4/builder/_html5lib.py b/bs4/builder/_html5lib.py
index 7ce69aa..26b1773 100644
--- a/bs4/builder/_html5lib.py
+++ b/bs4/builder/_html5lib.py
@@ -10,11 +10,7 @@ from bs4.builder import (
)
from bs4.element import NamespacedAttribute
import html5lib
-from html5lib.constants import (
- DataLossWarning,
- namespaces,
- )
-import warnings
+from html5lib.constants import namespaces
from bs4.element import (
Comment,
Doctype,
diff --git a/bs4/element.py b/bs4/element.py
index 7e5810a..c2c4e2e 100644
--- a/bs4/element.py
+++ b/bs4/element.py
@@ -22,29 +22,14 @@ def _alias(attr):
return alias
-class NamespacedAttribute(object):
+class NamespacedAttribute(unicode):
- def __init__(self, namespace_abbreviation, name, namespace=None):
- self.namespace_abbreviation = namespace_abbreviation
- self.name = name
- self.namespace = namespace
-
- def __eq__(self, other):
- if isinstance(other, NamespacedAttribute):
- return (
- self.namespace_abbreviation == other.namespace_abbreviation
- and self.name == other.name
- and self.namespace == other.namespace)
- elif isinstance(other, basestring):
- return str(self) == other
- else:
- return super(NamespacedAttribute, self).__eq__(other)
-
- def __str__(self):
- name = self.name
- if self.namespace_abbreviation:
- name = self.namespace_abbreviation + ":" + name
- return name
+ def __new__(cls, prefix, name, namespace=None):
+ obj = unicode.__new__(cls, prefix + ":" + name)
+ obj.prefix = prefix
+ obj.name = name
+ obj.namespace = namespace
+ return obj
class PageElement(object):
@@ -686,6 +671,9 @@ class Tag(PageElement):
def has_attr(self, key):
return key in self.attrs
+ def __hash__(self):
+ return str(self).__hash__()
+
def __getitem__(self, key):
"""tag[key] returns the value of the 'key' attribute for the tag,
and throws an exception if it's not there."""
diff --git a/bs4/testing.py b/bs4/testing.py
index b2ca180..1945c02 100644
--- a/bs4/testing.py
+++ b/bs4/testing.py
@@ -360,16 +360,28 @@ class HTMLTreeBuilderSmokeTest(object):
class XMLTreeBuilderSmokeTest(object):
+ def test_docstring_generated(self):
+ soup = self.soup("<root/>")
+ self.assertEqual(
+ soup.encode(), b'<?xml version="1.0" encoding="utf-8">\n<root/>')
+
+ def test_docstring_includes_correct_encoding(self):
+ soup = self.soup("<root/>")
+ self.assertEqual(
+ soup.encode("latin1"),
+ b'<?xml version="1.0" encoding="latin1">\n<root/>')
+
+
+ def test_tags_are_empty_element_if_and_only_if_they_are_empty(self):
+ self.assertSoupEquals("<p>", "<p/>")
+ self.assertSoupEquals("<p>foo</p>")
+
def test_namespaces_are_preserved(self):
- markup = '<root xmlns:a="http://www.example.com/" xmlns:b="http://example.net/"><a:foo>This tag is in the a namespace</a:foo><b:foo>This tag is in the b namespace</b:foo></root>'
+ markup = '<root xmlns:a="http://example.com/" xmlns:b="http://example.net/"><a:foo>This tag is in the a namespace</a:foo><b:foo>This tag is in the b namespace</b:foo></root>'
soup = self.soup(markup)
root = soup.root
- import pdb; pdb.set_trace()
- self.assertEquals("http://www.example.com/", root['xmlns:a'])
- self.assertEquals("http://www.example.net/", root['xmlns:b'])
-
-
- pass
+ self.assertEquals("http://example.com/", root['xmlns:a'])
+ self.assertEquals("http://example.net/", root['xmlns:b'])
diff --git a/bs4/tests/test_soup.py b/bs4/tests/test_soup.py
index e9aaa78..8333ad4 100644
--- a/bs4/tests/test_soup.py
+++ b/bs4/tests/test_soup.py
@@ -19,7 +19,7 @@ class TestDeprecatedConstructorArguments(SoupTest):
msg = str(w[0].message)
self.assertTrue("parseOnlyThese" in msg)
self.assertTrue("parse_only" in msg)
- self.assertEquals(b"<b></b>", soup.encode())
+ self.assertEqual(b"<b></b>", soup.encode())
def test_fromEncoding_renamed_to_from_encoding(self):
with warnings.catch_warnings(record=True) as w:
@@ -28,7 +28,7 @@ class TestDeprecatedConstructorArguments(SoupTest):
msg = str(w[0].message)
self.assertTrue("fromEncoding" in msg)
self.assertTrue("from_encoding" in msg)
- self.assertEquals("utf8", soup.original_encoding)
+ self.assertEqual("utf8", soup.original_encoding)
def test_unrecognized_keyword_argument(self):
self.assertRaises(
@@ -209,7 +209,7 @@ class TestUnicodeDammit(unittest.TestCase):
b"<html><meta charset=euc-jp /></html>",
b"<html><meta charset=euc-jp/></html>"):
dammit = UnicodeDammit(data, is_html=True)
- self.assertEquals(
+ self.assertEqual(
"euc-jp", dammit.original_encoding)
def test_last_ditch_entity_replacement(self):
@@ -244,9 +244,18 @@ class TestNamedspacedAttribute(SoupTest):
a = NamespacedAttribute("a", "b")
self.assertEqual("a:b", a)
- def test_attributes_are_equivalent_if_all_members_identical(self):
+ def test_attributes_are_equivalent_if_prefix_and_name_identical(self):
a = NamespacedAttribute("a", "b", "c")
b = NamespacedAttribute("a", "b", "c")
self.assertEqual(a, b)
- b.namespace = "d"
- self.assertNotEqual(a, b)
+
+ # The actual namespace is not considered.
+ c = NamespacedAttribute("a", "b", None)
+ self.assertEqual(a, c)
+
+ # But name and prefix are important.
+ d = NamespacedAttribute("a", "z", "c")
+ self.assertNotEqual(a, d)
+
+ e = NamespacedAttribute("z", "b", "c")
+ self.assertNotEqual(a, e)
diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py
index 6aa02cb..ce9a7ec 100644
--- a/bs4/tests/test_tree.py
+++ b/bs4/tests/test_tree.py
@@ -97,8 +97,8 @@ class TestFindAllBasicNamespaces(TreeTest):
def test_find_by_namespaced_name(self):
soup = self.soup('<mathml:msqrt>4</mathml:msqrt><a svg:fill="red">')
- self.assertEquals("4", soup.find("mathml:msqrt").string)
- self.assertEquals("a", soup.find(attrs= { "svg:fill" : "red" }).name)
+ self.assertEqual("4", soup.find("mathml:msqrt").string)
+ self.assertEqual("a", soup.find(attrs= { "svg:fill" : "red" }).name)
class TestFindAllByName(TreeTest):
diff --git a/setup.py b/setup.py
index 0d5b7d7..878d06c 100644
--- a/setup.py
+++ b/setup.py
@@ -7,7 +7,7 @@ except ImportError:
from distutils.command.build_py import build_py
setup(name="beautifulsoup4",
- version = "4.0.0b7",
+ version = "4.0.0b8",
author="Leonard Richardson",
author_email='leonardr@segfault.org',
url="http://www.crummy.com/software/BeautifulSoup/bs4/",