diff options
author | Leonard Richardson <leonardr@segfault.org> | 2019-10-05 16:24:19 -0400 |
---|---|---|
committer | Leonard Richardson <leonardr@segfault.org> | 2019-10-05 16:24:19 -0400 |
commit | 24fca77a9e156ec77f1bec115199c9be176edcc2 (patch) | |
tree | d3927bd7cb95a0e4f0939c78e9ec1628577a02d9 | |
parent | ab0626db2a60f4f22b97ece310d92038b3da5cc1 (diff) |
Avoid a crash when unpickling certain parse trees generated using html5lib on Python 3. [bug=1843545]
-rw-r--r-- | CHANGELOG | 13 | ||||
-rw-r--r-- | bs4/element.py | 2 | ||||
-rw-r--r-- | bs4/tests/test_soup.py | 5 |
3 files changed, 13 insertions, 7 deletions
@@ -6,6 +6,11 @@ and position within a line (Tag.sourcepos). Based on code by Chris Mayo. [bug=1742921] +* When instantiating a BeautifulSoup object, it's now possible to + provide replacement classes to be instantiated for every tag ('tag_class') + or string ('string_class') encountered during parsing, rather than + using the default Tag and NavigableString objects. + * Fixed the definition of the default XML namespace when using lxml 4.4. Patch by Isaac Muse. [bug=1840141] @@ -15,17 +20,15 @@ * Copying a Tag preserves information that was originally obtained from the TreeBuilder used to build the original Tag. [bug=1838903] -* When instantiating a BeautifulSoup object, it's now possible to - provide replacement classes to be instantiated for every tag ('tag_class') - or string ('string_class') encountered during parsing, rather than - using the default Tag and NavigableString objects. - * Raise an explanatory exception when the underlying parser completely rejects the incoming markup. [bug=1838877] * Avoid a crash when trying to detect the declared encoding of a Unicode document. [bug=1838877] +* Avoid a crash when unpickling certain parse trees generated + using html5lib on Python 3. [bug=1843545] + = 4.8.0 (20190720, "One Small Soup") This release focuses on making it easier to customize Beautiful Soup's diff --git a/bs4/element.py b/bs4/element.py index d6ed020..2001ad5 100644 --- a/bs4/element.py +++ b/bs4/element.py @@ -45,7 +45,7 @@ def _alias(attr): class NamespacedAttribute(unicode): - def __new__(cls, prefix, name, namespace=None): + def __new__(cls, prefix, name=None, namespace=None): if not name: # This is the default namespace. Its name "has no value" # per https://www.w3.org/TR/xml-names/#defaulting diff --git a/bs4/tests/test_soup.py b/bs4/tests/test_soup.py index 3603e81..dc88662 100644 --- a/bs4/tests/test_soup.py +++ b/bs4/tests/test_soup.py @@ -638,10 +638,13 @@ class TestUnicodeDammit(unittest.TestCase): class TestNamedspacedAttribute(SoupTest): - def test_name_may_be_none(self): + def test_name_may_be_none_or_missing(self): a = NamespacedAttribute("xmlns", None) self.assertEqual(a, "xmlns") + a = NamespacedAttribute("xmlns") + self.assertEqual(a, "xmlns") + def test_attribute_is_equivalent_to_colon_separated_string(self): a = NamespacedAttribute("a", "b") self.assertEqual("a:b", a) |