summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLeonard Richardson <leonardr@segfault.org>2019-10-05 16:24:19 -0400
committerLeonard Richardson <leonardr@segfault.org>2019-10-05 16:24:19 -0400
commit24fca77a9e156ec77f1bec115199c9be176edcc2 (patch)
treed3927bd7cb95a0e4f0939c78e9ec1628577a02d9
parentab0626db2a60f4f22b97ece310d92038b3da5cc1 (diff)
Avoid a crash when unpickling certain parse trees generated using html5lib on Python 3. [bug=1843545]
-rw-r--r--CHANGELOG13
-rw-r--r--bs4/element.py2
-rw-r--r--bs4/tests/test_soup.py5
3 files changed, 13 insertions, 7 deletions
diff --git a/CHANGELOG b/CHANGELOG
index 3551449..1b6a95d 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -6,6 +6,11 @@
and position within a line (Tag.sourcepos). Based on code by Chris
Mayo. [bug=1742921]
+* When instantiating a BeautifulSoup object, it's now possible to
+ provide replacement classes to be instantiated for every tag ('tag_class')
+ or string ('string_class') encountered during parsing, rather than
+ using the default Tag and NavigableString objects.
+
* Fixed the definition of the default XML namespace when using
lxml 4.4. Patch by Isaac Muse. [bug=1840141]
@@ -15,17 +20,15 @@
* Copying a Tag preserves information that was originally obtained from
the TreeBuilder used to build the original Tag. [bug=1838903]
-* When instantiating a BeautifulSoup object, it's now possible to
- provide replacement classes to be instantiated for every tag ('tag_class')
- or string ('string_class') encountered during parsing, rather than
- using the default Tag and NavigableString objects.
-
* Raise an explanatory exception when the underlying parser
completely rejects the incoming markup. [bug=1838877]
* Avoid a crash when trying to detect the declared encoding of a
Unicode document. [bug=1838877]
+* Avoid a crash when unpickling certain parse trees generated
+ using html5lib on Python 3. [bug=1843545]
+
= 4.8.0 (20190720, "One Small Soup")
This release focuses on making it easier to customize Beautiful Soup's
diff --git a/bs4/element.py b/bs4/element.py
index d6ed020..2001ad5 100644
--- a/bs4/element.py
+++ b/bs4/element.py
@@ -45,7 +45,7 @@ def _alias(attr):
class NamespacedAttribute(unicode):
- def __new__(cls, prefix, name, namespace=None):
+ def __new__(cls, prefix, name=None, namespace=None):
if not name:
# This is the default namespace. Its name "has no value"
# per https://www.w3.org/TR/xml-names/#defaulting
diff --git a/bs4/tests/test_soup.py b/bs4/tests/test_soup.py
index 3603e81..dc88662 100644
--- a/bs4/tests/test_soup.py
+++ b/bs4/tests/test_soup.py
@@ -638,10 +638,13 @@ class TestUnicodeDammit(unittest.TestCase):
class TestNamedspacedAttribute(SoupTest):
- def test_name_may_be_none(self):
+ def test_name_may_be_none_or_missing(self):
a = NamespacedAttribute("xmlns", None)
self.assertEqual(a, "xmlns")
+ a = NamespacedAttribute("xmlns")
+ self.assertEqual(a, "xmlns")
+
def test_attribute_is_equivalent_to_colon_separated_string(self):
a = NamespacedAttribute("a", "b")
self.assertEqual("a:b", a)