diff options
author | Leonard Richardson <leonardr@segfault.org> | 2021-12-17 12:54:14 -0500 |
---|---|---|
committer | Leonard Richardson <leonardr@segfault.org> | 2021-12-17 12:54:14 -0500 |
commit | 854fd52ad616d8e9c0860bba2eb4ddd93eb2dc79 (patch) | |
tree | 419c15e1c024ea09fc71efbc6d56014ae099fe52 | |
parent | ad52722cc6b55ce414d395e9a0860cee57c0ab2d (diff) |
Fix a crash when pickling a BeautifulSoup object that has no
tree builder. [bug=1934003]
-rw-r--r-- | CHANGELOG | 3 | ||||
-rw-r--r-- | bs4/__init__.py | 2 | ||||
-rw-r--r-- | bs4/tests/test_lxml.py | 12 | ||||
-rw-r--r-- | bs4/tests/test_soup.py | 19 |
4 files changed, 35 insertions, 1 deletions
@@ -33,6 +33,9 @@ Python 2 was revision 605. version. "text" still works, but will give a DeprecationWarning. [bug=1947038] +* Fix a crash when pickling a BeautifulSoup object that has no + tree builder. [bug=1934003] + = 4.10.0 (20210907) * This is the first release of Beautiful Soup to only support Python diff --git a/bs4/__init__.py b/bs4/__init__.py index ddf1a86..75c801c 100644 --- a/bs4/__init__.py +++ b/bs4/__init__.py @@ -396,7 +396,7 @@ class BeautifulSoup(Tag): def __getstate__(self): # Frequently a tree builder can't be pickled. d = dict(self.__dict__) - if 'builder' in d and not self.builder.picklable: + if 'builder' in d and d['builder'] is not None and not self.builder.picklable: d['builder'] = None return d diff --git a/bs4/tests/test_lxml.py b/bs4/tests/test_lxml.py index 41319d1..396ca0e 100644 --- a/bs4/tests/test_lxml.py +++ b/bs4/tests/test_lxml.py @@ -1,5 +1,6 @@ """Tests to ensure that the lxml tree builder generates good trees.""" +import pickle import re import warnings @@ -185,3 +186,14 @@ class TestLXMLXMLTreeBuilder(SoupTest, XMLTreeBuilderSmokeTest): assert soup.find('prefix:tag2').name == 'tag2' assert soup.find('prefix:tag3').name == 'tag3' assert soup.subtag.find('prefix:tag3').name == 'tag3' + + def test_pickle_removes_builder(self): + # The lxml TreeBuilder is not picklable, so it won't be + # preserved in a pickle/unpickle operation. + + soup = self.soup("<a>some markup</a>") + assert isinstance(soup.builder, self.default_builder) + pickled = pickle.dumps(soup) + unpickled = pickle.loads(pickled) + assert "some markup" == unpickled.a.string + assert unpickled.builder is None diff --git a/bs4/tests/test_soup.py b/bs4/tests/test_soup.py index 67845cc..2f53a30 100644 --- a/bs4/tests/test_soup.py +++ b/bs4/tests/test_soup.py @@ -4,6 +4,7 @@ from pdb import set_trace import logging import os +import pickle import pytest import sys import tempfile @@ -384,6 +385,24 @@ class TestNewString(SoupTest): assert isinstance(s, Comment) +class TestPickle(SoupTest): + # Test our ability to pickle the BeautifulSoup object itself. + + def test_normal_pickle(self): + soup = self.soup("<a>some markup</a>") + pickled = pickle.dumps(soup) + unpickled = pickle.loads(pickled) + assert "some markup" == unpickled.a.string + + def test_pickle_with_no_builder(self): + # We had a bug that prevented pickling from working if + # the builder wasn't set. + soup = self.soup("some markup") + soup.builder = None + pickled = pickle.dumps(soup) + unpickled = pickle.loads(pickled) + assert "some markup" == unpickled.string + class TestEncodingConversion(SoupTest): # Test Beautiful Soup's ability to decode and encode from various # encodings. |