summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CHANGELOG3
-rw-r--r--bs4/__init__.py2
-rw-r--r--bs4/tests/test_lxml.py12
-rw-r--r--bs4/tests/test_soup.py19
4 files changed, 35 insertions, 1 deletions
diff --git a/CHANGELOG b/CHANGELOG
index af99990..3c807d5 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -33,6 +33,9 @@ Python 2 was revision 605.
version. "text" still works, but will give a DeprecationWarning.
[bug=1947038]
+* Fix a crash when pickling a BeautifulSoup object that has no
+ tree builder. [bug=1934003]
+
= 4.10.0 (20210907)
* This is the first release of Beautiful Soup to only support Python
diff --git a/bs4/__init__.py b/bs4/__init__.py
index ddf1a86..75c801c 100644
--- a/bs4/__init__.py
+++ b/bs4/__init__.py
@@ -396,7 +396,7 @@ class BeautifulSoup(Tag):
def __getstate__(self):
# Frequently a tree builder can't be pickled.
d = dict(self.__dict__)
- if 'builder' in d and not self.builder.picklable:
+ if 'builder' in d and d['builder'] is not None and not self.builder.picklable:
d['builder'] = None
return d
diff --git a/bs4/tests/test_lxml.py b/bs4/tests/test_lxml.py
index 41319d1..396ca0e 100644
--- a/bs4/tests/test_lxml.py
+++ b/bs4/tests/test_lxml.py
@@ -1,5 +1,6 @@
"""Tests to ensure that the lxml tree builder generates good trees."""
+import pickle
import re
import warnings
@@ -185,3 +186,14 @@ class TestLXMLXMLTreeBuilder(SoupTest, XMLTreeBuilderSmokeTest):
assert soup.find('prefix:tag2').name == 'tag2'
assert soup.find('prefix:tag3').name == 'tag3'
assert soup.subtag.find('prefix:tag3').name == 'tag3'
+
+ def test_pickle_removes_builder(self):
+ # The lxml TreeBuilder is not picklable, so it won't be
+ # preserved in a pickle/unpickle operation.
+
+ soup = self.soup("<a>some markup</a>")
+ assert isinstance(soup.builder, self.default_builder)
+ pickled = pickle.dumps(soup)
+ unpickled = pickle.loads(pickled)
+ assert "some markup" == unpickled.a.string
+ assert unpickled.builder is None
diff --git a/bs4/tests/test_soup.py b/bs4/tests/test_soup.py
index 67845cc..2f53a30 100644
--- a/bs4/tests/test_soup.py
+++ b/bs4/tests/test_soup.py
@@ -4,6 +4,7 @@
from pdb import set_trace
import logging
import os
+import pickle
import pytest
import sys
import tempfile
@@ -384,6 +385,24 @@ class TestNewString(SoupTest):
assert isinstance(s, Comment)
+class TestPickle(SoupTest):
+ # Test our ability to pickle the BeautifulSoup object itself.
+
+ def test_normal_pickle(self):
+ soup = self.soup("<a>some markup</a>")
+ pickled = pickle.dumps(soup)
+ unpickled = pickle.loads(pickled)
+ assert "some markup" == unpickled.a.string
+
+ def test_pickle_with_no_builder(self):
+ # We had a bug that prevented pickling from working if
+ # the builder wasn't set.
+ soup = self.soup("some markup")
+ soup.builder = None
+ pickled = pickle.dumps(soup)
+ unpickled = pickle.loads(pickled)
+ assert "some markup" == unpickled.string
+
class TestEncodingConversion(SoupTest):
# Test Beautiful Soup's ability to decode and encode from various
# encodings.