summaryrefslogtreecommitdiff
path: root/bs4/__init__.py
diff options
context:
space:
mode:
Diffstat (limited to 'bs4/__init__.py')
-rw-r--r--bs4/__init__.py27
1 files changed, 26 insertions, 1 deletions
diff --git a/bs4/__init__.py b/bs4/__init__.py
index 9c7e67d..18d380b 100644
--- a/bs4/__init__.py
+++ b/bs4/__init__.py
@@ -38,6 +38,7 @@ from .builder import (
builder_registry,
ParserRejectedMarkup,
XMLParsedAsHTMLWarning,
+ HTMLParserTreeBuilder
)
from .dammit import UnicodeDammit
from .element import (
@@ -366,8 +367,32 @@ class BeautifulSoup(Tag):
# Frequently a tree builder can't be pickled.
d = dict(self.__dict__)
if 'builder' in d and d['builder'] is not None and not self.builder.picklable:
- d['builder'] = None
+ d['builder'] = type(self.builder)
+ # Store the contents as a Unicode string.
+ d['contents'] = []
+ d['markup'] = self.decode()
+
+ # If _most_recent_element is present, it's a Tag object left
+ # over from initial parse. It might not be picklable and we
+ # don't need it.
+ if '_most_recent_element' in d:
+ del d['_most_recent_element']
return d
+
+ def __setstate__(self, state):
+ # If necessary, restore the TreeBuilder by looking it up.
+ self.__dict__ = state
+ if isinstance(self.builder, type):
+ self.builder = self.builder()
+ elif not self.builder:
+ # We don't know which builder was used to build this
+ # parse tree, so use a default we know is always available.
+ self.builder = HTMLParserTreeBuilder()
+ self.builder.soup = self
+ self.reset()
+ self._feed()
+ return state
+
@classmethod
def _decode_markup(cls, markup):