summaryrefslogtreecommitdiff
path: root/bs4/__init__.py
diff options
context:
space:
mode:
authorLeonard Richardson <leonardr@segfault.org>2015-06-28 15:58:48 -0400
committerLeonard Richardson <leonardr@segfault.org>2015-06-28 15:58:48 -0400
commit007aa56a1922eea9f364bf3b73e72077046e2c69 (patch)
tree709684c6357fc69013c5d39b026b1d303da9a40b /bs4/__init__.py
parent9428b9d6ed0d279a72414a986290821ca4f0caaf (diff)
It's now possible to pickle a BeautifulSoup object no matter which
tree builder was used to create it. However, the only tree builder that survives the pickling process is the HTMLParserTreeBuilder ('html.parser'). If you unpickle a BeautifulSoup object created with some other tree builder, soup.builder will be None. [bug=1231545]
Diffstat (limited to 'bs4/__init__.py')
-rw-r--r--bs4/__init__.py13
1 files changed, 10 insertions, 3 deletions
diff --git a/bs4/__init__.py b/bs4/__init__.py
index cb74bd3..b861d87 100644
--- a/bs4/__init__.py
+++ b/bs4/__init__.py
@@ -79,9 +79,6 @@ class BeautifulSoup(Tag):
NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nTo get rid of this warning, change this:\n\n BeautifulSoup([your markup])\n\nto this:\n\n BeautifulSoup([your markup], \"%(parser)s\")\n"
- def __copy__(self):
- return type(self)(self.encode(), builder=self.builder)
-
def __init__(self, markup="", features=None, builder=None,
parse_only=None, from_encoding=None, exclude_encodings=None,
**kwargs):
@@ -225,6 +222,16 @@ class BeautifulSoup(Tag):
self.markup = None
self.builder.soup = None
+ def __copy__(self):
+ return type(self)(self.encode(), builder=self.builder)
+
+ def __getstate__(self):
+ # Frequently a tree builder can't be pickled.
+ d = dict(self.__dict__)
+ if 'builder' in d and not self.builder.picklable:
+ del d['builder']
+ return d
+
def _feed(self):
# Convert the document to Unicode.
self.builder.reset()