diff options
author | Leonard Richardson <leonardr@segfault.org> | 2015-06-28 15:58:48 -0400 |
---|---|---|
committer | Leonard Richardson <leonardr@segfault.org> | 2015-06-28 15:58:48 -0400 |
commit | 007aa56a1922eea9f364bf3b73e72077046e2c69 (patch) | |
tree | 709684c6357fc69013c5d39b026b1d303da9a40b /bs4/__init__.py | |
parent | 9428b9d6ed0d279a72414a986290821ca4f0caaf (diff) |
It's now possible to pickle a BeautifulSoup object no matter which
tree builder was used to create it. However, the only tree builder
that survives the pickling process is the HTMLParserTreeBuilder
('html.parser'). If you unpickle a BeautifulSoup object created with
some other tree builder, soup.builder will be None. [bug=1231545]
Diffstat (limited to 'bs4/__init__.py')
-rw-r--r-- | bs4/__init__.py | 13 |
1 files changed, 10 insertions, 3 deletions
diff --git a/bs4/__init__.py b/bs4/__init__.py index cb74bd3..b861d87 100644 --- a/bs4/__init__.py +++ b/bs4/__init__.py @@ -79,9 +79,6 @@ class BeautifulSoup(Tag): NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nTo get rid of this warning, change this:\n\n BeautifulSoup([your markup])\n\nto this:\n\n BeautifulSoup([your markup], \"%(parser)s\")\n" - def __copy__(self): - return type(self)(self.encode(), builder=self.builder) - def __init__(self, markup="", features=None, builder=None, parse_only=None, from_encoding=None, exclude_encodings=None, **kwargs): @@ -225,6 +222,16 @@ class BeautifulSoup(Tag): self.markup = None self.builder.soup = None + def __copy__(self): + return type(self)(self.encode(), builder=self.builder) + + def __getstate__(self): + # Frequently a tree builder can't be pickled. + d = dict(self.__dict__) + if 'builder' in d and not self.builder.picklable: + del d['builder'] + return d + def _feed(self): # Convert the document to Unicode. self.builder.reset() |