summaryrefslogtreecommitdiff
path: root/bs4/__init__.py
diff options
context:
space:
mode:
authorLeonard Richardson <leonardr@segfault.org>2016-07-18 21:56:10 -0400
committerLeonard Richardson <leonardr@segfault.org>2016-07-18 21:56:10 -0400
commit964a25828644e87daa80c2b85ceaed397bca637e (patch)
treea76fcde33a01441f580393c9399a69795163cebb /bs4/__init__.py
parent1126b39cf68d7b75b7f12a185ab9f6983526fa3a (diff)
Corrected an encoding error that happened when a BeautifulSoup
object was copied. [bug=1554439]
Diffstat (limited to 'bs4/__init__.py')
-rw-r--r--bs4/__init__.py11
1 files changed, 10 insertions, 1 deletions
diff --git a/bs4/__init__.py b/bs4/__init__.py
index bc611c9..308428a 100644
--- a/bs4/__init__.py
+++ b/bs4/__init__.py
@@ -236,7 +236,16 @@ class BeautifulSoup(Tag):
self.builder.soup = None
def __copy__(self):
- return type(self)(self.encode(), builder=self.builder)
+ copy = type(self)(
+ self.encode('utf-8'), builder=self.builder, from_encoding='utf-8'
+ )
+
+ # Although we encoded the tree to UTF-8, that may not have
+ # been the encoding of the original markup. Set the copy's
+ # .original_encoding to reflect the original object's
+ # .original_encoding.
+ copy.original_encoding = self.original_encoding
+ return copy
def __getstate__(self):
# Frequently a tree builder can't be pickled.