6 files changed, 110 insertions, 12 deletions
diff --git a/NEWS.txt b/NEWS.txt
index 6eeebd2..7bc920e 100644
--- a/NEWS.txt
+++ b/NEWS.txt
@@ -15,9 +15,10 @@ Especially important changes:
   argument described in the documentation. `text` may eventually
   change its meaning, but not for a very long time. [bug=1366856]
 
-* Changed the way soup objects work under copy.copy() and
-  copy.deepcopy(). Copying a NavigableString will give you a new
-  NavigableString that is not connected to the parse tree.
+* Changed the way soup objects work under copy.copy(). Copying a
+  NavigableString or a Tag will give you a new NavigableString that's
+  equal to the old one but not connected to the parse tree. Patch by
+  Martijn Peters. [bug=1307490]
 
 * Started using a standard MIT license. [bug=1294662]
 
diff --git a/bs4/__init__.py b/bs4/__init__.py
index 68e7512..cb74bd3 100644
--- a/bs4/__init__.py
+++ b/bs4/__init__.py
@@ -79,6 +79,9 @@ class BeautifulSoup(Tag):
 
     NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nTo get rid of this warning, change this:\n\n BeautifulSoup([your markup])\n\nto this:\n\n BeautifulSoup([your markup], \"%(parser)s\")\n"
 
+    def __copy__(self):
+        return type(self)(self.encode(), builder=self.builder)
+
     def __init__(self, markup="", features=None, builder=None,
                  parse_only=None, from_encoding=None, exclude_encodings=None,
                  **kwargs):
diff --git a/bs4/builder/_html5lib.py b/bs4/builder/_html5lib.py
index 4eaaaec..ab5793c 100644
--- a/bs4/builder/_html5lib.py
+++ b/bs4/builder/_html5lib.py
@@ -244,9 +244,9 @@ class Element(html5lib.treebuilders._base.Node):
 
     def reparentChildren(self, new_parent):
         """Move all of this tag's children into another tag."""
-        print "MOVE", self.element.contents
-        print "FROM", self.element
-        print "TO", new_parent.element
+        # print "MOVE", self.element.contents
+        # print "FROM", self.element
+        # print "TO", new_parent.element
         element = self.element
         new_parent_element = new_parent.element
         # Determine what this tag's next_element will be once all the children
@@ -297,9 +297,9 @@ class Element(html5lib.treebuilders._base.Node):
         element.contents = []
         element.next_element = final_next_element
 
-        print "DONE WITH MOVE"
-        print "FROM", self.element
-        print "TO", new_parent_element
+        # print "DONE WITH MOVE"
+        # print "FROM", self.element
+        # print "TO", new_parent_element
 
     def cloneNode(self):
         tag = self.soup.new_tag(self.element.name, self.namespace)
diff --git a/bs4/element.py b/bs4/element.py
index 0486da2..c70ad5a 100644
--- a/bs4/element.py
+++ b/bs4/element.py
@@ -815,6 +815,18 @@ class Tag(PageElement):
 
     parserClass = _alias("parser_class")  # BS3
 
+    def __copy__(self):
+        """A copy of a Tag is a new Tag, unconnected to the parse tree.
+        Its contents are a copy of the old Tag's contents.
+        """
+        clone = type(self)(None, self.builder, self.name, self.namespace,
+                           self.nsprefix, self.attrs)
+        for attr in ('can_be_empty_element', 'hidden'):
+            setattr(clone, attr, getattr(self, attr))
+        for child in self.contents:
+            clone.append(child.__copy__())
+        return clone
+
     @property
     def is_empty_element(self):
         """Is this tag an empty-element tag? (aka a self-closing tag)
diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py
index 22d4b4f..2371591 100644
--- a/bs4/tests/test_tree.py
+++ b/bs4/tests/test_tree.py
@@ -1330,6 +1330,32 @@ class TestPersistence(SoupTest):
         self.assertEqual(s1, s2)
         self.assertTrue(isinstance(s2, Comment))
 
+    def test_copy_entire_soup(self):
+        html = u"<div><b>Foo<a></a></b><b>Bar</b></div>end"
+        soup = self.soup(html)
+        soup_copy = copy.copy(soup)
+        self.assertEqual(soup, soup_copy)
+
+    def test_copy_tag_copies_contents(self):
+        html = u"<div><b>Foo<a></a></b><b>Bar</b></div>end"
+        soup = self.soup(html)
+        div = soup.div
+        div_copy = copy.copy(div)
+
+        # The two tags look the same, and evaluate to equal.
+        self.assertEqual(unicode(div), unicode(div_copy))
+        self.assertEqual(div, div_copy)
+
+        # But they're not the same object.
+        self.assertFalse(div is div_copy)
+
+        # And they don't have the same relation to the parse tree. The
+        # copy is not associated with a parse tree at all.
+        self.assertEqual(None, div_copy.parent)
+        self.assertEqual(None, div_copy.previous_element)
+        self.assertEqual(None, div_copy.find(string='Bar').next_element)
+        self.assertNotEqual(None, div.find(string='Bar').next_element)
+
 class TestSubstitutions(SoupTest):
 
     def test_default_formatter_is_minimal(self):
diff --git a/doc/source/index.rst b/doc/source/index.rst
index f6d3e38..81659ed 100644
--- a/doc/source/index.rst
+++ b/doc/source/index.rst
@@ -1787,7 +1787,6 @@ attributes, and delete attributes::
  tag
  # <blockquote>Extremely bold</blockquote>
 
-
 Modifying ``.string``
 ---------------------
 
@@ -2419,8 +2418,9 @@ as ``exclude_encodings``::
  soup.original_encoding
  'WINDOWS-1255'
 
-(This isn't 100% correct, but Windows-1255 is a compatible superset of
-ISO-8859-8, so it's close enough.)
+Windows-1255 isn't 100% correct, but that encoding is a compatible
+superset of ISO-8859-8, so it's close enough. (``exclude_encodings``
+is a new feature in Beautiful Soup 4.4.0.)
 
 In rare cases (usually when a UTF-8 document contains text written in
 a completely different encoding), the only way to get Unicode may be
@@ -2609,6 +2609,62 @@ document is Windows-1252, and the document will come out looking like
 
 ``UnicodeDammit.detwingle()`` is new in Beautiful Soup 4.1.0.
 
+
+Comparing objects for equality
+==============================
+
+Beautiful Soup says that two ``NavigableString`` or ``Tag`` objects
+are equal when they represent the same HTML or XML markup. In this
+example, the two <b> tags are treated as equal, even though they live
+in different parts of the object tree, because they both look like
+"<b>pizza</b>"::
+
+ markup = "<p>I want <b>pizza</b> and more <b>pizza</b>!</p>"
+ soup = BeautifulSoup(markup, 'html.parser')
+ first_b, second_b = soup.find_all('b')
+ print first_b == second_b
+ # True
+
+ print first_b.previous_element == second_b.previous_element
+ # False
+
+If you want to see whether two variables refer to exactly the same
+object, use `is`::
+
+ print first_b is second_b
+ # False
+
+Copying Beautiful Soup objects
+==============================
+
+You can use ``copy.copy()`` to create a copy of any ``Tag`` or
+``NavigableString``::
+
+ import copy
+ p_copy = copy.copy(soup.p)
+ print p_copy
+ # <p>I want <b>pizza</b> and more <b>pizza</b>!</p>
+
+The copy is considered equal to the original, since it represents the
+same markup as the original, but it's not the same object::
+
+ print soup.p == p_copy
+ # True
+
+ print soup.p is p_copy
+ # False
+
+The only real difference is that the copy is completely detached from
+the original Beautiful Soup object tree, just as if ``extract()`` had
+been called on it::
+
+ print p_copy.parent
+ # None
+
+This is because two different ``Tag`` objects can't occupy the same
+space at the same time.
+
+
 Parsing only part of a document
 ===============================