summaryrefslogtreecommitdiff
path: root/bs4
diff options
context:
space:
mode:
Diffstat (limited to 'bs4')
-rw-r--r--bs4/__init__.py3
-rw-r--r--bs4/builder/_html5lib.py12
-rw-r--r--bs4/element.py12
-rw-r--r--bs4/tests/test_tree.py26
4 files changed, 47 insertions, 6 deletions
diff --git a/bs4/__init__.py b/bs4/__init__.py
index 68e7512..cb74bd3 100644
--- a/bs4/__init__.py
+++ b/bs4/__init__.py
@@ -79,6 +79,9 @@ class BeautifulSoup(Tag):
NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nTo get rid of this warning, change this:\n\n BeautifulSoup([your markup])\n\nto this:\n\n BeautifulSoup([your markup], \"%(parser)s\")\n"
+ def __copy__(self):
+ return type(self)(self.encode(), builder=self.builder)
+
def __init__(self, markup="", features=None, builder=None,
parse_only=None, from_encoding=None, exclude_encodings=None,
**kwargs):
diff --git a/bs4/builder/_html5lib.py b/bs4/builder/_html5lib.py
index 4eaaaec..ab5793c 100644
--- a/bs4/builder/_html5lib.py
+++ b/bs4/builder/_html5lib.py
@@ -244,9 +244,9 @@ class Element(html5lib.treebuilders._base.Node):
def reparentChildren(self, new_parent):
"""Move all of this tag's children into another tag."""
- print "MOVE", self.element.contents
- print "FROM", self.element
- print "TO", new_parent.element
+ # print "MOVE", self.element.contents
+ # print "FROM", self.element
+ # print "TO", new_parent.element
element = self.element
new_parent_element = new_parent.element
# Determine what this tag's next_element will be once all the children
@@ -297,9 +297,9 @@ class Element(html5lib.treebuilders._base.Node):
element.contents = []
element.next_element = final_next_element
- print "DONE WITH MOVE"
- print "FROM", self.element
- print "TO", new_parent_element
+ # print "DONE WITH MOVE"
+ # print "FROM", self.element
+ # print "TO", new_parent_element
def cloneNode(self):
tag = self.soup.new_tag(self.element.name, self.namespace)
diff --git a/bs4/element.py b/bs4/element.py
index 0486da2..c70ad5a 100644
--- a/bs4/element.py
+++ b/bs4/element.py
@@ -815,6 +815,18 @@ class Tag(PageElement):
parserClass = _alias("parser_class") # BS3
+ def __copy__(self):
+ """A copy of a Tag is a new Tag, unconnected to the parse tree.
+ Its contents are a copy of the old Tag's contents.
+ """
+ clone = type(self)(None, self.builder, self.name, self.namespace,
+ self.nsprefix, self.attrs)
+ for attr in ('can_be_empty_element', 'hidden'):
+ setattr(clone, attr, getattr(self, attr))
+ for child in self.contents:
+ clone.append(child.__copy__())
+ return clone
+
@property
def is_empty_element(self):
"""Is this tag an empty-element tag? (aka a self-closing tag)
diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py
index 22d4b4f..2371591 100644
--- a/bs4/tests/test_tree.py
+++ b/bs4/tests/test_tree.py
@@ -1330,6 +1330,32 @@ class TestPersistence(SoupTest):
self.assertEqual(s1, s2)
self.assertTrue(isinstance(s2, Comment))
+ def test_copy_entire_soup(self):
+ html = u"<div><b>Foo<a></a></b><b>Bar</b></div>end"
+ soup = self.soup(html)
+ soup_copy = copy.copy(soup)
+ self.assertEqual(soup, soup_copy)
+
+ def test_copy_tag_copies_contents(self):
+ html = u"<div><b>Foo<a></a></b><b>Bar</b></div>end"
+ soup = self.soup(html)
+ div = soup.div
+ div_copy = copy.copy(div)
+
+ # The two tags look the same, and evaluate to equal.
+ self.assertEqual(unicode(div), unicode(div_copy))
+ self.assertEqual(div, div_copy)
+
+ # But they're not the same object.
+ self.assertFalse(div is div_copy)
+
+ # And they don't have the same relation to the parse tree. The
+ # copy is not associated with a parse tree at all.
+ self.assertEqual(None, div_copy.parent)
+ self.assertEqual(None, div_copy.previous_element)
+ self.assertEqual(None, div_copy.find(string='Bar').next_element)
+ self.assertNotEqual(None, div.find(string='Bar').next_element)
+
class TestSubstitutions(SoupTest):
def test_default_formatter_is_minimal(self):