diff options
Diffstat (limited to 'bs4')
-rw-r--r-- | bs4/__init__.py | 3 | ||||
-rw-r--r-- | bs4/builder/_html5lib.py | 12 | ||||
-rw-r--r-- | bs4/element.py | 12 | ||||
-rw-r--r-- | bs4/tests/test_tree.py | 26 |
4 files changed, 47 insertions, 6 deletions
diff --git a/bs4/__init__.py b/bs4/__init__.py index 68e7512..cb74bd3 100644 --- a/bs4/__init__.py +++ b/bs4/__init__.py @@ -79,6 +79,9 @@ class BeautifulSoup(Tag): NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nTo get rid of this warning, change this:\n\n BeautifulSoup([your markup])\n\nto this:\n\n BeautifulSoup([your markup], \"%(parser)s\")\n" + def __copy__(self): + return type(self)(self.encode(), builder=self.builder) + def __init__(self, markup="", features=None, builder=None, parse_only=None, from_encoding=None, exclude_encodings=None, **kwargs): diff --git a/bs4/builder/_html5lib.py b/bs4/builder/_html5lib.py index 4eaaaec..ab5793c 100644 --- a/bs4/builder/_html5lib.py +++ b/bs4/builder/_html5lib.py @@ -244,9 +244,9 @@ class Element(html5lib.treebuilders._base.Node): def reparentChildren(self, new_parent): """Move all of this tag's children into another tag.""" - print "MOVE", self.element.contents - print "FROM", self.element - print "TO", new_parent.element + # print "MOVE", self.element.contents + # print "FROM", self.element + # print "TO", new_parent.element element = self.element new_parent_element = new_parent.element # Determine what this tag's next_element will be once all the children @@ -297,9 +297,9 @@ class Element(html5lib.treebuilders._base.Node): element.contents = [] element.next_element = final_next_element - print "DONE WITH MOVE" - print "FROM", self.element - print "TO", new_parent_element + # print "DONE WITH MOVE" + # print "FROM", self.element + # print "TO", new_parent_element def cloneNode(self): tag = self.soup.new_tag(self.element.name, self.namespace) diff --git a/bs4/element.py b/bs4/element.py index 0486da2..c70ad5a 100644 --- a/bs4/element.py +++ b/bs4/element.py @@ -815,6 +815,18 @@ class Tag(PageElement): parserClass = _alias("parser_class") # BS3 + def __copy__(self): + """A copy of a Tag is a new Tag, unconnected to the parse tree. + Its contents are a copy of the old Tag's contents. + """ + clone = type(self)(None, self.builder, self.name, self.namespace, + self.nsprefix, self.attrs) + for attr in ('can_be_empty_element', 'hidden'): + setattr(clone, attr, getattr(self, attr)) + for child in self.contents: + clone.append(child.__copy__()) + return clone + @property def is_empty_element(self): """Is this tag an empty-element tag? (aka a self-closing tag) diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py index 22d4b4f..2371591 100644 --- a/bs4/tests/test_tree.py +++ b/bs4/tests/test_tree.py @@ -1330,6 +1330,32 @@ class TestPersistence(SoupTest): self.assertEqual(s1, s2) self.assertTrue(isinstance(s2, Comment)) + def test_copy_entire_soup(self): + html = u"<div><b>Foo<a></a></b><b>Bar</b></div>end" + soup = self.soup(html) + soup_copy = copy.copy(soup) + self.assertEqual(soup, soup_copy) + + def test_copy_tag_copies_contents(self): + html = u"<div><b>Foo<a></a></b><b>Bar</b></div>end" + soup = self.soup(html) + div = soup.div + div_copy = copy.copy(div) + + # The two tags look the same, and evaluate to equal. + self.assertEqual(unicode(div), unicode(div_copy)) + self.assertEqual(div, div_copy) + + # But they're not the same object. + self.assertFalse(div is div_copy) + + # And they don't have the same relation to the parse tree. The + # copy is not associated with a parse tree at all. + self.assertEqual(None, div_copy.parent) + self.assertEqual(None, div_copy.previous_element) + self.assertEqual(None, div_copy.find(string='Bar').next_element) + self.assertNotEqual(None, div.find(string='Bar').next_element) + class TestSubstitutions(SoupTest): def test_default_formatter_is_minimal(self): |