diff options
author | Leonard Richardson <leonardr@segfault.org> | 2021-06-01 19:58:18 -0400 |
---|---|---|
committer | Leonard Richardson <leonardr@segfault.org> | 2021-06-01 19:58:18 -0400 |
commit | 70f546b1e689a70e2f103795efce6d261a3dadf7 (patch) | |
tree | e803f70d1e1e5625c8f31e5495201f0be462cb3d | |
parent | a00624d7fc2e29b41b286f46844cb75f4d96ff63 (diff) |
The 'replace_with()' method now takes a variable number of arguments,
and can be used to replace a single element with a sequence of elements.
Patch by Bill Chandos.
-rw-r--r-- | CHANGELOG | 10 | ||||
-rw-r--r-- | bs4/element.py | 15 | ||||
-rw-r--r-- | bs4/tests/test_tree.py | 31 | ||||
-rw-r--r-- | doc/source/index.rst | 25 |
4 files changed, 65 insertions, 16 deletions
@@ -35,6 +35,10 @@ guarantee that this will hold for the next release. a boolean attribute. In a future release, I plan to also give this behavior to the 'html' formatter. Patch by Isaac Muse. [bug=1915424] +* The 'replace_with()' method now takes a variable number of arguments, + and can be used to replace a single element with a sequence of elements. + Patch by Bill Chandos. [rev=605] + * Corrected output when the namespace prefix associated with a namespaced attribute is the empty string, as opposed to None. [bug=1915583] @@ -47,10 +51,10 @@ guarantee that this will hold for the next release. the <template> tag, which may contain both TemplateString objects and Comment objects. [bug=1913406] -* The html.parser tree builder can now handles named entities +* The html.parser tree builder can now handle named entities found in the HTML5 spec in much the same way that the html5lib - tree builder does. Note that the lxml tree builder still handles - named entities differently. [bug=1924908] + tree builder does. Note that the lxml HTML tree builder doesn't handle + named entities this way. [bug=1924908] * Added a second way to pass specify encodings to UnicodeDammit and EncodingDetector, based on the order of precedence defined in the diff --git a/bs4/element.py b/bs4/element.py index 3428e21..e7867a9 100644 --- a/bs4/element.py +++ b/bs4/element.py @@ -296,25 +296,26 @@ class PageElement(object): getText = get_text text = property(get_text) - def replace_with(self, replace_with): - """Replace this PageElement with another one, keeping the rest of the - tree the same. + def replace_with(self, *args): + """Replace this PageElement with one or more PageElements, keeping the + rest of the tree the same. - :param replace_with: A PageElement. + :param args: One or more PageElements. :return: `self`, no longer part of the tree. """ if self.parent is None: raise ValueError( "Cannot replace one element with another when the " "element to be replaced is not part of a tree.") - if replace_with is self: + if len(args) == 1 and args[0] is self: return - if replace_with is self.parent: + if any(x is self.parent for x in args): raise ValueError("Cannot replace a Tag with its parent.") old_parent = self.parent my_index = self.parent.index(self) self.extract(_self_index=my_index) - old_parent.insert(my_index, replace_with) + for idx, replace_with in enumerate(args, start=my_index): + old_parent.insert(idx, replace_with) return self replaceWith = replace_with # BS3 diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py index 875befe..26004ce 100644 --- a/bs4/tests/test_tree.py +++ b/bs4/tests/test_tree.py @@ -1130,6 +1130,37 @@ class TestTreeModification(SoupTest): self.assertEqual(no.next_element, "no") self.assertEqual(no.next_sibling, " business") + def test_replace_with_errors(self): + # Can't replace a tag that's not part of a tree. + a_tag = Tag(name="a") + self.assertRaises(ValueError, a_tag.replace_with, "won't work") + + # Can't replace a tag with its parent. + a_tag = self.soup("<a><b></b></a>").a + self.assertRaises(ValueError, a_tag.b.replace_with, a_tag) + + # Or with a list that includes its parent. + self.assertRaises(ValueError, a_tag.b.replace_with, + "string1", a_tag, "string2") + + def test_replace_with_multiple(self): + data = "<a><b></b><c></c></a>" + soup = self.soup(data) + d_tag = soup.new_tag("d") + d_tag.string = "Text In D Tag" + e_tag = soup.new_tag("e") + f_tag = soup.new_tag("f") + a_string = "Random Text" + soup.c.replace_with(d_tag, e_tag, a_string, f_tag) + self.assertEqual( + "<a><b></b><d>Text In D Tag</d><e></e>Random Text<f></f></a>", + soup.decode() + ) + assert soup.b.next_element == d_tag + assert d_tag.string.next_element==e_tag + assert e_tag.next_element.string == a_string + assert e_tag.next_element.next_element == f_tag + def test_replace_first_child(self): data = "<a><b></b><c></c></a>" soup = self.soup(data) diff --git a/doc/source/index.rst b/doc/source/index.rst index 88b8475..01cb6df 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -2097,22 +2097,35 @@ whether something has been decomposed, you can check its ------------------ ``PageElement.replace_with()`` removes a tag or string from the tree, -and replaces it with the tag or string of your choice:: +and replaces it with one or more tags or strings of your choice:: markup = '<a href="http://example.com/">I linked to <i>example.com</i></a>' soup = BeautifulSoup(markup, 'html.parser') a_tag = soup.a new_tag = soup.new_tag("b") - new_tag.string = "example.net" + new_tag.string = "example.com" a_tag.i.replace_with(new_tag) a_tag - # <a href="http://example.com/">I linked to <b>example.net</b></a> + # <a href="http://example.com/">I linked to <b>example.com</b></a> -``replace_with()`` returns the tag or string that was replaced, so + bold_tag = soup.new_tag("b") + bold_tag.string = "example" + i_tag = soup.new_tag("i") + i_tag.string = "net" + a_tag.b.replace_with(bold_tag, ".", i_tag) + + a_tag + # <a href="http://example.com/">I linked to <b>example</b>.<i>net</i></a> + +``replace_with()`` returns the tag or string that got replaced, so that you can examine it or add it back to another part of the tree. +`The ability to pass multiple arguments into replace_with() is new +in Beautiful Soup 4.10.0.` + + ``wrap()`` ---------- @@ -2126,7 +2139,7 @@ returns the new wrapper:: soup.p.wrap(soup.new_tag("div")) # <div><p><b>I wish I was bold.</b></p></div> -This method is new in Beautiful Soup 4.0.5. +`This method is new in Beautiful Soup 4.0.5.` ``unwrap()`` --------------------------- @@ -2177,7 +2190,7 @@ You can call ``Tag.smooth()`` to clean up the parse tree by consolidating adjace # A one, a two # </p> -The ``smooth()`` method is new in Beautiful Soup 4.8.0. +`This method is new in Beautiful Soup 4.8.0.` Output ====== |