summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--bs4/doc/source/index.rst25
-rw-r--r--bs4/element.py50
-rw-r--r--bs4/tests/test_tree.py26
3 files changed, 60 insertions, 41 deletions
diff --git a/bs4/doc/source/index.rst b/bs4/doc/source/index.rst
index 3cf96f0..7847fd3 100644
--- a/bs4/doc/source/index.rst
+++ b/bs4/doc/source/index.rst
@@ -156,6 +156,7 @@ Beautiful Soup 4 is published through PyPi, so you can install it with
and the same package works on Python 2 and Python 3.
:kbd:`$ easy_install beautifulsoup4`
+
:kbd:`$ pip install beautifulsoup4`
(The ``BeautifulSoup`` package is probably `not` what you want. That's
@@ -180,15 +181,14 @@ Be sure to install a good parser!
Beautiful Soup uses a plugin system that supports a number of popular
Python parsers. If no third-party parsers are installed, Beautiful
Soup uses the HTML parser that comes with Python. In recent releases
-of Python (2.7.2 and 3.2.2), this parser works pretty well at handling
-bad HTML. In older releases, it's not so good.
+of Python (2.7.2 and 3.2.2), this parser is excellent at handling bad
+HTML. Unfortunately, in older releases, it's not very good at all.
Even if you're using a recent release of Python, I recommend you
-install the `lxml parser <http://lxml.de/>`_ if possible. It's much
-faster than Python's built-in parser. It works with both Python 2 and
-Python 3, and it parses HTML and XML very well. Beautiful Soup will
-detect that you have lxml installed, and use it instead of Python's
-built-in parser.
+install the `lxml parser <http://lxml.de/>`_ if you can. Its
+reliability is good on both HTML and XML, and it's much faster than
+Python's built-in parser. Beautiful Soup will detect that you have
+lxml installed, and use it instead of Python's built-in parser.
Depending on your setup, you might install lxml with one of these commands:
@@ -1512,7 +1512,7 @@ Only the first argument, the tag name, is required.
``Tag.insert()`` is just like ``Tag.append()``, except the new element
doesn't necessarily go at the end of its parent's
-``... contents``. It'll be inserted at whatever numeric position you
+``.contents``. It'll be inserted at whatever numeric position you
say. It works just like ``.insert()`` on a Python list::
markup = '<a href="http://example.com/">I linked to <i>example.com</i></a>'
@@ -1528,20 +1528,20 @@ say. It works just like ``.insert()`` on a Python list::
``insert_before()`` and ``insert_after()``
------------------------------------------
-The ``insert_before()`` method moves a tag or string so that it
-immediately precedes something else in the parse tree::
+The ``insert_before()`` method inserts a tag or string immediately
+before something else in the parse tree::
soup = BeautifulSoup("<b>stop</b>")
tag = soup.new_tag("i")
tag.string = "Don't"
- tag.insert_before(soup.b.string)
+ soup.b.string.insert_before(tag)
soup.b
# <b><i>Don't</i>stop</b>
The ``insert_after()`` method moves a tag or string so that it
immediately follows something else in the parse tree::
- soup.new_string(" ever ").insert_after(soup.b.i)
+ soup.b.i.insert_after(soup.new_string(" ever "))
soup.b
# <b><i>Don't</i> ever stop</b>
soup.b.contents
@@ -2246,6 +2246,7 @@ major Linux distributions::
It's also published through PyPi as `BeautifulSoup`.::
:kbd:`$ easy_install BeautifulSoup`
+
:kbd:`$ pip install BeautifulSoup`
You can also `download a tarball of Beautiful Soup 3.2.0
diff --git a/bs4/element.py b/bs4/element.py
index 478d285..257cdbb 100644
--- a/bs4/element.py
+++ b/bs4/element.py
@@ -190,33 +190,43 @@ class PageElement(object):
"""Appends the given tag to the contents of this tag."""
self.insert(len(self.contents), tag)
- def insert_before(self, successor):
- """Makes this element the immediate predecessor of the given element.
+ def insert_before(self, predecessor):
+ """Makes the given element the immediate predecessor of this one.
- The two elements will have the same parent, and this element
- will be immediately before the given one.
+ The two elements will have the same parent, and the given element
+ will be immediately before this one.
"""
- parent = successor.parent
+ if self is predecessor:
+ raise ValueError("Can't insert an element before itself.")
+ parent = self.parent
if parent is None:
raise ValueError(
- "Destination has no parent, so 'before' has no meaning.")
- self.extract()
- index = parent.index(successor)
- parent.insert(index, self)
-
- def insert_after(self, predecessor):
- """Makes this element the immediate successor of the given element.
-
- The two elements will have the same parent, and this element
- will be immediately after the given one.
+ "Element has no parent, so 'before' has no meaning.")
+ # Extract first so that the index won't be screwed up if they
+ # are siblings.
+ if isinstance(predecessor, PageElement):
+ predecessor.extract()
+ index = parent.index(self)
+ parent.insert(index, predecessor)
+
+ def insert_after(self, successor):
+ """Makes the given element the immediate successor of this one.
+
+ The two elements will have the same parent, and the given element
+ will be immediately after this one.
"""
- parent = predecessor.parent
+ if self is successor:
+ raise ValueError("Can't insert an element after itself.")
+ parent = self.parent
if parent is None:
raise ValueError(
- "Destination has no parent, so 'after' has no meaning.")
- self.extract()
- index = parent.index(predecessor)
- parent.insert(index+1, self)
+ "Element has no parent, so 'after' has no meaning.")
+ # Extract first so that the index won't be screwed up if they
+ # are siblings.
+ if isinstance(successor, PageElement):
+ successor.extract()
+ index = parent.index(self)
+ parent.insert(index+1, successor)
def find_next(self, name=None, attrs={}, text=None, **kwargs):
"""Returns the first item that matches the given criteria and
diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py
index 1d6d612..2c47aa5 100644
--- a/bs4/tests/test_tree.py
+++ b/bs4/tests/test_tree.py
@@ -663,6 +663,15 @@ class TestTreeModification(SoupTest):
self.assertEqual(new_text.next_sibling, None)
self.assertEqual(new_text.next_element, soup.c)
+ def test_insert_string(self):
+ soup = self.soup("<a></a>")
+ soup.a.insert(0, "bar")
+ soup.a.insert(0, "foo")
+ # The string were added to the tag.
+ self.assertEqual(["foo", "bar"], soup.a.contents)
+ # And they were converted to NavigableStrings.
+ self.assertEqual(soup.a.contents[0].next_element, "bar")
+
def test_insert_tag(self):
builder = self.default_builder
soup = self.soup(
@@ -704,23 +713,22 @@ class TestTreeModification(SoupTest):
def test_insert_before(self):
soup = self.soup("<a>foo</a><b>bar</b>")
- soup.new_string("BAZ").insert_before(soup.b)
- soup.new_string("QUUX").insert_before(soup.a)
+ soup.b.insert_before("BAZ")
+ soup.a.insert_before("QUUX")
self.assertEqual(
soup.decode(), self.document_for("QUUX<a>foo</a>BAZ<b>bar</b>"))
- soup.b.insert_before(soup.a)
+ soup.a.insert_before(soup.b)
self.assertEqual(
soup.decode(), self.document_for("QUUX<b>bar</b><a>foo</a>BAZ"))
-
def test_insert_after(self):
soup = self.soup("<a>foo</a><b>bar</b>")
- soup.new_string("BAZ").insert_after(soup.b)
- soup.new_string("QUUX").insert_after(soup.a)
+ soup.b.insert_after("BAZ")
+ soup.a.insert_after("QUUX")
self.assertEqual(
soup.decode(), self.document_for("<a>foo</a>QUUX<b>bar</b>BAZ"))
- soup.a.insert_after(soup.b)
+ soup.b.insert_after(soup.a)
self.assertEqual(
soup.decode(), self.document_for("QUUX<b>bar</b><a>foo</a>BAZ"))
@@ -729,16 +737,16 @@ class TestTreeModification(SoupTest):
tag = soup.new_tag("a")
string = soup.new_string("")
self.assertRaises(ValueError, string.insert_after, tag)
-
self.assertRaises(ValueError, soup.insert_after, tag)
+ self.assertRaises(ValueError, tag.insert_after, tag)
def test_insert_before_raises_valueerror_if_before_has_no_meaning(self):
soup = self.soup("")
tag = soup.new_tag("a")
string = soup.new_string("")
self.assertRaises(ValueError, string.insert_before, tag)
-
self.assertRaises(ValueError, soup.insert_before, tag)
+ self.assertRaises(ValueError, tag.insert_before, tag)
def test_replace_with(self):
soup = self.soup(