diff options
-rw-r--r-- | bs4/doc/source/index.rst | 25 | ||||
-rw-r--r-- | bs4/element.py | 50 | ||||
-rw-r--r-- | bs4/tests/test_tree.py | 26 |
3 files changed, 60 insertions, 41 deletions
diff --git a/bs4/doc/source/index.rst b/bs4/doc/source/index.rst index 3cf96f0..7847fd3 100644 --- a/bs4/doc/source/index.rst +++ b/bs4/doc/source/index.rst @@ -156,6 +156,7 @@ Beautiful Soup 4 is published through PyPi, so you can install it with and the same package works on Python 2 and Python 3. :kbd:`$ easy_install beautifulsoup4` + :kbd:`$ pip install beautifulsoup4` (The ``BeautifulSoup`` package is probably `not` what you want. That's @@ -180,15 +181,14 @@ Be sure to install a good parser! Beautiful Soup uses a plugin system that supports a number of popular Python parsers. If no third-party parsers are installed, Beautiful Soup uses the HTML parser that comes with Python. In recent releases -of Python (2.7.2 and 3.2.2), this parser works pretty well at handling -bad HTML. In older releases, it's not so good. +of Python (2.7.2 and 3.2.2), this parser is excellent at handling bad +HTML. Unfortunately, in older releases, it's not very good at all. Even if you're using a recent release of Python, I recommend you -install the `lxml parser <http://lxml.de/>`_ if possible. It's much -faster than Python's built-in parser. It works with both Python 2 and -Python 3, and it parses HTML and XML very well. Beautiful Soup will -detect that you have lxml installed, and use it instead of Python's -built-in parser. +install the `lxml parser <http://lxml.de/>`_ if you can. Its +reliability is good on both HTML and XML, and it's much faster than +Python's built-in parser. Beautiful Soup will detect that you have +lxml installed, and use it instead of Python's built-in parser. Depending on your setup, you might install lxml with one of these commands: @@ -1512,7 +1512,7 @@ Only the first argument, the tag name, is required. ``Tag.insert()`` is just like ``Tag.append()``, except the new element doesn't necessarily go at the end of its parent's -``... contents``. It'll be inserted at whatever numeric position you +``.contents``. It'll be inserted at whatever numeric position you say. It works just like ``.insert()`` on a Python list:: markup = '<a href="http://example.com/">I linked to <i>example.com</i></a>' @@ -1528,20 +1528,20 @@ say. It works just like ``.insert()`` on a Python list:: ``insert_before()`` and ``insert_after()`` ------------------------------------------ -The ``insert_before()`` method moves a tag or string so that it -immediately precedes something else in the parse tree:: +The ``insert_before()`` method inserts a tag or string immediately +before something else in the parse tree:: soup = BeautifulSoup("<b>stop</b>") tag = soup.new_tag("i") tag.string = "Don't" - tag.insert_before(soup.b.string) + soup.b.string.insert_before(tag) soup.b # <b><i>Don't</i>stop</b> The ``insert_after()`` method moves a tag or string so that it immediately follows something else in the parse tree:: - soup.new_string(" ever ").insert_after(soup.b.i) + soup.b.i.insert_after(soup.new_string(" ever ")) soup.b # <b><i>Don't</i> ever stop</b> soup.b.contents @@ -2246,6 +2246,7 @@ major Linux distributions:: It's also published through PyPi as `BeautifulSoup`.:: :kbd:`$ easy_install BeautifulSoup` + :kbd:`$ pip install BeautifulSoup` You can also `download a tarball of Beautiful Soup 3.2.0 diff --git a/bs4/element.py b/bs4/element.py index 478d285..257cdbb 100644 --- a/bs4/element.py +++ b/bs4/element.py @@ -190,33 +190,43 @@ class PageElement(object): """Appends the given tag to the contents of this tag.""" self.insert(len(self.contents), tag) - def insert_before(self, successor): - """Makes this element the immediate predecessor of the given element. + def insert_before(self, predecessor): + """Makes the given element the immediate predecessor of this one. - The two elements will have the same parent, and this element - will be immediately before the given one. + The two elements will have the same parent, and the given element + will be immediately before this one. """ - parent = successor.parent + if self is predecessor: + raise ValueError("Can't insert an element before itself.") + parent = self.parent if parent is None: raise ValueError( - "Destination has no parent, so 'before' has no meaning.") - self.extract() - index = parent.index(successor) - parent.insert(index, self) - - def insert_after(self, predecessor): - """Makes this element the immediate successor of the given element. - - The two elements will have the same parent, and this element - will be immediately after the given one. + "Element has no parent, so 'before' has no meaning.") + # Extract first so that the index won't be screwed up if they + # are siblings. + if isinstance(predecessor, PageElement): + predecessor.extract() + index = parent.index(self) + parent.insert(index, predecessor) + + def insert_after(self, successor): + """Makes the given element the immediate successor of this one. + + The two elements will have the same parent, and the given element + will be immediately after this one. """ - parent = predecessor.parent + if self is successor: + raise ValueError("Can't insert an element after itself.") + parent = self.parent if parent is None: raise ValueError( - "Destination has no parent, so 'after' has no meaning.") - self.extract() - index = parent.index(predecessor) - parent.insert(index+1, self) + "Element has no parent, so 'after' has no meaning.") + # Extract first so that the index won't be screwed up if they + # are siblings. + if isinstance(successor, PageElement): + successor.extract() + index = parent.index(self) + parent.insert(index+1, successor) def find_next(self, name=None, attrs={}, text=None, **kwargs): """Returns the first item that matches the given criteria and diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py index 1d6d612..2c47aa5 100644 --- a/bs4/tests/test_tree.py +++ b/bs4/tests/test_tree.py @@ -663,6 +663,15 @@ class TestTreeModification(SoupTest): self.assertEqual(new_text.next_sibling, None) self.assertEqual(new_text.next_element, soup.c) + def test_insert_string(self): + soup = self.soup("<a></a>") + soup.a.insert(0, "bar") + soup.a.insert(0, "foo") + # The string were added to the tag. + self.assertEqual(["foo", "bar"], soup.a.contents) + # And they were converted to NavigableStrings. + self.assertEqual(soup.a.contents[0].next_element, "bar") + def test_insert_tag(self): builder = self.default_builder soup = self.soup( @@ -704,23 +713,22 @@ class TestTreeModification(SoupTest): def test_insert_before(self): soup = self.soup("<a>foo</a><b>bar</b>") - soup.new_string("BAZ").insert_before(soup.b) - soup.new_string("QUUX").insert_before(soup.a) + soup.b.insert_before("BAZ") + soup.a.insert_before("QUUX") self.assertEqual( soup.decode(), self.document_for("QUUX<a>foo</a>BAZ<b>bar</b>")) - soup.b.insert_before(soup.a) + soup.a.insert_before(soup.b) self.assertEqual( soup.decode(), self.document_for("QUUX<b>bar</b><a>foo</a>BAZ")) - def test_insert_after(self): soup = self.soup("<a>foo</a><b>bar</b>") - soup.new_string("BAZ").insert_after(soup.b) - soup.new_string("QUUX").insert_after(soup.a) + soup.b.insert_after("BAZ") + soup.a.insert_after("QUUX") self.assertEqual( soup.decode(), self.document_for("<a>foo</a>QUUX<b>bar</b>BAZ")) - soup.a.insert_after(soup.b) + soup.b.insert_after(soup.a) self.assertEqual( soup.decode(), self.document_for("QUUX<b>bar</b><a>foo</a>BAZ")) @@ -729,16 +737,16 @@ class TestTreeModification(SoupTest): tag = soup.new_tag("a") string = soup.new_string("") self.assertRaises(ValueError, string.insert_after, tag) - self.assertRaises(ValueError, soup.insert_after, tag) + self.assertRaises(ValueError, tag.insert_after, tag) def test_insert_before_raises_valueerror_if_before_has_no_meaning(self): soup = self.soup("") tag = soup.new_tag("a") string = soup.new_string("") self.assertRaises(ValueError, string.insert_before, tag) - self.assertRaises(ValueError, soup.insert_before, tag) + self.assertRaises(ValueError, tag.insert_before, tag) def test_replace_with(self): soup = self.soup( |