diff options
-rw-r--r-- | NEWS.txt | 3 | ||||
-rw-r--r-- | bs4/element.py | 15 | ||||
-rw-r--r-- | bs4/tests/test_tree.py | 17 | ||||
-rw-r--r-- | doc/source/index.rst | 16 |
4 files changed, 48 insertions, 3 deletions
@@ -2,6 +2,9 @@ * Fixed a bug that sometimes created disconnected trees. +* Added the missing renderContents method from Beautiful Soup 3. Also + added an encode_contents() method to go along with decode_contents(). + = 4.0.3 (20120403) = * Fixed a typo that caused some versions of Python 3 to convert the diff --git a/bs4/element.py b/bs4/element.py index bd4c3aa..7935cb1 100644 --- a/bs4/element.py +++ b/bs4/element.py @@ -1036,6 +1036,21 @@ class Tag(PageElement): s.append("\n") return ''.join(s) + def encode_contents( + self, indent_level=None, encoding=DEFAULT_OUTPUT_ENCODING, + formatter="minimal"): + """Renders the contents of this tag as a bytestring.""" + contents = self.decode_contents(indent_level, encoding, formatter) + return contents.encode(encoding) + + # Old method for BS3 compatibility + def renderContents(self, encoding=DEFAULT_OUTPUT_ENCODING, + prettyPrint=False, indentLevel=0): + if not prettyPrint: + indentLevel = None + return self.encode_contents( + indent_level=indentLevel, encoding=encoding) + #Soup methods def find(self, name=None, attrs={}, recursive=True, text=None, diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py index 4d114b7..76e6bf7 100644 --- a/bs4/tests/test_tree.py +++ b/bs4/tests/test_tree.py @@ -1297,6 +1297,23 @@ class TestEncoding(SoupTest): self.assertRaises( UnicodeEncodeError, soup.encode, "ascii", errors="strict") + def test_decode_contents(self): + html = u"<b>\N{SNOWMAN}</b>" + soup = self.soup(html) + self.assertEquals(u"\N{SNOWMAN}", soup.b.decode_contents()) + + def test_encode_contents(self): + html = u"<b>\N{SNOWMAN}</b>" + soup = self.soup(html) + self.assertEquals( + u"\N{SNOWMAN}".encode("utf8"), soup.b.encode_contents( + encoding="utf8")) + + def test_deprecated_renderContents(self): + html = u"<b>\N{SNOWMAN}</b>" + soup = self.soup(html) + self.assertEquals(u"\N{SNOWMAN}".encode("utf8"), soup.b.renderContents()) + class TestNavigableStringSubclasses(SoupTest): def test_cdata(self): diff --git a/doc/source/index.rst b/doc/source/index.rst index 0b85924..5016fb0 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -2201,9 +2201,9 @@ that are part of the HTML5 standard, so it has the best claim on being the "correct" way, but all three techniques are legitimate. Differences between parsers can affect your script. If you're planning -on distributing your script to other people, you might want to specify -in the ``BeautifulSoup`` constructor which parser you used during -development. That will reduce the chances that your users parse a +on distributing your script to other people, or running it on multiple +machines, you should specify a parser in the ``BeautifulSoup`` +constructor. That will reduce the chances that your users parse a document differently from the way you parse it. Encodings @@ -2503,12 +2503,21 @@ probably using Python's built-in HTML parser, which sometimes skips tags it doesn't understand. Solution: :ref:`Install lxml or html5lib. <parser-installation>` +If your script works on one computer but not another, it's probably +because the two computers have different sets of parser libraries +available. For instance, you may have developed the script on a +computer that has lxml installed, and then tried to run it on a +computer that only has html5lib installed. See `Differences between +parsers`_ for why this matters, and fix the problem by mentioning a +specific parser library in the ``BeautifulSoup`` constructor. + ``KeyError: [attr]`` - Caused by accessing ``tag['attr']`` when the tag in question doesn't define the ``attr`` attribute. The most common errors are ``KeyError: 'href'`` and ``KeyError: 'class'``. Use ``tag.get('attr')`` if you're not sure ``attr`` is defined, just as you would with a Python dictionary. + Parsing XML ----------- @@ -2616,6 +2625,7 @@ use that instead. See `Installing a parser`_ for a comparison. Method names ^^^^^^^^^^^^ +* ``renderContents`` -> ``encode_contents`` * ``replaceWith`` -> ``replace_with`` * ``replaceWithChildren`` -> ``replace_with_children`` * ``findAll`` -> ``find_all`` |