summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--NEWS.txt4
-rw-r--r--bs4/element.py16
-rw-r--r--bs4/tests/test_tree.py9
3 files changed, 26 insertions, 3 deletions
diff --git a/NEWS.txt b/NEWS.txt
index ed2d89d..17abf99 100644
--- a/NEWS.txt
+++ b/NEWS.txt
@@ -13,6 +13,10 @@
* Fixed yet another bug that caused a disconnected tree when html5lib
copied an element from one part of the tree to another. [bug=1270611]
+* __repr__ now returns an ASCII bytestring in Python 2, and a Unicode
+ string in Python 3, instead of a UTF8-encoded bytestring in both
+ versions. [bug=1420131]
+
* The select() method now supports selector grouping. Patch by
Francisco Canas [bug=1191917]
diff --git a/bs4/element.py b/bs4/element.py
index 5d895ab..3c32c17 100644
--- a/bs4/element.py
+++ b/bs4/element.py
@@ -983,15 +983,25 @@ class Tag(PageElement):
as defined in __eq__."""
return not self == other
- def __repr__(self, encoding=DEFAULT_OUTPUT_ENCODING):
+ def __repr__(self, encoding="unicode-escape"):
"""Renders this tag as a string."""
- return self.encode(encoding)
+ if PY3K:
+ # "The return value must be a string object", i.e. Unicode
+ return self.decode()
+ else:
+ # "The return value must be a string object", i.e. a bytestring.
+ # By convention, the return value of __repr__ should also be
+ # an ASCII string.
+ return self.encode(encoding)
def __unicode__(self):
return self.decode()
def __str__(self):
- return self.encode()
+ if PY3K:
+ return self.decode()
+ else:
+ return self.encode()
if PY3K:
__str__ = __repr__ = __unicode__
diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py
index 7c44c48..bab73c6 100644
--- a/bs4/tests/test_tree.py
+++ b/bs4/tests/test_tree.py
@@ -19,6 +19,7 @@ from bs4.builder import (
HTMLParserTreeBuilder,
)
from bs4.element import (
+ PY3K,
CData,
Comment,
Doctype,
@@ -1490,6 +1491,14 @@ class TestEncoding(SoupTest):
self.assertEqual(
u"\N{SNOWMAN}".encode("utf8"), soup.b.renderContents())
+ def test_repr(self):
+ html = u"<b>\N{SNOWMAN}</b>"
+ soup = self.soup(html)
+ if PY3K:
+ self.assertEqual(html, repr(soup))
+ else:
+ self.assertEqual(b'<b>\\u2603</b>', repr(soup))
+
class TestNavigableStringSubclasses(SoupTest):
def test_cdata(self):