From 0ce55d794c2ed142f70674d2b48e8294ca8b1d22 Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Thu, 25 Jun 2015 07:35:32 -0400 Subject: __repr__ now returns an ASCII bytestring in Python 2, and a Unicode string in Python 3, instead of a UTF8-encoded bytestring in both versions. [bug=1420131] --- bs4/element.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'bs4/element.py') diff --git a/bs4/element.py b/bs4/element.py index 5d895ab..3c32c17 100644 --- a/bs4/element.py +++ b/bs4/element.py @@ -983,15 +983,25 @@ class Tag(PageElement): as defined in __eq__.""" return not self == other - def __repr__(self, encoding=DEFAULT_OUTPUT_ENCODING): + def __repr__(self, encoding="unicode-escape"): """Renders this tag as a string.""" - return self.encode(encoding) + if PY3K: + # "The return value must be a string object", i.e. Unicode + return self.decode() + else: + # "The return value must be a string object", i.e. a bytestring. + # By convention, the return value of __repr__ should also be + # an ASCII string. + return self.encode(encoding) def __unicode__(self): return self.decode() def __str__(self): - return self.encode() + if PY3K: + return self.decode() + else: + return self.encode() if PY3K: __str__ = __repr__ = __unicode__ -- cgit v1.2.3