diff options
author | Leonard Richardson <leonard.richardson@canonical.com> | 2012-02-16 13:55:20 -0500 |
---|---|---|
committer | Leonard Richardson <leonard.richardson@canonical.com> | 2012-02-16 13:55:20 -0500 |
commit | 1a50d9623831990ae0a78ea3a7e66fa098fe92ac (patch) | |
tree | d31578ac86c753c6e3427f574408a1ad960d80ac /bs4/element.py | |
parent | ffcebc274b84b85a0b8c93c2aca8756df4baa236 (diff) |
By default, turn unrecognized characters into numeric XML entity refs.
Diffstat (limited to 'bs4/element.py')
-rw-r--r-- | bs4/element.py | 9 |
1 files changed, 6 insertions, 3 deletions
diff --git a/bs4/element.py b/bs4/element.py index a0f64ba..513407c 100644 --- a/bs4/element.py +++ b/bs4/element.py @@ -745,9 +745,12 @@ class Tag(PageElement): __str__ = __repr__ = __unicode__ def encode(self, encoding=DEFAULT_OUTPUT_ENCODING, - indent_level=None, formatter="minimal"): - return self.decode(indent_level, encoding, - formatter).encode(encoding) + indent_level=None, formatter="minimal", + errors="xmlcharrefreplace"): + # Turn the data structure into Unicode, then encode the + # Unicode. + u = self.decode(indent_level, encoding, formatter) + return u.encode(encoding, errors=errors) def decode(self, indent_level=None, eventual_encoding=DEFAULT_OUTPUT_ENCODING, |