diff options
author | Leonard Richardson <leonard.richardson@canonical.com> | 2011-02-26 21:26:15 -0500 |
---|---|---|
committer | Leonard Richardson <leonard.richardson@canonical.com> | 2011-02-26 21:26:15 -0500 |
commit | d7056f49c8bb3a448cec2f1a6f2de55e93c8e8d6 (patch) | |
tree | cc75bdadec0060288b1b76213c3ac2de34d9ab68 /beautifulsoup/element.py | |
parent | ef5770589595e80cbd9690b64504a2166b3558fd (diff) |
First stab at HTML entity replacement.
Diffstat (limited to 'beautifulsoup/element.py')
-rw-r--r-- | beautifulsoup/element.py | 27 |
1 files changed, 19 insertions, 8 deletions
diff --git a/beautifulsoup/element.py b/beautifulsoup/element.py index 23f8c33..f3a59d4 100644 --- a/beautifulsoup/element.py +++ b/beautifulsoup/element.py @@ -561,11 +561,14 @@ class Tag(PageElement, EntitySubstitution): return self.encode() def encode(self, encoding=DEFAULT_OUTPUT_ENCODING, - pretty_print=False, indent_level=0): - return self.decode(pretty_print, indent_level, encoding).encode(encoding) + pretty_print=False, indent_level=0, + replace_with_html_entities=False): + return self.decode(pretty_print, indent_level, encoding, + replace_with_html_entities).encode(encoding) def decode(self, pretty_print=False, indent_level=0, - eventual_encoding=DEFAULT_OUTPUT_ENCODING): + eventual_encoding=DEFAULT_OUTPUT_ENCODING, + replace_with_html_entities=False): """Returns a string or Unicode representation of this tag and its contents. To get Unicode, pass None for encoding.""" @@ -597,7 +600,8 @@ class Tag(PageElement, EntitySubstitution): space = (' ' * (indentTag-1)) indentContents = indentTag + 1 contents = self.decodeContents(pretty_print, indentContents, - eventual_encoding) + eventual_encoding, + replace_with_html_entities) if self.hidden: s = contents else: @@ -635,11 +639,15 @@ class Tag(PageElement, EntitySubstitution): return self.encode(encoding, True) def encodeContents(self, encoding=DEFAULT_OUTPUT_ENCODING, - pretty_print=False, indent_level=0): - return self.decodeContents(pretty_print, indent_level).encode(encoding) + pretty_print=False, indent_level=0, + replace_With_html_entities=False): + return self.decodeContents( + pretty_print, indent_level, replace_with_html_entities).encode( + encoding) def decodeContents(self, pretty_print=False, indent_level=0, - eventual_encoding=DEFAULT_OUTPUT_ENCODING): + eventual_encoding=DEFAULT_OUTPUT_ENCODING, + replace_with_html_entities=False): """Renders the contents of this tag as a string in the given encoding. If encoding is None, returns a Unicode string..""" s=[] @@ -648,10 +656,13 @@ class Tag(PageElement, EntitySubstitution): if isinstance(c, NavigableString): text = c.decodeGivenEventualEncoding(eventual_encoding) elif isinstance(c, Tag): - s.append(c.decode(pretty_print, indent_level, eventual_encoding)) + s.append(c.decode(pretty_print, indent_level, eventual_encoding, + replace_with_html_entities)) if text and pretty_print: text = text.strip() if text: + if replace_with_html_entities: + text = self.substitute_html(text) if pretty_print: s.append(" " * (indent_level-1)) s.append(text) |