summaryrefslogtreecommitdiff
path: root/beautifulsoup
diff options
context:
space:
mode:
Diffstat (limited to 'beautifulsoup')
-rw-r--r--beautifulsoup/__init__.py6
-rw-r--r--beautifulsoup/dammit.py4
-rw-r--r--beautifulsoup/element.py27
3 files changed, 25 insertions, 12 deletions
diff --git a/beautifulsoup/__init__.py b/beautifulsoup/__init__.py
index cee55e7..f4c2a95 100644
--- a/beautifulsoup/__init__.py
+++ b/beautifulsoup/__init__.py
@@ -263,7 +263,8 @@ class BeautifulSoup(Tag):
self.currentData.append(data)
def decode(self, pretty_print=False, indent_level=0,
- eventual_encoding=DEFAULT_OUTPUT_ENCODING):
+ eventual_encoding=DEFAULT_OUTPUT_ENCODING,
+ replace_with_html_entities=False):
"""Returns a string or Unicode representation of this document.
To get Unicode, pass None for encoding."""
if self.is_xml:
@@ -275,7 +276,8 @@ class BeautifulSoup(Tag):
else:
prefix = u''
return prefix + super(BeautifulSoup, self).decode(
- pretty_print, indent_level, eventual_encoding)
+ pretty_print, indent_level, eventual_encoding,
+ replace_with_html_entities)
class StopParsing(Exception):
diff --git a/beautifulsoup/dammit.py b/beautifulsoup/dammit.py
index 9833bd4..31dfa95 100644
--- a/beautifulsoup/dammit.py
+++ b/beautifulsoup/dammit.py
@@ -37,8 +37,8 @@ class EntitySubstitution(object):
for codepoint, name in codepoint2name.items():
if codepoint == 34:
# There's no point in turning the quotation mark into
- # ", unless it happens in an attribute value, which
- # is done elsewhere.
+ # ", unless it happens within an attribute value, which
+ # is handled elsewhere.
continue;
character = unichr(codepoint)
characters.append(character)
diff --git a/beautifulsoup/element.py b/beautifulsoup/element.py
index 23f8c33..f3a59d4 100644
--- a/beautifulsoup/element.py
+++ b/beautifulsoup/element.py
@@ -561,11 +561,14 @@ class Tag(PageElement, EntitySubstitution):
return self.encode()
def encode(self, encoding=DEFAULT_OUTPUT_ENCODING,
- pretty_print=False, indent_level=0):
- return self.decode(pretty_print, indent_level, encoding).encode(encoding)
+ pretty_print=False, indent_level=0,
+ replace_with_html_entities=False):
+ return self.decode(pretty_print, indent_level, encoding,
+ replace_with_html_entities).encode(encoding)
def decode(self, pretty_print=False, indent_level=0,
- eventual_encoding=DEFAULT_OUTPUT_ENCODING):
+ eventual_encoding=DEFAULT_OUTPUT_ENCODING,
+ replace_with_html_entities=False):
"""Returns a string or Unicode representation of this tag and
its contents. To get Unicode, pass None for encoding."""
@@ -597,7 +600,8 @@ class Tag(PageElement, EntitySubstitution):
space = (' ' * (indentTag-1))
indentContents = indentTag + 1
contents = self.decodeContents(pretty_print, indentContents,
- eventual_encoding)
+ eventual_encoding,
+ replace_with_html_entities)
if self.hidden:
s = contents
else:
@@ -635,11 +639,15 @@ class Tag(PageElement, EntitySubstitution):
return self.encode(encoding, True)
def encodeContents(self, encoding=DEFAULT_OUTPUT_ENCODING,
- pretty_print=False, indent_level=0):
- return self.decodeContents(pretty_print, indent_level).encode(encoding)
+ pretty_print=False, indent_level=0,
+ replace_With_html_entities=False):
+ return self.decodeContents(
+ pretty_print, indent_level, replace_with_html_entities).encode(
+ encoding)
def decodeContents(self, pretty_print=False, indent_level=0,
- eventual_encoding=DEFAULT_OUTPUT_ENCODING):
+ eventual_encoding=DEFAULT_OUTPUT_ENCODING,
+ replace_with_html_entities=False):
"""Renders the contents of this tag as a string in the given
encoding. If encoding is None, returns a Unicode string.."""
s=[]
@@ -648,10 +656,13 @@ class Tag(PageElement, EntitySubstitution):
if isinstance(c, NavigableString):
text = c.decodeGivenEventualEncoding(eventual_encoding)
elif isinstance(c, Tag):
- s.append(c.decode(pretty_print, indent_level, eventual_encoding))
+ s.append(c.decode(pretty_print, indent_level, eventual_encoding,
+ replace_with_html_entities))
if text and pretty_print:
text = text.strip()
if text:
+ if replace_with_html_entities:
+ text = self.substitute_html(text)
if pretty_print:
s.append(" " * (indent_level-1))
s.append(text)