diff options
author | Leonard Richardson <leonard.richardson@canonical.com> | 2011-02-26 23:05:10 -0500 |
---|---|---|
committer | Leonard Richardson <leonard.richardson@canonical.com> | 2011-02-26 23:05:10 -0500 |
commit | cb369db2e4a16816ae78ff6eff7d8d3393141569 (patch) | |
tree | c4e1e47b01cf081e70434d65b3495194c5671470 | |
parent | 247785aa53358fa64e9bd5f799c4c9a1609489f0 (diff) |
Refactored the code that makes a string output-ready.
-rw-r--r-- | beautifulsoup/element.py | 48 |
1 files changed, 28 insertions, 20 deletions
diff --git a/beautifulsoup/element.py b/beautifulsoup/element.py index bbecdbd..3515d50 100644 --- a/beautifulsoup/element.py +++ b/beautifulsoup/element.py @@ -11,7 +11,7 @@ from util import isList DEFAULT_OUTPUT_ENCODING = "utf-8" -class PageElement(object): +class PageElement(EntitySubstitution): """Contains the navigational information for some part of the page (either a tag or a piece of text)""" @@ -334,6 +334,9 @@ class PageElement(object): class NavigableString(unicode, PageElement): + PREFIX = '' + SUFFIX = '' + def __new__(cls, value): """Create a new NavigableString. @@ -358,29 +361,35 @@ class NavigableString(unicode, PageElement): else: raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__.__name__, attr) - def decodeGivenEventualEncoding(self, eventual_encoding): - return self + def output_ready(self, substitute_html_entities=False): + if substitute_html_entities: + output = self.substitute_html(self) + else: + output = self + return self.PREFIX + output + self.SUFFIX + class CData(NavigableString): - def decodeGivenEventualEncoding(self, eventual_encoding): - return u'<![CDATA[' + self + u']]>' + PREFIX = u'<![CDATA[' + SUFFIX = u']]>' + class ProcessingInstruction(NavigableString): - def decodeGivenEventualEncoding(self, eventual_encoding): - output = self - if u'%SOUP-ENCODING%' in output: - output = self.substituteEncoding(output, eventual_encoding) - return u'<?' + output + u'?>' + PREFIX = u'<?' + SUFFIX = u'?>' + class Comment(NavigableString): - def decodeGivenEventualEncoding(self, eventual_encoding): - return u'<!--' + self + u'-->' + + PREFIX = u'<!--' + SUFFIX = u'-->' class Declaration(NavigableString): - def decodeGivenEventualEncoding(self, eventual_encoding): - return u'<!' + self + u'>' + PREFIX = u'<!' + SUFFIX = u'!>' + class Doctype(NavigableString): @@ -394,10 +403,11 @@ class Doctype(NavigableString): return Doctype(value) - def decodeGivenEventualEncoding(self, eventual_encoding): - return u'<!DOCTYPE ' + self + u'>' + PREFIX = u'<!DOCTYPE ' + SUFFIX = u'>' + -class Tag(PageElement, EntitySubstitution): +class Tag(PageElement): """Represents a found HTML tag with its attributes and contents.""" @@ -654,15 +664,13 @@ class Tag(PageElement, EntitySubstitution): for c in self: text = None if isinstance(c, NavigableString): - text = c.decodeGivenEventualEncoding(eventual_encoding) + text = c.output_ready(substitute_html_entities) elif isinstance(c, Tag): s.append(c.decode(pretty_print, indent_level, eventual_encoding, substitute_html_entities)) if text and pretty_print: text = text.strip() if text: - if substitute_html_entities: - text = self.substitute_html(text) if pretty_print: s.append(" " * (indent_level-1)) s.append(text) |