summaryrefslogtreecommitdiff
path: root/beautifulsoup/element.py
diff options
context:
space:
mode:
authorLeonard Richardson <leonard.richardson@canonical.com>2011-02-21 17:11:56 -0500
committerLeonard Richardson <leonard.richardson@canonical.com>2011-02-21 17:11:56 -0500
commit384c5cd07b027b1a53d59d81fd3dc7661cbc2ab1 (patch)
tree3de877db036706e3f11981a4d33968dea4a020e1 /beautifulsoup/element.py
parent274d94dc13ffeb80c587f68bbad267f4f5199b9e (diff)
Switched Tag.decode to use EntitySubstitution.substitute_xml.
Diffstat (limited to 'beautifulsoup/element.py')
-rw-r--r--beautifulsoup/element.py53
1 files changed, 9 insertions, 44 deletions
diff --git a/beautifulsoup/element.py b/beautifulsoup/element.py
index a70813d..6101641 100644
--- a/beautifulsoup/element.py
+++ b/beautifulsoup/element.py
@@ -4,6 +4,7 @@ try:
from htmlentitydefs import name2codepoint
except ImportError:
name2codepoint = {}
+from beautifulsoup.dammit import EntitySubstitution
from util import isString, isList
@@ -417,7 +418,7 @@ class Doctype(NavigableString):
def decodeGivenEventualEncoding(self, eventualEncoding):
return u'<!DOCTYPE ' + self + u'>'
-class Tag(PageElement, Entities):
+class Tag(PageElement, EntitySubstitution):
"""Represents a found HTML tag with its attributes and contents."""
@@ -574,15 +575,6 @@ class Tag(PageElement, Entities):
"""Renders this tag as a string."""
return self.decode(eventualEncoding=encoding)
- BARE_AMPERSAND_OR_BRACKET = re.compile("([<>]|"
- + "&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)"
- + ")")
-
- def _sub_entity(self, x):
- """Used with a regular expression to substitute the
- appropriate XML entity for an XML special character."""
- return "&" + self.XML_SPECIAL_CHARS_TO_ENTITIES[x.group(0)[0]] + ";"
-
def __unicode__(self):
return self.decode()
@@ -601,45 +593,18 @@ class Tag(PageElement, Entities):
attrs = []
if self.attrs:
for key, val in self.attrs:
- fmt = '%s="%s"'
- if isString(val):
+ if val is None:
+ decoded = key
+ else:
+ if not isString(val):
+ val = str(val)
if (self.contains_substitutions
and eventualEncoding is not None
and '%SOUP-ENCODING%' in val):
val = self.substituteEncoding(val, eventualEncoding)
- # The attribute value either:
- #
- # * Contains no embedded double quotes or single quotes.
- # No problem: we enclose it in double quotes.
- # * Contains embedded single quotes. No problem:
- # double quotes work here too.
- # * Contains embedded double quotes. No problem:
- # we enclose it in single quotes.
- # * Embeds both single _and_ double quotes. This
- # can't happen naturally, but it can happen if
- # you modify an attribute value after parsing
- # the document. Now we have a bit of a
- # problem. We solve it by enclosing the
- # attribute in single quotes, and escaping any
- # embedded single quotes to XML entities.
- if '"' in val:
- fmt = "%s='%s'"
- if "'" in val:
- # TODO: replace with apos when
- # appropriate.
- val = val.replace("'", "&squot;")
-
- # Now we're okay w/r/t quotes. But the attribute
- # value might also contain angle brackets, or
- # ampersands that aren't part of entities. We need
- # to escape those to XML entities too.
- val = self.BARE_AMPERSAND_OR_BRACKET.sub(self._sub_entity, val)
- if val is None:
- # Handle boolean attributes.
- decoded = key
- else:
- decoded = fmt % (key, val)
+ # Set destination_is_xml based on something...
+ decoded = key + '=' + self.substitute_xml(val, True, False)
attrs.append(decoded)
close = ''
closeTag = ''