diff options
Diffstat (limited to 'bs4/dammit.py')
-rw-r--r-- | bs4/dammit.py | 25 |
1 files changed, 25 insertions, 0 deletions
diff --git a/bs4/dammit.py b/bs4/dammit.py index c199cd5..a733cad 100644 --- a/bs4/dammit.py +++ b/bs4/dammit.py @@ -81,6 +81,8 @@ class EntitySubstitution(object): "&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)" ")") + AMPERSAND_OR_BRACKET = re.compile("([<>&])") + @classmethod def _substitute_html_entity(cls, matchobj): entity = cls.CHARACTER_TO_HTML_ENTITY.get(matchobj.group(0)) @@ -134,6 +136,28 @@ class EntitySubstitution(object): def substitute_xml(cls, value, make_quoted_attribute=False): """Substitute XML entities for special XML characters. + :param value: A string to be substituted. The less-than sign + will become <, the greater-than sign will become >, + and any ampersands will become &. If you want ampersands + that appear to be part of an entity definition to be left + alone, use substitute_xml_containing_entities() instead. + + :param make_quoted_attribute: If True, then the string will be + quoted, as befits an attribute value. + """ + # Escape angle brackets and ampersands. + value = cls.AMPERSAND_OR_BRACKET.sub( + cls._substitute_xml_entity, value) + + if make_quoted_attribute: + value = cls.quoted_attribute_value(value) + return value + + @classmethod + def substitute_xml_containing_entities( + cls, value, make_quoted_attribute=False): + """Substitute XML entities for special XML characters. + :param value: A string to be substituted. The less-than sign will become <, the greater-than sign will become >, and any ampersands that are not part of an entity defition will @@ -151,6 +175,7 @@ class EntitySubstitution(object): value = cls.quoted_attribute_value(value) return value + @classmethod def substitute_html(cls, s): """Replace certain Unicode characters with named HTML entities. |