diff options
Diffstat (limited to 'bs4/element.py')
-rw-r--r-- | bs4/element.py | 37 |
1 files changed, 30 insertions, 7 deletions
diff --git a/bs4/element.py b/bs4/element.py index 997378a..efc6ec7 100644 --- a/bs4/element.py +++ b/bs4/element.py @@ -22,6 +22,19 @@ def _alias(attr): return alias +class NamespacedAttribute(unicode): + + def __new__(cls, prefix, name, namespace=None): + if name is None: + obj = unicode.__new__(cls, prefix) + else: + obj = unicode.__new__(cls, prefix + ":" + name) + obj.prefix = prefix + obj.name = name + obj.namespace = namespace + return obj + + class PageElement(object): """Contains the navigational information for some part of the page (either a tag or a piece of text)""" @@ -500,15 +513,15 @@ class Doctype(NavigableString): return Doctype(value) PREFIX = u'<!DOCTYPE ' - SUFFIX = u'>' + SUFFIX = u'>\n' class Tag(PageElement): """Represents a found HTML tag with its attributes and contents.""" - def __init__(self, parser=None, builder=None, name=None, attrs=None, - parent=None, previous=None): + def __init__(self, parser=None, builder=None, name=None, namespace=None, + nsprefix=None, attrs=None, parent=None, previous=None): "Basic constructor." if parser is None: @@ -520,6 +533,8 @@ class Tag(PageElement): if name is None: raise ValueError("No value provided for new tag's name.") self.name = name + self.namespace = namespace + self.nsprefix = nsprefix if attrs is None: attrs = {} else: @@ -659,6 +674,9 @@ class Tag(PageElement): def has_attr(self, key): return key in self.attrs + def __hash__(self): + return str(self).__hash__() + def __getitem__(self, key): """tag[key] returns the value of the 'key' attribute for the tag, and throws an exception if it's not there.""" @@ -779,7 +797,7 @@ class Tag(PageElement): and '%SOUP-ENCODING%' in val): val = self.substitute_encoding(val, eventual_encoding) - decoded = (key + '=' + decoded = (str(key) + '=' + EntitySubstitution.substitute_xml(val, True)) attrs.append(decoded) close = '' @@ -789,6 +807,10 @@ class Tag(PageElement): else: closeTag = '</%s>' % self.name + prefix = '' + if self.nsprefix: + prefix = self.nsprefix + ":" + pretty_print = (indent_level is not None) if pretty_print: space = (' ' * (indent_level - 1)) @@ -809,7 +831,8 @@ class Tag(PageElement): attribute_string = ' ' + ' '.join(attrs) if pretty_print: s.append(space) - s.append('<%s%s%s>' % (self.name, attribute_string, close)) + s.append('<%s%s%s%s>' % ( + prefix, self.name, attribute_string, close)) if pretty_print: s.append("\n") s.append(contents) @@ -986,7 +1009,7 @@ class SoupStrainer(object): searchTag = search_tag def search(self, markup): - #print 'looking for %s in %s' % (self, markup) + # print 'looking for %s in %s' % (self, markup) found = None # If given a list of items, scan it for a text element that # matches. @@ -1012,7 +1035,7 @@ class SoupStrainer(object): return found def _matches(self, markup, match_against): - #print "Matching %s against %s" % (markup, match_against) + # print "Matching %s against %s" % (markup, match_against) result = False if isinstance(markup, list) or isinstance(markup, tuple): |