diff options
author | Leonard Richardson <leonard.richardson@canonical.com> | 2012-02-23 13:55:51 -0500 |
---|---|---|
committer | Leonard Richardson <leonard.richardson@canonical.com> | 2012-02-23 13:55:51 -0500 |
commit | 97b54c4bdbee0f109c444b50d8102ae8d7abb7c4 (patch) | |
tree | 8feb3c4387fa5dc67c810f76c9a831ebf523898d /bs4/element.py | |
parent | 328204928bd22ca9e8aeac0a3208645d9f82f264 (diff) | |
parent | deaeb40977719ea821a62f41d75e2c9f48559094 (diff) |
The namespace stuff seems to work, and it's definitely an improvement on the status quo, so in it goes.
Diffstat (limited to 'bs4/element.py')
-rw-r--r-- | bs4/element.py | 37 |
1 files changed, 30 insertions, 7 deletions
diff --git a/bs4/element.py b/bs4/element.py index 997378a..efc6ec7 100644 --- a/bs4/element.py +++ b/bs4/element.py @@ -22,6 +22,19 @@ def _alias(attr): return alias +class NamespacedAttribute(unicode): + + def __new__(cls, prefix, name, namespace=None): + if name is None: + obj = unicode.__new__(cls, prefix) + else: + obj = unicode.__new__(cls, prefix + ":" + name) + obj.prefix = prefix + obj.name = name + obj.namespace = namespace + return obj + + class PageElement(object): """Contains the navigational information for some part of the page (either a tag or a piece of text)""" @@ -500,15 +513,15 @@ class Doctype(NavigableString): return Doctype(value) PREFIX = u'<!DOCTYPE ' - SUFFIX = u'>' + SUFFIX = u'>\n' class Tag(PageElement): """Represents a found HTML tag with its attributes and contents.""" - def __init__(self, parser=None, builder=None, name=None, attrs=None, - parent=None, previous=None): + def __init__(self, parser=None, builder=None, name=None, namespace=None, + nsprefix=None, attrs=None, parent=None, previous=None): "Basic constructor." if parser is None: @@ -520,6 +533,8 @@ class Tag(PageElement): if name is None: raise ValueError("No value provided for new tag's name.") self.name = name + self.namespace = namespace + self.nsprefix = nsprefix if attrs is None: attrs = {} else: @@ -659,6 +674,9 @@ class Tag(PageElement): def has_attr(self, key): return key in self.attrs + def __hash__(self): + return str(self).__hash__() + def __getitem__(self, key): """tag[key] returns the value of the 'key' attribute for the tag, and throws an exception if it's not there.""" @@ -779,7 +797,7 @@ class Tag(PageElement): and '%SOUP-ENCODING%' in val): val = self.substitute_encoding(val, eventual_encoding) - decoded = (key + '=' + decoded = (str(key) + '=' + EntitySubstitution.substitute_xml(val, True)) attrs.append(decoded) close = '' @@ -789,6 +807,10 @@ class Tag(PageElement): else: closeTag = '</%s>' % self.name + prefix = '' + if self.nsprefix: + prefix = self.nsprefix + ":" + pretty_print = (indent_level is not None) if pretty_print: space = (' ' * (indent_level - 1)) @@ -809,7 +831,8 @@ class Tag(PageElement): attribute_string = ' ' + ' '.join(attrs) if pretty_print: s.append(space) - s.append('<%s%s%s>' % (self.name, attribute_string, close)) + s.append('<%s%s%s%s>' % ( + prefix, self.name, attribute_string, close)) if pretty_print: s.append("\n") s.append(contents) @@ -986,7 +1009,7 @@ class SoupStrainer(object): searchTag = search_tag def search(self, markup): - #print 'looking for %s in %s' % (self, markup) + # print 'looking for %s in %s' % (self, markup) found = None # If given a list of items, scan it for a text element that # matches. @@ -1012,7 +1035,7 @@ class SoupStrainer(object): return found def _matches(self, markup, match_against): - #print "Matching %s against %s" % (markup, match_against) + # print "Matching %s against %s" % (markup, match_against) result = False if isinstance(markup, list) or isinstance(markup, tuple): |