diff options
Diffstat (limited to 'bs4/element.py')
-rw-r--r-- | bs4/element.py | 33 |
1 files changed, 27 insertions, 6 deletions
diff --git a/bs4/element.py b/bs4/element.py index 997378a..fdb90e0 100644 --- a/bs4/element.py +++ b/bs4/element.py @@ -22,6 +22,20 @@ def _alias(attr): return alias +class NamespacedAttribute(object): + + def __init__(self, namespace_abbreviation, name, namespace): + self.namespace_abbreviation = namespace_abbreviation + self.name = name + self.namespace = namespace + + def __str__(self): + name = self.name + if self.namespace_abbreviation: + name = self.namespace_abbreviation + ":" + name + return name + + class PageElement(object): """Contains the navigational information for some part of the page (either a tag or a piece of text)""" @@ -507,8 +521,8 @@ class Tag(PageElement): """Represents a found HTML tag with its attributes and contents.""" - def __init__(self, parser=None, builder=None, name=None, attrs=None, - parent=None, previous=None): + def __init__(self, parser=None, builder=None, name=None, namespace=None, + nsprefix=None, attrs=None, parent=None, previous=None): "Basic constructor." if parser is None: @@ -520,6 +534,8 @@ class Tag(PageElement): if name is None: raise ValueError("No value provided for new tag's name.") self.name = name + self.namespace = namespace + self.nsprefix = nsprefix if attrs is None: attrs = {} else: @@ -779,7 +795,7 @@ class Tag(PageElement): and '%SOUP-ENCODING%' in val): val = self.substitute_encoding(val, eventual_encoding) - decoded = (key + '=' + decoded = (str(key) + '=' + EntitySubstitution.substitute_xml(val, True)) attrs.append(decoded) close = '' @@ -789,6 +805,10 @@ class Tag(PageElement): else: closeTag = '</%s>' % self.name + prefix = '' + if self.nsprefix: + prefix = self.nsprefix + ":" + pretty_print = (indent_level is not None) if pretty_print: space = (' ' * (indent_level - 1)) @@ -809,7 +829,8 @@ class Tag(PageElement): attribute_string = ' ' + ' '.join(attrs) if pretty_print: s.append(space) - s.append('<%s%s%s>' % (self.name, attribute_string, close)) + s.append('<%s%s%s%s>' % ( + prefix, self.name, attribute_string, close)) if pretty_print: s.append("\n") s.append(contents) @@ -986,7 +1007,7 @@ class SoupStrainer(object): searchTag = search_tag def search(self, markup): - #print 'looking for %s in %s' % (self, markup) + # print 'looking for %s in %s' % (self, markup) found = None # If given a list of items, scan it for a text element that # matches. @@ -1012,7 +1033,7 @@ class SoupStrainer(object): return found def _matches(self, markup, match_against): - #print "Matching %s against %s" % (markup, match_against) + # print "Matching %s against %s" % (markup, match_against) result = False if isinstance(markup, list) or isinstance(markup, tuple): |