summaryrefslogtreecommitdiff
path: root/bs4/element.py
diff options
context:
space:
mode:
Diffstat (limited to 'bs4/element.py')
-rw-r--r--bs4/element.py33
1 files changed, 27 insertions, 6 deletions
diff --git a/bs4/element.py b/bs4/element.py
index 997378a..fdb90e0 100644
--- a/bs4/element.py
+++ b/bs4/element.py
@@ -22,6 +22,20 @@ def _alias(attr):
return alias
+class NamespacedAttribute(object):
+
+ def __init__(self, namespace_abbreviation, name, namespace):
+ self.namespace_abbreviation = namespace_abbreviation
+ self.name = name
+ self.namespace = namespace
+
+ def __str__(self):
+ name = self.name
+ if self.namespace_abbreviation:
+ name = self.namespace_abbreviation + ":" + name
+ return name
+
+
class PageElement(object):
"""Contains the navigational information for some part of the page
(either a tag or a piece of text)"""
@@ -507,8 +521,8 @@ class Tag(PageElement):
"""Represents a found HTML tag with its attributes and contents."""
- def __init__(self, parser=None, builder=None, name=None, attrs=None,
- parent=None, previous=None):
+ def __init__(self, parser=None, builder=None, name=None, namespace=None,
+ nsprefix=None, attrs=None, parent=None, previous=None):
"Basic constructor."
if parser is None:
@@ -520,6 +534,8 @@ class Tag(PageElement):
if name is None:
raise ValueError("No value provided for new tag's name.")
self.name = name
+ self.namespace = namespace
+ self.nsprefix = nsprefix
if attrs is None:
attrs = {}
else:
@@ -779,7 +795,7 @@ class Tag(PageElement):
and '%SOUP-ENCODING%' in val):
val = self.substitute_encoding(val, eventual_encoding)
- decoded = (key + '='
+ decoded = (str(key) + '='
+ EntitySubstitution.substitute_xml(val, True))
attrs.append(decoded)
close = ''
@@ -789,6 +805,10 @@ class Tag(PageElement):
else:
closeTag = '</%s>' % self.name
+ prefix = ''
+ if self.nsprefix:
+ prefix = self.nsprefix + ":"
+
pretty_print = (indent_level is not None)
if pretty_print:
space = (' ' * (indent_level - 1))
@@ -809,7 +829,8 @@ class Tag(PageElement):
attribute_string = ' ' + ' '.join(attrs)
if pretty_print:
s.append(space)
- s.append('<%s%s%s>' % (self.name, attribute_string, close))
+ s.append('<%s%s%s%s>' % (
+ prefix, self.name, attribute_string, close))
if pretty_print:
s.append("\n")
s.append(contents)
@@ -986,7 +1007,7 @@ class SoupStrainer(object):
searchTag = search_tag
def search(self, markup):
- #print 'looking for %s in %s' % (self, markup)
+ # print 'looking for %s in %s' % (self, markup)
found = None
# If given a list of items, scan it for a text element that
# matches.
@@ -1012,7 +1033,7 @@ class SoupStrainer(object):
return found
def _matches(self, markup, match_against):
- #print "Matching %s against %s" % (markup, match_against)
+ # print "Matching %s against %s" % (markup, match_against)
result = False
if isinstance(markup, list) or isinstance(markup, tuple):