summaryrefslogtreecommitdiff
path: root/bs4/element.py
diff options
context:
space:
mode:
Diffstat (limited to 'bs4/element.py')
-rw-r--r--bs4/element.py37
1 files changed, 30 insertions, 7 deletions
diff --git a/bs4/element.py b/bs4/element.py
index 997378a..efc6ec7 100644
--- a/bs4/element.py
+++ b/bs4/element.py
@@ -22,6 +22,19 @@ def _alias(attr):
return alias
+class NamespacedAttribute(unicode):
+
+ def __new__(cls, prefix, name, namespace=None):
+ if name is None:
+ obj = unicode.__new__(cls, prefix)
+ else:
+ obj = unicode.__new__(cls, prefix + ":" + name)
+ obj.prefix = prefix
+ obj.name = name
+ obj.namespace = namespace
+ return obj
+
+
class PageElement(object):
"""Contains the navigational information for some part of the page
(either a tag or a piece of text)"""
@@ -500,15 +513,15 @@ class Doctype(NavigableString):
return Doctype(value)
PREFIX = u'<!DOCTYPE '
- SUFFIX = u'>'
+ SUFFIX = u'>\n'
class Tag(PageElement):
"""Represents a found HTML tag with its attributes and contents."""
- def __init__(self, parser=None, builder=None, name=None, attrs=None,
- parent=None, previous=None):
+ def __init__(self, parser=None, builder=None, name=None, namespace=None,
+ nsprefix=None, attrs=None, parent=None, previous=None):
"Basic constructor."
if parser is None:
@@ -520,6 +533,8 @@ class Tag(PageElement):
if name is None:
raise ValueError("No value provided for new tag's name.")
self.name = name
+ self.namespace = namespace
+ self.nsprefix = nsprefix
if attrs is None:
attrs = {}
else:
@@ -659,6 +674,9 @@ class Tag(PageElement):
def has_attr(self, key):
return key in self.attrs
+ def __hash__(self):
+ return str(self).__hash__()
+
def __getitem__(self, key):
"""tag[key] returns the value of the 'key' attribute for the tag,
and throws an exception if it's not there."""
@@ -779,7 +797,7 @@ class Tag(PageElement):
and '%SOUP-ENCODING%' in val):
val = self.substitute_encoding(val, eventual_encoding)
- decoded = (key + '='
+ decoded = (str(key) + '='
+ EntitySubstitution.substitute_xml(val, True))
attrs.append(decoded)
close = ''
@@ -789,6 +807,10 @@ class Tag(PageElement):
else:
closeTag = '</%s>' % self.name
+ prefix = ''
+ if self.nsprefix:
+ prefix = self.nsprefix + ":"
+
pretty_print = (indent_level is not None)
if pretty_print:
space = (' ' * (indent_level - 1))
@@ -809,7 +831,8 @@ class Tag(PageElement):
attribute_string = ' ' + ' '.join(attrs)
if pretty_print:
s.append(space)
- s.append('<%s%s%s>' % (self.name, attribute_string, close))
+ s.append('<%s%s%s%s>' % (
+ prefix, self.name, attribute_string, close))
if pretty_print:
s.append("\n")
s.append(contents)
@@ -986,7 +1009,7 @@ class SoupStrainer(object):
searchTag = search_tag
def search(self, markup):
- #print 'looking for %s in %s' % (self, markup)
+ # print 'looking for %s in %s' % (self, markup)
found = None
# If given a list of items, scan it for a text element that
# matches.
@@ -1012,7 +1035,7 @@ class SoupStrainer(object):
return found
def _matches(self, markup, match_against):
- #print "Matching %s against %s" % (markup, match_against)
+ # print "Matching %s against %s" % (markup, match_against)
result = False
if isinstance(markup, list) or isinstance(markup, tuple):