From e1b321db7331752a3aea8dd7070dd0db4c60c51d Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Thu, 16 Feb 2012 16:33:40 -0500 Subject: It's a start, at least. --- bs4/element.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) (limited to 'bs4/element.py') diff --git a/bs4/element.py b/bs4/element.py index 513407c..926fb8f 100644 --- a/bs4/element.py +++ b/bs4/element.py @@ -22,6 +22,20 @@ def _alias(attr): return alias +class NamespacedAttribute(object): + + def __init__(self, namespace_abbreviation, name, namespace): + self.namespace_abbreviation = namespace_abbreviation + self.name = name + self.namespace = namespace + + def __str__(self): + name = self.name + if self.namespace_abbreviation: + name = self.namespace_abbreviation + ":" + name + return name + + class PageElement(object): """Contains the navigational information for some part of the page (either a tag or a piece of text)""" @@ -507,8 +521,8 @@ class Tag(PageElement): """Represents a found HTML tag with its attributes and contents.""" - def __init__(self, parser=None, builder=None, name=None, attrs=None, - parent=None, previous=None): + def __init__(self, parser=None, builder=None, name=None, namespace=None, + attrs=None, parent=None, previous=None): "Basic constructor." if parser is None: @@ -520,6 +534,7 @@ class Tag(PageElement): if name is None: raise ValueError("No value provided for new tag's name.") self.name = name + self.namespace = namespace if attrs is None: attrs = {} else: @@ -779,7 +794,7 @@ class Tag(PageElement): and '%SOUP-ENCODING%' in val): val = self.substitute_encoding(val, eventual_encoding) - decoded = (key + '=' + decoded = (str(key) + '=' + EntitySubstitution.substitute_xml(val, True)) attrs.append(decoded) close = '' -- cgit v1.2.3 From 2ccae07967bb15f6bad6ba262411ac47bcbb98e7 Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Tue, 21 Feb 2012 12:19:56 -0500 Subject: Added nsprefix argument to the tag class. --- bs4/element.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'bs4/element.py') diff --git a/bs4/element.py b/bs4/element.py index 653bb05..73f225e 100644 --- a/bs4/element.py +++ b/bs4/element.py @@ -522,7 +522,7 @@ class Tag(PageElement): """Represents a found HTML tag with its attributes and contents.""" def __init__(self, parser=None, builder=None, name=None, namespace=None, - attrs=None, parent=None, previous=None): + nsprefix=None, attrs=None, parent=None, previous=None): "Basic constructor." if parser is None: @@ -535,6 +535,7 @@ class Tag(PageElement): raise ValueError("No value provided for new tag's name.") self.name = name self.namespace = namespace + self.nsprefix = nsprefix if attrs is None: attrs = {} else: -- cgit v1.2.3 From 2b6af1e6204461e89338ae452c3bc742d0d1fa0f Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Tue, 21 Feb 2012 13:41:58 -0500 Subject: Have lxml invert namespace maps as they come in and set each tag's prefix appropriately. --- bs4/element.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'bs4/element.py') diff --git a/bs4/element.py b/bs4/element.py index 73f225e..ab30951 100644 --- a/bs4/element.py +++ b/bs4/element.py @@ -805,6 +805,10 @@ class Tag(PageElement): else: closeTag = '' % self.name + prefix = '' + if self.nsprefix: + prefix = self.nsprefix + ":" + pretty_print = (indent_level is not None) if pretty_print: space = (' ' * (indent_level - 1)) @@ -825,7 +829,8 @@ class Tag(PageElement): attribute_string = ' ' + ' '.join(attrs) if pretty_print: s.append(space) - s.append('<%s%s%s>' % (self.name, attribute_string, close)) + s.append('<%s%s%s%s>' % ( + prefix, self.name, attribute_string, close)) if pretty_print: s.append("\n") s.append(contents) -- cgit v1.2.3 From 2f72913160bedb509a8042693328d139e7c6b945 Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Thu, 23 Feb 2012 11:16:45 -0500 Subject: Namespaced attributes are equal if they correspond to the same string. --- bs4/element.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'bs4/element.py') diff --git a/bs4/element.py b/bs4/element.py index fdb90e0..7e5810a 100644 --- a/bs4/element.py +++ b/bs4/element.py @@ -24,11 +24,22 @@ def _alias(attr): class NamespacedAttribute(object): - def __init__(self, namespace_abbreviation, name, namespace): + def __init__(self, namespace_abbreviation, name, namespace=None): self.namespace_abbreviation = namespace_abbreviation self.name = name self.namespace = namespace + def __eq__(self, other): + if isinstance(other, NamespacedAttribute): + return ( + self.namespace_abbreviation == other.namespace_abbreviation + and self.name == other.name + and self.namespace == other.namespace) + elif isinstance(other, basestring): + return str(self) == other + else: + return super(NamespacedAttribute, self).__eq__(other) + def __str__(self): name = self.name if self.namespace_abbreviation: -- cgit v1.2.3 From b7749c50a2c96ccf6982cfa1ca02d883e31e0af9 Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Thu, 23 Feb 2012 11:56:40 -0500 Subject: Bumped version number. --- bs4/element.py | 32 ++++++++++---------------------- 1 file changed, 10 insertions(+), 22 deletions(-) (limited to 'bs4/element.py') diff --git a/bs4/element.py b/bs4/element.py index 7e5810a..c2c4e2e 100644 --- a/bs4/element.py +++ b/bs4/element.py @@ -22,29 +22,14 @@ def _alias(attr): return alias -class NamespacedAttribute(object): +class NamespacedAttribute(unicode): - def __init__(self, namespace_abbreviation, name, namespace=None): - self.namespace_abbreviation = namespace_abbreviation - self.name = name - self.namespace = namespace - - def __eq__(self, other): - if isinstance(other, NamespacedAttribute): - return ( - self.namespace_abbreviation == other.namespace_abbreviation - and self.name == other.name - and self.namespace == other.namespace) - elif isinstance(other, basestring): - return str(self) == other - else: - return super(NamespacedAttribute, self).__eq__(other) - - def __str__(self): - name = self.name - if self.namespace_abbreviation: - name = self.namespace_abbreviation + ":" + name - return name + def __new__(cls, prefix, name, namespace=None): + obj = unicode.__new__(cls, prefix + ":" + name) + obj.prefix = prefix + obj.name = name + obj.namespace = namespace + return obj class PageElement(object): @@ -686,6 +671,9 @@ class Tag(PageElement): def has_attr(self, key): return key in self.attrs + def __hash__(self): + return str(self).__hash__() + def __getitem__(self, key): """tag[key] returns the value of the 'key' attribute for the tag, and throws an exception if it's not there.""" -- cgit v1.2.3 From fcefebe15290b9ff44934efa73fb07c70ebf5171 Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Thu, 23 Feb 2012 12:23:12 -0500 Subject: Fixed handling of the closing of namespaced tags. --- bs4/element.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'bs4/element.py') diff --git a/bs4/element.py b/bs4/element.py index c2c4e2e..efc6ec7 100644 --- a/bs4/element.py +++ b/bs4/element.py @@ -25,7 +25,10 @@ def _alias(attr): class NamespacedAttribute(unicode): def __new__(cls, prefix, name, namespace=None): - obj = unicode.__new__(cls, prefix + ":" + name) + if name is None: + obj = unicode.__new__(cls, prefix) + else: + obj = unicode.__new__(cls, prefix + ":" + name) obj.prefix = prefix obj.name = name obj.namespace = namespace @@ -510,7 +513,7 @@ class Doctype(NavigableString): return Doctype(value) PREFIX = u'' + SUFFIX = u'>\n' class Tag(PageElement): -- cgit v1.2.3