diff options
-rw-r--r-- | bs4/element.py | 100 |
1 files changed, 50 insertions, 50 deletions
diff --git a/bs4/element.py b/bs4/element.py index 2375544..f6a6879 100644 --- a/bs4/element.py +++ b/bs4/element.py @@ -69,13 +69,13 @@ PYTHON_SPECIFIC_ENCODINGS = set([ "string-escape", "string_escape", ]) - + class NamespacedAttribute(str): """A namespaced string (e.g. 'xml:lang') that remembers the namespace ('xml') and the name ('lang') that were used to create it. """ - + def __new__(cls, prefix, name=None, namespace=None): if not name: # This is the default namespace. Its name "has no value" @@ -146,14 +146,14 @@ class ContentMetaAttributeValue(AttributeValueWithCharsetSubstitution): return match.group(1) + encoding return self.CHARSET_RE.sub(rewrite, self.original_value) - + class PageElement(object): """Contains the navigational information for some part of the page: that is, its current location in the parse tree. NavigableString, Tag, etc. are all subclasses of PageElement. """ - + def setup(self, parent=None, previous_element=None, next_element=None, previous_sibling=None, next_sibling=None): """Sets up the initial relations between this element and @@ -163,7 +163,7 @@ class PageElement(object): :param previous_element: The element parsed immediately before this one. - + :param next_element: The element parsed immediately before this one. @@ -257,11 +257,11 @@ class PageElement(object): default = object() def _all_strings(self, strip=False, types=default): """Yield all strings of certain classes, possibly stripping them. - + This is implemented differently in Tag and NavigableString. """ raise NotImplementedError() - + @property def stripped_strings(self): """Yield all strings in this PageElement, stripping them first. @@ -294,11 +294,11 @@ class PageElement(object): strip, types=types)]) getText = get_text text = property(get_text) - + def replace_with(self, *args): - """Replace this PageElement with one or more PageElements, keeping the + """Replace this PageElement with one or more PageElements, keeping the rest of the tree the same. - + :param args: One or more PageElements. :return: `self`, no longer part of the tree. """ @@ -410,7 +410,7 @@ class PageElement(object): This works the same way as `list.insert`. :param position: The numeric position that should be occupied - in `self.children` by the new PageElement. + in `self.children` by the new PageElement. :param new_child: A PageElement. """ if new_child is None: @@ -546,7 +546,7 @@ class PageElement(object): "Element has no parent, so 'after' has no meaning.") if any(x is self for x in args): raise ValueError("Can't insert an element after itself.") - + offset = 0 for successor in args: # Extract first so that the index won't be screwed up if they @@ -912,7 +912,7 @@ class PageElement(object): :rtype: bool """ return getattr(self, '_decomposed', False) or False - + # Old non-property versions of the generators, for backwards # compatibility with BS3. def nextGenerator(self): @@ -936,7 +936,7 @@ class NavigableString(str, PageElement): When Beautiful Soup parses the markup <b>penguin</b>, it will create a NavigableString for the string "penguin". - """ + """ PREFIX = '' SUFFIX = '' @@ -1059,10 +1059,10 @@ class PreformattedString(NavigableString): as comments (the Comment class) and CDATA blocks (the CData class). """ - + PREFIX = '' SUFFIX = '' - + def output_ready(self, formatter=None): """Make this string ready for output by adding any subclass-specific prefix or suffix. @@ -1144,7 +1144,7 @@ class Stylesheet(NavigableString): """ pass - + class Script(NavigableString): """A NavigableString representing an executable script (probably Javascript). @@ -1250,7 +1250,7 @@ class Tag(PageElement): if ((not builder or builder.store_line_numbers) and (sourceline is not None or sourcepos is not None)): self.sourceline = sourceline - self.sourcepos = sourcepos + self.sourcepos = sourcepos if attrs is None: attrs = {} elif attrs: @@ -1308,7 +1308,7 @@ class Tag(PageElement): self.interesting_string_types = builder.string_containers[self.name] else: self.interesting_string_types = self.DEFAULT_INTERESTING_STRING_TYPES - + parserClass = _alias("parser_class") # BS3 def __copy__(self): @@ -1329,7 +1329,7 @@ class Tag(PageElement): for child in self.contents: clone.append(child.__copy__()) return clone - + @property def is_empty_element(self): """Is this tag an empty-element tag? (aka a self-closing tag) @@ -1433,7 +1433,7 @@ class Tag(PageElement): i.contents = [] i._decomposed = True i = n - + def clear(self, decompose=False): """Wipe out all children of this PageElement by calling extract() on them. @@ -1521,7 +1521,7 @@ class Tag(PageElement): if not isinstance(value, list): value = [value] return value - + def has_attr(self, key): """Does this PageElement have an attribute with the given name?""" return key in self.attrs @@ -1608,7 +1608,7 @@ class Tag(PageElement): def __repr__(self, encoding="unicode-escape"): """Renders this PageElement as a string. - :param encoding: The encoding to use (Python 2 only). + :param encoding: The encoding to use (Python 2 only). TODO: This is now ignored and a warning should be issued if a value is provided. :return: A (Unicode) string. @@ -1770,7 +1770,7 @@ class Tag(PageElement): a Unicode string will be returned. :param formatter: A Formatter object, or a string naming one of the standard formatters. - :return: A Unicode string (if encoding==None) or a bytestring + :return: A Unicode string (if encoding==None) or a bytestring (otherwise). """ if encoding is None: @@ -1826,7 +1826,7 @@ class Tag(PageElement): if pretty_print and not preserve_whitespace: s.append("\n") return ''.join(s) - + def encode_contents( self, indent_level=None, encoding=DEFAULT_OUTPUT_ENCODING, formatter="minimal"): @@ -1948,7 +1948,7 @@ class Tag(PageElement): Beautiful Soup will use the prefixes it encountered while parsing the document. - :param kwargs: Keyword arguments to be passed into SoupSieve's + :param kwargs: Keyword arguments to be passed into SoupSieve's soupsieve.select() method. :return: A Tag. @@ -1970,7 +1970,7 @@ class Tag(PageElement): :param limit: After finding this number of results, stop looking. - :param kwargs: Keyword arguments to be passed into SoupSieve's + :param kwargs: Keyword arguments to be passed into SoupSieve's soupsieve.select() method. :return: A ResultSet of Tags. @@ -2026,7 +2026,7 @@ class SoupStrainer(object): :param attrs: A dictionary of filters on attribute values. :param string: A filter for a NavigableString with specific text. :kwargs: A dictionary of filters on attribute values. - """ + """ if string is None and 'text' in kwargs: string = kwargs.pop('text') warnings.warn( @@ -2125,7 +2125,7 @@ class SoupStrainer(object): # looking at a tag with a different name. if markup and not markup.prefix and self.name != markup.name: return False - + call_function_with_tag_data = ( isinstance(self.name, Callable) and not isinstance(markup_name, Tag)) @@ -2211,7 +2211,7 @@ class SoupStrainer(object): if self._matches(' '.join(markup), match_against): return True return False - + if match_against is True: # True matches any non-None value. return markup is not None @@ -2255,11 +2255,11 @@ class SoupStrainer(object): return True else: return False - + # Beyond this point we might need to run the test twice: once against # the tag's name and once against its prefixed name. match = False - + if not match and isinstance(match_against, str): # Exact string match match = markup == match_against @@ -2314,7 +2314,7 @@ class SoupSieveProxy(object): self.element = element def _ns(self, ns): - """Normalize a dictionary of namespaces.""" + """Normalize a dictionary of namespaces.""" if ns is None: ns = self.element._namespaces return ns @@ -2347,7 +2347,7 @@ class SoupSieveProxy(object): :param flags: Flags to be passed into Soup Sieve's soupsieve.select_one() method. - :param kwargs: Keyword arguments to be passed into SoupSieve's + :param kwargs: Keyword arguments to be passed into SoupSieve's soupsieve.select_one() method. :return: A Tag, or None if the selector has no match. @@ -2356,7 +2356,7 @@ class SoupSieveProxy(object): return soupsieve.select_one( select, self.element, self._ns(namespaces), flags, **kwargs ) - + def select(self, select, namespaces=None, limit=0, flags=0, **kwargs): """Perform a CSS selection operation on the current element. @@ -2375,13 +2375,13 @@ class SoupSieveProxy(object): :param flags: Flags to be passed into Soup Sieve's soupsieve.select() method. - - :param kwargs: Keyword arguments to be passed into SoupSieve's + + :param kwargs: Keyword arguments to be passed into SoupSieve's soupsieve.select() method. :return: A ResultSet of Tag objects. :rtype: bs4.element.ResultSet - """ + """ if limit is None: limit = 0 @@ -2411,17 +2411,17 @@ class SoupSieveProxy(object): :param flags: Flags to be passed into Soup Sieve's soupsieve.iselect() method. - - :param kwargs: Keyword arguments to be passed into SoupSieve's + + :param kwargs: Keyword arguments to be passed into SoupSieve's soupsieve.iselect() method. :return: A generator :rtype: types.GeneratorType - """ + """ return soupsieve.iselect( select, self.element, self._ns(namespaces), limit, flags, **kwargs ) - + def closest(self, select, namespaces=None, flags=0, **kwargs): """Find the element closest to this one that matches the given selector. @@ -2439,13 +2439,13 @@ class SoupSieveProxy(object): :param flags: Flags to be passed into Soup Sieve's soupsieve.closest() method. - - :param kwargs: Keyword arguments to be passed into SoupSieve's + + :param kwargs: Keyword arguments to be passed into SoupSieve's soupsieve.closest() method. :return: A PageElement, or None if there is no match. :rtype: bs4.element.Tag | bs4.element.NavigableString - """ + """ return soupsieve.closest( select, self.element, self._ns(namespaces), flags, **kwargs ) @@ -2466,8 +2466,8 @@ class SoupSieveProxy(object): :param flags: Flags to be passed into Soup Sieve's soupsieve.match() method. - - :param kwargs: Keyword arguments to be passed into SoupSieve's + + :param kwargs: Keyword arguments to be passed into SoupSieve's soupsieve.match() method. :return: True if this element matches the selector; False otherwise. @@ -2476,7 +2476,7 @@ class SoupSieveProxy(object): return soupsieve.match( select, self.element, self._ns(namespaces), flags, **kwargs ) - + def filter(self, select, namespaces=None, flags=0, **kwargs): """Filter this element's direct children based on the given CSS selector. @@ -2493,8 +2493,8 @@ class SoupSieveProxy(object): :param flags: Flags to be passed into Soup Sieve's soupsieve.filter() method. - - :param kwargs: Keyword arguments to be passed into SoupSieve's + + :param kwargs: Keyword arguments to be passed into SoupSieve's soupsieve.filter() method. :return: A ResultSet of Tag objects. |