diff options
-rw-r--r-- | AUTHORS.txt | 10 | ||||
-rw-r--r-- | NEWS.txt | 4 | ||||
-rw-r--r-- | bs4/element.py | 20 | ||||
-rw-r--r-- | bs4/tests/test_tree.py | 11 |
4 files changed, 33 insertions, 12 deletions
diff --git a/AUTHORS.txt b/AUTHORS.txt index e093cd6..2ac8fcc 100644 --- a/AUTHORS.txt +++ b/AUTHORS.txt @@ -11,12 +11,16 @@ of UnicodeDammit. Thomas Kluyver and Ezio Melotti finished the work of getting Beautiful Soup 4 working under Python 3. +Simon Willison wrote soupselect, which was used to make Beautiful Soup +support CSS selectors. + Sam Ruby helped with a lot of edge cases. Jonathan Ellis was awarded the prestigous Beau Potage D'Or for his work in solving the nestable tags conundrum. -The following people have contributed patches to Beautiful Soup: +An incomplete list of people have contributed patches to Beautiful +Soup: Istvan Albert, Andrew Lin, Anthony Baxter, Andrew Boyko, Tony Chang, Zephyr Fang, Fuzzy, Roman Gaufman, Yoni Gilad, Richie Hindle, Peteris @@ -26,8 +30,8 @@ The following people have contributed patches to Beautiful Soup: Samastur, Jouni Seppänen, Alexander Schmolck, Andy Theyers, Glyn Webster, Paul Wright, Danny Yoo -The following people made suggestions or found bugs or found ways to -break Beautiful Soup: +An incomplete list of people who made suggestions or found bugs or +found ways to break Beautiful Soup: Hanno Böck, Matteo Bertini, Chris Curvey, Simon Cusack, Bruce Eckel, Matt Ernst, Michael Foord, Tom Harris, Bill de hOra, Donald Howes, @@ -1,3 +1,7 @@ += 4.0.0b10 () = + +* Added support for CSS selectors, taken from the soupselect project. + = 4.0.0b9 (20110228) = * Fixed the string representation of DOCTYPEs that have both a public diff --git a/bs4/element.py b/bs4/element.py index 3baafe3..584e171 100644 --- a/bs4/element.py +++ b/bs4/element.py @@ -446,9 +446,11 @@ class PageElement(object): combination. """ if operator == '=': - # string representation of attribute is equal to value + # string representation of `attribute` is equal to `value` return lambda el: el._attr_value_as_string(attribute) == value elif operator == '~': + # space-separated list representation of `attribute` + # contains `value` def _includes_value(element): attribute_value = element.get(attribute, []) if not isinstance(attribute_value, list): @@ -456,17 +458,19 @@ class PageElement(object): return value in attribute_value return _includes_value elif operator == '^': - # string representation of attribute starts with value - return lambda el: el._attr_value_as_string(attribute, '').startswith(value) + # string representation of `attribute` starts with `value` + return lambda el: el._attr_value_as_string( + attribute, '').startswith(value) elif operator == '$': - # string represenation of attribute ends with value - return lambda el: el._attr_value_as_string(attribute, '').endswith(value) + # string represenation of `attribute` ends with `value` + return lambda el: el._attr_value_as_string( + attribute, '').endswith(value) elif operator == '*': - # string representation of attribute contains value + # string representation of `attribute` contains `value` return lambda el: value in el._attr_value_as_string(attribute, '') elif operator == '|': - # string representation of attribute is either exactly - # value or starts with value- + # string representation of `attribute` is either exactly + # `value` or starts with `value` and then a dash. def _is_or_starts_with_dash(element): attribute_value = element._attr_value_as_string(attribute, '') return (attribute_value == value or attribute_value.startswith( diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py index bfc4218..3f32736 100644 --- a/bs4/tests/test_tree.py +++ b/bs4/tests/test_tree.py @@ -1301,7 +1301,7 @@ class TestNavigableStringSubclasses(SoupTest): self.assertEqual(soup.encode(), b"<!DOCTYPE foo>\n") -class TestSoupSelector(SoupTest): +class TestSoupSelector(TreeTest): HTML = """<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> @@ -1528,3 +1528,12 @@ class TestSoupSelector(SoupTest): ('[blah]', []), ('p[blah]', []), ) + + def test_select_on_element(self): + # Other tests operate on the tree; this operates on an element + # within the tree. + inner = self.soup.find("div", id="main") + selected = inner.select("div") + # The <div id="inner"> tag was selected. The <div id="footer"> + # tag was not. + self.assertSelectsIDs(selected, ['inner']) |