diff options
-rw-r--r-- | NEWS.txt | 3 | ||||
-rw-r--r-- | bs4/element.py | 18 | ||||
-rw-r--r-- | bs4/tests/test_tree.py | 22 |
3 files changed, 30 insertions, 13 deletions
@@ -8,6 +8,9 @@ * The select() method now supports selector grouping. Patch by Francisco Canas [bug=1191917] +* The select() method can now find tags whose names contain + dashes. Patch by Francisco Canas [bug=1276211] + * Restored the helpful syntax error that happens when you try to import the Python 2 edition of Beautiful Soup under Python 3. [bug=1213387] diff --git a/bs4/element.py b/bs4/element.py index 197722d..1127c7a 100644 --- a/bs4/element.py +++ b/bs4/element.py @@ -548,17 +548,17 @@ class PageElement(object): # Methods for supporting CSS selectors. - tag_name_re = re.compile('^[a-z0-9]+$') - - # /^(\w+)\[(\w+)([=~\|\^\$\*]?)=?"?([^\]"]*)"?\]$/ - # \---/ \---/\-------------/ \-------/ - # | | | | - # | | | The value - # | | ~,|,^,$,* or = - # | Attribute + tag_name_re = re.compile('^[a-zA-Z0-9][-.a-zA-Z0-9:_]*$') + + # /^([a-zA-Z0-9][-.a-zA-Z0-9:_]*)\[(\w+)([=~\|\^\$\*]?)=?"?([^\]"]*)"?\]$/ + # \---------------------------/ \---/\-------------/ \-------/ + # | | | | + # | | | The value + # | | ~,|,^,$,* or = + # | Attribute # Tag attribselect_re = re.compile( - r'^(?P<tag>\w+)?\[(?P<attribute>\w+)(?P<operator>[=~\|\^\$\*]?)' + + r'^(?P<tag>[a-zA-Z0-9][-.a-zA-Z0-9:_]*)?\[(?P<attribute>\w+)(?P<operator>[=~\|\^\$\*]?)' + r'=?"?(?P<value>[^\]"]*)"?\]$' ) diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py index 8f629d9..c9d1dcd 100644 --- a/bs4/tests/test_tree.py +++ b/bs4/tests/test_tree.py @@ -1534,7 +1534,7 @@ class TestSoupSelector(TreeTest): <link rel="stylesheet" href="blah.css" type="text/css" id="l1"> </head> <body> - +<custom-dashed-tag class="dashed" id="dash1">Hello there.</custom-dashed-tag> <div id="main" class="fancy"> <div id="inner"> <h1 id="header1">An H1</h1> @@ -1552,6 +1552,7 @@ class TestSoupSelector(TreeTest): <a href="#" id="s2a1">span2a1</a> </span> <span class="span3"></span> +<custom-dashed-tag class="dashed" id="dash2"/> </span> </div> <x id="xid"> @@ -1617,6 +1618,20 @@ class TestSoupSelector(TreeTest): def test_invalid_tag(self): self.assertRaises(ValueError, self.soup.select, 'tag%t') + def test_select_dashed_tag_ids(self): + self.assertSelects('custom-dashed-tag', ['dash1', 'dash2']) + + def test_select_dashed_by_id(self): + dashed = self.soup.select('custom-dashed-tag[id=\"dash2\"]') + self.assertEqual(dashed[0].name, 'custom-dashed-tag') + self.assertEqual(dashed[0]['id'], 'dash2') + + def test_dashed_tag_text(self): + self.assertEqual(self.soup.select('body > custom-dashed-tag')[0].text, u'Hello there.') + + def test_select_dashed_matches_find_all(self): + self.assertEqual(self.soup.select('custom-dashed-tag'), self.soup.find_all('custom-dashed-tag')) + def test_header_tags(self): self.assertSelectMultiple( ('h1', ['header1']), @@ -1724,7 +1739,7 @@ class TestSoupSelector(TreeTest): ('[href$=".css"]', ['l1']), ('link[href$=".css"]', ['l1']), ('link[id$="1"]', ['l1']), - ('[id$="1"]', ['l1', 'p1', 'header1', 's1a1', 's2a1', 's1a2s1']), + ('[id$="1"]', ['l1', 'p1', 'header1', 's1a1', 's2a1', 's1a2s1', 'dash1']), ('div[id$="1"]', []), ('[id$="noending"]', []), ) @@ -1738,7 +1753,6 @@ class TestSoupSelector(TreeTest): ('[rel*="notstyle"]', []), ('link[rel*="notstyle"]', []), ('link[href*="bla"]', ['l1']), - ('a[href*="http://"]', ['bob', 'me']), ('[href*="http://"]', ['bob', 'me']), ('[id*="p"]', ['pmulti', 'p1']), ('div[id*="m"]', ['main']), @@ -1747,7 +1761,7 @@ class TestSoupSelector(TreeTest): ('[href*=".css"]', ['l1']), ('link[href*=".css"]', ['l1']), ('link[id*="1"]', ['l1']), - ('[id*="1"]', ['l1', 'p1', 'header1', 's1a1', 's1a2', 's2a1', 's1a2s1']), + ('[id*="1"]', ['l1', 'p1', 'header1', 's1a1', 's1a2', 's2a1', 's1a2s1', 'dash1']), ('div[id*="1"]', []), ('[id*="noending"]', []), # New for this test |