summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--NEWS.txt3
-rw-r--r--bs4/element.py18
-rw-r--r--bs4/tests/test_tree.py22
3 files changed, 30 insertions, 13 deletions
diff --git a/NEWS.txt b/NEWS.txt
index 2a3bb1e..d45e1c4 100644
--- a/NEWS.txt
+++ b/NEWS.txt
@@ -8,6 +8,9 @@
* The select() method now supports selector grouping. Patch by
Francisco Canas [bug=1191917]
+* The select() method can now find tags whose names contain
+ dashes. Patch by Francisco Canas [bug=1276211]
+
* Restored the helpful syntax error that happens when you try to
import the Python 2 edition of Beautiful Soup under Python
3. [bug=1213387]
diff --git a/bs4/element.py b/bs4/element.py
index 197722d..1127c7a 100644
--- a/bs4/element.py
+++ b/bs4/element.py
@@ -548,17 +548,17 @@ class PageElement(object):
# Methods for supporting CSS selectors.
- tag_name_re = re.compile('^[a-z0-9]+$')
-
- # /^(\w+)\[(\w+)([=~\|\^\$\*]?)=?"?([^\]"]*)"?\]$/
- # \---/ \---/\-------------/ \-------/
- # | | | |
- # | | | The value
- # | | ~,|,^,$,* or =
- # | Attribute
+ tag_name_re = re.compile('^[a-zA-Z0-9][-.a-zA-Z0-9:_]*$')
+
+ # /^([a-zA-Z0-9][-.a-zA-Z0-9:_]*)\[(\w+)([=~\|\^\$\*]?)=?"?([^\]"]*)"?\]$/
+ # \---------------------------/ \---/\-------------/ \-------/
+ # | | | |
+ # | | | The value
+ # | | ~,|,^,$,* or =
+ # | Attribute
# Tag
attribselect_re = re.compile(
- r'^(?P<tag>\w+)?\[(?P<attribute>\w+)(?P<operator>[=~\|\^\$\*]?)' +
+ r'^(?P<tag>[a-zA-Z0-9][-.a-zA-Z0-9:_]*)?\[(?P<attribute>\w+)(?P<operator>[=~\|\^\$\*]?)' +
r'=?"?(?P<value>[^\]"]*)"?\]$'
)
diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py
index 8f629d9..c9d1dcd 100644
--- a/bs4/tests/test_tree.py
+++ b/bs4/tests/test_tree.py
@@ -1534,7 +1534,7 @@ class TestSoupSelector(TreeTest):
<link rel="stylesheet" href="blah.css" type="text/css" id="l1">
</head>
<body>
-
+<custom-dashed-tag class="dashed" id="dash1">Hello there.</custom-dashed-tag>
<div id="main" class="fancy">
<div id="inner">
<h1 id="header1">An H1</h1>
@@ -1552,6 +1552,7 @@ class TestSoupSelector(TreeTest):
<a href="#" id="s2a1">span2a1</a>
</span>
<span class="span3"></span>
+<custom-dashed-tag class="dashed" id="dash2"/>
</span>
</div>
<x id="xid">
@@ -1617,6 +1618,20 @@ class TestSoupSelector(TreeTest):
def test_invalid_tag(self):
self.assertRaises(ValueError, self.soup.select, 'tag%t')
+ def test_select_dashed_tag_ids(self):
+ self.assertSelects('custom-dashed-tag', ['dash1', 'dash2'])
+
+ def test_select_dashed_by_id(self):
+ dashed = self.soup.select('custom-dashed-tag[id=\"dash2\"]')
+ self.assertEqual(dashed[0].name, 'custom-dashed-tag')
+ self.assertEqual(dashed[0]['id'], 'dash2')
+
+ def test_dashed_tag_text(self):
+ self.assertEqual(self.soup.select('body > custom-dashed-tag')[0].text, u'Hello there.')
+
+ def test_select_dashed_matches_find_all(self):
+ self.assertEqual(self.soup.select('custom-dashed-tag'), self.soup.find_all('custom-dashed-tag'))
+
def test_header_tags(self):
self.assertSelectMultiple(
('h1', ['header1']),
@@ -1724,7 +1739,7 @@ class TestSoupSelector(TreeTest):
('[href$=".css"]', ['l1']),
('link[href$=".css"]', ['l1']),
('link[id$="1"]', ['l1']),
- ('[id$="1"]', ['l1', 'p1', 'header1', 's1a1', 's2a1', 's1a2s1']),
+ ('[id$="1"]', ['l1', 'p1', 'header1', 's1a1', 's2a1', 's1a2s1', 'dash1']),
('div[id$="1"]', []),
('[id$="noending"]', []),
)
@@ -1738,7 +1753,6 @@ class TestSoupSelector(TreeTest):
('[rel*="notstyle"]', []),
('link[rel*="notstyle"]', []),
('link[href*="bla"]', ['l1']),
- ('a[href*="http://"]', ['bob', 'me']),
('[href*="http://"]', ['bob', 'me']),
('[id*="p"]', ['pmulti', 'p1']),
('div[id*="m"]', ['main']),
@@ -1747,7 +1761,7 @@ class TestSoupSelector(TreeTest):
('[href*=".css"]', ['l1']),
('link[href*=".css"]', ['l1']),
('link[id*="1"]', ['l1']),
- ('[id*="1"]', ['l1', 'p1', 'header1', 's1a1', 's1a2', 's2a1', 's1a2s1']),
+ ('[id*="1"]', ['l1', 'p1', 'header1', 's1a1', 's1a2', 's2a1', 's1a2s1', 'dash1']),
('div[id*="1"]', []),
('[id*="noending"]', []),
# New for this test