summaryrefslogtreecommitdiff
path: root/bs4/tests
diff options
context:
space:
mode:
Diffstat (limited to 'bs4/tests')
-rw-r--r--bs4/tests/test_html5lib.py14
-rw-r--r--bs4/tests/test_htmlparser.py25
-rw-r--r--bs4/tests/test_lxml.py15
3 files changed, 43 insertions, 11 deletions
diff --git a/bs4/tests/test_html5lib.py b/bs4/tests/test_html5lib.py
index 371463a..6446f84 100644
--- a/bs4/tests/test_html5lib.py
+++ b/bs4/tests/test_html5lib.py
@@ -168,3 +168,17 @@ class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest):
for form in soup.find_all('form'):
inputs.extend(form.find_all('input'))
self.assertEqual(len(inputs), 1)
+
+ def test_tracking_line_numbers(self):
+ # The html.parser TreeBuilder keeps track of line number and
+ # position of each element.
+ markup = "\n <p>\n\n<sourceline>\n<b>text</b></sourceline><sourcepos></p>"
+ soup = self.soup(markup)
+ self.assertEqual(2, soup.p.sourceline)
+ self.assertEqual(5, soup.p.sourcepos)
+ self.assertEqual("sourceline", soup.p.find('sourceline').name)
+
+ # You can deactivate this behavior.
+ soup = self.soup(markup, store_line_numbers=False)
+ self.assertEqual("sourceline", soup.p.sourceline.name)
+ self.assertEqual("sourcepos", soup.p.sourcepos.name)
diff --git a/bs4/tests/test_htmlparser.py b/bs4/tests/test_htmlparser.py
index c6a6691..7be6493 100644
--- a/bs4/tests/test_htmlparser.py
+++ b/bs4/tests/test_htmlparser.py
@@ -38,17 +38,20 @@ class HTMLParserTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
self.assertSoupEquals("foo &# bar", "foo &amp;# bar")
def test_tracking_line_numbers(self):
- # Unlike other TreeBuilders, the html.parser TreeBuilder
- # keeps track of line number and position of each element.
- soup = self.soup(
- "\n <p>\n\n<lineno>\n<b>text</b></lineno><offset></p>",
- store_line_numbers=True
- )
- self.assertEqual(2, soup.p.lineno)
- self.assertEqual(3, soup.p.offset)
- self.assertEqual("lineno", soup.p.find('lineno').name)
-
-
+ # The html.parser TreeBuilder keeps track of line number and
+ # position of each element.
+ markup = "\n <p>\n\n<sourceline>\n<b>text</b></sourceline><sourcepos></p>"
+ soup = self.soup(markup)
+ self.assertEqual(2, soup.p.sourceline)
+ self.assertEqual(3, soup.p.sourcepos)
+ self.assertEqual("sourceline", soup.p.find('sourceline').name)
+
+ # You can deactivate this behavior.
+ soup = self.soup(markup, store_line_numbers=False)
+ self.assertEqual("sourceline", soup.p.sourceline.name)
+ self.assertEqual("sourcepos", soup.p.sourcepos.name)
+
+
class TestHTMLParserSubclass(SoupTest):
def test_error(self):
"""Verify that our HTMLParser subclass implements error() in a way
diff --git a/bs4/tests/test_lxml.py b/bs4/tests/test_lxml.py
index 3b7858f..f96e4ae 100644
--- a/bs4/tests/test_lxml.py
+++ b/bs4/tests/test_lxml.py
@@ -71,6 +71,21 @@ class LXMLTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
self.assertEqual(u"<b/>", unicode(soup.b))
self.assertTrue("BeautifulStoneSoup class is deprecated" in str(w[0].message))
+ def test_tracking_line_numbers(self):
+ # The lxml TreeBuilder cannot keep track of line numbers from
+ # the original markup. Even if you ask for line numbers, we
+ # don't have 'em.
+ #
+ # This means that if you have a tag like <sourceline> or
+ # <sourcepos>, attribute access will find it rather than
+ # giving you a numeric answer.
+ soup = self.soup(
+ "\n <p>\n\n<sourceline>\n<b>text</b></sourceline><sourcepos></p>",
+ store_line_numbers=True
+ )
+ self.assertEqual("sourceline", soup.p.sourceline.name)
+ self.assertEqual("sourcepos", soup.p.sourcepos.name)
+
@skipIf(
not LXML_PRESENT,
"lxml seems not to be present, not testing its XML tree builder.")