diff options
Diffstat (limited to 'bs4/tests')
-rw-r--r-- | bs4/tests/test_html5lib.py | 14 | ||||
-rw-r--r-- | bs4/tests/test_htmlparser.py | 25 | ||||
-rw-r--r-- | bs4/tests/test_lxml.py | 15 |
3 files changed, 43 insertions, 11 deletions
diff --git a/bs4/tests/test_html5lib.py b/bs4/tests/test_html5lib.py index 371463a..6446f84 100644 --- a/bs4/tests/test_html5lib.py +++ b/bs4/tests/test_html5lib.py @@ -168,3 +168,17 @@ class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest): for form in soup.find_all('form'): inputs.extend(form.find_all('input')) self.assertEqual(len(inputs), 1) + + def test_tracking_line_numbers(self): + # The html.parser TreeBuilder keeps track of line number and + # position of each element. + markup = "\n <p>\n\n<sourceline>\n<b>text</b></sourceline><sourcepos></p>" + soup = self.soup(markup) + self.assertEqual(2, soup.p.sourceline) + self.assertEqual(5, soup.p.sourcepos) + self.assertEqual("sourceline", soup.p.find('sourceline').name) + + # You can deactivate this behavior. + soup = self.soup(markup, store_line_numbers=False) + self.assertEqual("sourceline", soup.p.sourceline.name) + self.assertEqual("sourcepos", soup.p.sourcepos.name) diff --git a/bs4/tests/test_htmlparser.py b/bs4/tests/test_htmlparser.py index c6a6691..7be6493 100644 --- a/bs4/tests/test_htmlparser.py +++ b/bs4/tests/test_htmlparser.py @@ -38,17 +38,20 @@ class HTMLParserTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest): self.assertSoupEquals("foo &# bar", "foo &# bar") def test_tracking_line_numbers(self): - # Unlike other TreeBuilders, the html.parser TreeBuilder - # keeps track of line number and position of each element. - soup = self.soup( - "\n <p>\n\n<lineno>\n<b>text</b></lineno><offset></p>", - store_line_numbers=True - ) - self.assertEqual(2, soup.p.lineno) - self.assertEqual(3, soup.p.offset) - self.assertEqual("lineno", soup.p.find('lineno').name) - - + # The html.parser TreeBuilder keeps track of line number and + # position of each element. + markup = "\n <p>\n\n<sourceline>\n<b>text</b></sourceline><sourcepos></p>" + soup = self.soup(markup) + self.assertEqual(2, soup.p.sourceline) + self.assertEqual(3, soup.p.sourcepos) + self.assertEqual("sourceline", soup.p.find('sourceline').name) + + # You can deactivate this behavior. + soup = self.soup(markup, store_line_numbers=False) + self.assertEqual("sourceline", soup.p.sourceline.name) + self.assertEqual("sourcepos", soup.p.sourcepos.name) + + class TestHTMLParserSubclass(SoupTest): def test_error(self): """Verify that our HTMLParser subclass implements error() in a way diff --git a/bs4/tests/test_lxml.py b/bs4/tests/test_lxml.py index 3b7858f..f96e4ae 100644 --- a/bs4/tests/test_lxml.py +++ b/bs4/tests/test_lxml.py @@ -71,6 +71,21 @@ class LXMLTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest): self.assertEqual(u"<b/>", unicode(soup.b)) self.assertTrue("BeautifulStoneSoup class is deprecated" in str(w[0].message)) + def test_tracking_line_numbers(self): + # The lxml TreeBuilder cannot keep track of line numbers from + # the original markup. Even if you ask for line numbers, we + # don't have 'em. + # + # This means that if you have a tag like <sourceline> or + # <sourcepos>, attribute access will find it rather than + # giving you a numeric answer. + soup = self.soup( + "\n <p>\n\n<sourceline>\n<b>text</b></sourceline><sourcepos></p>", + store_line_numbers=True + ) + self.assertEqual("sourceline", soup.p.sourceline.name) + self.assertEqual("sourcepos", soup.p.sourcepos.name) + @skipIf( not LXML_PRESENT, "lxml seems not to be present, not testing its XML tree builder.") |