diff options
author | Leonard Richardson <leonardr@segfault.org> | 2019-07-21 14:58:16 -0400 |
---|---|---|
committer | Leonard Richardson <leonardr@segfault.org> | 2019-07-21 14:58:16 -0400 |
commit | b2294f4f05d9e8583613560986f8aa64b18866b9 (patch) | |
tree | 5af13a59eca15ea082cb46ea286bc9c5b91996da /bs4/testing.py | |
parent | 819fa4255063d6b8d16f62469afa6c6e504f284a (diff) |
Adapt Chris Mayo's code to track line number and position when using html.parser.
Diffstat (limited to 'bs4/testing.py')
-rw-r--r-- | bs4/testing.py | 17 |
1 files changed, 17 insertions, 0 deletions
diff --git a/bs4/testing.py b/bs4/testing.py index 9f12e8d..3e8d15b 100644 --- a/bs4/testing.py +++ b/bs4/testing.py @@ -790,6 +790,23 @@ Hello, world! soup = self.soup(BAD_DOCUMENT) self.linkage_validator(soup) + def test_tracking_line_numbers(self): + # In general, TreeBuilders do not keep track of + # line numbers from the original markup. Even if you + # ask for line numbers, we don't have 'em. + # + # This means that if you have a tag like <lineno> or <offset>, + # tag.lineno will find it rather than giving you a numeric + # answer. + # + # See HTMLParserTreeBuilderSmokeTest for a situation + # where the parser _does_ keep track of the line numbers. + soup = self.soup( + "\n <p>\n\n<lineno>\n<b>text</b></lineno><offset></p>", + store_line_numbers=True + ) + self.assertEqual("lineno", soup.p.lineno.name) + self.assertEqual("offset", soup.p.offset.name) class XMLTreeBuilderSmokeTest(object): |