diff options
author | Leonard Richardson <leonardr@segfault.org> | 2019-07-21 14:58:16 -0400 |
---|---|---|
committer | Leonard Richardson <leonardr@segfault.org> | 2019-07-21 14:58:16 -0400 |
commit | b2294f4f05d9e8583613560986f8aa64b18866b9 (patch) | |
tree | 5af13a59eca15ea082cb46ea286bc9c5b91996da /bs4/builder/_htmlparser.py | |
parent | 819fa4255063d6b8d16f62469afa6c6e504f284a (diff) |
Adapt Chris Mayo's code to track line number and position when using html.parser.
Diffstat (limited to 'bs4/builder/_htmlparser.py')
-rw-r--r-- | bs4/builder/_htmlparser.py | 9 |
1 files changed, 8 insertions, 1 deletions
diff --git a/bs4/builder/_htmlparser.py b/bs4/builder/_htmlparser.py index 56b8b91..e2c87c1 100644 --- a/bs4/builder/_htmlparser.py +++ b/bs4/builder/_htmlparser.py @@ -99,7 +99,10 @@ class BeautifulSoupHTMLParser(HTMLParser): attr_dict[key] = value attrvalue = '""' #print "START", name - tag = self.soup.handle_starttag(name, None, None, attr_dict) + lineno, offset = self.getpos() + tag = self.soup.handle_starttag( + name, None, None, attr_dict, lineno=lineno, offset=offset + ) if tag and tag.is_empty_element and handle_empty_element: # Unlike other parsers, html.parser doesn't send separate end tag # events for empty-element tags. (It's handled in @@ -214,6 +217,10 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder): NAME = HTMLPARSER features = [NAME, HTML, STRICT] + # The html.parser knows which line number and position in the + # original file is the source of a document. + TRACKS_LINE_NUMBERS = True + def __init__(self, parser_args=None, parser_kwargs=None, **kwargs): super(HTMLParserTreeBuilder, self).__init__(**kwargs) parser_args = parser_args or [] |