summaryrefslogtreecommitdiff
path: root/bs4/builder/_htmlparser.py
diff options
context:
space:
mode:
Diffstat (limited to 'bs4/builder/_htmlparser.py')
-rw-r--r--bs4/builder/_htmlparser.py9
1 files changed, 8 insertions, 1 deletions
diff --git a/bs4/builder/_htmlparser.py b/bs4/builder/_htmlparser.py
index 56b8b91..e2c87c1 100644
--- a/bs4/builder/_htmlparser.py
+++ b/bs4/builder/_htmlparser.py
@@ -99,7 +99,10 @@ class BeautifulSoupHTMLParser(HTMLParser):
attr_dict[key] = value
attrvalue = '""'
#print "START", name
- tag = self.soup.handle_starttag(name, None, None, attr_dict)
+ lineno, offset = self.getpos()
+ tag = self.soup.handle_starttag(
+ name, None, None, attr_dict, lineno=lineno, offset=offset
+ )
if tag and tag.is_empty_element and handle_empty_element:
# Unlike other parsers, html.parser doesn't send separate end tag
# events for empty-element tags. (It's handled in
@@ -214,6 +217,10 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder):
NAME = HTMLPARSER
features = [NAME, HTML, STRICT]
+ # The html.parser knows which line number and position in the
+ # original file is the source of a document.
+ TRACKS_LINE_NUMBERS = True
+
def __init__(self, parser_args=None, parser_kwargs=None, **kwargs):
super(HTMLParserTreeBuilder, self).__init__(**kwargs)
parser_args = parser_args or []