summaryrefslogtreecommitdiff
path: root/bs4/builder/__init__.py
diff options
context:
space:
mode:
authorLeonard Richardson <leonardr@segfault.org>2019-07-21 14:58:16 -0400
committerLeonard Richardson <leonardr@segfault.org>2019-07-21 14:58:16 -0400
commitb2294f4f05d9e8583613560986f8aa64b18866b9 (patch)
tree5af13a59eca15ea082cb46ea286bc9c5b91996da /bs4/builder/__init__.py
parent819fa4255063d6b8d16f62469afa6c6e504f284a (diff)
Adapt Chris Mayo's code to track line number and position when using html.parser.
Diffstat (limited to 'bs4/builder/__init__.py')
-rw-r--r--bs4/builder/__init__.py24
1 files changed, 21 insertions, 3 deletions
diff --git a/bs4/builder/__init__.py b/bs4/builder/__init__.py
index e087f07..e28242b 100644
--- a/bs4/builder/__init__.py
+++ b/bs4/builder/__init__.py
@@ -99,8 +99,13 @@ class TreeBuilder(object):
DEFAULT_PRESERVE_WHITESPACE_TAGS = set()
USE_DEFAULT = object()
+
+ # Most parsers don't keep track of line numbers.
+ TRACKS_LINE_NUMBERS = False
- def __init__(self, multi_valued_attributes=USE_DEFAULT, preserve_whitespace_tags=USE_DEFAULT):
+ def __init__(self, multi_valued_attributes=USE_DEFAULT,
+ preserve_whitespace_tags=USE_DEFAULT,
+ store_line_numbers=USE_DEFAULT):
"""Constructor.
:param multi_valued_attributes: If this is set to None, the
@@ -113,7 +118,17 @@ class TreeBuilder(object):
probably doesn't make sense to an end-user, so the argument name
is `multi_valued_attributes`.
- :param preserve_whitespace_tags:
+ :param preserve_whitespace_tags: A list of tags to treat
+ the way <pre> tags are treated in HTML. Tags in this list
+ will have
+
+ :param store_line_numbers: If the parser keeps track of the
+ line numbers and positions of the original markup, that
+ information will, by default, be stored in each corresponding
+ `Tag` object. You can turn this off by passing
+ store_line_numbers=False. If the parser you're using doesn't
+ keep track of this information, then setting store_line_numbers=True
+ will do nothing.
"""
self.soup = None
if multi_valued_attributes is self.USE_DEFAULT:
@@ -122,7 +137,10 @@ class TreeBuilder(object):
if preserve_whitespace_tags is self.USE_DEFAULT:
preserve_whitespace_tags = self.DEFAULT_PRESERVE_WHITESPACE_TAGS
self.preserve_whitespace_tags = preserve_whitespace_tags
-
+ if store_line_numbers == self.USE_DEFAULT:
+ store_line_numbers = self.TRACKS_LINE_NUMBERS
+ self.store_line_numbers = store_line_numbers
+
def initialize_soup(self, soup):
"""The BeautifulSoup object has been initialized and is now
being associated with the TreeBuilder.