diff options
author | Leonard Richardson <leonardr@segfault.org> | 2013-08-13 10:44:46 -0400 |
---|---|---|
committer | Leonard Richardson <leonardr@segfault.org> | 2013-08-13 10:44:46 -0400 |
commit | 064439f3b2decfb55b4e118ac4d41851d13c4c6f (patch) | |
tree | da0e8007734a025df3e3d024a74a3bfb2f5f3e5a /bs4/element.py | |
parent | 020b385300c75e4c3ab0c45532de272b27945c5a (diff) |
* Fixed yet another problem with the html5lib tree builder, caused by
html5lib's tendency to rearrange the tree during
parsing. [bug=1189267]
Diffstat (limited to 'bs4/element.py')
-rw-r--r-- | bs4/element.py | 13 |
1 files changed, 8 insertions, 5 deletions
diff --git a/bs4/element.py b/bs4/element.py index e10e100..caa855e 100644 --- a/bs4/element.py +++ b/bs4/element.py @@ -255,13 +255,16 @@ class PageElement(object): self.previous_sibling = self.next_sibling = None return self - def _last_descendant(self, is_initialized=True): + def _last_descendant(self, is_initialized=True, accept_self=True): "Finds the last element beneath this object to be parsed." if is_initialized and self.next_sibling: - return self.next_sibling.previous_element - last_child = self - while isinstance(last_child, Tag) and last_child.contents: - last_child = last_child.contents[-1] + last_child = self.next_sibling.previous_element + else: + last_child = self + while isinstance(last_child, Tag) and last_child.contents: + last_child = last_child.contents[-1] + if not accept_self and last_child == self: + last_child = None return last_child # BS3: Not part of the API! _lastRecursiveChild = _last_descendant |