summaryrefslogtreecommitdiff
path: root/bs4/element.py
diff options
context:
space:
mode:
authorLeonard Richardson <leonardr@segfault.org>2013-08-13 10:44:46 -0400
committerLeonard Richardson <leonardr@segfault.org>2013-08-13 10:44:46 -0400
commit064439f3b2decfb55b4e118ac4d41851d13c4c6f (patch)
treeda0e8007734a025df3e3d024a74a3bfb2f5f3e5a /bs4/element.py
parent020b385300c75e4c3ab0c45532de272b27945c5a (diff)
* Fixed yet another problem with the html5lib tree builder, caused by
html5lib's tendency to rearrange the tree during parsing. [bug=1189267]
Diffstat (limited to 'bs4/element.py')
-rw-r--r--bs4/element.py13
1 files changed, 8 insertions, 5 deletions
diff --git a/bs4/element.py b/bs4/element.py
index e10e100..caa855e 100644
--- a/bs4/element.py
+++ b/bs4/element.py
@@ -255,13 +255,16 @@ class PageElement(object):
self.previous_sibling = self.next_sibling = None
return self
- def _last_descendant(self, is_initialized=True):
+ def _last_descendant(self, is_initialized=True, accept_self=True):
"Finds the last element beneath this object to be parsed."
if is_initialized and self.next_sibling:
- return self.next_sibling.previous_element
- last_child = self
- while isinstance(last_child, Tag) and last_child.contents:
- last_child = last_child.contents[-1]
+ last_child = self.next_sibling.previous_element
+ else:
+ last_child = self
+ while isinstance(last_child, Tag) and last_child.contents:
+ last_child = last_child.contents[-1]
+ if not accept_self and last_child == self:
+ last_child = None
return last_child
# BS3: Not part of the API!
_lastRecursiveChild = _last_descendant