summaryrefslogtreecommitdiff
path: root/bs4
diff options
context:
space:
mode:
Diffstat (limited to 'bs4')
-rw-r--r--bs4/builder/_html5lib.py6
-rw-r--r--bs4/testing.py10
2 files changed, 16 insertions, 0 deletions
diff --git a/bs4/builder/_html5lib.py b/bs4/builder/_html5lib.py
index 7de36ae..6446c2e 100644
--- a/bs4/builder/_html5lib.py
+++ b/bs4/builder/_html5lib.py
@@ -161,6 +161,12 @@ class Element(html5lib.treebuilders._base.Node):
# immediately after the parent, if it has no children.)
if self.element.contents:
most_recent_element = self.element._last_descendant(False)
+ elif self.element.next_element is not None:
+ # Something from further ahead in the parse tree is
+ # being inserted into this earlier element. This is
+ # very annoying because it means an expensive search
+ # for the last element in the tree.
+ most_recent_element = self.soup._last_descendant()
else:
most_recent_element = self.element
diff --git a/bs4/testing.py b/bs4/testing.py
index fd4495a..ce207cf 100644
--- a/bs4/testing.py
+++ b/bs4/testing.py
@@ -254,6 +254,16 @@ class HTMLTreeBuilderSmokeTest(object):
self.assertEqual("p", soup.h2.string.next_element.name)
self.assertEqual("p", soup.p.name)
+ def test_head_tag_between_head_and_body(self):
+ "Prevent recurrence of a bug in the html5lib treebuilder."
+ content = """<html><head></head>
+ <link></link>
+ <body>foo</body>
+</html>
+"""
+ soup = self.soup(content)
+ self.assertNotEqual(None, soup.html.body)
+
def test_basic_namespaces(self):
"""Parsers don't need to *understand* namespaces, but at the
very least they should not choke on namespaces or lose