diff options
Diffstat (limited to 'bs4')
-rw-r--r-- | bs4/builder/_html5lib.py | 6 | ||||
-rw-r--r-- | bs4/testing.py | 10 |
2 files changed, 16 insertions, 0 deletions
diff --git a/bs4/builder/_html5lib.py b/bs4/builder/_html5lib.py index 7de36ae..6446c2e 100644 --- a/bs4/builder/_html5lib.py +++ b/bs4/builder/_html5lib.py @@ -161,6 +161,12 @@ class Element(html5lib.treebuilders._base.Node): # immediately after the parent, if it has no children.) if self.element.contents: most_recent_element = self.element._last_descendant(False) + elif self.element.next_element is not None: + # Something from further ahead in the parse tree is + # being inserted into this earlier element. This is + # very annoying because it means an expensive search + # for the last element in the tree. + most_recent_element = self.soup._last_descendant() else: most_recent_element = self.element diff --git a/bs4/testing.py b/bs4/testing.py index fd4495a..ce207cf 100644 --- a/bs4/testing.py +++ b/bs4/testing.py @@ -254,6 +254,16 @@ class HTMLTreeBuilderSmokeTest(object): self.assertEqual("p", soup.h2.string.next_element.name) self.assertEqual("p", soup.p.name) + def test_head_tag_between_head_and_body(self): + "Prevent recurrence of a bug in the html5lib treebuilder." + content = """<html><head></head> + <link></link> + <body>foo</body> +</html> +""" + soup = self.soup(content) + self.assertNotEqual(None, soup.html.body) + def test_basic_namespaces(self): """Parsers don't need to *understand* namespaces, but at the very least they should not choke on namespaces or lose |