diff options
-rw-r--r-- | NEWS.txt | 3 | ||||
-rw-r--r-- | bs4/builder/_html5lib.py | 6 | ||||
-rw-r--r-- | bs4/testing.py | 10 |
3 files changed, 19 insertions, 0 deletions
@@ -1,5 +1,8 @@ = 4.3.3 (Unreleased) = +* Fixed yet another problem that caused the html5lib tree builder to + create a disconnected parse tree. [bug=1237763] + * Restored the helpful syntax error that happens when you try to import the Python 2 edition of Beautiful Soup under Python 3. [bug=1213387] diff --git a/bs4/builder/_html5lib.py b/bs4/builder/_html5lib.py index 7de36ae..6446c2e 100644 --- a/bs4/builder/_html5lib.py +++ b/bs4/builder/_html5lib.py @@ -161,6 +161,12 @@ class Element(html5lib.treebuilders._base.Node): # immediately after the parent, if it has no children.) if self.element.contents: most_recent_element = self.element._last_descendant(False) + elif self.element.next_element is not None: + # Something from further ahead in the parse tree is + # being inserted into this earlier element. This is + # very annoying because it means an expensive search + # for the last element in the tree. + most_recent_element = self.soup._last_descendant() else: most_recent_element = self.element diff --git a/bs4/testing.py b/bs4/testing.py index fd4495a..ce207cf 100644 --- a/bs4/testing.py +++ b/bs4/testing.py @@ -254,6 +254,16 @@ class HTMLTreeBuilderSmokeTest(object): self.assertEqual("p", soup.h2.string.next_element.name) self.assertEqual("p", soup.p.name) + def test_head_tag_between_head_and_body(self): + "Prevent recurrence of a bug in the html5lib treebuilder." + content = """<html><head></head> + <link></link> + <body>foo</body> +</html> +""" + soup = self.soup(content) + self.assertNotEqual(None, soup.html.body) + def test_basic_namespaces(self): """Parsers don't need to *understand* namespaces, but at the very least they should not choke on namespaces or lose |