From caeb168dc47470607b3cd091e1d35db45c089385 Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Thu, 9 Feb 2012 12:15:36 -0500 Subject: Patched over a bug in html5lib (?) that was crashing Beautiful Soup on certain kinds of markup. [bug=838800] --- NEWS.txt | 3 +++ bs4/builder/_html5lib.py | 5 ++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/NEWS.txt b/NEWS.txt index d03c442..b1df902 100644 --- a/NEWS.txt +++ b/NEWS.txt @@ -20,6 +20,9 @@ * Unicode, Dammit now detects the encoding in HTML 5-style tags like . [bug=837268] +* Patched over a bug in html5lib (?) that was crashing Beautiful Soup + on certain kinds of markup. [bug=838800] + * Fixed a bug that wrecked the tree if you replaced an element with an empty string. [bug=728697] diff --git a/bs4/builder/_html5lib.py b/bs4/builder/_html5lib.py index 11fcc0d..4b80870 100644 --- a/bs4/builder/_html5lib.py +++ b/bs4/builder/_html5lib.py @@ -195,7 +195,10 @@ class Element(html5lib.treebuilders._base.Node): def removeChild(self, node): index = self._nodeIndex(node.parent, node) - del node.parent.element.contents[index] + # XXX This if statement is problematic: + # https://bugs.launchpad.net/beautifulsoup/+bug/838800 + if index is not None: + del node.parent.element.contents[index] node.element.parent = None node.element.extract() node.parent = None -- cgit v1.2.3