diff options
author | Leonard Richardson <leonard.richardson@canonical.com> | 2013-06-03 10:33:03 -0400 |
---|---|---|
committer | Leonard Richardson <leonard.richardson@canonical.com> | 2013-06-03 10:33:03 -0400 |
commit | 4cc522e01dae3fab54170060beef1389d528f6b6 (patch) | |
tree | 509368036b44969526becf745093fb7327676e02 /bs4/builder/_html5lib.py | |
parent | aff6cac088db63a65415f2d239e9c8bf07001e73 (diff) |
Improved performance for html5lib.
Diffstat (limited to 'bs4/builder/_html5lib.py')
-rw-r--r-- | bs4/builder/_html5lib.py | 36 |
1 files changed, 25 insertions, 11 deletions
diff --git a/bs4/builder/_html5lib.py b/bs4/builder/_html5lib.py index 3bbc9a9..79f618a 100644 --- a/bs4/builder/_html5lib.py +++ b/bs4/builder/_html5lib.py @@ -123,17 +123,31 @@ class Element(html5lib.treebuilders._base.Node): self.namespace = namespace def appendChild(self, node): - if (node.element.__class__ == NavigableString and self.element.contents + string_child = child = None + if isinstance(node, basestring): + # Some other piece of code decided to pass in a string + # instead of creating a TextElement object to contain the + # string. + string_child = child = node + elif node.element.__class__ == NavigableString: + string_child = child = node.element + else: + child = node.element + + if (string_child and self.element.contents and self.element.contents[-1].__class__ == NavigableString): - # Concatenate new text onto old text node - # XXX This has O(n^2) performance, for input like + # We are appending a string onto another string. + # TODO This has O(n^2) performance, for input like # "a</a>a</a>a</a>..." old_element = self.element.contents[-1] - new_element = self.soup.new_string(old_element + node.element) + new_element = self.soup.new_string(old_element + string_child) old_element.replace_with(new_element) - self.soup._most_recent_element = new_element + self.soup._most_recent_element = new_element else: - self.soup.object_was_parsed(node.element, parent=self.element) + if isinstance(node, basestring): + # Create a brand new NavigableString from this string. + child = self.soup.new_string(node) + self.soup.object_was_parsed(child, parent=self.element) def getAttributes(self): return AttrList(self.element) @@ -162,11 +176,11 @@ class Element(html5lib.treebuilders._base.Node): attributes = property(getAttributes, setAttributes) def insertText(self, data, insertBefore=None): - text = TextNode(self.soup.new_string(data), self.soup) if insertBefore: - self.insertBefore(text, insertBefore) + text = TextNode(self.soup.new_string(data), self.soup) + self.insertBefore(data, insertBefore) else: - self.appendChild(text) + self.appendChild(data) def insertBefore(self, node, refNode): index = self.element.index(refNode.element) @@ -182,6 +196,7 @@ class Element(html5lib.treebuilders._base.Node): def removeChild(self, node): node.element.extract() + pass def reparentChildren(self, newParent): while self.element.contents: @@ -191,8 +206,7 @@ class Element(html5lib.treebuilders._base.Node): newParent.appendChild( Element(child, self.soup, namespaces["html"])) else: - newParent.appendChild( - TextNode(child, self.soup)) + newParent.appendChild(child) def cloneNode(self): tag = self.soup.new_tag(self.element.name, self.namespace) |