summaryrefslogtreecommitdiff
path: root/bs4/diagnose.py
diff options
context:
space:
mode:
authorLeonard Richardson <leonard.richardson@canonical.com>2013-05-15 16:50:27 -0400
committerLeonard Richardson <leonard.richardson@canonical.com>2013-05-15 16:50:27 -0400
commit202d6407adc5ce81e461b9a85e5930b53f717901 (patch)
tree1ef1f06e33bb34190d93844c12b0136cee31cb83 /bs4/diagnose.py
parent87c7f93d7feb8bd982dbbc42d814352b60be344d (diff)
How about actually parsing the same markup with different parsers.
Diffstat (limited to 'bs4/diagnose.py')
-rw-r--r--bs4/diagnose.py4
1 files changed, 3 insertions, 1 deletions
diff --git a/bs4/diagnose.py b/bs4/diagnose.py
index 4b5f6e4..25fda5c 100644
--- a/bs4/diagnose.py
+++ b/bs4/diagnose.py
@@ -8,6 +8,7 @@ import random
import time
import traceback
import sys
+import cProfile
def diagnose(data):
"""Diagnostic suite for isolating common problems."""
@@ -153,11 +154,12 @@ def benchmark_parsers(num_elements=100000):
print "Comparative parser benchmark on Beautiful Soup %s" % __version__
data = rdoc(num_elements)
print "Generated a large invalid HTML document (%d bytes)." % len(data)
+
for parser in ["lxml", ["lxml", "html"], "html5lib", "html.parser"]:
success = False
try:
a = time.time()
- soup = BeautifulSoup(data, )
+ soup = BeautifulSoup(data, parser)
b = time.time()
success = True
except Exception, e: