How about actually parsing the same markup with different parsers.

author: Leonard Richardson <leonard.richardson@canonical.com> 2013-05-15 16:50:27 -0400
committer: Leonard Richardson <leonard.richardson@canonical.com> 2013-05-15 16:50:27 -0400
commit: 202d6407adc5ce81e461b9a85e5930b53f717901 (patch)
tree: 1ef1f06e33bb34190d93844c12b0136cee31cb83 /bs4/diagnose.py
parent: 87c7f93d7feb8bd982dbbc42d814352b60be344d (diff)
1 files changed, 3 insertions, 1 deletions
diff --git a/bs4/diagnose.py b/bs4/diagnose.py
index 4b5f6e4..25fda5c 100644
--- a/bs4/diagnose.py
+++ b/bs4/diagnose.py
@@ -8,6 +8,7 @@ import random
 import time
 import traceback
 import sys
+import cProfile
 
 def diagnose(data):
     """Diagnostic suite for isolating common problems."""
@@ -153,11 +154,12 @@ def benchmark_parsers(num_elements=100000):
     print "Comparative parser benchmark on Beautiful Soup %s" % __version__
     data = rdoc(num_elements)
     print "Generated a large invalid HTML document (%d bytes)." % len(data)
+    
     for parser in ["lxml", ["lxml", "html"], "html5lib", "html.parser"]:
         success = False
         try:
             a = time.time()
-            soup = BeautifulSoup(data, )
+            soup = BeautifulSoup(data, parser)
             b = time.time()
             success = True
         except Exception, e:
author	Leonard Richardson <leonard.richardson@canonical.com>	2013-05-15 16:50:27 -0400
committer	Leonard Richardson <leonard.richardson@canonical.com>	2013-05-15 16:50:27 -0400
commit	202d6407adc5ce81e461b9a85e5930b53f717901 (patch)
tree	1ef1f06e33bb34190d93844c12b0136cee31cb83 /bs4/diagnose.py
parent	87c7f93d7feb8bd982dbbc42d814352b60be344d (diff)