diff options
author | Leonard Richardson <leonard.richardson@canonical.com> | 2013-05-15 16:50:27 -0400 |
---|---|---|
committer | Leonard Richardson <leonard.richardson@canonical.com> | 2013-05-15 16:50:27 -0400 |
commit | 202d6407adc5ce81e461b9a85e5930b53f717901 (patch) | |
tree | 1ef1f06e33bb34190d93844c12b0136cee31cb83 /bs4/diagnose.py | |
parent | 87c7f93d7feb8bd982dbbc42d814352b60be344d (diff) |
How about actually parsing the same markup with different parsers.
Diffstat (limited to 'bs4/diagnose.py')
-rw-r--r-- | bs4/diagnose.py | 4 |
1 files changed, 3 insertions, 1 deletions
diff --git a/bs4/diagnose.py b/bs4/diagnose.py index 4b5f6e4..25fda5c 100644 --- a/bs4/diagnose.py +++ b/bs4/diagnose.py @@ -8,6 +8,7 @@ import random import time import traceback import sys +import cProfile def diagnose(data): """Diagnostic suite for isolating common problems.""" @@ -153,11 +154,12 @@ def benchmark_parsers(num_elements=100000): print "Comparative parser benchmark on Beautiful Soup %s" % __version__ data = rdoc(num_elements) print "Generated a large invalid HTML document (%d bytes)." % len(data) + for parser in ["lxml", ["lxml", "html"], "html5lib", "html.parser"]: success = False try: a = time.time() - soup = BeautifulSoup(data, ) + soup = BeautifulSoup(data, parser) b = time.time() success = True except Exception, e: |