diff options
author | Leonard Richardson <leonard.richardson@canonical.com> | 2013-06-03 08:02:06 -0400 |
---|---|---|
committer | Leonard Richardson <leonard.richardson@canonical.com> | 2013-06-03 08:02:06 -0400 |
commit | 1235765f2f6009428c0b1c60e56656851b9334ce (patch) | |
tree | 7a1aced6826c1703890dc6258b70e9150ae2d9ec /bs4/diagnose.py | |
parent | 4a84a3088fc653e390eac5f5572d0d21a697d86f (diff) |
Let's get some profiling going.
Diffstat (limited to 'bs4/diagnose.py')
-rw-r--r-- | bs4/diagnose.py | 23 |
1 files changed, 22 insertions, 1 deletions
diff --git a/bs4/diagnose.py b/bs4/diagnose.py index f9bff28..b6eaa8d 100644 --- a/bs4/diagnose.py +++ b/bs4/diagnose.py @@ -1,10 +1,15 @@ """Diagnostic functions, mainly for use when doing tech support.""" +import cProfile from StringIO import StringIO from HTMLParser import HTMLParser +import bs4 from bs4 import BeautifulSoup, __version__ from bs4.builder import builder_registry + import os +import pstats import random +import tempfile import time import traceback import sys @@ -174,5 +179,21 @@ def benchmark_parsers(num_elements=100000): b = time.time() print "Raw lxml parsed the markup in %.2fs." % (b-a) +def profile(num_elements=100000, parser="lxml"): + + filehandle = tempfile.NamedTemporaryFile() + filename = filehandle.name + + data = rdoc(num_elements) + vars = dict(bs4=bs4, data=data, parser=parser) + cProfile.runctx('bs4.BeautifulSoup(data, parser)' , vars, vars, filename) + + stats = pstats.Stats(filename) + stats.strip_dirs() + cumulative = stats.sort_stats("cumulative") + total = stats.sort_stats("time") + import pdb; pdb.set_trace() + if __name__ == '__main__': - diagnose(sys.stdin.read()) + #diagnose(sys.stdin.read()) + profile() |