summaryrefslogtreecommitdiff
path: root/bs4/diagnose.py
diff options
context:
space:
mode:
authorLeonard Richardson <leonard.richardson@canonical.com>2013-06-03 08:02:06 -0400
committerLeonard Richardson <leonard.richardson@canonical.com>2013-06-03 08:02:06 -0400
commit1235765f2f6009428c0b1c60e56656851b9334ce (patch)
tree7a1aced6826c1703890dc6258b70e9150ae2d9ec /bs4/diagnose.py
parent4a84a3088fc653e390eac5f5572d0d21a697d86f (diff)
Let's get some profiling going.
Diffstat (limited to 'bs4/diagnose.py')
-rw-r--r--bs4/diagnose.py23
1 files changed, 22 insertions, 1 deletions
diff --git a/bs4/diagnose.py b/bs4/diagnose.py
index f9bff28..b6eaa8d 100644
--- a/bs4/diagnose.py
+++ b/bs4/diagnose.py
@@ -1,10 +1,15 @@
"""Diagnostic functions, mainly for use when doing tech support."""
+import cProfile
from StringIO import StringIO
from HTMLParser import HTMLParser
+import bs4
from bs4 import BeautifulSoup, __version__
from bs4.builder import builder_registry
+
import os
+import pstats
import random
+import tempfile
import time
import traceback
import sys
@@ -174,5 +179,21 @@ def benchmark_parsers(num_elements=100000):
b = time.time()
print "Raw lxml parsed the markup in %.2fs." % (b-a)
+def profile(num_elements=100000, parser="lxml"):
+
+ filehandle = tempfile.NamedTemporaryFile()
+ filename = filehandle.name
+
+ data = rdoc(num_elements)
+ vars = dict(bs4=bs4, data=data, parser=parser)
+ cProfile.runctx('bs4.BeautifulSoup(data, parser)' , vars, vars, filename)
+
+ stats = pstats.Stats(filename)
+ stats.strip_dirs()
+ cumulative = stats.sort_stats("cumulative")
+ total = stats.sort_stats("time")
+ import pdb; pdb.set_trace()
+
if __name__ == '__main__':
- diagnose(sys.stdin.read())
+ #diagnose(sys.stdin.read())
+ profile()