summaryrefslogtreecommitdiff
path: root/bs4/diagnose.py
diff options
context:
space:
mode:
authorLeonard Richardson <leonard.richardson@canonical.com>2013-06-03 09:54:49 -0400
committerLeonard Richardson <leonard.richardson@canonical.com>2013-06-03 09:54:49 -0400
commitaff6cac088db63a65415f2d239e9c8bf07001e73 (patch)
tree24d832c97f1cdd835d2ab1ed81ba83e94d360623 /bs4/diagnose.py
parent74380c3685d0fe730bcb06e63d8591e65b557df5 (diff)
Added raw html5lib to the list of parsers that get tested.
Diffstat (limited to 'bs4/diagnose.py')
-rw-r--r--bs4/diagnose.py10
1 files changed, 9 insertions, 1 deletions
diff --git a/bs4/diagnose.py b/bs4/diagnose.py
index a2b405b..ad79d8a 100644
--- a/bs4/diagnose.py
+++ b/bs4/diagnose.py
@@ -179,6 +179,13 @@ def benchmark_parsers(num_elements=100000):
b = time.time()
print "Raw lxml parsed the markup in %.2fs." % (b-a)
+ import html5lib
+ parser = html5lib.HTMLParser()
+ a = time.time()
+ parser.parse(data)
+ b = time.time()
+ print "Raw html5lib parsed the markup in %.2fs." % (b-a)
+
def profile(num_elements=100000, parser="lxml"):
filehandle = tempfile.NamedTemporaryFile()
@@ -196,4 +203,5 @@ def profile(num_elements=100000, parser="lxml"):
if __name__ == '__main__':
#diagnose(sys.stdin.read())
- profile(parser="lxml")
+ profile(1000, parser="html5lib")
+ # benchmark_parsers()