diff options
author | Leonard Richardson <leonardr@segfault.org> | 2013-05-07 14:12:10 -0400 |
---|---|---|
committer | Leonard Richardson <leonardr@segfault.org> | 2013-05-07 14:12:10 -0400 |
commit | 39efcb4b7ab30145b3733ba820f3c0df0da35ace (patch) | |
tree | ac8be4a47b4c16b936f94f25fa39a174872e80ce /bs4/diagnose.py | |
parent | 07bafa37e866876563ecd729c6a2adaa6d6d01ff (diff) |
Fixed up diagnose() and added it to the docs.
Diffstat (limited to 'bs4/diagnose.py')
-rw-r--r-- | bs4/diagnose.py | 24 |
1 files changed, 20 insertions, 4 deletions
diff --git a/bs4/diagnose.py b/bs4/diagnose.py index daaf523..e336633 100644 --- a/bs4/diagnose.py +++ b/bs4/diagnose.py @@ -1,13 +1,22 @@ """Diagnostic functions, mainly for use when doing tech support.""" from StringIO import StringIO from HTMLParser import HTMLParser -from bs4 import BeautifulSoup +from bs4 import BeautifulSoup, __version__ from bs4.builder import builder_registry +import os import traceback import sys def diagnose(data): """Diagnostic suite for isolating common problems.""" + print "Diagnostic running on Beautiful Soup %s" % __version__ + print "Python version %s" % sys.version + + if hasattr(data, 'read'): + data = data.read() + elif os.path.exists(data): + print '"%s" looks like a filename. Reading data from the file.' % data + data = open(data).read() basic_parsers = ["html.parser", "html5lib", "lxml"] for name in basic_parsers: for builder in builder_registry.builders: @@ -21,18 +30,25 @@ def diagnose(data): if 'lxml' in basic_parsers: basic_parsers.append(["lxml", "xml"]) + from lxml import etree + print "Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION)) + + if 'html5lib' in basic_parsers: + import html5lib + print "Found html5lib version %s" % html5lib.__version__ + print for parser in basic_parsers: - print "Trying to parse your data with %s" % parser + print "Trying to parse your markup with %s" % parser success = False try: soup = BeautifulSoup(data, parser) success = True except Exception, e: - print "%s could not parse the document." % parser + print "%s could not parse the markup." % parser traceback.print_exc() if success: - print "Here's what %s did with the document:" % parser + print "Here's what %s did with the markup:" % parser print soup.prettify() print "-" * 80 |