summaryrefslogtreecommitdiff
path: root/bs4/diagnose.py
diff options
context:
space:
mode:
authorLeonard Richardson <leonard.richardson@canonical.com>2013-05-15 11:10:12 -0400
committerLeonard Richardson <leonard.richardson@canonical.com>2013-05-15 11:10:12 -0400
commit87c7f93d7feb8bd982dbbc42d814352b60be344d (patch)
tree3905df101f631ca66055ab2af50c36d34227268b /bs4/diagnose.py
parentada530f6bc24bf4e536c1c859d798b836ec0799c (diff)
parentc3cc17f0dda7d378890a12fd8b5c29de9f923dab (diff)
Merge.
Diffstat (limited to 'bs4/diagnose.py')
-rw-r--r--bs4/diagnose.py15
1 files changed, 10 insertions, 5 deletions
diff --git a/bs4/diagnose.py b/bs4/diagnose.py
index d4e657c..4b5f6e4 100644
--- a/bs4/diagnose.py
+++ b/bs4/diagnose.py
@@ -14,11 +14,6 @@ def diagnose(data):
print "Diagnostic running on Beautiful Soup %s" % __version__
print "Python version %s" % sys.version
- if hasattr(data, 'read'):
- data = data.read()
- elif os.path.exists(data):
- print '"%s" looks like a filename. Reading data from the file.' % data
- data = open(data).read()
basic_parsers = ["html.parser", "html5lib", "lxml"]
for name in basic_parsers:
for builder in builder_registry.builders:
@@ -38,6 +33,16 @@ def diagnose(data):
if 'html5lib' in basic_parsers:
import html5lib
print "Found html5lib version %s" % html5lib.__version__
+
+ if hasattr(data, 'read'):
+ data = data.read()
+ elif os.path.exists(data):
+ print '"%s" looks like a filename. Reading data from the file.' % data
+ data = open(data).read()
+ elif data.startswith("http:") or data.startswith("https:"):
+ print '"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data
+ print "You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup."
+ return
print
for parser in basic_parsers: