Added diagnostic case for attempting to parse a URL as HTML.

author: Leonard Richardson <leonardr@segfault.org> 2013-05-14 09:28:14 -0400
committer: Leonard Richardson <leonardr@segfault.org> 2013-05-14 09:28:14 -0400
commit: 613f46a1885a007093d8218e97024b7e94edb9dc (patch)
tree: bdc1e8c30f336db1b725ccfca8ff41b8eafbe352 /bs4/diagnose.py
parent: ef34d42104b7b207527c4b808855aba40a2263d3 (diff)
1 files changed, 10 insertions, 5 deletions
diff --git a/bs4/diagnose.py b/bs4/diagnose.py
index e336633..69f739c 100644
--- a/bs4/diagnose.py
+++ b/bs4/diagnose.py
@@ -12,11 +12,6 @@ def diagnose(data):
     print "Diagnostic running on Beautiful Soup %s" % __version__
     print "Python version %s" % sys.version
 
-    if hasattr(data, 'read'):
-        data = data.read()
-    elif os.path.exists(data):
-        print '"%s" looks like a filename. Reading data from the file.' % data
-        data = open(data).read()
     basic_parsers = ["html.parser", "html5lib", "lxml"]
     for name in basic_parsers:
         for builder in builder_registry.builders:
@@ -36,6 +31,16 @@ def diagnose(data):
     if 'html5lib' in basic_parsers:
         import html5lib
         print "Found html5lib version %s" % html5lib.__version__
+
+    if hasattr(data, 'read'):
+        data = data.read()
+    elif os.path.exists(data):
+        print '"%s" looks like a filename. Reading data from the file.' % data
+        data = open(data).read()
+    elif data.startswith("http:") or data.startswith("https:"):
+        print '"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data
+        print "You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup."
+        return
     print
 
     for parser in basic_parsers:
author	Leonard Richardson <leonardr@segfault.org>	2013-05-14 09:28:14 -0400
committer	Leonard Richardson <leonardr@segfault.org>	2013-05-14 09:28:14 -0400
commit	613f46a1885a007093d8218e97024b7e94edb9dc (patch)
tree	bdc1e8c30f336db1b725ccfca8ff41b8eafbe352 /bs4/diagnose.py
parent	ef34d42104b7b207527c4b808855aba40a2263d3 (diff)