diff options
author | Leonard Richardson <leonardr@segfault.org> | 2013-05-14 09:28:14 -0400 |
---|---|---|
committer | Leonard Richardson <leonardr@segfault.org> | 2013-05-14 09:28:14 -0400 |
commit | 613f46a1885a007093d8218e97024b7e94edb9dc (patch) | |
tree | bdc1e8c30f336db1b725ccfca8ff41b8eafbe352 /bs4/diagnose.py | |
parent | ef34d42104b7b207527c4b808855aba40a2263d3 (diff) |
Added diagnostic case for attempting to parse a URL as HTML.
Diffstat (limited to 'bs4/diagnose.py')
-rw-r--r-- | bs4/diagnose.py | 15 |
1 files changed, 10 insertions, 5 deletions
diff --git a/bs4/diagnose.py b/bs4/diagnose.py index e336633..69f739c 100644 --- a/bs4/diagnose.py +++ b/bs4/diagnose.py @@ -12,11 +12,6 @@ def diagnose(data): print "Diagnostic running on Beautiful Soup %s" % __version__ print "Python version %s" % sys.version - if hasattr(data, 'read'): - data = data.read() - elif os.path.exists(data): - print '"%s" looks like a filename. Reading data from the file.' % data - data = open(data).read() basic_parsers = ["html.parser", "html5lib", "lxml"] for name in basic_parsers: for builder in builder_registry.builders: @@ -36,6 +31,16 @@ def diagnose(data): if 'html5lib' in basic_parsers: import html5lib print "Found html5lib version %s" % html5lib.__version__ + + if hasattr(data, 'read'): + data = data.read() + elif os.path.exists(data): + print '"%s" looks like a filename. Reading data from the file.' % data + data = open(data).read() + elif data.startswith("http:") or data.startswith("https:"): + print '"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data + print "You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup." + return print for parser in basic_parsers: |