1 files changed, 18 insertions, 1 deletions
diff --git a/bs4/__init__.py b/bs4/__init__.py
index 7b5964a..0dded3a 100644
--- a/bs4/__init__.py
+++ b/bs4/__init__.py
@@ -17,12 +17,13 @@ http://www.crummy.com/software/BeautifulSoup/bs4/doc/
 """
 
 __author__ = "Leonard Richardson (leonardr@segfault.org)"
-__version__ = "4.2.1"
+__version__ = "4.3.0"
 __copyright__ = "Copyright (c) 2004-2013 Leonard Richardson"
 __license__ = "MIT"
 
 __all__ = ['BeautifulSoup']
 
+import os
 import re
 import warnings
 
@@ -162,6 +163,22 @@ class BeautifulSoup(Tag):
 
         if hasattr(markup, 'read'):        # It's a file-type object.
             markup = markup.read()
+        elif len(markup) <= 256:
+            # Print out warnings for a couple beginner problems
+            # involving passing non-markup to Beautiful Soup.
+            # Beautiful Soup will still parse the input as markup, 
+            # just in case that's what the user really wants.
+            if os.path.exists(markup):
+                warnings.warn(
+                    '"%s" looks like a filename, not markup. You should probably open a filehandle and pass the filehandle into Beautiful Soup.' % markup)
+            if markup[:5] == "http:" or markup[:6] == "https:":
+                # TODO: This is ugly but I couldn't get it to work in
+                # Python 3 otherwise.
+                if ((isinstance(markup, bytes) and not b' ' in markup)
+                    or (isinstance(markup, unicode) and not u' ' in markup)):
+                    warnings.warn(
+                        '"%s" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client to get the document behind the URL, and feed that document to Beautiful Soup.' % markup)
+
         for (self.markup, self.original_encoding, self.declared_html_encoding,
          self.contains_replacement_characters) in (
             self.builder.prepare_markup(markup, from_encoding)):