summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CHANGELOG4
-rw-r--r--bs4/__init__.py14
-rw-r--r--bs4/tests/test_soup.py16
3 files changed, 33 insertions, 1 deletions
diff --git a/CHANGELOG b/CHANGELOG
index f610bc7..7eee93d 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -4,6 +4,10 @@
namespaced attribute is the empty string, as opposed to
None. [bug=1915583]
+* Improve the warning issued when a directory name (as opposed to
+ the name of a regular file) is passed as markup into the BeautifulSoup
+ constructor. [bug=1913628]
+
= 4.9.3 (20201003)
* Implemented a significant performance optimization to the process of
diff --git a/bs4/__init__.py b/bs4/__init__.py
index 8f78809..e33f62a 100644
--- a/bs4/__init__.py
+++ b/bs4/__init__.py
@@ -321,14 +321,26 @@ class BeautifulSoup(Tag):
else:
possible_filename = markup
is_file = False
+ is_directory = False
try:
is_file = os.path.exists(possible_filename)
+ if is_file:
+ is_directory = os.path.isdir(possible_filename)
except Exception, e:
# This is almost certainly a problem involving
# characters not valid in filenames on this
# system. Just let it go.
pass
- if is_file:
+ if is_directory:
+ warnings.warn(
+ '"%s" looks like a directory name, not markup. You may'
+ ' want to open a file found in this directory and pass'
+ ' the filehandle into Beautiful Soup.' % (
+ self._decode_markup(markup)
+ ),
+ MarkupResemblesLocatorWarning
+ )
+ elif is_file:
warnings.warn(
'"%s" looks like a filename, not markup. You should'
' probably open this file and pass the filehandle into'
diff --git a/bs4/tests/test_soup.py b/bs4/tests/test_soup.py
index f21edfa..0603ce7 100644
--- a/bs4/tests/test_soup.py
+++ b/bs4/tests/test_soup.py
@@ -3,6 +3,7 @@
from pdb import set_trace
import logging
+import os
import unittest
import sys
import tempfile
@@ -291,6 +292,21 @@ class TestWarnings(SoupTest):
soup = self.soup(filename)
self.assertEqual([], w)
+ def test_directory_warning(self):
+ try:
+ filename = tempfile.mkdtemp()
+ with warnings.catch_warnings(record=True) as w:
+ soup = self.soup(filename)
+ warning = self._assert_warning(w, MarkupResemblesLocatorWarning)
+ self.assertTrue("looks like a directory" in str(warning.message))
+ finally:
+ os.rmdir(filename)
+
+ # The directory no longer exists, so Beautiful Soup will no longer issue the warning.
+ with warnings.catch_warnings(record=True) as w:
+ soup = self.soup(filename)
+ self.assertEqual([], w)
+
def test_url_warning_with_bytes_url(self):
with warnings.catch_warnings(record=True) as warning_list:
soup = self.soup(b"http://www.crummybytes.com/")