2 files changed, 41 insertions, 8 deletions
diff --git a/bs4/tests/__init__.py b/bs4/tests/__init__.py
index 224c9d8..4af4b0c 100644
--- a/bs4/tests/__init__.py
+++ b/bs4/tests/__init__.py
@@ -22,7 +22,11 @@ from bs4.element import (
     Tag
 )
 
-from bs4.builder import HTMLParserTreeBuilder
+from bs4.builder import (
+    DetectsXMLParsedAsHTML,
+    HTMLParserTreeBuilder,
+    XMLParsedAsHTMLWarning,
+)
 default_builder = HTMLParserTreeBuilder
 
 BAD_DOCUMENT = """A bare string
@@ -422,16 +426,43 @@ class HTMLTreeBuilderSmokeTest(TreeBuilderSmokeTest):
 <head><title>Hello.</title></head>
 <body>Goodbye.</body>
 </html>"""
-        soup = self.soup(markup)
+        with warnings.catch_warnings(record=True) as w:
+            soup = self.soup(markup)
         assert soup.encode("utf-8").replace(b"\n", b"") == markup.replace(b"\n", b"")
 
+        # No warning was issued about parsing an XML document as HTML,
+        # because XHTML is both.
+        assert w == []
+
+
     def test_namespaced_html(self):
-        """When a namespaced XML document is parsed as HTML it should
-        be treated as HTML with weird tag names.
-        """
+        # When a namespaced XML document is parsed as HTML it should
+        # be treated as HTML with weird tag names.
         markup = b"""<ns1:foo>content</ns1:foo><ns1:foo/><ns2:foo/>"""
-        soup = self.soup(markup)
+        with warnings.catch_warnings(record=True) as w:
+            soup = self.soup(markup)
+
         assert 2 == len(soup.find_all("ns1:foo"))
+            
+        # n.b. no "you're parsing XML as HTML" warning was given
+        # because there was no XML declaration.
+        assert [] == w
+
+    def test_detect_xml_parsed_as_html(self):
+        # A warning is issued when parsing an XML document as HTML,
+        # but basic stuff should still work.
+        markup = b"""<?xml version="1.0" encoding="utf-8"?><tag>string</tag>"""
+        with warnings.catch_warnings(record=True) as w:
+            soup = self.soup(markup)
+            assert soup.tag.string == 'string'
+        [warning] = w
+        assert isinstance(warning.message, XMLParsedAsHTMLWarning)
+        assert str(warning.message) == XMLParsedAsHTMLWarning.MESSAGE
+
+        # NOTE: the warning is not issued if the document appears to
+        # be XHTML (tested with test_real_xhtml_document in the
+        # superclass) or if there is no XML declaration (tested with
+        # test_namespaced_html in the superclass).
         
     def test_processing_instruction(self):
         # We test both Unicode and bytestring to verify that
diff --git a/bs4/tests/test_htmlparser.py b/bs4/tests/test_htmlparser.py
index 5912cf5..bfcfa1f 100644
--- a/bs4/tests/test_htmlparser.py
+++ b/bs4/tests/test_htmlparser.py
@@ -4,7 +4,10 @@ trees."""
 from pdb import set_trace
 import pickle
 import warnings
-from bs4.builder import HTMLParserTreeBuilder
+from bs4.builder import (
+    HTMLParserTreeBuilder,
+    XMLParsedAsHTMLWarning,
+)
 from bs4.builder._htmlparser import BeautifulSoupHTMLParser
 from . import SoupTest, HTMLTreeBuilderSmokeTest
 
@@ -120,7 +123,6 @@ class TestHTMLParserTreeBuilder(SoupTest, HTMLTreeBuilderSmokeTest):
             expect = b"<div>%s</div>" % output_element
             assert with_element == expect
 
-
 class TestHTMLParserSubclass(SoupTest):
     def test_error(self):
         """Verify that our HTMLParser subclass implements error() in a way