From d055e7ab92a65e641f19989f0d92a0fdcc4fdc4c Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Wed, 18 Jul 2018 22:22:19 -0400 Subject: Fixed a bug where find_all() was not working when asked to find a tag with a namespaced name in an XML document that was parsed as HTML. [bug=1723783] --- bs4/testing.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'bs4/testing.py') diff --git a/bs4/testing.py b/bs4/testing.py index 641663c..5b0eb8f 100644 --- a/bs4/testing.py +++ b/bs4/testing.py @@ -150,6 +150,14 @@ class HTMLTreeBuilderSmokeTest(object): soup.encode("utf-8").replace(b"\n", b""), markup.replace(b"\n", b"")) + def test_namespaced_html(self): + """When a namespaced XML document is parsed as HTML it should + be treated as HTML with weird tag names. + """ + markup = b"""content""" + soup = self.soup(markup) + self.assertEqual(2, len(soup.find_all("ns1:foo"))) + def test_processing_instruction(self): # We test both Unicode and bytestring to verify that # process_markup correctly sets processing_instruction_class @@ -625,14 +633,14 @@ class XMLTreeBuilderSmokeTest(object): soup.encode("utf-8"), markup) def test_nested_namespaces(self): - doc = """ + doc = b""" """ - soup = BeautifulSoup(doc, "lxml-xml") + soup = self.soup(doc) self.assertEqual(doc, soup.encode()) def test_formatter_processes_script_tag_for_xml_documents(self): -- cgit v1.2.3