diff options
author | Leonard Richardson <leonardr@segfault.org> | 2018-07-18 22:22:19 -0400 |
---|---|---|
committer | Leonard Richardson <leonardr@segfault.org> | 2018-07-18 22:22:19 -0400 |
commit | d055e7ab92a65e641f19989f0d92a0fdcc4fdc4c (patch) | |
tree | ac78c0eb01e7793a8557a8b448022745b90232e2 /bs4/testing.py | |
parent | 68b55626839a8a0ea9e750fff546e201d144f96c (diff) |
Fixed a bug where find_all() was not working when asked to find a
tag with a namespaced name in an XML document that was parsed as
HTML. [bug=1723783]
Diffstat (limited to 'bs4/testing.py')
-rw-r--r-- | bs4/testing.py | 12 |
1 files changed, 10 insertions, 2 deletions
diff --git a/bs4/testing.py b/bs4/testing.py index 641663c..5b0eb8f 100644 --- a/bs4/testing.py +++ b/bs4/testing.py @@ -150,6 +150,14 @@ class HTMLTreeBuilderSmokeTest(object): soup.encode("utf-8").replace(b"\n", b""), markup.replace(b"\n", b"")) + def test_namespaced_html(self): + """When a namespaced XML document is parsed as HTML it should + be treated as HTML with weird tag names. + """ + markup = b"""<ns1:foo>content</ns1:foo><ns1:foo/><ns2:foo/>""" + soup = self.soup(markup) + self.assertEqual(2, len(soup.find_all("ns1:foo"))) + def test_processing_instruction(self): # We test both Unicode and bytestring to verify that # process_markup correctly sets processing_instruction_class @@ -625,14 +633,14 @@ class XMLTreeBuilderSmokeTest(object): soup.encode("utf-8"), markup) def test_nested_namespaces(self): - doc = """<?xml version="1.0" encoding="utf-8"?> + doc = b"""<?xml version="1.0" encoding="utf-8"?> <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"> <parent xmlns="http://ns1/"> <child xmlns="http://ns2/" xmlns:ns3="http://ns3/"> <grandchild ns3:attr="value" xmlns="http://ns4/"/> </child> </parent>""" - soup = BeautifulSoup(doc, "lxml-xml") + soup = self.soup(doc) self.assertEqual(doc, soup.encode()) def test_formatter_processes_script_tag_for_xml_documents(self): |