From febd6cea8cdd665a6f13c7040c3ff7d60d350e41 Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Sat, 21 Jul 2018 12:18:17 -0400 Subject: Fixed a problem where the html.parser tree builder interpreted a string like '&foo ' as the character entity '&foo;' [bug=1728706] --- bs4/testing.py | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'bs4/testing.py') diff --git a/bs4/testing.py b/bs4/testing.py index 5b0eb8f..bbcc271 100644 --- a/bs4/testing.py +++ b/bs4/testing.py @@ -319,6 +319,14 @@ Hello, world! def test_angle_brackets_in_attribute_values_are_escaped(self): self.assertSoupEquals('', '') + def test_strings_resembling_character_entity_references(self): + # "&T" and "&p" look like incomplete character entities, but they are + # not. + self.assertSoupEquals( + u"

• AT&T is in the s&p 500

", + u"

\u2022 AT&T is in the s&p 500

" + ) + def test_entities_in_attributes_converted_to_unicode(self): expect = u'

' self.assertSoupEquals('

', expect) -- cgit v1.2.3