summaryrefslogtreecommitdiff
path: root/bs4/testing.py
diff options
context:
space:
mode:
authorLeonard Richardson <leonard.richardson@canonical.com>2012-03-01 13:37:42 -0500
committerLeonard Richardson <leonard.richardson@canonical.com>2012-03-01 13:37:42 -0500
commite533b599457f713944a17d29739308f09bfd5aef (patch)
tree70fa05e8de9e6457c0df6bfb594ac3cf04abd38d /bs4/testing.py
parent483286bfbb40bfabe4c48c9f31c59ef7449d64bb (diff)
In HTML5-style <meta charset="foo"> tags, the value of the "charset" attribute is now replaced with the appropriate encoding on output. [bug=942714]
Diffstat (limited to 'bs4/testing.py')
-rw-r--r--bs4/testing.py18
1 files changed, 18 insertions, 0 deletions
diff --git a/bs4/testing.py b/bs4/testing.py
index a3e0b38..1b73160 100644
--- a/bs4/testing.py
+++ b/bs4/testing.py
@@ -368,6 +368,24 @@ class HTMLTreeBuilderSmokeTest(object):
# For the rest of the story, see TestSubstitutions in
# test_tree.py.
+ def test_html5_style_meta_tag_reflects_current_encoding(self):
+ # Here's the <meta> tag saying that a document is
+ # encoded in Shift-JIS.
+ meta_tag = ('<meta id="encoding" charset="x-sjis" />')
+
+ # Here's a document incorporating that meta tag.
+ shift_jis_html = (
+ '<html><head>\n%s\n'
+ '<meta http-equiv="Content-language" content="ja"/>'
+ '</head><body>Shift-JIS markup goes here.') % meta_tag
+ soup = self.soup(shift_jis_html)
+
+ # Parse the document, and the charset is replaced with a
+ # generic value.
+ parsed_meta = soup.find('meta', id="encoding")
+ self.assertEqual('%SOUP-ENCODING%', parsed_meta['charset'])
+ self.assertEqual(True, parsed_meta.contains_substitutions)
+
class XMLTreeBuilderSmokeTest(object):
def test_docstring_generated(self):