diff options
author | Leonard Richardson <leonardr@segfault.org> | 2021-10-11 16:37:41 -0400 |
---|---|---|
committer | Leonard Richardson <leonardr@segfault.org> | 2021-10-11 16:37:41 -0400 |
commit | 283a27a0af4c70d91695a146b7a6cbc82dc28098 (patch) | |
tree | 97ef8be25ff87e82c446eaa4eb462a82e9988ae9 /bs4/tests/test_navigablestring.py | |
parent | 242a340e5cf8c13449c9a4d73cf55194536a27d1 (diff) |
Added special string classes, RubyParenthesisString and RubyTextString,
to make it possible to treat ruby text specially in get_text() calls.
[bug=1941980]
Diffstat (limited to 'bs4/tests/test_navigablestring.py')
-rw-r--r-- | bs4/tests/test_navigablestring.py | 14 |
1 files changed, 14 insertions, 0 deletions
diff --git a/bs4/tests/test_navigablestring.py b/bs4/tests/test_navigablestring.py index 2b76392..649acc0 100644 --- a/bs4/tests/test_navigablestring.py +++ b/bs4/tests/test_navigablestring.py @@ -6,6 +6,8 @@ from bs4.element import ( Declaration, Doctype, NavigableString, + RubyParenthesisString, + RubyTextString, Script, Stylesheet, TemplateString, @@ -128,3 +130,15 @@ class TestNavigableStringSubclasses(SoupTest): soup = self.soup(markup) assert markup == soup.template.encode("utf8") + def test_ruby_strings(self): + markup = "<ruby>漢 <rp>(</rp><rt>kan</rt><rp>)</rp> 字 <rp>(</rp><rt>ji</rt><rp>)</rp></ruby>" + soup = self.soup(markup) + assert isinstance(soup.rp.string, RubyParenthesisString) + assert isinstance(soup.rt.string, RubyTextString) + + # Just as a demo, here's what this means for get_text usage. + assert "漢字" == soup.get_text(strip=True) + assert "漢(kan)字(ji)" == soup.get_text( + strip=True, + types=(NavigableString, RubyTextString, RubyParenthesisString) + ) |