summaryrefslogtreecommitdiff
path: root/bs4/tests/test_navigablestring.py
diff options
context:
space:
mode:
authorLeonard Richardson <leonardr@segfault.org>2021-10-11 16:37:41 -0400
committerLeonard Richardson <leonardr@segfault.org>2021-10-11 16:37:41 -0400
commit283a27a0af4c70d91695a146b7a6cbc82dc28098 (patch)
tree97ef8be25ff87e82c446eaa4eb462a82e9988ae9 /bs4/tests/test_navigablestring.py
parent242a340e5cf8c13449c9a4d73cf55194536a27d1 (diff)
Added special string classes, RubyParenthesisString and RubyTextString,
to make it possible to treat ruby text specially in get_text() calls. [bug=1941980]
Diffstat (limited to 'bs4/tests/test_navigablestring.py')
-rw-r--r--bs4/tests/test_navigablestring.py14
1 files changed, 14 insertions, 0 deletions
diff --git a/bs4/tests/test_navigablestring.py b/bs4/tests/test_navigablestring.py
index 2b76392..649acc0 100644
--- a/bs4/tests/test_navigablestring.py
+++ b/bs4/tests/test_navigablestring.py
@@ -6,6 +6,8 @@ from bs4.element import (
Declaration,
Doctype,
NavigableString,
+ RubyParenthesisString,
+ RubyTextString,
Script,
Stylesheet,
TemplateString,
@@ -128,3 +130,15 @@ class TestNavigableStringSubclasses(SoupTest):
soup = self.soup(markup)
assert markup == soup.template.encode("utf8")
+ def test_ruby_strings(self):
+ markup = "<ruby>漢 <rp>(</rp><rt>kan</rt><rp>)</rp> 字 <rp>(</rp><rt>ji</rt><rp>)</rp></ruby>"
+ soup = self.soup(markup)
+ assert isinstance(soup.rp.string, RubyParenthesisString)
+ assert isinstance(soup.rt.string, RubyTextString)
+
+ # Just as a demo, here's what this means for get_text usage.
+ assert "漢字" == soup.get_text(strip=True)
+ assert "漢(kan)字(ji)" == soup.get_text(
+ strip=True,
+ types=(NavigableString, RubyTextString, RubyParenthesisString)
+ )