From 283a27a0af4c70d91695a146b7a6cbc82dc28098 Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Mon, 11 Oct 2021 16:37:41 -0400 Subject: Added special string classes, RubyParenthesisString and RubyTextString, to make it possible to treat ruby text specially in get_text() calls. [bug=1941980] --- bs4/tests/test_navigablestring.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'bs4/tests/test_navigablestring.py') diff --git a/bs4/tests/test_navigablestring.py b/bs4/tests/test_navigablestring.py index 2b76392..649acc0 100644 --- a/bs4/tests/test_navigablestring.py +++ b/bs4/tests/test_navigablestring.py @@ -6,6 +6,8 @@ from bs4.element import ( Declaration, Doctype, NavigableString, + RubyParenthesisString, + RubyTextString, Script, Stylesheet, TemplateString, @@ -128,3 +130,15 @@ class TestNavigableStringSubclasses(SoupTest): soup = self.soup(markup) assert markup == soup.template.encode("utf8") + def test_ruby_strings(self): + markup = "(kan)(ji)" + soup = self.soup(markup) + assert isinstance(soup.rp.string, RubyParenthesisString) + assert isinstance(soup.rt.string, RubyTextString) + + # Just as a demo, here's what this means for get_text usage. + assert "漢字" == soup.get_text(strip=True) + assert "漢(kan)字(ji)" == soup.get_text( + strip=True, + types=(NavigableString, RubyTextString, RubyParenthesisString) + ) -- cgit v1.2.3