summaryrefslogtreecommitdiff
path: root/bs4/element.py
diff options
context:
space:
mode:
authorLeonard Richardson <leonardr@segfault.org>2020-04-05 15:43:58 -0400
committerLeonard Richardson <leonardr@segfault.org>2020-04-05 15:43:58 -0400
commita6f897b213bb08f0d8d8a1528937541c280abbd6 (patch)
tree866d3392a854ea27a172e9b456b2160307e39363 /bs4/element.py
parentddadf13ef66122d75eadaf7f10e0937429e6a3a6 (diff)
Embedded CSS and Javascript is now stored in distinct Stylesheet and
Script tags, which are ignored by methods like get_text(). This feature is not supported by the html5lib treebuilder. [bug=1868861]
Diffstat (limited to 'bs4/element.py')
-rw-r--r--bs4/element.py29
1 files changed, 28 insertions, 1 deletions
diff --git a/bs4/element.py b/bs4/element.py
index e403839..8c553cd 100644
--- a/bs4/element.py
+++ b/bs4/element.py
@@ -992,6 +992,33 @@ class Doctype(PreformattedString):
SUFFIX = u'>\n'
+class Stylesheet(NavigableString):
+ """A NavigableString representing an stylesheet (probably
+ CSS).
+
+ Used to distinguish embedded stylesheets from textual content.
+ """
+ pass
+
+
+class Script(NavigableString):
+ """A NavigableString representing an executable script (probably
+ Javascript).
+
+ Used to distinguish executable code from textual content.
+ """
+ pass
+
+
+class TemplateString(NavigableString):
+ """A NavigableString representing a string found inside an HTML
+ template embedded in a larger document.
+
+ Used to distinguish such strings from the main body of the document.
+ """
+ pass
+
+
class Tag(PageElement):
"""Represents an HTML or XML tag that is part of a parse tree, along
with its attributes and contents.
@@ -1211,7 +1238,7 @@ class Tag(PageElement):
a subclass not found in this list will be ignored. By
default, this means only NavigableString and CData objects
will be considered. So no comments, processing instructions,
- etc.
+ stylesheets, etc.
:return: A string.
"""