From a6f897b213bb08f0d8d8a1528937541c280abbd6 Mon Sep 17 00:00:00 2001
From: Leonard Richardson <leonardr@segfault.org>
Date: Sun, 5 Apr 2020 15:43:58 -0400
Subject: Embedded CSS and Javascript is now stored in distinct Stylesheet and 
  Script tags, which are ignored by methods like get_text(). This   feature is
 not supported by the html5lib treebuilder. [bug=1868861]

---
 bs4/builder/_html5lib.py | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

(limited to 'bs4/builder/_html5lib.py')

diff --git a/bs4/builder/_html5lib.py b/bs4/builder/_html5lib.py
index 32a0856..b36189d 100644
--- a/bs4/builder/_html5lib.py
+++ b/bs4/builder/_html5lib.py
@@ -39,7 +39,18 @@ except ImportError, e:
     new_html5lib = True
 
 class HTML5TreeBuilder(HTMLTreeBuilder):
-    """Use html5lib to build a tree."""
+    """Use html5lib to build a tree.
+
+    Note that this TreeBuilder does not support some features common
+    to HTML TreeBuilders. Some of these features could theoretically
+    be implemented, but at the very least it's quite difficult,
+    because html5lib moves the parse tree around as it's being built.
+
+    * This TreeBuilder doesn't use different subclasses of NavigableString
+      based on the name of the tag in which the string was found.
+
+    * You can't use a SoupStrainer to parse only part of a document.
+    """
 
     NAME = "html5lib"
 
@@ -116,6 +127,9 @@ class TreeBuilderForHtml5lib(treebuilder_base.TreeBuilder):
                 "", "html.parser", store_line_numbers=store_line_numbers,
                 **kwargs
             )
+        # TODO: What are **kwargs exactly? Should they be passed in
+        # here in addition to/instead of being passed to the BeautifulSoup
+        # constructor?
         super(TreeBuilderForHtml5lib, self).__init__(namespaceHTMLElements)
 
         # This will be set later to an html5lib.html5parser.HTMLParser
-- 
cgit v1.2.3