try:
from bs4.builder import HTML5TreeBuilder
HTML5LIB_PRESENT = True
except ImportError, e:
HTML5LIB_PRESENT = False
from bs4.element import Comment, SoupStrainer
import test_htmlparser
import unittest
from bs4.testing import skipIf
@skipIf(
not HTML5LIB_PRESENT,
"html5lib seems not to be present, not testing its tree builder.")
class TestHTML5Builder(test_htmlparser.TestHTMLParserTreeBuilder):
"""See `BuilderSmokeTest`."""
@property
def default_builder(self):
return HTML5TreeBuilder()
def test_soupstrainer(self):
# The html5lib tree builder does not support SoupStrainers.
strainer = SoupStrainer("b")
markup = "
A bold statement.
"
soup = self.soup(markup,
parse_only=strainer)
self.assertEqual(
soup.decode(), self.document_for(markup))
def test_bare_string(self):
# A bare string is turned into some kind of HTML document or
# fragment recognizable as the original string.
#
# In this case, html5lib puts a
tag around the bare string.
self.assertSoupEquals(
"A bare string", "A bare string")
def test_correctly_nested_tables(self):
markup = ('
'
'
'
"
Here's another table:"
'
'
'
foo
'
'
')
self.assertSoupEquals(
markup,
'
Here\'s another table:'
'
foo
'
'
')
self.assertSoupEquals(
"
Foo
"
"
Bar
"
"
Baz
")
def test_literal_in_textarea(self):
markup = ''
soup = self.soup(markup)
self.assertEqual(
soup.textarea.contents, ["Junk like tags and <&<&"])
def test_collapsed_whitespace(self):
"""Whitespace is preserved even in tags that don't require it."""
self.assertSoupEquals("
")
self.assertSoupEquals("")
def test_cdata_where_its_ok(self):
# In html5lib 0.9.0, all CDATA sections are converted into
# comments. In a later version (unreleased as of this
# writing), CDATA sections in tags like