"""Tests to ensure that the lxml tree builder generates good trees.""" import re try: from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML LXML_PRESENT = True except ImportError, e: LXML_PRESENT = False from bs4 import BeautifulSoup from bs4.element import Comment, Doctype, SoupStrainer from bs4.testing import skipIf from bs4.tests import test_htmlparser from bs4.testing import skipIf @skipIf( not LXML_PRESENT, "lxml seems not to be present, not testing its tree builder.") class TestLXMLTreeBuilder(test_htmlparser.TestHTMLParserTreeBuilder): """A smoke test for the LXML tree builder. Subclass this to test some other HTML tree builder. Subclasses of this test ensure that all of Beautiful Soup's tree builders generate more or less the same trees. It's okay for trees to differ--just override the appropriate test method to demonstrate how one tree builder differs from the LXML builder. But in general, all HTML tree builders should generate trees that make most of these tests pass. """ @property def default_builder(self): return LXMLTreeBuilder() def test_bare_string(self): # A bare string is turned into some kind of HTML document or # fragment recognizable as the original string. # # In this case, lxml puts a
tag around the bare string. self.assertSoupEquals( "A bare string", "
A bare string
") def test_cdata_where_its_ok(self): # lxml strips CDATA sections, no matter where they occur. markup = "