diff options
Diffstat (limited to 'src/beautifulsoup/builder')
-rw-r--r-- | src/beautifulsoup/builder/__init__.py | 13 | ||||
-rw-r--r-- | src/beautifulsoup/builder/html5lib_builder.py | 3 | ||||
-rw-r--r-- | src/beautifulsoup/builder/lxml_builder.py | 5 |
3 files changed, 21 insertions, 0 deletions
diff --git a/src/beautifulsoup/builder/__init__.py b/src/beautifulsoup/builder/__init__.py index cf54c9c..b7db8db 100644 --- a/src/beautifulsoup/builder/__init__.py +++ b/src/beautifulsoup/builder/__init__.py @@ -26,6 +26,19 @@ class TreeBuilder(Entities): def feed(self, markup): raise NotImplementedError() + def test_fragment_to_document(self, fragment): + """Wrap an HTML fragment to make it look like a document. + + Different parsers do this differently. For instance, lxml + introduces an empty <head> tag, and html5lib + doesn't. Abstracting this away lets us write simple tests + which run HTML fragments through the parser and compare the + results against other HTML fragments. + + This method should not be used outside of tests. + """ + return fragment + class SAXTreeBuilder(TreeBuilder): """A Beautiful Soup treebuilder that listens for SAX events.""" diff --git a/src/beautifulsoup/builder/html5lib_builder.py b/src/beautifulsoup/builder/html5lib_builder.py index b4ef4de..80c3e6d 100644 --- a/src/beautifulsoup/builder/html5lib_builder.py +++ b/src/beautifulsoup/builder/html5lib_builder.py @@ -21,4 +21,7 @@ class HTML5TreeBuilder(SAXTreeBuilder, HTMLTreeBuilder): walker = treewalkers.getTreeWalker('dom') dom2sax(doc, self) + def test_fragment_to_document(self, fragment): + """See `TreeBuilder`.""" + return u'<html><head></head><body>%s</body></html>' % fragment diff --git a/src/beautifulsoup/builder/lxml_builder.py b/src/beautifulsoup/builder/lxml_builder.py index 4949fea..3e1de5f 100644 --- a/src/beautifulsoup/builder/lxml_builder.py +++ b/src/beautifulsoup/builder/lxml_builder.py @@ -29,3 +29,8 @@ class LXMLTreeBuilder(HTMLTreeBuilder): self.soup.endData() self.soup.handle_data(content) self.soup.endData(Comment) + + def test_fragment_to_document(self, fragment): + """See `TreeBuilder`.""" + return u'<html><body>%s</body></html>' % fragment + |