diff options
-rw-r--r-- | beautifulsoup/__init__.py | 7 | ||||
-rw-r--r-- | beautifulsoup/builder/__init__.py | 4 | ||||
-rw-r--r-- | beautifulsoup/builder/_html5lib.py (renamed from beautifulsoup/builder/html5lib_builder.py) | 6 | ||||
-rw-r--r-- | beautifulsoup/builder/_lxml.py (renamed from beautifulsoup/builder/lxml_builder.py) | 5 | ||||
-rw-r--r-- | beautifulsoup/testing.py | 2 | ||||
-rw-r--r-- | tests/test_html5lib.py | 13 | ||||
-rw-r--r-- | tests/test_lxml.py | 11 | ||||
-rw-r--r-- | tests/test_tree.py | 9 |
8 files changed, 43 insertions, 14 deletions
diff --git a/beautifulsoup/__init__.py b/beautifulsoup/__init__.py index ca32589..968be08 100644 --- a/beautifulsoup/__init__.py +++ b/beautifulsoup/__init__.py @@ -121,10 +121,10 @@ class BeautifulSoup(Tag): @classmethod def default_builder(self): try: - from builder.html5_builder import HTML5TreeBuilder + from builder import HTML5TreeBuilder return HTML5TreeBuilder() except ImportError: - from builder.lxml_builder import LXMLTreeBuilder + from builder import LXMLTreeBuilder return LXMLTreeBuilder() def __init__(self, markup="", builder=None, parseOnlyThese=None, @@ -258,12 +258,15 @@ class BeautifulSoup(Tag): tag = Tag(self, self.builder, name, attrs, self.currentTag, self.previous) + if tag is None: + return tag if self.previous: self.previous.next = tag self.previous = tag self.pushTag(tag) return tag + def handle_endtag(self, name): #print "End tag: " + name self.endData() diff --git a/beautifulsoup/builder/__init__.py b/beautifulsoup/builder/__init__.py index 9ffa9ef..d6c750c 100644 --- a/beautifulsoup/builder/__init__.py +++ b/beautifulsoup/builder/__init__.py @@ -7,7 +7,6 @@ __all__ = [ 'TreeBuilder', ] - class TreeBuilder(Entities): """Turn a document into a Beautiful Soup object tree.""" @@ -163,3 +162,6 @@ class HTMLTreeBuilder(TreeBuilder): raise StopParsing pass return False + +from _lxml import * +from _html5lib import * diff --git a/beautifulsoup/builder/html5lib_builder.py b/beautifulsoup/builder/_html5lib.py index 0a24ce1..9cca0b0 100644 --- a/beautifulsoup/builder/html5lib_builder.py +++ b/beautifulsoup/builder/_html5lib.py @@ -1,3 +1,7 @@ +__all__ = [ + 'HTML5TreeBuilder', + ] + from beautifulsoup.builder import HTMLTreeBuilder, SAXTreeBuilder import html5lib from html5lib.constants import DataLossWarning @@ -217,6 +221,6 @@ class TextNode(Element): html5lib.treebuilders._base.Node.__init__(self, None) self.element = element self.soup = soup - + def cloneNode(self): raise NotImplementedError diff --git a/beautifulsoup/builder/lxml_builder.py b/beautifulsoup/builder/_lxml.py index 9f4c0bd..c2f368c 100644 --- a/beautifulsoup/builder/lxml_builder.py +++ b/beautifulsoup/builder/_lxml.py @@ -1,3 +1,8 @@ +__all__ = [ + 'LXMLTreeBuilderForXML', + 'LXMLTreeBuilder', + ] + from lxml import etree from beautifulsoup.element import Comment, Doctype from beautifulsoup.builder import TreeBuilder, HTMLTreeBuilder diff --git a/beautifulsoup/testing.py b/beautifulsoup/testing.py index 9b1e858..8fd9abf 100644 --- a/beautifulsoup/testing.py +++ b/beautifulsoup/testing.py @@ -3,7 +3,7 @@ import unittest from beautifulsoup import BeautifulSoup from beautifulsoup.element import Comment, SoupStrainer -from beautifulsoup.builder.lxml_builder import LXMLTreeBuilder +from beautifulsoup.builder import LXMLTreeBuilder class SoupTest(unittest.TestCase): diff --git a/tests/test_html5lib.py b/tests/test_html5lib.py index 021c603..aa0bad2 100644 --- a/tests/test_html5lib.py +++ b/tests/test_html5lib.py @@ -1,5 +1,5 @@ -from beautifulsoup.builder.html5lib_builder import HTML5TreeBuilder -from beautifulsoup.element import Comment +from beautifulsoup.builder import HTML5TreeBuilder +from beautifulsoup.element import Comment, SoupStrainer from test_lxml import ( TestLXMLBuilder, TestLXMLBuilderInvalidMarkup, @@ -13,6 +13,15 @@ class TestHTML5Builder(TestLXMLBuilder): def default_builder(self): return HTML5TreeBuilder() + def test_soupstrainer(self): + # The html5lib tree builder does not support SoupStrainers. + strainer = SoupStrainer("b") + markup = "<p>A <b>bold</b> statement.</p>" + soup = self.soup(markup, + parseOnlyThese=strainer) + self.assertEquals( + soup.decode(), self.document_for(markup)) + def test_bare_string(self): # A bare string is turned into some kind of HTML document or # fragment recognizable as the original string. diff --git a/tests/test_lxml.py b/tests/test_lxml.py index 88c866d..de2ce7b 100644 --- a/tests/test_lxml.py +++ b/tests/test_lxml.py @@ -3,8 +3,8 @@ import re from beautifulsoup import BeautifulSoup -from beautifulsoup.builder.lxml_builder import LXMLTreeBuilder -from beautifulsoup.element import Comment, Doctype +from beautifulsoup.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML +from beautifulsoup.element import Comment, Doctype, SoupStrainer from beautifulsoup.testing import SoupTest @@ -320,6 +320,12 @@ class TestLXMLBuilder(SoupTest): self.assertFalse(soup.p.is_empty_element) self.assertEquals(str(soup.p), "<p></p>") + def test_soupstrainer(self): + strainer = SoupStrainer("b") + soup = self.soup("A <b>bold</b> <meta /> <i>statement</i>", + parseOnlyThese=strainer) + self.assertEquals(soup.decode(), "<b>bold</b>") + class TestLXMLBuilderInvalidMarkup(SoupTest): """Tests of invalid markup for the LXML tree builder. @@ -505,7 +511,6 @@ class TestLXMLBuilderEncodingConversion(SoupTest): self.HEBREW_DOCUMENT.decode("iso-8859-8").encode("utf-8")) -from beautifulsoup.builder.lxml_builder import LXMLTreeBuilderForXML class TestLXMLXMLBuilder(SoupTest): """Test XML-specific parsing behavior. diff --git a/tests/test_tree.py b/tests/test_tree.py index 8cbd309..384d518 100644 --- a/tests/test_tree.py +++ b/tests/test_tree.py @@ -524,14 +524,15 @@ class TestTreeModification(SoupTest): def test_new_tag_creation(self): builder = BeautifulSoup.default_builder() - soup = self.soup("", builder=builder) + soup = self.soup("<body></body>", builder=builder) a = Tag(soup, builder, 'a') ol = Tag(soup, builder, 'ol') a['href'] = 'http://foo.com/' - soup.insert(0, a) - soup.insert(1, ol) + soup.body.insert(0, a) + soup.body.insert(1, ol) self.assertEqual( - soup.decode(), '<a href="http://foo.com/"></a><ol></ol>') + soup.body.encode(), + '<body><a href="http://foo.com/"></a><ol></ol></body>') def test_append_to_contents_moves_tag(self): doc = """<p id="1">Don't leave me <b>here</b>.</p> |