summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--beautifulsoup/__init__.py7
-rw-r--r--beautifulsoup/builder/__init__.py4
-rw-r--r--beautifulsoup/builder/_html5lib.py (renamed from beautifulsoup/builder/html5lib_builder.py)6
-rw-r--r--beautifulsoup/builder/_lxml.py (renamed from beautifulsoup/builder/lxml_builder.py)5
-rw-r--r--beautifulsoup/testing.py2
-rw-r--r--tests/test_html5lib.py13
-rw-r--r--tests/test_lxml.py11
-rw-r--r--tests/test_tree.py9
8 files changed, 43 insertions, 14 deletions
diff --git a/beautifulsoup/__init__.py b/beautifulsoup/__init__.py
index ca32589..968be08 100644
--- a/beautifulsoup/__init__.py
+++ b/beautifulsoup/__init__.py
@@ -121,10 +121,10 @@ class BeautifulSoup(Tag):
@classmethod
def default_builder(self):
try:
- from builder.html5_builder import HTML5TreeBuilder
+ from builder import HTML5TreeBuilder
return HTML5TreeBuilder()
except ImportError:
- from builder.lxml_builder import LXMLTreeBuilder
+ from builder import LXMLTreeBuilder
return LXMLTreeBuilder()
def __init__(self, markup="", builder=None, parseOnlyThese=None,
@@ -258,12 +258,15 @@ class BeautifulSoup(Tag):
tag = Tag(self, self.builder, name, attrs, self.currentTag,
self.previous)
+ if tag is None:
+ return tag
if self.previous:
self.previous.next = tag
self.previous = tag
self.pushTag(tag)
return tag
+
def handle_endtag(self, name):
#print "End tag: " + name
self.endData()
diff --git a/beautifulsoup/builder/__init__.py b/beautifulsoup/builder/__init__.py
index 9ffa9ef..d6c750c 100644
--- a/beautifulsoup/builder/__init__.py
+++ b/beautifulsoup/builder/__init__.py
@@ -7,7 +7,6 @@ __all__ = [
'TreeBuilder',
]
-
class TreeBuilder(Entities):
"""Turn a document into a Beautiful Soup object tree."""
@@ -163,3 +162,6 @@ class HTMLTreeBuilder(TreeBuilder):
raise StopParsing
pass
return False
+
+from _lxml import *
+from _html5lib import *
diff --git a/beautifulsoup/builder/html5lib_builder.py b/beautifulsoup/builder/_html5lib.py
index 0a24ce1..9cca0b0 100644
--- a/beautifulsoup/builder/html5lib_builder.py
+++ b/beautifulsoup/builder/_html5lib.py
@@ -1,3 +1,7 @@
+__all__ = [
+ 'HTML5TreeBuilder',
+ ]
+
from beautifulsoup.builder import HTMLTreeBuilder, SAXTreeBuilder
import html5lib
from html5lib.constants import DataLossWarning
@@ -217,6 +221,6 @@ class TextNode(Element):
html5lib.treebuilders._base.Node.__init__(self, None)
self.element = element
self.soup = soup
-
+
def cloneNode(self):
raise NotImplementedError
diff --git a/beautifulsoup/builder/lxml_builder.py b/beautifulsoup/builder/_lxml.py
index 9f4c0bd..c2f368c 100644
--- a/beautifulsoup/builder/lxml_builder.py
+++ b/beautifulsoup/builder/_lxml.py
@@ -1,3 +1,8 @@
+__all__ = [
+ 'LXMLTreeBuilderForXML',
+ 'LXMLTreeBuilder',
+ ]
+
from lxml import etree
from beautifulsoup.element import Comment, Doctype
from beautifulsoup.builder import TreeBuilder, HTMLTreeBuilder
diff --git a/beautifulsoup/testing.py b/beautifulsoup/testing.py
index 9b1e858..8fd9abf 100644
--- a/beautifulsoup/testing.py
+++ b/beautifulsoup/testing.py
@@ -3,7 +3,7 @@
import unittest
from beautifulsoup import BeautifulSoup
from beautifulsoup.element import Comment, SoupStrainer
-from beautifulsoup.builder.lxml_builder import LXMLTreeBuilder
+from beautifulsoup.builder import LXMLTreeBuilder
class SoupTest(unittest.TestCase):
diff --git a/tests/test_html5lib.py b/tests/test_html5lib.py
index 021c603..aa0bad2 100644
--- a/tests/test_html5lib.py
+++ b/tests/test_html5lib.py
@@ -1,5 +1,5 @@
-from beautifulsoup.builder.html5lib_builder import HTML5TreeBuilder
-from beautifulsoup.element import Comment
+from beautifulsoup.builder import HTML5TreeBuilder
+from beautifulsoup.element import Comment, SoupStrainer
from test_lxml import (
TestLXMLBuilder,
TestLXMLBuilderInvalidMarkup,
@@ -13,6 +13,15 @@ class TestHTML5Builder(TestLXMLBuilder):
def default_builder(self):
return HTML5TreeBuilder()
+ def test_soupstrainer(self):
+ # The html5lib tree builder does not support SoupStrainers.
+ strainer = SoupStrainer("b")
+ markup = "<p>A <b>bold</b> statement.</p>"
+ soup = self.soup(markup,
+ parseOnlyThese=strainer)
+ self.assertEquals(
+ soup.decode(), self.document_for(markup))
+
def test_bare_string(self):
# A bare string is turned into some kind of HTML document or
# fragment recognizable as the original string.
diff --git a/tests/test_lxml.py b/tests/test_lxml.py
index 88c866d..de2ce7b 100644
--- a/tests/test_lxml.py
+++ b/tests/test_lxml.py
@@ -3,8 +3,8 @@
import re
from beautifulsoup import BeautifulSoup
-from beautifulsoup.builder.lxml_builder import LXMLTreeBuilder
-from beautifulsoup.element import Comment, Doctype
+from beautifulsoup.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML
+from beautifulsoup.element import Comment, Doctype, SoupStrainer
from beautifulsoup.testing import SoupTest
@@ -320,6 +320,12 @@ class TestLXMLBuilder(SoupTest):
self.assertFalse(soup.p.is_empty_element)
self.assertEquals(str(soup.p), "<p></p>")
+ def test_soupstrainer(self):
+ strainer = SoupStrainer("b")
+ soup = self.soup("A <b>bold</b> <meta /> <i>statement</i>",
+ parseOnlyThese=strainer)
+ self.assertEquals(soup.decode(), "<b>bold</b>")
+
class TestLXMLBuilderInvalidMarkup(SoupTest):
"""Tests of invalid markup for the LXML tree builder.
@@ -505,7 +511,6 @@ class TestLXMLBuilderEncodingConversion(SoupTest):
self.HEBREW_DOCUMENT.decode("iso-8859-8").encode("utf-8"))
-from beautifulsoup.builder.lxml_builder import LXMLTreeBuilderForXML
class TestLXMLXMLBuilder(SoupTest):
"""Test XML-specific parsing behavior.
diff --git a/tests/test_tree.py b/tests/test_tree.py
index 8cbd309..384d518 100644
--- a/tests/test_tree.py
+++ b/tests/test_tree.py
@@ -524,14 +524,15 @@ class TestTreeModification(SoupTest):
def test_new_tag_creation(self):
builder = BeautifulSoup.default_builder()
- soup = self.soup("", builder=builder)
+ soup = self.soup("<body></body>", builder=builder)
a = Tag(soup, builder, 'a')
ol = Tag(soup, builder, 'ol')
a['href'] = 'http://foo.com/'
- soup.insert(0, a)
- soup.insert(1, ol)
+ soup.body.insert(0, a)
+ soup.body.insert(1, ol)
self.assertEqual(
- soup.decode(), '<a href="http://foo.com/"></a><ol></ol>')
+ soup.body.encode(),
+ '<body><a href="http://foo.com/"></a><ol></ol></body>')
def test_append_to_contents_moves_tag(self):
doc = """<p id="1">Don't leave me <b>here</b>.</p>