summaryrefslogtreecommitdiff
path: root/bs4
diff options
context:
space:
mode:
Diffstat (limited to 'bs4')
-rw-r--r--bs4/__init__.py4
-rw-r--r--bs4/builder/_htmlparser.py3
-rw-r--r--bs4/element.py2
-rw-r--r--bs4/testing.py5
-rw-r--r--bs4/tests/test_lxml.py13
5 files changed, 26 insertions, 1 deletions
diff --git a/bs4/__init__.py b/bs4/__init__.py
index fe2656b..88177d6 100644
--- a/bs4/__init__.py
+++ b/bs4/__init__.py
@@ -335,6 +335,10 @@ class BeautifulSoup(Tag):
return prefix + super(BeautifulSoup, self).decode(
indent_level, eventual_encoding, formatter)
+# Alias to make it easier to type import: 'from bs4 import _soup'
+_s = BeautifulSoup
+_soup = BeautifulSoup
+
class BeautifulStoneSoup(BeautifulSoup):
"""Deprecated interface to an XML parser."""
diff --git a/bs4/builder/_htmlparser.py b/bs4/builder/_htmlparser.py
index ede5cec..e34c9fa 100644
--- a/bs4/builder/_htmlparser.py
+++ b/bs4/builder/_htmlparser.py
@@ -85,6 +85,9 @@ class BeautifulSoupHTMLParser(HTMLParser):
self.soup.endData()
if data.startswith("DOCTYPE "):
data = data[len("DOCTYPE "):]
+ elif data == 'DOCTYPE':
+ # i.e. "<!DOCTYPE>"
+ data = ''
self.soup.handle_data(data)
self.soup.endData(Doctype)
diff --git a/bs4/element.py b/bs4/element.py
index c081eba..5ccb019 100644
--- a/bs4/element.py
+++ b/bs4/element.py
@@ -779,7 +779,7 @@ class Doctype(PreformattedString):
@classmethod
def for_name_and_ids(cls, name, pub_id, system_id):
- value = name
+ value = name or ''
if pub_id is not None:
value += ' PUBLIC "%s"' % pub_id
if system_id is not None:
diff --git a/bs4/testing.py b/bs4/testing.py
index c9307d3..ed71d3b 100644
--- a/bs4/testing.py
+++ b/bs4/testing.py
@@ -81,6 +81,11 @@ class HTMLTreeBuilderSmokeTest(object):
self.assertDoctypeHandled(
'html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"')
+ def test_empty_doctype(self):
+ soup = self.soup("<!DOCTYPE>")
+ doctype = soup.contents[0]
+ self.assertEqual("", doctype.strip())
+
def test_public_doctype_with_url(self):
doctype = 'html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"'
self.assertDoctypeHandled(doctype)
diff --git a/bs4/tests/test_lxml.py b/bs4/tests/test_lxml.py
index 39e26bf..f32fc2b 100644
--- a/bs4/tests/test_lxml.py
+++ b/bs4/tests/test_lxml.py
@@ -6,6 +6,8 @@ import warnings
try:
from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML
LXML_PRESENT = True
+ import lxml.etree
+ LXML_VERSION = lxml.etree.LXML_VERSION
except ImportError, e:
LXML_PRESENT = False
@@ -41,6 +43,17 @@ class LXMLTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
self.assertSoupEquals(
"<p>foo&#1000000000;bar</p>", "<p>foobar</p>")
+ # In lxml < 2.3.5, an empty doctype causes a segfault. Skip this
+ # test if an old version of lxml is installed.
+
+ @skipIf(
+ LXML_VERSION < (2,3,5,0),
+ "Skipping doctype test for old version of lxml to avoid segfault.")
+ def test_empty_doctype(self):
+ soup = self.soup("<!DOCTYPE>")
+ doctype = soup.contents[0]
+ self.assertEqual("", doctype.strip())
+
def test_beautifulstonesoup_is_xml_parser(self):
# Make sure that the deprecated BSS class uses an xml builder
# if one is installed.