From 269157a8f40dfdac082f39befd69f170263d2ce1 Mon Sep 17 00:00:00 2001
From: Leonard Richardson <leonardr@segfault.org>
Date: Tue, 7 May 2013 08:19:02 -0400
Subject: Now that lxml's segfault on invalid doctype has been fixed, fix a  
 corresponding problem on the Beautiful Soup end that was previously  
 invisible. [bug=984936]

---
 NEWS.txt                   |  4 ++++
 bs4/builder/_htmlparser.py |  3 +++
 bs4/element.py             |  2 +-
 bs4/testing.py             |  5 +++++
 bs4/tests/test_lxml.py     | 19 +++++++++++++++++++
 5 files changed, 32 insertions(+), 1 deletion(-)
diff --git a/NEWS.txt b/NEWS.txt
index edbba28..c2739ca 100644
--- a/NEWS.txt
+++ b/NEWS.txt
@@ -17,6 +17,10 @@
 * Fix a bug by which keyword arguments to find_parent() were not
   being passed on. [bug=1126734]
 
+* Now that lxml's segfault on invalid doctype has been fixed, fix a
+  corresponding problem on the Beautiful Soup end that was previously
+  invisible. [bug=984936]
+
 = 4.1.3 (20120820) =
 
 * Skipped a test under Python 2.6 and Python 3.1 to avoid a spurious
diff --git a/bs4/builder/_htmlparser.py b/bs4/builder/_htmlparser.py
index ede5cec..e34c9fa 100644
--- a/bs4/builder/_htmlparser.py
+++ b/bs4/builder/_htmlparser.py
@@ -85,6 +85,9 @@ class BeautifulSoupHTMLParser(HTMLParser):
         self.soup.endData()
         if data.startswith("DOCTYPE "):
             data = data[len("DOCTYPE "):]
+        elif data == 'DOCTYPE':
+            # i.e. "<!DOCTYPE>"
+            data = ''
         self.soup.handle_data(data)
         self.soup.endData(Doctype)
 
diff --git a/bs4/element.py b/bs4/element.py
index d58da92..f38d9b4 100644
--- a/bs4/element.py
+++ b/bs4/element.py
@@ -779,7 +779,7 @@ class Doctype(PreformattedString):
 
     @classmethod
     def for_name_and_ids(cls, name, pub_id, system_id):
-        value = name
+        value = name or ''
         if pub_id is not None:
             value += ' PUBLIC "%s"' % pub_id
             if system_id is not None:
diff --git a/bs4/testing.py b/bs4/testing.py
index c9307d3..ed71d3b 100644
--- a/bs4/testing.py
+++ b/bs4/testing.py
@@ -81,6 +81,11 @@ class HTMLTreeBuilderSmokeTest(object):
         self.assertDoctypeHandled(
             'html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"')
 
+    def test_empty_doctype(self):
+        soup = self.soup("<!DOCTYPE>")
+        doctype = soup.contents[0]
+        self.assertEqual("", doctype.strip())
+
     def test_public_doctype_with_url(self):
         doctype = 'html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"'
         self.assertDoctypeHandled(doctype)
diff --git a/bs4/tests/test_lxml.py b/bs4/tests/test_lxml.py
index 39e26bf..693ec25 100644
--- a/bs4/tests/test_lxml.py
+++ b/bs4/tests/test_lxml.py
@@ -6,6 +6,14 @@ import warnings
 try:
     from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML
     LXML_PRESENT = True
+    import lxml.etree
+    LXML_VERSION = []
+    for i in lxml.etree.__version__.split('.'):
+        try:
+            part = int(i)
+        except TypeError:
+            part = 0
+        LXML_VERSION.append(part)
 except ImportError, e:
     LXML_PRESENT = False
 
@@ -41,6 +49,17 @@ class LXMLTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
         self.assertSoupEquals(
             "<p>foo&#1000000000;bar</p>", "<p>foobar</p>")
 
+    # In lxml < 2.3.5, an empty doctype causes a segfault. Skip this
+    # test if an old version of lxml is installed.
+
+    @skipIf(
+        LXML_VERSION < [2,3,5],
+        "Skipping doctype test for old version of lxml to avoid segfault.")
+    def test_empty_doctype(self):
+        soup = self.soup("<!DOCTYPE>")
+        doctype = soup.contents[0]
+        self.assertEqual("", doctype.strip())
+
     def test_beautifulstonesoup_is_xml_parser(self):
         # Make sure that the deprecated BSS class uses an xml builder
         # if one is installed.
-- 
cgit v1.2.3


From e31151091c3dd44d0f39ba234df261f362199ae5 Mon Sep 17 00:00:00 2001
From: Leonard Richardson <leonardr@segfault.org>
Date: Tue, 7 May 2013 08:36:07 -0400
Subject: Improved detection of lxml version number.

---
 bs4/tests/test_lxml.py | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/bs4/tests/test_lxml.py b/bs4/tests/test_lxml.py
index 693ec25..f32fc2b 100644
--- a/bs4/tests/test_lxml.py
+++ b/bs4/tests/test_lxml.py
@@ -7,13 +7,7 @@ try:
     from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML
     LXML_PRESENT = True
     import lxml.etree
-    LXML_VERSION = []
-    for i in lxml.etree.__version__.split('.'):
-        try:
-            part = int(i)
-        except TypeError:
-            part = 0
-        LXML_VERSION.append(part)
+    LXML_VERSION = lxml.etree.LXML_VERSION
 except ImportError, e:
     LXML_PRESENT = False
 
@@ -53,7 +47,7 @@ class LXMLTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
     # test if an old version of lxml is installed.
 
     @skipIf(
-        LXML_VERSION < [2,3,5],
+        LXML_VERSION < (2,3,5,0),
         "Skipping doctype test for old version of lxml to avoid segfault.")
     def test_empty_doctype(self):
         soup = self.soup("<!DOCTYPE>")
-- 
cgit v1.2.3


From 5b3860ec348b66976de64b5be407704041102869 Mon Sep 17 00:00:00 2001
From: Leonard Richardson <leonardr@segfault.org>
Date: Tue, 7 May 2013 08:40:35 -0400
Subject: Aliased the BeautifulSoup class to the easier-to-type "_s" and
 "_soup".

---
 NEWS.txt        | 7 +++++++
 bs4/__init__.py | 4 ++++
 2 files changed, 11 insertions(+)

diff --git a/NEWS.txt b/NEWS.txt
index c2739ca..70a1dc7 100644
--- a/NEWS.txt
+++ b/NEWS.txt
@@ -8,6 +8,13 @@
   you strings that are visible in the document--no comments or
   processing commands. [bug=1050164]
 
+* The BeautifulSoup class is now aliased to "_s" and "_soup", making
+  it quicker to type an import statement in an interactive session:
+
+  from bs4 import _s
+   or
+  from bs4 import _soup
+
 * Fix a bug in the html5lib treebuilder which sometimes created
   disconnected trees. [bug=1039527]
 
diff --git a/bs4/__init__.py b/bs4/__init__.py
index fe2656b..88177d6 100644
--- a/bs4/__init__.py
+++ b/bs4/__init__.py
@@ -335,6 +335,10 @@ class BeautifulSoup(Tag):
         return prefix + super(BeautifulSoup, self).decode(
             indent_level, eventual_encoding, formatter)
 
+# Alias to make it easier to type import: 'from bs4 import _soup'
+_s = BeautifulSoup
+_soup = BeautifulSoup
+
 class BeautifulStoneSoup(BeautifulSoup):
     """Deprecated interface to an XML parser."""
 
-- 
cgit v1.2.3