summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--NEWS.txt7
-rw-r--r--bs4/__init__.py2
-rw-r--r--bs4/builder/_lxml.py4
-rw-r--r--doc/source/index.rst16
-rw-r--r--setup.py2
5 files changed, 17 insertions, 14 deletions
diff --git a/NEWS.txt b/NEWS.txt
index 2682720..0443968 100644
--- a/NEWS.txt
+++ b/NEWS.txt
@@ -1,4 +1,4 @@
-= 4.1.0 (unreleased) =
+= 4.1.0 (20120529) =
* Added experimental support for fixing Windows-1252 characters
embedded in UTF-8 documents. (UnicodeDammit.detwingle())
@@ -11,14 +11,13 @@
* Fixed a bug with the lxml treebuilder that prevented the user from
adding attributes to a tag that didn't originally have
- any. [bug=1002378] Thanks to Oliver Beattie for the patch.
+ attributes. [bug=1002378] Thanks to Oliver Beattie for the patch.
* Fixed some edge-case bugs having to do with inserting an element
into a tag it's already inside, and replacing one of a tag's
children with another. [bug=997529]
-* Fixed the inability to search for non-ASCII attribute
- values. [bug=1003974]
+* Added the ability to search for attribute values specified in UTF-8. [bug=1003974]
This caused a major refactoring of the search code. All the tests
pass, but it's possible that some searches will behave differently.
diff --git a/bs4/__init__.py b/bs4/__init__.py
index 463b153..af8c718 100644
--- a/bs4/__init__.py
+++ b/bs4/__init__.py
@@ -17,7 +17,7 @@ http://www.crummy.com/software/BeautifulSoup/bs4/doc/
"""
__author__ = "Leonard Richardson (leonardr@segfault.org)"
-__version__ = "4.0.5"
+__version__ = "4.1.0"
__copyright__ = "Copyright (c) 2004-2012 Leonard Richardson"
__license__ = "MIT"
diff --git a/bs4/builder/_lxml.py b/bs4/builder/_lxml.py
index c78fdff..9ec5eb4 100644
--- a/bs4/builder/_lxml.py
+++ b/bs4/builder/_lxml.py
@@ -82,9 +82,11 @@ class LXMLTreeBuilderForXML(TreeBuilder):
data = markup.read(self.CHUNK_SIZE)
if data != '':
self.parser.feed(data)
- self.parser.close()
+ foo = self.parser.close()
+ import pdb; pdb.set_trace()
def close(self):
+ import pdb; pdb.set_trace()
self.nsmaps = None
def start(self, name, attrs, nsmap={}):
diff --git a/doc/source/index.rst b/doc/source/index.rst
index 3a2069d..16c6020 100644
--- a/doc/source/index.rst
+++ b/doc/source/index.rst
@@ -2465,8 +2465,9 @@ UTF-8. Here's a simple example::
quote = (u"\N{LEFT DOUBLE QUOTATION MARK}I like snowmen!\N{RIGHT DOUBLE QUOTATION MARK}")
doc = snowmen.encode("utf8") + quote.encode("windows_1252")
-This document is a mess. You can display the snowmen or the smart
-quotes, but not both::
+This document is a mess. The snowmen are in UTF-8 and the quotes are
+in Windows-1252. You can display the snowmen or the quotes, but not
+both::
print(doc)
# ☃☃☃�I like snowmen!�
@@ -2474,10 +2475,11 @@ quotes, but not both::
print(doc.decode("windows-1252"))
# ☃☃☃“I like snowmen!”
-Decoding the document as UTF-8 will raise a ``UnicodeDecodeError``,
-but ``UnicodeDammit.detwingle()`` will convert the document to pure
-UTF-8, allowing you to decode it and display the snowmen and
-quote marks simultaneously::
+Decoding the document as UTF-8 raises a ``UnicodeDecodeError``, and
+decoding it as Windows-1252 gives you gibberish. Fortunately,
+``UnicodeDammit.detwingle()`` will convert the string to pure UTF-8,
+allowing you to decode it to Unicode and display the snowmen and quote
+marks simultaneously::
new_doc = UnicodeDammit.detwingle(doc)
print(new_doc.decode("utf8"))
@@ -2493,7 +2495,7 @@ constructor. Beautiful Soup assumes that a document has a single
encoding, whatever it might be. If you pass it a document that
contains both UTF-8 and Windows-1252, it's likely to think the whole
document is Windows-1252, and the document will come out looking like
-`` ☃☃☃“I like snowmen!”``.
+` ☃☃☃“I like snowmen!”`.
``UnicodeDammit.detwingle()`` is new in Beautiful Soup 4.1.0.
diff --git a/setup.py b/setup.py
index 2db8f47..59045b3 100644
--- a/setup.py
+++ b/setup.py
@@ -7,7 +7,7 @@ except ImportError:
from distutils.command.build_py import build_py
setup(name="beautifulsoup4",
- version = "4.0.5",
+ version = "4.1.0",
author="Leonard Richardson",
author_email='leonardr@segfault.org',
url="http://www.crummy.com/software/BeautifulSoup/bs4/",