diff options
author | Leonard Richardson <leonardr@segfault.org> | 2012-05-29 13:25:01 -0400 |
---|---|---|
committer | Leonard Richardson <leonardr@segfault.org> | 2012-05-29 13:25:01 -0400 |
commit | 8ec2a7d9423a6269f74c47ec2475b6c5fd143437 (patch) | |
tree | c56ac2168b2d10da6057e9b84c129f081cf039e6 | |
parent | 49aa4dd243353f7d0f25d7c5ea51ba3344110a47 (diff) |
Prep for release.
-rw-r--r-- | NEWS.txt | 7 | ||||
-rw-r--r-- | bs4/__init__.py | 2 | ||||
-rw-r--r-- | bs4/builder/_lxml.py | 4 | ||||
-rw-r--r-- | doc/source/index.rst | 16 | ||||
-rw-r--r-- | setup.py | 2 |
5 files changed, 17 insertions, 14 deletions
@@ -1,4 +1,4 @@ -= 4.1.0 (unreleased) = += 4.1.0 (20120529) = * Added experimental support for fixing Windows-1252 characters embedded in UTF-8 documents. (UnicodeDammit.detwingle()) @@ -11,14 +11,13 @@ * Fixed a bug with the lxml treebuilder that prevented the user from adding attributes to a tag that didn't originally have - any. [bug=1002378] Thanks to Oliver Beattie for the patch. + attributes. [bug=1002378] Thanks to Oliver Beattie for the patch. * Fixed some edge-case bugs having to do with inserting an element into a tag it's already inside, and replacing one of a tag's children with another. [bug=997529] -* Fixed the inability to search for non-ASCII attribute - values. [bug=1003974] +* Added the ability to search for attribute values specified in UTF-8. [bug=1003974] This caused a major refactoring of the search code. All the tests pass, but it's possible that some searches will behave differently. diff --git a/bs4/__init__.py b/bs4/__init__.py index 463b153..af8c718 100644 --- a/bs4/__init__.py +++ b/bs4/__init__.py @@ -17,7 +17,7 @@ http://www.crummy.com/software/BeautifulSoup/bs4/doc/ """ __author__ = "Leonard Richardson (leonardr@segfault.org)" -__version__ = "4.0.5" +__version__ = "4.1.0" __copyright__ = "Copyright (c) 2004-2012 Leonard Richardson" __license__ = "MIT" diff --git a/bs4/builder/_lxml.py b/bs4/builder/_lxml.py index c78fdff..9ec5eb4 100644 --- a/bs4/builder/_lxml.py +++ b/bs4/builder/_lxml.py @@ -82,9 +82,11 @@ class LXMLTreeBuilderForXML(TreeBuilder): data = markup.read(self.CHUNK_SIZE) if data != '': self.parser.feed(data) - self.parser.close() + foo = self.parser.close() + import pdb; pdb.set_trace() def close(self): + import pdb; pdb.set_trace() self.nsmaps = None def start(self, name, attrs, nsmap={}): diff --git a/doc/source/index.rst b/doc/source/index.rst index 3a2069d..16c6020 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -2465,8 +2465,9 @@ UTF-8. Here's a simple example:: quote = (u"\N{LEFT DOUBLE QUOTATION MARK}I like snowmen!\N{RIGHT DOUBLE QUOTATION MARK}") doc = snowmen.encode("utf8") + quote.encode("windows_1252") -This document is a mess. You can display the snowmen or the smart -quotes, but not both:: +This document is a mess. The snowmen are in UTF-8 and the quotes are +in Windows-1252. You can display the snowmen or the quotes, but not +both:: print(doc) # ☃☃☃�I like snowmen!� @@ -2474,10 +2475,11 @@ quotes, but not both:: print(doc.decode("windows-1252")) # ☃☃☃“I like snowmen!” -Decoding the document as UTF-8 will raise a ``UnicodeDecodeError``, -but ``UnicodeDammit.detwingle()`` will convert the document to pure -UTF-8, allowing you to decode it and display the snowmen and -quote marks simultaneously:: +Decoding the document as UTF-8 raises a ``UnicodeDecodeError``, and +decoding it as Windows-1252 gives you gibberish. Fortunately, +``UnicodeDammit.detwingle()`` will convert the string to pure UTF-8, +allowing you to decode it to Unicode and display the snowmen and quote +marks simultaneously:: new_doc = UnicodeDammit.detwingle(doc) print(new_doc.decode("utf8")) @@ -2493,7 +2495,7 @@ constructor. Beautiful Soup assumes that a document has a single encoding, whatever it might be. If you pass it a document that contains both UTF-8 and Windows-1252, it's likely to think the whole document is Windows-1252, and the document will come out looking like -`` ☃☃☃“I like snowmen!”``. +` ☃☃☃“I like snowmen!”`. ``UnicodeDammit.detwingle()`` is new in Beautiful Soup 4.1.0. @@ -7,7 +7,7 @@ except ImportError: from distutils.command.build_py import build_py setup(name="beautifulsoup4", - version = "4.0.5", + version = "4.1.0", author="Leonard Richardson", author_email='leonardr@segfault.org', url="http://www.crummy.com/software/BeautifulSoup/bs4/", |