From 23ec4e144b4d737e8fb8712e35532bb9f5e67cbf Mon Sep 17 00:00:00 2001
From: Leonard Richardson <leonard.richardson@canonical.com>
Date: Wed, 8 Feb 2012 09:21:39 -0500
Subject: Moved around a bunch of metadata.

---
 AUTHORS     |  39 ----------
 AUTHORS.txt |  39 ++++++++++
 CHANGELOG   | 229 -----------------------------------------------------------
 COPYING     |  26 -------
 COPYING.txt |  26 +++++++
 NEWS.txt    | 231 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 README.txt  |  25 ++++---
 TODO        |  42 -----------
 TODO.txt    |  42 +++++++++++
 setup.py    |  25 ++++---
 10 files changed, 365 insertions(+), 359 deletions(-)
 delete mode 100644 AUTHORS
 create mode 100644 AUTHORS.txt
 delete mode 100644 CHANGELOG
 delete mode 100644 COPYING
 create mode 100644 COPYING.txt
 create mode 100644 NEWS.txt
 delete mode 100644 TODO
 create mode 100644 TODO.txt

diff --git a/AUTHORS b/AUTHORS
deleted file mode 100644
index 9623a7c..0000000
--- a/AUTHORS
+++ /dev/null
@@ -1,39 +0,0 @@
-Behold, mortal, the origins of Beautiful Soup...
-================================================
-
-Leonard Richardson is the primary programmer.
-
-Aaron DeVore is awesome.
-
-Mark Pilgrim provided the encoding detection code that forms the base
-of UnicodeDammit.
-
-Thomas Kluyver and Ezio Melotti finished the work of getting Beautiful
-Soup 4 working under Python 3.
-
-Sam Ruby helped with a lot of edge cases.
-
-Jonathan Ellis was awarded the prestigous Beau Potage D'Or for his
-work in solving the nestable tags conundrum.
-
-The following people have contributed patches to Beautiful Soup:
-
- Istvan Albert, Andrew Lin, Anthony Baxter, Andrew Boyko, Tony Chang,
- Zephyr Fang, Fuzzy, Roman Gaufman, Yoni Gilad, Richie Hindle, Peteris
- Krumins, Kent Johnson, Ben Last, Robert Leftwich, Staffan Malmgren,
- Ksenia Marasanova, JP Moins, Adam Monsen, John Nagle, "Jon", Ed
- Oskiewicz, Greg Phillips, Giles Radford, Arthur Rudolph, Marko
- Samastur, Jouni Seppänen, Alexander Schmolck, Andy Theyers, Glyn
- Webster, Paul Wright, Danny Yoo
-
-The following people made suggestions or found bugs or found ways to
-break Beautiful Soup:
-
- Hanno Böck, Matteo Bertini, Chris Curvey, Simon Cusack, Matt Ernst,
- Michael Foord, Tom Harris, Bill de hOra, Donald Howes, Matt
- Patterson, Scott Roberts, Steve Strassmann, Mike Williams, warchild
- at redho dot com, Sami Kuisma, Carlos Rocha, Bob Hutchison, Joren Mc,
- Michal Migurski, John Kleven, Tim Heaney, Tripp Lilley, Ed Summers,
- Dennis Sutch, Chris Smith, Aaron Sweep^W Swartz, Stuart Turner, Greg
- Edwards, Kevin J Kalupson, Nikos Kouremenos, Artur de Sousa Rocha,
- Yichun Wei, Per Vognsen
diff --git a/AUTHORS.txt b/AUTHORS.txt
new file mode 100644
index 0000000..e093cd6
--- /dev/null
+++ b/AUTHORS.txt
@@ -0,0 +1,39 @@
+Behold, mortal, the origins of Beautiful Soup...
+================================================
+
+Leonard Richardson is the primary programmer.
+
+Aaron DeVore is awesome.
+
+Mark Pilgrim provided the encoding detection code that forms the base
+of UnicodeDammit.
+
+Thomas Kluyver and Ezio Melotti finished the work of getting Beautiful
+Soup 4 working under Python 3.
+
+Sam Ruby helped with a lot of edge cases.
+
+Jonathan Ellis was awarded the prestigous Beau Potage D'Or for his
+work in solving the nestable tags conundrum.
+
+The following people have contributed patches to Beautiful Soup:
+
+ Istvan Albert, Andrew Lin, Anthony Baxter, Andrew Boyko, Tony Chang,
+ Zephyr Fang, Fuzzy, Roman Gaufman, Yoni Gilad, Richie Hindle, Peteris
+ Krumins, Kent Johnson, Ben Last, Robert Leftwich, Staffan Malmgren,
+ Ksenia Marasanova, JP Moins, Adam Monsen, John Nagle, "Jon", Ed
+ Oskiewicz, Greg Phillips, Giles Radford, Arthur Rudolph, Marko
+ Samastur, Jouni Seppänen, Alexander Schmolck, Andy Theyers, Glyn
+ Webster, Paul Wright, Danny Yoo
+
+The following people made suggestions or found bugs or found ways to
+break Beautiful Soup:
+
+ Hanno Böck, Matteo Bertini, Chris Curvey, Simon Cusack, Bruce Eckel,
+ Matt Ernst, Michael Foord, Tom Harris, Bill de hOra, Donald Howes,
+ Matt Patterson, Scott Roberts, Steve Strassmann, Mike Williams,
+ warchild at redho dot com, Sami Kuisma, Carlos Rocha, Bob Hutchison,
+ Joren Mc, Michal Migurski, John Kleven, Tim Heaney, Tripp Lilley, Ed
+ Summers, Dennis Sutch, Chris Smith, Aaron Sweep^W Swartz, Stuart
+ Turner, Greg Edwards, Kevin J Kalupson, Nikos Kouremenos, Artur de
+ Sousa Rocha, Yichun Wei, Per Vognsen
diff --git a/CHANGELOG b/CHANGELOG
deleted file mode 100644
index b0ad7be..0000000
--- a/CHANGELOG
+++ /dev/null
@@ -1,229 +0,0 @@
-= 4.0 beta 4 =
-
-Added BeautifulSoup.new_string() to go along with BeautifulSoup.new_tag()
-
-BeautifulSoup.new_tag() will follow the rules of whatever tree-builder
-was used to create the original BeautifulSoup object. A new <p> tag
-will look like "<p />" if the soup object was created to parse XML,
-but it will look like "<p></p>" if the soup object was created to
-parse HTML.
-
-We pass in strict=False to html.parser on Python 3, greatly improving
-html.parser's ability to handle bad HTML.
-
-Monkeypatch a serious bug in html.parser that made strict=False
-disastrous on Python 3.2.2.
-
-Replaced the "substitute_html_entities" argument with the "formatter" argument.
-
-Bare ampersands and angle brackets are always converted to XML
-entities unless the user prevents it.
-
-Added PageElement.insert_before().
-
-Added PageElement.insert_after().
-
-Raise an exception when the user tries to do something nonsensical
-like insert a tag into itself.
-
-= 4.0.0b3 =
-
-Beautiful Soup 4 is a nearly-complete rewrite that removes Beautiful
-Soup's custom HTML parser in favor of a system that lets you write a
-little glue code and plug in any HTML or XML parser you want.
-
-Beautiful Soup 4.0 comes with glue code for four parsers:
-
- * Python's standard HTMLParser (html.parser in Python 3)
- * lxml's HTML and XML parsers
- * html5lib's HTML parser
-
-HTMLParser is the default, but I recommend you install lxml if you
-can.
-
-For complete documentation, see the Sphinx documentation in
-bs4/doc/source/. What follows is a summary of the changes from
-Beautiful Soup 3.
-
-=== The module name has changed ===
-
-Previously you imported the BeautifulSoup class from a module also
-called BeautifulSoup. To save keystrokes and make it clear which
-version of the API is in use, the module is now called 'bs4':
-
-    >>> from bs4 import BeautifulSoup
-
-=== It works with Python 3 ===
-
-Beautiful Soup 3.1.0 worked with Python 3, but the parser it used was
-so bad that it barely worked at all. Beautiful Soup 4 works with
-Python 3, and since its parser is pluggable, you don't sacrifice
-quality.
-
-Special thanks to Thomas Kluyver and Ezio Melotti for getting Python 3
-support to the finish line. Ezio Melotti is also to thank for greatly
-improving the HTML parser that comes with Python 3.2.
-
-=== CDATA sections are normal text, if they're understood at all. ===
-
-Currently, the lxml and html5lib HTML parsers ignore CDATA sections in
-markup:
-
- <p><![CDATA[foo]]></p> => <p></p>
-
-A future version of html5lib will turn CDATA sections into text nodes,
-but only within tags like <svg> and <math>:
-
- <svg><![CDATA[foo]]></svg> => <p>foo</p>
-
-The default XML parser (which uses lxml behind the scenes) turns CDATA
-sections into ordinary text elements:
-
- <p><![CDATA[foo]]></p> => <p>foo</p>
-
-In theory it's possible to preserve the CDATA sections when using the
-XML parser, but I don't see how to get it to work in practice.
-
-=== Miscellaneous other stuff ===
-
-If the BeautifulSoup instance has .is_xml set to True, an appropriate
-XML declaration will be emitted when the tree is transformed into a
-string:
-
-    <?xml version="1.0" encoding="utf-8">
-    <markup>
-     ...
-    </markup>
-
-The ['lxml', 'xml'] tree builder sets .is_xml to True; the other tree
-builders set it to False. If you want to parse XHTML with an HTML
-parser, you can set it manually.
-
-
-= 3.2.0 =
-
-The 3.1 series wasn't very useful, so I renamed the 3.0 series to 3.2
-to make it obvious which one you should use.
-
-= 3.1.0 =
-
-A hybrid version that supports 2.4 and can be automatically converted
-to run under Python 3.0. There are three backwards-incompatible
-changes you should be aware of, but no new features or deliberate
-behavior changes.
-
-1. str() may no longer do what you want. This is because the meaning
-of str() inverts between Python 2 and 3; in Python 2 it gives you a
-byte string, in Python 3 it gives you a Unicode string.
-
-The effect of this is that you can't pass an encoding to .__str__
-anymore. Use encode() to get a string and decode() to get Unicode, and
-you'll be ready (well, readier) for Python 3.
-
-2. Beautiful Soup is now based on HTMLParser rather than SGMLParser,
-which is gone in Python 3. There's some bad HTML that SGMLParser
-handled but HTMLParser doesn't, usually to do with attribute values
-that aren't closed or have brackets inside them:
-
-  <a href="foo</a>, </a><a href="bar">baz</a>
-  <a b="<a>">', '<a b="&lt;a&gt;"></a><a>"></a>
-
-A later version of Beautiful Soup will allow you to plug in different
-parsers to make tradeoffs between speed and the ability to handle bad
-HTML.
-
-3. In Python 3 (but not Python 2), HTMLParser converts entities within
-attributes to the corresponding Unicode characters. In Python 2 it's
-possible to parse this string and leave the &eacute; intact.
-
- <a href="http://crummy.com?sacr&eacute;&bleu">
-
-In Python 3, the &eacute; is always converted to \xe9 during
-parsing.
-
-
-= 3.0.7a =
-
-Added an import that makes BS work in Python 2.3.
-
-
-= 3.0.7 =
-
-Fixed a UnicodeDecodeError when unpickling documents that contain
-non-ASCII characters.
-
-Fixed a TypeError that occured in some circumstances when a tag
-contained no text.
-
-Jump through hoops to avoid the use of chardet, which can be extremely
-slow in some circumstances. UTF-8 documents should never trigger the
-use of chardet.
-
-Whitespace is preserved inside <pre> and <textarea> tags that contain
-nothing but whitespace.
-
-Beautiful Soup can now parse a doctype that's scoped to an XML namespace.
-
-
-= 3.0.6 =
-
-Got rid of a very old debug line that prevented chardet from working.
-
-Added a Tag.decompose() method that completely disconnects a tree or a
-subset of a tree, breaking it up into bite-sized pieces that are
-easy for the garbage collecter to collect.
-
-Tag.extract() now returns the tag that was extracted.
-
-Tag.findNext() now does something with the keyword arguments you pass
-it instead of dropping them on the floor.
-
-Fixed a Unicode conversion bug.
-
-Fixed a bug that garbled some <meta> tags when rewriting them.
-
-
-= 3.0.5 =
-
-Soup objects can now be pickled, and copied with copy.deepcopy.
-
-Tag.append now works properly on existing BS objects. (It wasn't
-originally intended for outside use, but it can be now.) (Giles
-Radford)
-
-Passing in a nonexistent encoding will no longer crash the parser on
-Python 2.4 (John Nagle).
-
-Fixed an underlying bug in SGMLParser that thinks ASCII has 255
-characters instead of 127 (John Nagle).
-
-Entities are converted more consistently to Unicode characters.
-
-Entity references in attribute values are now converted to Unicode
-characters when appropriate. Numeric entities are always converted,
-because SGMLParser always converts them outside of attribute values.
-
-ALL_ENTITIES happens to just be the XHTML entities, so I renamed it to
-XHTML_ENTITIES.
-
-The regular expression for bare ampersands was too loose. In some
-cases ampersands were not being escaped. (Sam Ruby?)
-
-Non-breaking spaces and other special Unicode space characters are no
-longer folded to ASCII spaces. (Robert Leftwich)
-
-Information inside a TEXTAREA tag is now parsed literally, not as HTML
-tags. TEXTAREA now works exactly the same way as SCRIPT. (Zephyr Fang)
-
-
-= 3.0.4 =
-
-Fixed a bug that crashed Unicode conversion in some cases.
-
-Fixed a bug that prevented UnicodeDammit from being used as a
-general-purpose data scrubber.
-
-Fixed some unit test failures when running against Python 2.5.
-
-When considering whether to convert smart quotes, UnicodeDammit now
-looks at the original encoding in a case-insensitive way.
diff --git a/COPYING b/COPYING
deleted file mode 100644
index 2c9cd0c..0000000
--- a/COPYING
+++ /dev/null
@@ -1,26 +0,0 @@
-Beautiful Soup is made available under the MIT license:
-
- Copyright (c) 2004-2011 Leonard Richardson
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice shall be
- included in all copies or substantial portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- SOFTWARE, DAMMIT.
-
-Beautiful Soup incorporates code from the html5lib library, which is
-also made available under the MIT license.
diff --git a/COPYING.txt b/COPYING.txt
new file mode 100644
index 0000000..2c9cd0c
--- /dev/null
+++ b/COPYING.txt
@@ -0,0 +1,26 @@
+Beautiful Soup is made available under the MIT license:
+
+ Copyright (c) 2004-2011 Leonard Richardson
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ SOFTWARE, DAMMIT.
+
+Beautiful Soup incorporates code from the html5lib library, which is
+also made available under the MIT license.
diff --git a/NEWS.txt b/NEWS.txt
new file mode 100644
index 0000000..aa08b6e
--- /dev/null
+++ b/NEWS.txt
@@ -0,0 +1,231 @@
+= 4.0.0b4 =
+
+* Added BeautifulSoup.new_string() to go along with BeautifulSoup.new_tag()
+
+* BeautifulSoup.new_tag() will follow the rules of whatever
+  tree-builder was used to create the original BeautifulSoup object. A
+  new <p> tag will look like "<p />" if the soup object was created to
+  parse XML, but it will look like "<p></p>" if the soup object was
+  created to parse HTML.
+
+* We pass in strict=False to html.parser on Python 3, greatly
+  improving html.parser's ability to handle bad HTML.
+
+* We also monkeypatch a serious bug in html.parser that made
+  strict=False disastrous on Python 3.2.2.
+
+* Replaced the "substitute_html_entities" argument with the
+  "formatter" argument.
+
+* Bare ampersands and angle brackets are always converted to XML
+  entities unless the user prevents it.
+
+* Added PageElement.insert_before().
+
+* Added PageElement.insert_after().
+
+* Raise an exception when the user tries to do something nonsensical
+  like insert a tag into itself.
+
+
+= 4.0.0b3 =
+
+Beautiful Soup 4 is a nearly-complete rewrite that removes Beautiful
+Soup's custom HTML parser in favor of a system that lets you write a
+little glue code and plug in any HTML or XML parser you want.
+
+Beautiful Soup 4.0 comes with glue code for four parsers:
+
+ * Python's standard HTMLParser (html.parser in Python 3)
+ * lxml's HTML and XML parsers
+ * html5lib's HTML parser
+
+HTMLParser is the default, but I recommend you install lxml if you
+can.
+
+For complete documentation, see the Sphinx documentation in
+bs4/doc/source/. What follows is a summary of the changes from
+Beautiful Soup 3.
+
+=== The module name has changed ===
+
+Previously you imported the BeautifulSoup class from a module also
+called BeautifulSoup. To save keystrokes and make it clear which
+version of the API is in use, the module is now called 'bs4':
+
+    >>> from bs4 import BeautifulSoup
+
+=== It works with Python 3 ===
+
+Beautiful Soup 3.1.0 worked with Python 3, but the parser it used was
+so bad that it barely worked at all. Beautiful Soup 4 works with
+Python 3, and since its parser is pluggable, you don't sacrifice
+quality.
+
+Special thanks to Thomas Kluyver and Ezio Melotti for getting Python 3
+support to the finish line. Ezio Melotti is also to thank for greatly
+improving the HTML parser that comes with Python 3.2.
+
+=== CDATA sections are normal text, if they're understood at all. ===
+
+Currently, the lxml and html5lib HTML parsers ignore CDATA sections in
+markup:
+
+ <p><![CDATA[foo]]></p> => <p></p>
+
+A future version of html5lib will turn CDATA sections into text nodes,
+but only within tags like <svg> and <math>:
+
+ <svg><![CDATA[foo]]></svg> => <p>foo</p>
+
+The default XML parser (which uses lxml behind the scenes) turns CDATA
+sections into ordinary text elements:
+
+ <p><![CDATA[foo]]></p> => <p>foo</p>
+
+In theory it's possible to preserve the CDATA sections when using the
+XML parser, but I don't see how to get it to work in practice.
+
+=== Miscellaneous other stuff ===
+
+If the BeautifulSoup instance has .is_xml set to True, an appropriate
+XML declaration will be emitted when the tree is transformed into a
+string:
+
+    <?xml version="1.0" encoding="utf-8">
+    <markup>
+     ...
+    </markup>
+
+The ['lxml', 'xml'] tree builder sets .is_xml to True; the other tree
+builders set it to False. If you want to parse XHTML with an HTML
+parser, you can set it manually.
+
+
+= 3.2.0 =
+
+The 3.1 series wasn't very useful, so I renamed the 3.0 series to 3.2
+to make it obvious which one you should use.
+
+= 3.1.0 =
+
+A hybrid version that supports 2.4 and can be automatically converted
+to run under Python 3.0. There are three backwards-incompatible
+changes you should be aware of, but no new features or deliberate
+behavior changes.
+
+1. str() may no longer do what you want. This is because the meaning
+of str() inverts between Python 2 and 3; in Python 2 it gives you a
+byte string, in Python 3 it gives you a Unicode string.
+
+The effect of this is that you can't pass an encoding to .__str__
+anymore. Use encode() to get a string and decode() to get Unicode, and
+you'll be ready (well, readier) for Python 3.
+
+2. Beautiful Soup is now based on HTMLParser rather than SGMLParser,
+which is gone in Python 3. There's some bad HTML that SGMLParser
+handled but HTMLParser doesn't, usually to do with attribute values
+that aren't closed or have brackets inside them:
+
+  <a href="foo</a>, </a><a href="bar">baz</a>
+  <a b="<a>">', '<a b="&lt;a&gt;"></a><a>"></a>
+
+A later version of Beautiful Soup will allow you to plug in different
+parsers to make tradeoffs between speed and the ability to handle bad
+HTML.
+
+3. In Python 3 (but not Python 2), HTMLParser converts entities within
+attributes to the corresponding Unicode characters. In Python 2 it's
+possible to parse this string and leave the &eacute; intact.
+
+ <a href="http://crummy.com?sacr&eacute;&bleu">
+
+In Python 3, the &eacute; is always converted to \xe9 during
+parsing.
+
+
+= 3.0.7a =
+
+Added an import that makes BS work in Python 2.3.
+
+
+= 3.0.7 =
+
+Fixed a UnicodeDecodeError when unpickling documents that contain
+non-ASCII characters.
+
+Fixed a TypeError that occured in some circumstances when a tag
+contained no text.
+
+Jump through hoops to avoid the use of chardet, which can be extremely
+slow in some circumstances. UTF-8 documents should never trigger the
+use of chardet.
+
+Whitespace is preserved inside <pre> and <textarea> tags that contain
+nothing but whitespace.
+
+Beautiful Soup can now parse a doctype that's scoped to an XML namespace.
+
+
+= 3.0.6 =
+
+Got rid of a very old debug line that prevented chardet from working.
+
+Added a Tag.decompose() method that completely disconnects a tree or a
+subset of a tree, breaking it up into bite-sized pieces that are
+easy for the garbage collecter to collect.
+
+Tag.extract() now returns the tag that was extracted.
+
+Tag.findNext() now does something with the keyword arguments you pass
+it instead of dropping them on the floor.
+
+Fixed a Unicode conversion bug.
+
+Fixed a bug that garbled some <meta> tags when rewriting them.
+
+
+= 3.0.5 =
+
+Soup objects can now be pickled, and copied with copy.deepcopy.
+
+Tag.append now works properly on existing BS objects. (It wasn't
+originally intended for outside use, but it can be now.) (Giles
+Radford)
+
+Passing in a nonexistent encoding will no longer crash the parser on
+Python 2.4 (John Nagle).
+
+Fixed an underlying bug in SGMLParser that thinks ASCII has 255
+characters instead of 127 (John Nagle).
+
+Entities are converted more consistently to Unicode characters.
+
+Entity references in attribute values are now converted to Unicode
+characters when appropriate. Numeric entities are always converted,
+because SGMLParser always converts them outside of attribute values.
+
+ALL_ENTITIES happens to just be the XHTML entities, so I renamed it to
+XHTML_ENTITIES.
+
+The regular expression for bare ampersands was too loose. In some
+cases ampersands were not being escaped. (Sam Ruby?)
+
+Non-breaking spaces and other special Unicode space characters are no
+longer folded to ASCII spaces. (Robert Leftwich)
+
+Information inside a TEXTAREA tag is now parsed literally, not as HTML
+tags. TEXTAREA now works exactly the same way as SCRIPT. (Zephyr Fang)
+
+
+= 3.0.4 =
+
+Fixed a bug that crashed Unicode conversion in some cases.
+
+Fixed a bug that prevented UnicodeDammit from being used as a
+general-purpose data scrubber.
+
+Fixed some unit test failures when running against Python 2.5.
+
+When considering whether to convert smart quotes, UnicodeDammit now
+looks at the original encoding in a case-insensitive way.
diff --git a/README.txt b/README.txt
index 769da98..e5c78c2 100644
--- a/README.txt
+++ b/README.txt
@@ -34,21 +34,20 @@
    </tag3>
   </tag1>
 
-The bs4/doc directory contains full documentation in Sphinx
-format. Run "make html" to create HTML documentation.
+= Full documentation =
 
-= Running the unit tests =
-
-Here's how to run the tests on Python 2.7:
+The bs4/doc/ directory contains full documentation in Sphinx
+format. Run "make html" in that directory to create HTML
+documentation.
 
- $ cd bs4
- $ python2.7 -m unittest discover -s bs4
+= Running the unit tests =
 
-Here's how to do it with Python 3.2:
+Beautiful Soup supports unit test discovery. You can run the tests
+from the project root directory with this command:
 
- $ ./convert-py3k
- $ cd py3k/bs4
- $ python3 -m unittest discover -s bs4
+ $ python -m unittest discover -s bs4
 
-The script test-all-versions will run the tests twice, once on Python
-2.7 and once on Python 3.
+If you checked out the source tree, you should see a script in the
+home directory called test-all-versions. This script will run the unit
+tests under Python 2.7, then create a temporary Python 3 conversion of
+the source and run the unit tests again under Python 3.
diff --git a/TODO b/TODO
deleted file mode 100644
index 2f03dd2..0000000
--- a/TODO
+++ /dev/null
@@ -1,42 +0,0 @@
-Bugs
-----
-
-* I think whitespace may not be processed correctly.
-
-* html5lib doesn't support SoupStrainers, which is OK, but there
-  should be a warning about it.
-
-Big features
-------------
-
-* Add namespace support.
-
-Optimizations
--------------
-
-markup_attr_map can be optimized since it's always a map now.
-
-BS3 features not yet ported
----------------------------
-
-* In BS3, "soup.aTag" is the same as 'soup.find("a")'. This lets you
-locate a tag called (let's say) "find" with attribute
-access. "soup.find" won't do what you want, but "soup.findTag" will.
-
-This still works In BS4 but it's deprecated. I could make
-"soup.find_tag" work the same way as "soup.find('find')", but I don't
-think it's worth it.
-
-CDATA
------
-
-The elementtree XMLParser has a strip_cdata argument that, when set to
-False, should allow Beautiful Soup to preserve CDATA sections instead
-of treating them as text. Except it doesn't. (This argument is also
-present for HTMLParser, and also does nothing there.)
-
-Currently, htm5lib converts CDATA sections into comments. An
-as-yet-unreleased version of html5lib changes the parser's handling of
-CDATA sections to allow CDATA sections in tags like <svg> and
-<math>. The HTML5TreeBuilder will need to be updated to create CData
-objects instead of Comment objects in this situation.
diff --git a/TODO.txt b/TODO.txt
new file mode 100644
index 0000000..2f03dd2
--- /dev/null
+++ b/TODO.txt
@@ -0,0 +1,42 @@
+Bugs
+----
+
+* I think whitespace may not be processed correctly.
+
+* html5lib doesn't support SoupStrainers, which is OK, but there
+  should be a warning about it.
+
+Big features
+------------
+
+* Add namespace support.
+
+Optimizations
+-------------
+
+markup_attr_map can be optimized since it's always a map now.
+
+BS3 features not yet ported
+---------------------------
+
+* In BS3, "soup.aTag" is the same as 'soup.find("a")'. This lets you
+locate a tag called (let's say) "find" with attribute
+access. "soup.find" won't do what you want, but "soup.findTag" will.
+
+This still works In BS4 but it's deprecated. I could make
+"soup.find_tag" work the same way as "soup.find('find')", but I don't
+think it's worth it.
+
+CDATA
+-----
+
+The elementtree XMLParser has a strip_cdata argument that, when set to
+False, should allow Beautiful Soup to preserve CDATA sections instead
+of treating them as text. Except it doesn't. (This argument is also
+present for HTMLParser, and also does nothing there.)
+
+Currently, htm5lib converts CDATA sections into comments. An
+as-yet-unreleased version of html5lib changes the parser's handling of
+CDATA sections to allow CDATA sections in tags like <svg> and
+<math>. The HTML5TreeBuilder will need to be updated to create CData
+objects instead of Comment objects in this situation.
diff --git a/setup.py b/setup.py
index 7fce6b0..4eebcb8 100644
--- a/setup.py
+++ b/setup.py
@@ -7,16 +7,21 @@ except ImportError:
     from distutils.command.build_py import build_py
 
 setup(name="beautifulsoup4",
-    version = "4.0.0b4",
-    author="Leonard Richardson",
-    author_email='leonardr@segfault.org',
-    url="http://www.crummy.com/software/BeautifulSoup/bs4/",
-    download_url = "http://www.crummy.com/software/BeautifulSoup/bs4/download/",
-    long_description="""Beautiful Soup sits atop an HTML or XML parser, providing Pythonic idioms for iterating, searching, and modifying the parse tree.""",
-    license="MIT",
-    packages=['bs4', 'bs4.builder', 'bs4.tests'],
-    package_data={"bs4": ["doc/source/*.jpg", "doc/source/*.rst", "doc/source/*.py", "doc/Makefile"]},
-    cmdclass = {'build_py':build_py},
+      version = "4.0.0b4",
+      author="Leonard Richardson",
+      author_email='leonardr@segfault.org',
+      url="http://www.crummy.com/software/BeautifulSoup/bs4/",
+      download_url = "http://www.crummy.com/software/BeautifulSoup/bs4/download/",
+      long_description="""Beautiful Soup sits atop an HTML or XML parser, providing Pythonic idioms for iterating, searching, and modifying the parse tree.""",
+      license="MIT",
+      packages=['bs4', 'bs4.builder', 'bs4.tests'],
+      package_data={
+        "bs4": ["doc/source/*.jpg", "doc/source/*.rst",
+                "doc/source/*.py", "doc/Makefile"]
+        },
+      data_files=[
+        ('', ['COPYING.txt', 'NEWS.txt', 'AUTHORS.txt', 'README.txt'])],
+      cmdclass = {'build_py':build_py},
       classifiers=["Development Status :: 4 - Beta",
                    "Intended Audience :: Developers",
                    "License :: OSI Approved :: MIT License",
-- 
cgit v1.2.3