8 files changed, 32 insertions, 33 deletions
diff --git a/NEWS.txt b/NEWS.txt
index a3485e7..3d0846f 100644
--- a/NEWS.txt
+++ b/NEWS.txt
@@ -1,9 +1,22 @@
 = 4.3.0 (Unreleased) =
 
-* A NavigableString object now has an immutable '.name' property whose
-  value is always None. This makes it easier to iterate over a mixed
-  list of tags and strings without having to check whether each
-  element is a tag or a string.
+* Instead of converting incoming data to Unicode and feeding it to the
+  lxml tree builder, Beautiful Soup now makes successive guesses at
+  the encoding of the incoming data, and tells lxml to parse the data
+  as that encoding. This improves performance and avoids an issue in
+  which lxml was refusing to parse strings because they were Unicode
+  strings.
+
+  This required a major overhaul of the tree builder architecture. If
+  you wrote your own tree builder and didn't tell me, you'll need to
+  modify your prepare_markup() method.
+
+* The UnicodeDammit code that makes guesses at encodings has been
+  split into its own class, EncodingDetector. A lot of apparently
+  redundant code has been removed from Unicode, Dammit, and some
+  undocumented features have also been removed.
+
+= 4.2.1 (20130531) =
 
 * The default XML formatter will now replace ampersands even if they
   appear to be part of entities. That is, "&lt;" will become
@@ -29,6 +42,10 @@
 * html5lib now supports Python 3. Fixed some Python 2-specific
   code in the html5lib test suite. [bug=1181624]
 
+* The html.parser treebuilder can now handle numeric attributes in
+  text when the hexidecimal name of the attribute starts with a
+  capital X. Patch by Tim Shirley. [bug=1186242]
+
 = 4.2.0 (20130514) =
 
 * The Tag.select() method now supports a much wider variety of CSS
diff --git a/bs4/__init__.py b/bs4/__init__.py
index 956f26e..7b5964a 100644
--- a/bs4/__init__.py
+++ b/bs4/__init__.py
@@ -17,7 +17,7 @@ http://www.crummy.com/software/BeautifulSoup/bs4/doc/
 """
 
 __author__ = "Leonard Richardson (leonardr@segfault.org)"
-__version__ = "4.3.0"
+__version__ = "4.2.1"
 __copyright__ = "Copyright (c) 2004-2013 Leonard Richardson"
 __license__ = "MIT"
 
diff --git a/bs4/builder/_htmlparser.py b/bs4/builder/_htmlparser.py
index 2b98969..4b80f79 100644
--- a/bs4/builder/_htmlparser.py
+++ b/bs4/builder/_htmlparser.py
@@ -58,6 +58,8 @@ class BeautifulSoupHTMLParser(HTMLParser):
         # it's fixed.
         if name.startswith('x'):
             real_name = int(name.lstrip('x'), 16)
+        elif name.startswith('X'):
+            real_name = int(name.lstrip('X'), 16)
         else:
             real_name = int(name)
 
diff --git a/bs4/element.py b/bs4/element.py
index 538f6b6..f6864f2 100644
--- a/bs4/element.py
+++ b/bs4/element.py
@@ -672,13 +672,6 @@ class NavigableString(unicode, PageElement):
         output = self.format_string(self, formatter)
         return self.PREFIX + output + self.SUFFIX
 
-    @property
-    def name(self):
-        return None
-
-    @name.setter
-    def name(self, name):
-        raise AttributeError("A NavigableString cannot be given a name.")
 
 class PreformattedString(NavigableString):
     """A NavigableString not subject to the normal formatting rules.
diff --git a/bs4/testing.py b/bs4/testing.py
index c363a89..fd4495a 100644
--- a/bs4/testing.py
+++ b/bs4/testing.py
@@ -228,12 +228,14 @@ class HTMLTreeBuilderSmokeTest(object):
         expect = u'<p id="pi\N{LATIN SMALL LETTER N WITH TILDE}ata"></p>'
         self.assertSoupEquals('<p id="pi&#241;ata"></p>', expect)
         self.assertSoupEquals('<p id="pi&#xf1;ata"></p>', expect)
+        self.assertSoupEquals('<p id="pi&#Xf1;ata"></p>', expect)
         self.assertSoupEquals('<p id="pi&ntilde;ata"></p>', expect)
 
     def test_entities_in_text_converted_to_unicode(self):
         expect = u'<p>pi\N{LATIN SMALL LETTER N WITH TILDE}ata</p>'
         self.assertSoupEquals("<p>pi&#241;ata</p>", expect)
         self.assertSoupEquals("<p>pi&#xf1;ata</p>", expect)
+        self.assertSoupEquals("<p>pi&#Xf1;ata</p>", expect)
         self.assertSoupEquals("<p>pi&ntilde;ata</p>", expect)
 
     def test_quot_entity_converted_to_quotation_mark(self):
diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py
index fc0e2c6..2d09f96 100644
--- a/bs4/tests/test_tree.py
+++ b/bs4/tests/test_tree.py
@@ -1187,13 +1187,6 @@ class TestElementObjects(SoupTest):
         soup = self.soup("foo<!--IGNORE-->bar")
         self.assertEqual(['foo', 'bar'], list(soup.strings))
 
-    def test_string_has_immutable_name_property(self):
-        string = self.soup("s").string
-        self.assertEqual(None, string.name)
-        def t():
-            string.name = 'foo'
-        self.assertRaises(AttributeError, t)
-
 class TestCDAtaListAttributes(SoupTest):
 
     """Testing cdata-list attributes like 'class'.
diff --git a/doc/source/index.rst b/doc/source/index.rst
index a91854c..1b38df7 100644
--- a/doc/source/index.rst
+++ b/doc/source/index.rst
@@ -2478,9 +2478,11 @@ become Unicode::
  dammit.original_encoding
  # 'utf-8'
 
-The more data you give Unicode, Dammit, the more accurately it will
-guess. If you have your own suspicions as to what the encoding might
-be, you can pass them in as a list::
+Unicode, Dammit's guesses will get a lot more accurate if you install
+the ``chardet`` or ``cchardet`` Python libraries. The more data you
+give Unicode, Dammit, the more accurately it will guess. If you have
+your own suspicions as to what the encoding might be, you can pass
+them in as a list::
 
  dammit = UnicodeDammit("Sacr\xe9 bleu!", ["latin-1", "iso-8859-1"])
  print(dammit.unicode_markup)
@@ -2823,16 +2825,6 @@ significantly faster using lxml than using html.parser or html5lib.
 You can speed up encoding detection significantly by installing the
 `cchardet <http://pypi.python.org/pypi/cchardet/>`_ library.
 
-Sometimes `Unicode, Dammit`_ can only detect the encoding of a file by
-doing a byte-by-byte examination of the file. This slows Beautiful
-Soup to a crawl. My tests indicate that this only happened on 2.x
-versions of Python, and that it happened most often with documents
-using Russian or Chinese encodings. If this is happening to you, you
-can fix it by installing cchardet, or by using Python 3 for your
-script. If you happen to know a document's encoding, you can pass
-it into the ``BeautifulSoup`` constructor as ``from_encoding``, and
-bypass encoding detection altogether.
-
 `Parsing only part of a document`_ won't save you much time parsing
 the document, but it can save a lot of memory, and it'll make
 `searching` the document much faster.
diff --git a/setup.py b/setup.py
index c1eb127..96457cd 100644
--- a/setup.py
+++ b/setup.py
@@ -7,7 +7,7 @@ except ImportError:
     from distutils.command.build_py import build_py
 
 setup(name="beautifulsoup4",
-      version = "4.2.0",
+      version = "4.2.1",
       author="Leonard Richardson",
       author_email='leonardr@segfault.org',
       url="http://www.crummy.com/software/BeautifulSoup/bs4/",