5 files changed, 45 insertions, 8 deletions
diff --git a/CHANGELOG b/CHANGELOG
index abdf1b1..4449279 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -99,6 +99,21 @@ sections into ordinary text elements:
 In theory it's possible to preserve the CDATA sections when using the
 XML parser, but I don't see how to get it to work in practice.
 
+== Miscellaneous other stuff ==
+
+If the BeautifulSoup instance has .is_xml set to True, an appropriate
+XML declaration will be emitted when the tree is transformed into a
+string:
+
+    <?xml version="1.0" encoding="utf-8">
+    <markup>
+     ...
+    </markup>
+
+The ['lxml', 'xml'] tree builder sets .is_xml to True; the other tree
+builders set it to False. If you want to parse XHTML with an HTML
+parser, you can set it manually.
+
 = 3.1.0 =
 
 A hybrid version that supports 2.4 and can be automatically converted
diff --git a/beautifulsoup/__init__.py b/beautifulsoup/__init__.py
index ce39d33..cee55e7 100644
--- a/beautifulsoup/__init__.py
+++ b/beautifulsoup/__init__.py
@@ -66,7 +66,7 @@ import re
 from util import isList, buildSet
 from builder import builder_registry
 from dammit import UnicodeDammit
-from element import NavigableString, Tag
+from element import DEFAULT_OUTPUT_ENCODING, NavigableString, Tag
 
 
 class BeautifulSoup(Tag):
@@ -122,6 +122,7 @@ class BeautifulSoup(Tag):
                     % ",".join(features))
             builder = builder_class()
         self.builder = builder
+        self.is_xml = builder.is_xml
         self.builder.soup = self
 
         self.parse_only = parse_only
@@ -261,6 +262,21 @@ class BeautifulSoup(Tag):
     def handle_data(self, data):
         self.currentData.append(data)
 
+    def decode(self, pretty_print=False, indent_level=0,
+               eventual_encoding=DEFAULT_OUTPUT_ENCODING):
+        """Returns a string or Unicode representation of this document.
+        To get Unicode, pass None for encoding."""
+        if self.is_xml:
+            # Print the XML declaration
+            encoding_part = ''
+            if eventual_encoding != None:
+                encoding_part = ' encoding="%s"' % eventual_encoding
+            prefix = u'<?xml version="1.0"%s>\n' % encoding_part
+        else:
+            prefix = u''
+        return prefix + super(BeautifulSoup, self).decode(
+            pretty_print, indent_level, eventual_encoding)
+
 
 class StopParsing(Exception):
     pass
diff --git a/beautifulsoup/builder/__init__.py b/beautifulsoup/builder/__init__.py
index fb10628..10c6b7f 100644
--- a/beautifulsoup/builder/__init__.py
+++ b/beautifulsoup/builder/__init__.py
@@ -77,7 +77,7 @@ class TreeBuilder(object):
 
     features = []
 
-    assume_html = False
+    is_xml = False
     preserve_whitespace_tags = set()
     empty_element_tags = None # A tag will be considered an empty-element
                               # tag when and only when it has no contents.
@@ -185,8 +185,6 @@ class HTMLTreeBuilder(TreeBuilder):
     Such as which tags are empty-element tags.
     """
 
-    assume_html = True
-
     preserve_whitespace_tags = set(['pre', 'textarea'])
     empty_element_tags = set(['br' , 'hr', 'input', 'img', 'meta',
                               'spacer', 'link', 'frame', 'base'])
diff --git a/beautifulsoup/builder/_lxml.py b/beautifulsoup/builder/_lxml.py
index 4c7a826..23ac485 100644
--- a/beautifulsoup/builder/_lxml.py
+++ b/beautifulsoup/builder/_lxml.py
@@ -20,6 +20,8 @@ LXML = 'lxml'
 class LXMLTreeBuilderForXML(TreeBuilder):
     DEFAULT_PARSER_CLASS = etree.XMLParser
 
+    is_xml = True
+
     # Well, it's permissive by XML parser standards.
     features = [LXML, XML, FAST, PERMISSIVE]
 
@@ -87,10 +89,15 @@ class LXMLTreeBuilderForXML(TreeBuilder):
         self.soup.handle_data(content)
         self.soup.endData(Comment)
 
+    def test_fragment_to_document(self, fragment):
+        """See `TreeBuilder`."""
+        return u'<?xml version="1.0" encoding="utf-8">\n%s' % fragment
+
 
 class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML):
 
     features = [LXML, HTML, FAST]
+    is_xml = False
 
     @property
     def default_parser(self):
diff --git a/tests/test_lxml.py b/tests/test_lxml.py
index 8f3d798..a96fbbb 100644
--- a/tests/test_lxml.py
+++ b/tests/test_lxml.py
@@ -525,6 +525,7 @@ class TestLXMLXMLBuilder(SoupTest):
     def default_builder(self):
         return LXMLTreeBuilderForXML()
 
+
     def test_cdata_becomes_text(self):
         # LXML sends CData sections as 'data' events, so we can't
         # create special CData objects for them. We have to use
@@ -556,20 +557,20 @@ class TestLXMLXMLBuilder(SoupTest):
         self.assertTrue(soup.bar.is_empty_element)
         soup.bar.insert(1, "Contents")
         self.assertFalse(soup.bar.is_empty_element)
-        self.assertEquals(str(soup), "<bar>Contents</bar>")
+        self.assertEquals(str(soup), self.document_for("<bar>Contents</bar>"))
 
     def test_designated_empty_element_tag_has_no_closing_tag(self):
         builder = LXMLTreeBuilderForXML(empty_element_tags=['bar'])
         soup = BeautifulSoup(builder=builder, markup="<bar></bar>")
         self.assertTrue(soup.bar.is_empty_element)
-        self.assertEquals(str(soup), "<bar />")
+        self.assertEquals(str(soup), self.document_for("<bar />"))
 
     def test_empty_tag_not_in_empty_element_tag_list_has_closing_tag(self):
         builder = LXMLTreeBuilderForXML(empty_element_tags=['bar'])
 
         soup = BeautifulSoup(builder=builder, markup="<foo />")
         self.assertFalse(soup.foo.is_empty_element)
-        self.assertEquals(str(soup), "<foo></foo>")
+        self.assertEquals(str(soup), self.document_for("<foo></foo>"))
 
     def test_designated_empty_element_tag_does_not_change_parser_behavior(self):
         # The designated list of empty-element tags only affects how
@@ -577,4 +578,4 @@ class TestLXMLXMLBuilder(SoupTest):
         # parsed--that's the parser's job.
         builder = LXMLTreeBuilderForXML(empty_element_tags=['bar'])
         soup = BeautifulSoup(builder=builder, markup="<bar>contents</bar>")
-        self.assertEquals(str(soup), "<bar>contents</bar>")
+        self.assertEquals(str(soup), self.document_for("<bar>contents</bar>"))