4 files changed, 27 insertions, 5 deletions
diff --git a/beautifulsoup/__init__.py b/beautifulsoup/__init__.py
index ce39d33..cee55e7 100644
--- a/beautifulsoup/__init__.py
+++ b/beautifulsoup/__init__.py
@@ -66,7 +66,7 @@ import re
 from util import isList, buildSet
 from builder import builder_registry
 from dammit import UnicodeDammit
-from element import NavigableString, Tag
+from element import DEFAULT_OUTPUT_ENCODING, NavigableString, Tag
 
 
 class BeautifulSoup(Tag):
@@ -122,6 +122,7 @@ class BeautifulSoup(Tag):
                     % ",".join(features))
             builder = builder_class()
         self.builder = builder
+        self.is_xml = builder.is_xml
         self.builder.soup = self
 
         self.parse_only = parse_only
@@ -261,6 +262,21 @@ class BeautifulSoup(Tag):
     def handle_data(self, data):
         self.currentData.append(data)
 
+    def decode(self, pretty_print=False, indent_level=0,
+               eventual_encoding=DEFAULT_OUTPUT_ENCODING):
+        """Returns a string or Unicode representation of this document.
+        To get Unicode, pass None for encoding."""
+        if self.is_xml:
+            # Print the XML declaration
+            encoding_part = ''
+            if eventual_encoding != None:
+                encoding_part = ' encoding="%s"' % eventual_encoding
+            prefix = u'<?xml version="1.0"%s>\n' % encoding_part
+        else:
+            prefix = u''
+        return prefix + super(BeautifulSoup, self).decode(
+            pretty_print, indent_level, eventual_encoding)
+
 
 class StopParsing(Exception):
     pass
diff --git a/beautifulsoup/builder/__init__.py b/beautifulsoup/builder/__init__.py
index fb10628..10c6b7f 100644
--- a/beautifulsoup/builder/__init__.py
+++ b/beautifulsoup/builder/__init__.py
@@ -77,7 +77,7 @@ class TreeBuilder(object):
 
     features = []
 
-    assume_html = False
+    is_xml = False
     preserve_whitespace_tags = set()
     empty_element_tags = None # A tag will be considered an empty-element
                               # tag when and only when it has no contents.
@@ -185,8 +185,6 @@ class HTMLTreeBuilder(TreeBuilder):
     Such as which tags are empty-element tags.
     """
 
-    assume_html = True
-
     preserve_whitespace_tags = set(['pre', 'textarea'])
     empty_element_tags = set(['br' , 'hr', 'input', 'img', 'meta',
                               'spacer', 'link', 'frame', 'base'])
diff --git a/beautifulsoup/builder/_lxml.py b/beautifulsoup/builder/_lxml.py
index 4c7a826..23ac485 100644
--- a/beautifulsoup/builder/_lxml.py
+++ b/beautifulsoup/builder/_lxml.py
@@ -20,6 +20,8 @@ LXML = 'lxml'
 class LXMLTreeBuilderForXML(TreeBuilder):
     DEFAULT_PARSER_CLASS = etree.XMLParser
 
+    is_xml = True
+
     # Well, it's permissive by XML parser standards.
     features = [LXML, XML, FAST, PERMISSIVE]
 
@@ -87,10 +89,15 @@ class LXMLTreeBuilderForXML(TreeBuilder):
         self.soup.handle_data(content)
         self.soup.endData(Comment)
 
+    def test_fragment_to_document(self, fragment):
+        """See `TreeBuilder`."""
+        return u'<?xml version="1.0" encoding="utf-8">\n%s' % fragment
+
 
 class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML):
 
     features = [LXML, HTML, FAST]
+    is_xml = False
 
     @property
     def default_parser(self):
diff --git a/beautifulsoup/dammit.py b/beautifulsoup/dammit.py
index 788f72d..9833bd4 100644
--- a/beautifulsoup/dammit.py
+++ b/beautifulsoup/dammit.py
@@ -37,7 +37,8 @@ class EntitySubstitution(object):
         for codepoint, name in codepoint2name.items():
             if codepoint == 34:
                 # There's no point in turning the quotation mark into
-                # &quot--even in attribute values we quote the 
+                # &quot;, unless it happens in an attribute value, which
+                # is done elsewhere.
                 continue;
             character = unichr(codepoint)
             characters.append(character)