Inlined some commonly called code to save a function call.

author: Leonard Richardson <leonard.richardson@canonical.com> 2013-06-03 09:32:03 -0400
committer: Leonard Richardson <leonard.richardson@canonical.com> 2013-06-03 09:32:03 -0400
commit: 88699026b6f94d6d7a831e849b01b8f3582d57a4 (patch)
tree: 3aa59a7928bf5477d74b15e165c14a3d4f755e8f
parent: d284ed9a3b16d7259303171934ade247186eb24f (diff)
3 files changed, 26 insertions, 22 deletions
diff --git a/bs4/__init__.py b/bs4/__init__.py
index 61de574..50bc67c 100644
--- a/bs4/__init__.py
+++ b/bs4/__init__.py
@@ -244,30 +244,34 @@ class BeautifulSoup(Tag):
         if tag.name in self.builder.preserve_whitespace_tags:
             self.preserve_whitespace_tag_stack.append(tag)
 
-    def _contains_only_ascii_spaces(self, s):
-        """Returns true if the given string contains nothing other than ASCII spaces.
-        The empty string meets this criteria.
-        """
-        for i in s:
-            if i not in self.ASCII_SPACES:
-                return False
-        return True
-
     def endData(self, containerClass=NavigableString):
         if self.current_data:
             current_data = u''.join(self.current_data)
-            if (self._contains_only_ascii_spaces(current_data) and
-                not self.preserve_whitespace_tag_stack):
-                # Time to strip the whitespace.
-                if '\n' in current_data:
-                    current_data = '\n'
-                else:
-                    current_data = ' '
+
+            # If whitespace is not preserved, and this string contains
+            # nothing but ASCII spaces, replace it with a single space
+            # or newline.
+            if not self.preserve_whitespace_tag_stack:
+                strippable = True
+                for i in current_data:
+                    if i not in self.ASCII_SPACES:
+                        strippable = False
+                        break
+                if strippable:
+                    if '\n' in current_data:
+                        current_data = '\n'
+                    else:
+                        current_data = ' '
+
+            # Reset the data collector.
             self.current_data = []
+
+            # Should we add this string to the tree at all?
             if self.parse_only and len(self.tagStack) <= 1 and \
                    (not self.parse_only.text or \
                     not self.parse_only.search(current_data)):
                 return
+
             o = containerClass(current_data)
             self.object_was_parsed(o)
 
diff --git a/bs4/dammit.py b/bs4/dammit.py
index a6b8663..c859066 100644
--- a/bs4/dammit.py
+++ b/bs4/dammit.py
@@ -295,15 +295,15 @@ class EncodingDetector:
         beginning of the document.
         """
         if search_entire_document:
-            xml_endpos = html_endpos = -1
+            xml_endpos = html_endpos = len(markup)
         else:
-            xml_endpos = 1025
+            xml_endpos = 1024
             html_endpos = max(2048, int(len(markup) * 0.05))
             
         declared_encoding = None
-        declared_encoding_match = xml_encoding_re.search(markup, xml_endpos)
+        declared_encoding_match = xml_encoding_re.search(markup, endpos=xml_endpos)
         if not declared_encoding_match and is_html:
-            declared_encoding_match = html_meta_re.search(markup, html_endpos)
+            declared_encoding_match = html_meta_re.search(markup, endpos=html_endpos)
         if declared_encoding_match is not None:
             declared_encoding = declared_encoding_match.groups()[0].decode(
                 'ascii')
diff --git a/bs4/diagnose.py b/bs4/diagnose.py
index c5a0c06..a2b405b 100644
--- a/bs4/diagnose.py
+++ b/bs4/diagnose.py
@@ -192,8 +192,8 @@ def profile(num_elements=100000, parser="lxml"):
     stats.strip_dirs()
     cumulative = stats.sort_stats("cumulative")
     total = stats.sort_stats("time")
-    cumulative.print_stats(50)
+    total.print_stats(50)
 
 if __name__ == '__main__':
     #diagnose(sys.stdin.read())
-    profile()
+    profile(parser="lxml")
author	Leonard Richardson <leonard.richardson@canonical.com>	2013-06-03 09:32:03 -0400
committer	Leonard Richardson <leonard.richardson@canonical.com>	2013-06-03 09:32:03 -0400
commit	88699026b6f94d6d7a831e849b01b8f3582d57a4 (patch)
tree	3aa59a7928bf5477d74b15e165c14a3d4f755e8f
parent	d284ed9a3b16d7259303171934ade247186eb24f (diff)