diff options
author | Leonard Richardson <leonard.richardson@canonical.com> | 2011-02-18 12:21:18 -0500 |
---|---|---|
committer | Leonard Richardson <leonard.richardson@canonical.com> | 2011-02-18 12:21:18 -0500 |
commit | 845dbe03bee981bcc5d24ef06ca868042968aa4c (patch) | |
tree | 83672d245b08a7a9456f3c29fa62885b13361bfa | |
parent | 945b719a28c229178e710b749d2af4d00a81bdba (diff) |
Made Unicode, Dammit more PEP-8 compliant.
-rw-r--r-- | beautifulsoup/__init__.py | 2 | ||||
-rw-r--r-- | beautifulsoup/builder/__init__.py | 6 | ||||
-rw-r--r-- | beautifulsoup/builder/lxml_builder.py | 2 | ||||
-rw-r--r-- | beautifulsoup/dammit.py | 16 | ||||
-rw-r--r-- | tests/test_soup.py | 2 |
5 files changed, 14 insertions, 14 deletions
diff --git a/beautifulsoup/__init__.py b/beautifulsoup/__init__.py index 32ea73f..9ae87a6 100644 --- a/beautifulsoup/__init__.py +++ b/beautifulsoup/__init__.py @@ -149,7 +149,7 @@ class BeautifulStoneSoup(Tag): if hasattr(markup, 'read'): # It's a file-type object. markup = markup.read() - self.markup, self.originalEncoding, self.declaredHTMLEncoding = ( + self.markup, self.originalEncoding, self.declared_html_encoding = ( self.builder.prepare_markup(markup, fromEncoding)) try: diff --git a/beautifulsoup/builder/__init__.py b/beautifulsoup/builder/__init__.py index 5bf5929..a5e1b06 100644 --- a/beautifulsoup/builder/__init__.py +++ b/beautifulsoup/builder/__init__.py @@ -120,7 +120,7 @@ class HTMLTreeBuilder(TreeBuilder): # This is an interesting meta tag. match = self.CHARSET_RE.search(content) if match: - if (self.soup.declaredHTMLEncoding is not None or + if (self.soup.declared_html_encoding is not None or self.soup.originalEncoding == self.soup.fromEncoding): # An HTML encoding was sniffed while converting # the document to Unicode, or an HTML encoding was @@ -137,8 +137,8 @@ class HTMLTreeBuilder(TreeBuilder): new_charset = match.group(3) if (new_charset is not None and new_charset != self.soup.originalEncoding): - self.soup.declaredHTMLEncoding = new_charset - self.soup._feed(self.soup.declaredHTMLEncoding) + self.soup.declared_html_encoding = new_charset + self.soup._feed(self.soup.declared_html_encoding) raise StopParsing pass return False diff --git a/beautifulsoup/builder/lxml_builder.py b/beautifulsoup/builder/lxml_builder.py index a1f8c1e..360e37d 100644 --- a/beautifulsoup/builder/lxml_builder.py +++ b/beautifulsoup/builder/lxml_builder.py @@ -23,7 +23,7 @@ class LXMLTreeBuilder(HTMLTreeBuilder): try_encodings = [user_specified_encoding, document_declared_encoding] dammit = UnicodeDammit(markup, try_encodings, isHTML=True) - return dammit.markup, dammit.originalEncoding, dammit.declaredHTMLEncoding + return dammit.markup, dammit.originalEncoding, dammit.declared_html_encoding def feed(self, markup): diff --git a/beautifulsoup/dammit.py b/beautifulsoup/dammit.py index 954ca54..f810d15 100644 --- a/beautifulsoup/dammit.py +++ b/beautifulsoup/dammit.py @@ -45,12 +45,12 @@ class UnicodeDammit: CHARSET_ALIASES = { "macintosh" : "mac-roman", "x-sjis" : "shift-jis" } - def __init__(self, markup, overrideEncodings=[], - smartQuotesTo='xml', isHTML=False): - self.declaredHTMLEncoding = None + def __init__(self, markup, override_encodings=[], + smart_quotes_to='xml', isHTML=False): + self.declared_html_encoding = None self.markup, documentEncoding, sniffedEncoding = \ self._detectEncoding(markup, isHTML) - self.smartQuotesTo = smartQuotesTo + self.smart_quotes_to = smart_quotes_to self.triedEncodings = [] if markup == '' or isinstance(markup, unicode): self.originalEncoding = None @@ -59,7 +59,7 @@ class UnicodeDammit: u = None for proposedEncoding in ( - overrideEncodings + [documentEncoding, sniffedEncoding]): + override_encodings + [documentEncoding, sniffedEncoding]): if proposedEncoding is not None: u = self._convertFrom(proposedEncoding) if u: @@ -84,7 +84,7 @@ class UnicodeDammit: orig = match.group(1) sub = self.MS_CHARS.get(orig) if type(sub) == types.TupleType: - if self.smartQuotesTo == 'xml': + if self.smart_quotes_to == 'xml': sub = '&#x'.encode() + sub[1].encode() + ';'.encode() else: sub = '&'.encode() + sub[0].encode() + ';'.encode() @@ -101,7 +101,7 @@ class UnicodeDammit: # Convert smart quotes to HTML if coming from an encoding # that might have them. - if self.smartQuotesTo and proposed.lower() in("windows-1252", + if self.smart_quotes_to and proposed.lower() in("windows-1252", "iso-8859-1", "iso-8859-2"): smart_quotes_re = "([\x80-\x9f])" @@ -205,7 +205,7 @@ class UnicodeDammit: xml_encoding = xml_encoding_match.groups()[0].decode( 'ascii').lower() if isHTML: - self.declaredHTMLEncoding = xml_encoding + self.declared_html_encoding = xml_encoding if sniffed_xml_encoding and \ (xml_encoding in ('iso-10646-ucs-2', 'ucs-2', 'csunicode', 'iso-10646-ucs-4', 'ucs-4', 'csucs4', diff --git a/tests/test_soup.py b/tests/test_soup.py index 4fb2142..7414403 100644 --- a/tests/test_soup.py +++ b/tests/test_soup.py @@ -27,7 +27,7 @@ class TestUnicodeDammit(unittest.TestCase): def test_smart_quotes_to_html_entities(self): markup = "<foo>\x91\x92\x93\x94</foo>" - dammit = UnicodeDammit(markup, smartQuotesTo="html") + dammit = UnicodeDammit(markup, smart_quotes_to="html") self.assertEquals( dammit.unicode, "<foo>‘’“”</foo>") |