Minor refactoring

2026-01-21 05:39:16 +00:00 · 2017-11-09 12:24:58 +01:00
parent 9404b63a42
commit d148694a4b
3 changed files with 16 additions and 6 deletions
--- a/lib/request/basic.py
+++ b/lib/request/basic.py
@@ -254,12 +254,22 @@ def decodePage(page, contentEncoding, contentType):
    if not page or (conf.nullConnection and len(page) < 2):
        return getUnicode(page)

-    if isinstance(contentEncoding, basestring) and contentEncoding.lower() in ("gzip", "x-gzip", "deflate"):
+    if isinstance(contentEncoding, basestring) and contentEncoding:
+        contentEncoding = contentEncoding.lower()
+    else:
+        contentEncoding = ""
+
+    if isinstance(contentType, basestring) and contentType:
+        contentType = contentType.lower()
+    else:
+        contentType = ""
+
+    if contentEncoding in ("gzip", "x-gzip", "deflate"):
        if not kb.pageCompress:
            return None

        try:
-            if contentEncoding.lower() == "deflate":
+            if contentEncoding == "deflate":
                data = StringIO.StringIO(zlib.decompress(page, -15))  # Reference: http://stackoverflow.com/questions/1089662/python-inflate-and-deflate-implementations
            else:
                data = gzip.GzipFile("", "rb", 9, StringIO.StringIO(page))
@@ -284,7 +294,7 @@ def decodePage(page, contentEncoding, contentType):
        httpCharset, metaCharset = None, None

        # Reference: http://stackoverflow.com/questions/1020892/python-urllib2-read-to-unicode
-        if contentType and (contentType.find("charset=") != -1):
+        if contentType.find("charset=") != -1:
            httpCharset = checkCharEncoding(contentType.split("charset=")[-1])

        metaCharset = checkCharEncoding(extractRegexResult(META_CHARSET_REGEX, page))
@@ -300,7 +310,7 @@ def decodePage(page, contentEncoding, contentType):
        kb.pageEncoding = conf.encoding

    # can't do for all responses because we need to support binary files too
-    if contentType and not isinstance(page, unicode) and "text/" in contentType.lower():
+    if not isinstance(page, unicode) and "text/" in contentType:
        if kb.heuristicMode:
            kb.pageEncoding = kb.pageEncoding or checkCharEncoding(getHeuristicCharEncoding(page))
            page = getUnicode(page, kb.pageEncoding)