Speed optimization(s)

This commit is contained in:
Miroslav Stampar
2016-09-09 11:06:38 +02:00
parent 8581d9e2ca
commit 9930f1b55b
8 changed files with 51 additions and 26 deletions

View File

@@ -26,6 +26,7 @@ from lib.core.common import singleTimeWarnMessage
from lib.core.data import conf
from lib.core.data import kb
from lib.core.data import logger
from lib.core.decorators import cachedmethod
from lib.core.enums import DBMS
from lib.core.enums import HTTP_HEADER
from lib.core.enums import PLACE
@@ -136,6 +137,7 @@ def parseResponse(page, headers):
if page:
htmlParser(page)
@cachedmethod
def checkCharEncoding(encoding, warn=True):
"""
Checks encoding name, repairs common misspellings and adjusts to
@@ -230,7 +232,10 @@ def getHeuristicCharEncoding(page):
Returns page encoding charset detected by usage of heuristics
Reference: http://chardet.feedparser.org/docs/
"""
retVal = detect(page)["encoding"]
key = hash(page)
retVal = kb.cache.encoding.get(key) or detect(page)["encoding"]
kb.cache.encoding[key] = retVal
if retVal:
infoMsg = "heuristics detected web page charset '%s'" % retVal

View File

@@ -403,7 +403,7 @@ class Connect(object):
responseHeaders = _(ws.getheaders())
responseHeaders.headers = ["%s: %s\r\n" % (_[0].capitalize(), _[1]) for _ in responseHeaders.items()]
requestHeaders += "\n".join("%s: %s" % (getUnicode(key.capitalize() if isinstance(key, basestring) else key), getUnicode(value)) for (key, value) in responseHeaders.items())
requestHeaders += "\n".join(["%s: %s" % (getUnicode(key.capitalize() if isinstance(key, basestring) else key), getUnicode(value)) for (key, value) in responseHeaders.items()])
requestMsg += "\n%s" % requestHeaders
if post is not None:
@@ -422,7 +422,7 @@ class Connect(object):
else:
req = urllib2.Request(url, post, headers)
requestHeaders += "\n".join("%s: %s" % (getUnicode(key.capitalize() if isinstance(key, basestring) else key), getUnicode(value)) for (key, value) in req.header_items())
requestHeaders += "\n".join(["%s: %s" % (getUnicode(key.capitalize() if isinstance(key, basestring) else key), getUnicode(value)) for (key, value) in req.header_items()])
if not getRequestHeader(req, HTTP_HEADER.COOKIE) and conf.cj:
conf.cj._policy._now = conf.cj._now = int(time.time())
@@ -556,7 +556,7 @@ class Connect(object):
responseMsg += "[#%d] (%d %s):\n" % (threadData.lastRequestUID, code, status)
if responseHeaders:
logHeaders = "\n".join("%s: %s" % (getUnicode(key.capitalize() if isinstance(key, basestring) else key), getUnicode(value)) for (key, value) in responseHeaders.items())
logHeaders = "\n".join(["%s: %s" % (getUnicode(key.capitalize() if isinstance(key, basestring) else key), getUnicode(value)) for (key, value) in responseHeaders.items()])
logHTTPTraffic(requestMsg, "%s%s\n\n%s" % (responseMsg, logHeaders, (page or "")[:MAX_CONNECTION_CHUNK_SIZE]))
@@ -691,7 +691,7 @@ class Connect(object):
responseMsg += "[#%d] (%d %s):\n" % (threadData.lastRequestUID, code, status)
if responseHeaders:
logHeaders = "\n".join("%s: %s" % (getUnicode(key.capitalize() if isinstance(key, basestring) else key), getUnicode(value)) for (key, value) in responseHeaders.items())
logHeaders = "\n".join(["%s: %s" % (getUnicode(key.capitalize() if isinstance(key, basestring) else key), getUnicode(value)) for (key, value) in responseHeaders.items()])
if not skipLogTraffic:
logHTTPTraffic(requestMsg, "%s%s\n\n%s" % (responseMsg, logHeaders, (page or "")[:MAX_CONNECTION_CHUNK_SIZE]))