Minor patch for crawling

This commit is contained in:
Miroslav Stampar
2019-11-12 22:51:11 +01:00
parent e58d68c203
commit f382443ddd
4 changed files with 8 additions and 7 deletions

View File

@@ -550,7 +550,7 @@ class Connect(object):
code = None
responseHeaders = {}
page = decodePage(page, responseHeaders.get(HTTP_HEADER.CONTENT_ENCODING), responseHeaders.get(HTTP_HEADER.CONTENT_TYPE))
page = decodePage(page, responseHeaders.get(HTTP_HEADER.CONTENT_ENCODING), responseHeaders.get(HTTP_HEADER.CONTENT_TYPE), percentDecode=not crawling)
status = getUnicode(conn.msg) if conn and getattr(conn, "msg", None) else None
kb.connErrorCounter = 0
@@ -628,7 +628,7 @@ class Connect(object):
responseHeaders = ex.info()
responseHeaders[URI_HTTP_HEADER] = ex.geturl()
patchHeaders(responseHeaders)
page = decodePage(page, responseHeaders.get(HTTP_HEADER.CONTENT_ENCODING), responseHeaders.get(HTTP_HEADER.CONTENT_TYPE))
page = decodePage(page, responseHeaders.get(HTTP_HEADER.CONTENT_ENCODING), responseHeaders.get(HTTP_HEADER.CONTENT_TYPE), percentDecode=not crawling)
except socket.timeout:
warnMsg = "connection timed out while trying "
warnMsg += "to get error page information (%d)" % ex.code