Speedup of caching for char encoding (non-Unicode chars in page)

This commit is contained in:
Miroslav Stampar
2021-03-03 23:08:00 +01:00
parent b3e454d0b1
commit eeacab0f19
4 changed files with 8 additions and 2 deletions

View File

@@ -259,7 +259,7 @@ def getHeuristicCharEncoding(page):
"""
key = hash(page)
retVal = kb.cache.encoding.get(key) or detect(page[:HEURISTIC_PAGE_SIZE_THRESHOLD])["encoding"]
retVal = kb.cache.encoding[key] if key in kb.cache.encoding else detect(page[:HEURISTIC_PAGE_SIZE_THRESHOLD])["encoding"]
kb.cache.encoding[key] = retVal
if retVal and retVal.lower().replace('-', "") == UNICODE_ENCODING.lower().replace('-', ""):