Fixing thread-safety in getHeuristicCharEncoding

This commit is contained in:
Miroslav Stampar
2026-01-08 20:59:40 +01:00
parent 603295e68d
commit 2e00154e38
4 changed files with 10 additions and 7 deletions

View File

@@ -259,8 +259,11 @@ def getHeuristicCharEncoding(page):
"""
key = (len(page), hash(page))
retVal = kb.cache.encoding[key] if key in kb.cache.encoding else detect(page[:HEURISTIC_PAGE_SIZE_THRESHOLD])["encoding"]
kb.cache.encoding[key] = retVal
retVal = kb.cache.encoding.get(key)
if retVal is None:
retVal = detect(page[:HEURISTIC_PAGE_SIZE_THRESHOLD])["encoding"]
kb.cache.encoding[key] = retVal
if retVal and retVal.lower().replace('-', "") == UNICODE_ENCODING.lower().replace('-', ""):
infoMsg = "heuristics detected web page charset '%s'" % retVal