15% speedup in some cases (avoiding heuristic char detection)

This commit is contained in:
Miroslav Stampar
2021-06-08 21:48:43 +02:00
parent 295cd15dff
commit 63073a1873
2 changed files with 7 additions and 2 deletions

View File

@@ -48,6 +48,7 @@ from lib.core.settings import IDENTYWAF_PARSE_LIMIT
from lib.core.settings import MAX_CONNECTION_TOTAL_SIZE
from lib.core.settings import META_CHARSET_REGEX
from lib.core.settings import PARSE_HEADERS_LIMIT
from lib.core.settings import PRINTABLE_BYTES
from lib.core.settings import SELECT_FROM_TABLE_REGEX
from lib.core.settings import UNICODE_ENCODING
from lib.core.settings import VIEWSTATE_REGEX
@@ -324,7 +325,7 @@ def decodePage(page, contentEncoding, contentType, percentDecode=True):
metaCharset = checkCharEncoding(extractRegexResult(META_CHARSET_REGEX, page))
if (any((httpCharset, metaCharset)) and not all((httpCharset, metaCharset))) or (httpCharset == metaCharset and all((httpCharset, metaCharset))):
if (any((httpCharset, metaCharset)) and (not all((httpCharset, metaCharset)) or isinstance(page, six.binary_type) and all(_ in PRINTABLE_BYTES for _ in page))) or (httpCharset == metaCharset and all((httpCharset, metaCharset))):
kb.pageEncoding = httpCharset or metaCharset # Reference: http://bytes.com/topic/html-css/answers/154758-http-equiv-vs-true-header-has-precedence
debugMsg = "declared web page charset '%s'" % kb.pageEncoding
singleTimeLogMessage(debugMsg, logging.DEBUG, debugMsg)