Some more DREI stuff

This commit is contained in:
Miroslav Stampar
2019-04-19 11:24:34 +02:00
parent da15701a55
commit bb7bd51d94
15 changed files with 94 additions and 71 deletions

View File

@@ -17,6 +17,7 @@ from lib.core.common import Backend
from lib.core.common import extractErrorMessage
from lib.core.common import extractRegexResult
from lib.core.common import filterNone
from lib.core.common import getBytes
from lib.core.common import getPublicTypeMembers
from lib.core.common import getSafeExString
from lib.core.common import getUnicode
@@ -42,11 +43,11 @@ from lib.core.settings import MAX_CONNECTION_TOTAL_SIZE
from lib.core.settings import META_CHARSET_REGEX
from lib.core.settings import PARSE_HEADERS_LIMIT
from lib.core.settings import SELECT_FROM_TABLE_REGEX
from lib.core.settings import UNICODE_ENCODING
from lib.core.settings import VIEWSTATE_REGEX
from lib.parse.headers import headersParser
from lib.parse.html import htmlParser
from lib.utils.htmlentities import htmlEntities
from thirdparty import six
from thirdparty.chardet import detect
from thirdparty.odict import OrderedDict
@@ -219,13 +220,13 @@ def checkCharEncoding(encoding, warn=True):
# Reference: http://www.iana.org/assignments/character-sets
# Reference: http://docs.python.org/library/codecs.html
try:
codecs.lookup(encoding.encode(UNICODE_ENCODING) if isinstance(encoding, unicode) else encoding)
except (LookupError, ValueError):
codecs.lookup(encoding)
except:
encoding = None
if encoding:
try:
unicode(randomStr(), encoding)
six.text_type(getBytes(randomStr()), encoding)
except:
if warn:
warnMsg = "invalid web page charset '%s'" % encoding
@@ -313,7 +314,7 @@ def decodePage(page, contentEncoding, contentType):
kb.pageEncoding = conf.encoding
# can't do for all responses because we need to support binary files too
if not isinstance(page, unicode) and "text/" in contentType:
if isinstance(page, six.binary_type) and "text/" in contentType:
# e.g. 	Ãëàâà
if "&#" in page:
page = re.sub(r"&#x([0-9a-f]{1,2});", lambda _: (_.group(1) if len(_.group(1)) == 2 else "0%s" % _.group(1)).decode("hex"), page)

View File

@@ -8,6 +8,7 @@ See the file 'LICENSE' for copying permission
import re
from lib.core.common import extractRegexResult
from lib.core.common import getBytes
from lib.core.common import getFilteredPageContent
from lib.core.common import listToStrValue
from lib.core.common import removeDynamicContent
@@ -28,6 +29,7 @@ from lib.core.settings import LOWER_RATIO_BOUND
from lib.core.settings import UPPER_RATIO_BOUND
from lib.core.settings import URI_HTTP_HEADER
from lib.core.threads import getCurrentThreadData
from thirdparty import six
def comparison(page, headers, code=None, getRatioValue=False, pageLength=None):
_ = _adjust(_comparison(page, headers, code, getRatioValue, pageLength), getRatioValue)
@@ -105,10 +107,10 @@ def _comparison(page, headers, code, getRatioValue, pageLength):
else:
# Preventing "Unicode equal comparison failed to convert both arguments to Unicode"
# (e.g. if one page is PDF and the other is HTML)
if isinstance(seqMatcher.a, str) and isinstance(page, unicode):
page = page.encode(kb.pageEncoding or DEFAULT_PAGE_ENCODING, "ignore")
elif isinstance(seqMatcher.a, unicode) and isinstance(page, str):
seqMatcher.a = seqMatcher.a.encode(kb.pageEncoding or DEFAULT_PAGE_ENCODING, "ignore")
if isinstance(seqMatcher.a, six.binary_type) and isinstance(page, six.text_type):
page = getBytes(page, kb.pageEncoding or DEFAULT_PAGE_ENCODING, "ignore")
elif isinstance(seqMatcher.a, six.text_type) and isinstance(page, six.binary_type):
seqMatcher.a = getBytes(seqMatcher.a, kb.pageEncoding or DEFAULT_PAGE_ENCODING, "ignore")
if any(_ is None for _ in (page, seqMatcher.a)):
return None

View File

@@ -486,7 +486,7 @@ def getValue(expression, blind=True, union=True, error=True, time=True, fromUser
singleTimeWarnMessage(warnMsg)
# Dirty patch (safe-encoded unicode characters)
if isinstance(value, unicode) and "\\x" in value:
if isinstance(value, six.text_type) and "\\x" in value:
try:
candidate = eval(repr(value).replace("\\\\x", "\\x").replace("u'", "'", 1)).decode(conf.encoding or UNICODE_ENCODING)
if "\\x" not in candidate: