Some more DREI stuff

2026-01-23 06:39:02 +00:00 · 2019-04-19 11:24:34 +02:00
parent da15701a55
commit bb7bd51d94
15 changed files with 94 additions and 71 deletions
--- a/lib/request/basic.py
+++ b/lib/request/basic.py
@@ -17,6 +17,7 @@ from lib.core.common import Backend
 from lib.core.common import extractErrorMessage
 from lib.core.common import extractRegexResult
 from lib.core.common import filterNone
+from lib.core.common import getBytes
 from lib.core.common import getPublicTypeMembers
 from lib.core.common import getSafeExString
 from lib.core.common import getUnicode
@@ -42,11 +43,11 @@ from lib.core.settings import MAX_CONNECTION_TOTAL_SIZE
 from lib.core.settings import META_CHARSET_REGEX
 from lib.core.settings import PARSE_HEADERS_LIMIT
 from lib.core.settings import SELECT_FROM_TABLE_REGEX
-from lib.core.settings import UNICODE_ENCODING
 from lib.core.settings import VIEWSTATE_REGEX
 from lib.parse.headers import headersParser
 from lib.parse.html import htmlParser
 from lib.utils.htmlentities import htmlEntities
+from thirdparty import six
 from thirdparty.chardet import detect
 from thirdparty.odict import OrderedDict

@@ -219,13 +220,13 @@ def checkCharEncoding(encoding, warn=True):
    # Reference: http://www.iana.org/assignments/character-sets
    # Reference: http://docs.python.org/library/codecs.html
    try:
-        codecs.lookup(encoding.encode(UNICODE_ENCODING) if isinstance(encoding, unicode) else encoding)
-    except (LookupError, ValueError):
+        codecs.lookup(encoding)
+    except:
        encoding = None

    if encoding:
        try:
-            unicode(randomStr(), encoding)
+            six.text_type(getBytes(randomStr()), encoding)
        except:
            if warn:
                warnMsg = "invalid web page charset '%s'" % encoding
@@ -313,7 +314,7 @@ def decodePage(page, contentEncoding, contentType):
        kb.pageEncoding = conf.encoding

    # can't do for all responses because we need to support binary files too
-    if not isinstance(page, unicode) and "text/" in contentType:
+    if isinstance(page, six.binary_type) and "text/" in contentType:
        # e.g. &#x9;&#195;&#235;&#224;&#226;&#224;
        if "&#" in page:
            page = re.sub(r"&#x([0-9a-f]{1,2});", lambda _: (_.group(1) if len(_.group(1)) == 2 else "0%s" % _.group(1)).decode("hex"), page)
--- a/lib/request/comparison.py
+++ b/lib/request/comparison.py
@@ -8,6 +8,7 @@ See the file 'LICENSE' for copying permission
 import re

 from lib.core.common import extractRegexResult
+from lib.core.common import getBytes
 from lib.core.common import getFilteredPageContent
 from lib.core.common import listToStrValue
 from lib.core.common import removeDynamicContent
@@ -28,6 +29,7 @@ from lib.core.settings import LOWER_RATIO_BOUND
 from lib.core.settings import UPPER_RATIO_BOUND
 from lib.core.settings import URI_HTTP_HEADER
 from lib.core.threads import getCurrentThreadData
+from thirdparty import six

 def comparison(page, headers, code=None, getRatioValue=False, pageLength=None):
    _ = _adjust(_comparison(page, headers, code, getRatioValue, pageLength), getRatioValue)
@@ -105,10 +107,10 @@ def _comparison(page, headers, code, getRatioValue, pageLength):
    else:
        # Preventing "Unicode equal comparison failed to convert both arguments to Unicode"
        # (e.g. if one page is PDF and the other is HTML)
-        if isinstance(seqMatcher.a, str) and isinstance(page, unicode):
-            page = page.encode(kb.pageEncoding or DEFAULT_PAGE_ENCODING, "ignore")
-        elif isinstance(seqMatcher.a, unicode) and isinstance(page, str):
-            seqMatcher.a = seqMatcher.a.encode(kb.pageEncoding or DEFAULT_PAGE_ENCODING, "ignore")
+        if isinstance(seqMatcher.a, six.binary_type) and isinstance(page, six.text_type):
+            page = getBytes(page, kb.pageEncoding or DEFAULT_PAGE_ENCODING, "ignore")
+        elif isinstance(seqMatcher.a, six.text_type) and isinstance(page, six.binary_type):
+            seqMatcher.a = getBytes(seqMatcher.a, kb.pageEncoding or DEFAULT_PAGE_ENCODING, "ignore")

        if any(_ is None for _ in (page, seqMatcher.a)):
            return None
--- a/lib/request/inject.py
+++ b/lib/request/inject.py
@@ -486,7 +486,7 @@ def getValue(expression, blind=True, union=True, error=True, time=True, fromUser
        singleTimeWarnMessage(warnMsg)

    # Dirty patch (safe-encoded unicode characters)
-    if isinstance(value, unicode) and "\\x" in value:
+    if isinstance(value, six.text_type) and "\\x" in value:
        try:
            candidate = eval(repr(value).replace("\\\\x", "\\x").replace("u'", "'", 1)).decode(conf.encoding or UNICODE_ENCODING)
            if "\\x" not in candidate: