This commit is contained in:
Miroslav Stampar
2019-05-06 00:54:21 +02:00
parent 9bb4930413
commit 33b42a17d7
37 changed files with 127 additions and 97 deletions

View File

@@ -56,6 +56,7 @@ from lib.core.convert import decodeBase64
from lib.core.convert import decodeHex
from lib.core.convert import getBytes
from lib.core.convert import getText
from lib.core.convert import getUnicode
from lib.core.convert import htmlunescape
from lib.core.convert import stdoutencode
from lib.core.data import conf
@@ -2418,50 +2419,6 @@ def getPartRun(alias=True):
else:
return retVal
def getUnicode(value, encoding=None, noneToNull=False):
"""
Return the unicode representation of the supplied value:
>>> getUnicode('test') == u'test'
True
>>> getUnicode(1) == u'1'
True
"""
if noneToNull and value is None:
return NULL
if isinstance(value, six.text_type):
return value
elif isinstance(value, six.binary_type):
# Heuristics (if encoding not explicitly specified)
candidates = filterNone((encoding, kb.get("pageEncoding") if kb.get("originalPage") else None, conf.get("encoding"), UNICODE_ENCODING, sys.getfilesystemencoding()))
if all(_ in value for _ in (b'<', b'>')):
pass
elif any(_ in value for _ in (b":\\", b'/', b'.')) and b'\n' not in value:
candidates = filterNone((encoding, sys.getfilesystemencoding(), kb.get("pageEncoding") if kb.get("originalPage") else None, UNICODE_ENCODING, conf.get("encoding")))
elif conf.get("encoding") and b'\n' not in value:
candidates = filterNone((encoding, conf.get("encoding"), kb.get("pageEncoding") if kb.get("originalPage") else None, sys.getfilesystemencoding(), UNICODE_ENCODING))
for candidate in candidates:
try:
return six.text_type(value, candidate)
except UnicodeDecodeError:
pass
try:
return six.text_type(value, encoding or (kb.get("pageEncoding") if kb.get("originalPage") else None) or UNICODE_ENCODING)
except UnicodeDecodeError:
return six.text_type(value, UNICODE_ENCODING, errors="reversible")
elif isListLike(value):
value = list(getUnicode(_, encoding, noneToNull) for _ in value)
return value
else:
try:
return six.text_type(value)
except UnicodeDecodeError:
return six.text_type(str(value), errors="ignore") # encoding ignored for non-basestring instances
def longestCommonPrefix(*sequences):
"""
Returns longest common prefix occuring in given sequences