Minor adjustments

This commit is contained in:
Miroslav Stampar
2019-03-13 17:20:14 +01:00
parent da1982c4af
commit 8ed5e88be6
3 changed files with 5 additions and 6 deletions

View File

@@ -2379,14 +2379,13 @@ def getUnicode(value, encoding=None, noneToNull=False):
return value
elif isinstance(value, basestring):
# Heuristics (if encoding not explicitly specified)
candidates = filter(None, (encoding, kb.get("pageEncoding") if kb.get("originalPage") else None, conf.get("encoding"), UNICODE_ENCODING, sys.getfilesystemencoding()))
if all(_ in value for _ in ('<', '>')):
candidates = filter(None, (encoding, kb.get("pageEncoding") if kb.get("originalPage") else None, conf.get("encoding"), sys.getfilesystemencoding(), UNICODE_ENCODING))
pass
elif any(_ in value for _ in (":\\", '/', '.')) and '\n' not in value:
candidates = filter(None, (encoding, sys.getfilesystemencoding(), kb.get("pageEncoding") if kb.get("originalPage") else None, UNICODE_ENCODING, conf.get("encoding")))
elif conf.get("encoding") and '\n' not in value:
candidates = filter(None, (encoding, conf.get("encoding"), kb.get("pageEncoding") if kb.get("originalPage") else None, sys.getfilesystemencoding(), UNICODE_ENCODING))
else:
candidates = filter(None, (encoding, kb.get("pageEncoding") if kb.get("originalPage") else None, UNICODE_ENCODING, conf.get("encoding"), sys.getfilesystemencoding()))
for candidate in candidates:
try: