Implementing Issue #111

This commit is contained in:
Miroslav Stampar
2012-07-23 15:14:52 +02:00
parent 6809449e31
commit ab9cb80602
2 changed files with 3 additions and 2 deletions

View File

@@ -209,8 +209,10 @@ def decodePage(page, contentEncoding, contentType):
else:
kb.pageEncoding = conf.charset
# can't do for all responses because we need to support binary files too
if contentType and not isinstance(page, unicode) and any(map(lambda x: x in contentType.lower(), ("text/txt", "text/raw", "text/html", "text/xml"))):
# can't do for all responses because we need to support binary files too
if "&#" in page:
page = re.sub('&#(\d+);', lambda _: chr(int(_.group(1))), page)
kb.pageEncoding = kb.pageEncoding or checkCharEncoding(getHeuristicCharEncoding(page))
page = getUnicode(page, kb.pageEncoding)