From ab9cb80602ca05d1e8a80bd2b6c5c8e55ac55963 Mon Sep 17 00:00:00 2001
From: Miroslav Stampar <miroslav.stampar@gmail.com>
Date: Mon, 23 Jul 2012 15:14:52 +0200
Subject: [PATCH] Implementing Issue #111

---
 lib/core/convert.py  | 1 -
 lib/request/basic.py | 4 +++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/lib/core/convert.py b/lib/core/convert.py
index fa8b6ebd9..5c8c71e2d 100644
--- a/lib/core/convert.py
+++ b/lib/core/convert.py
@@ -144,5 +144,4 @@ def htmlunescape(value):
     if value and isinstance(value, basestring):
         codes = (('&lt;', '<'), ('&gt;', '>'), ('&quot;', '"'), ('&nbsp;', ' '), ('&amp;', '&'))
         retVal = reduce(lambda x, y: x.replace(y[0], y[1]), codes, retVal)
-        retVal = re.sub('&#(\d+);', lambda x: getUnicode(chr(x.group(1))), retVal)
     return retVal
diff --git a/lib/request/basic.py b/lib/request/basic.py
index 07b718ced..506b60e47 100644
--- a/lib/request/basic.py
+++ b/lib/request/basic.py
@@ -209,8 +209,10 @@ def decodePage(page, contentEncoding, contentType):
     else:
         kb.pageEncoding = conf.charset
 
+    # can't do for all responses because we need to support binary files too
     if contentType and not isinstance(page, unicode) and any(map(lambda x: x in contentType.lower(), ("text/txt", "text/raw", "text/html", "text/xml"))):
-        # can't do for all responses because we need to support binary files too
+        if "&#" in page:
+            page = re.sub('&#(\d+);', lambda _: chr(int(_.group(1))), page)
         kb.pageEncoding = kb.pageEncoding or checkCharEncoding(getHeuristicCharEncoding(page))
         page = getUnicode(page, kb.pageEncoding)