Adding new version of chardet

This commit is contained in:
Miroslav Stampar
2015-10-09 13:35:48 +02:00
parent d424d4cdc7
commit 439d003753
39 changed files with 1499 additions and 1148 deletions

View File

@@ -25,8 +25,10 @@
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
import constants, sys
from charsetprober import CharSetProber
from . import constants
import sys
from .charsetprober import CharSetProber
class CharSetGroupProber(CharSetProber):
def __init__(self):
@@ -41,28 +43,32 @@ class CharSetGroupProber(CharSetProber):
for prober in self._mProbers:
if prober:
prober.reset()
prober.active = constants.True
prober.active = True
self._mActiveNum += 1
self._mBestGuessProber = None
def get_charset_name(self):
if not self._mBestGuessProber:
self.get_confidence()
if not self._mBestGuessProber: return None
if not self._mBestGuessProber:
return None
# self._mBestGuessProber = self._mProbers[0]
return self._mBestGuessProber.get_charset_name()
def feed(self, aBuf):
for prober in self._mProbers:
if not prober: continue
if not prober.active: continue
if not prober:
continue
if not prober.active:
continue
st = prober.feed(aBuf)
if not st: continue
if not st:
continue
if st == constants.eFoundIt:
self._mBestGuessProber = prober
return self.get_state()
elif st == constants.eNotMe:
prober.active = constants.False
prober.active = False
self._mActiveNum -= 1
if self._mActiveNum <= 0:
self._mState = constants.eNotMe
@@ -78,18 +84,22 @@ class CharSetGroupProber(CharSetProber):
bestConf = 0.0
self._mBestGuessProber = None
for prober in self._mProbers:
if not prober: continue
if not prober:
continue
if not prober.active:
if constants._debug:
sys.stderr.write(prober.get_charset_name() + ' not active\n')
sys.stderr.write(prober.get_charset_name()
+ ' not active\n')
continue
cf = prober.get_confidence()
if constants._debug:
sys.stderr.write('%s confidence = %s\n' % (prober.get_charset_name(), cf))
sys.stderr.write('%s confidence = %s\n' %
(prober.get_charset_name(), cf))
if bestConf < cf:
bestConf = cf
self._mBestGuessProber = prober
if not self._mBestGuessProber: return 0.0
if not self._mBestGuessProber:
return 0.0
return bestConf
# else:
# self._mBestGuessProber = self._mProbers[0]