mirror of
https://github.com/sqlmapproject/sqlmap.git
synced 2025-12-09 14:11:29 +00:00
Update of 3rd party library chardet
This commit is contained in:
163
thirdparty/chardet/hebrewprober.py
vendored
163
thirdparty/chardet/hebrewprober.py
vendored
@@ -26,8 +26,7 @@
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .charsetprober import CharSetProber
|
||||
from .constants import eNotMe, eDetecting
|
||||
from .compat import wrap_ord
|
||||
from .enums import ProbingState
|
||||
|
||||
# This prober doesn't actually recognize a language or a charset.
|
||||
# It is a helper prober for the use of the Hebrew model probers
|
||||
@@ -126,56 +125,59 @@ from .compat import wrap_ord
|
||||
# model probers scores. The answer is returned in the form of the name of the
|
||||
# charset identified, either "windows-1255" or "ISO-8859-8".
|
||||
|
||||
# windows-1255 / ISO-8859-8 code points of interest
|
||||
FINAL_KAF = 0xea
|
||||
NORMAL_KAF = 0xeb
|
||||
FINAL_MEM = 0xed
|
||||
NORMAL_MEM = 0xee
|
||||
FINAL_NUN = 0xef
|
||||
NORMAL_NUN = 0xf0
|
||||
FINAL_PE = 0xf3
|
||||
NORMAL_PE = 0xf4
|
||||
FINAL_TSADI = 0xf5
|
||||
NORMAL_TSADI = 0xf6
|
||||
|
||||
# Minimum Visual vs Logical final letter score difference.
|
||||
# If the difference is below this, don't rely solely on the final letter score
|
||||
# distance.
|
||||
MIN_FINAL_CHAR_DISTANCE = 5
|
||||
|
||||
# Minimum Visual vs Logical model score difference.
|
||||
# If the difference is below this, don't rely at all on the model score
|
||||
# distance.
|
||||
MIN_MODEL_DISTANCE = 0.01
|
||||
|
||||
VISUAL_HEBREW_NAME = "ISO-8859-8"
|
||||
LOGICAL_HEBREW_NAME = "windows-1255"
|
||||
|
||||
|
||||
class HebrewProber(CharSetProber):
|
||||
# windows-1255 / ISO-8859-8 code points of interest
|
||||
FINAL_KAF = 0xea
|
||||
NORMAL_KAF = 0xeb
|
||||
FINAL_MEM = 0xed
|
||||
NORMAL_MEM = 0xee
|
||||
FINAL_NUN = 0xef
|
||||
NORMAL_NUN = 0xf0
|
||||
FINAL_PE = 0xf3
|
||||
NORMAL_PE = 0xf4
|
||||
FINAL_TSADI = 0xf5
|
||||
NORMAL_TSADI = 0xf6
|
||||
|
||||
# Minimum Visual vs Logical final letter score difference.
|
||||
# If the difference is below this, don't rely solely on the final letter score
|
||||
# distance.
|
||||
MIN_FINAL_CHAR_DISTANCE = 5
|
||||
|
||||
# Minimum Visual vs Logical model score difference.
|
||||
# If the difference is below this, don't rely at all on the model score
|
||||
# distance.
|
||||
MIN_MODEL_DISTANCE = 0.01
|
||||
|
||||
VISUAL_HEBREW_NAME = "ISO-8859-8"
|
||||
LOGICAL_HEBREW_NAME = "windows-1255"
|
||||
|
||||
def __init__(self):
|
||||
CharSetProber.__init__(self)
|
||||
self._mLogicalProber = None
|
||||
self._mVisualProber = None
|
||||
super(HebrewProber, self).__init__()
|
||||
self._final_char_logical_score = None
|
||||
self._final_char_visual_score = None
|
||||
self._prev = None
|
||||
self._before_prev = None
|
||||
self._logical_prober = None
|
||||
self._visual_prober = None
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
self._mFinalCharLogicalScore = 0
|
||||
self._mFinalCharVisualScore = 0
|
||||
self._final_char_logical_score = 0
|
||||
self._final_char_visual_score = 0
|
||||
# The two last characters seen in the previous buffer,
|
||||
# mPrev and mBeforePrev are initialized to space in order to simulate
|
||||
# a word delimiter at the beginning of the data
|
||||
self._mPrev = ' '
|
||||
self._mBeforePrev = ' '
|
||||
self._prev = ' '
|
||||
self._before_prev = ' '
|
||||
# These probers are owned by the group prober.
|
||||
|
||||
def set_model_probers(self, logicalProber, visualProber):
|
||||
self._mLogicalProber = logicalProber
|
||||
self._mVisualProber = visualProber
|
||||
self._logical_prober = logicalProber
|
||||
self._visual_prober = visualProber
|
||||
|
||||
def is_final(self, c):
|
||||
return wrap_ord(c) in [FINAL_KAF, FINAL_MEM, FINAL_NUN, FINAL_PE,
|
||||
FINAL_TSADI]
|
||||
return c in [self.FINAL_KAF, self.FINAL_MEM, self.FINAL_NUN,
|
||||
self.FINAL_PE, self.FINAL_TSADI]
|
||||
|
||||
def is_non_final(self, c):
|
||||
# The normal Tsadi is not a good Non-Final letter due to words like
|
||||
@@ -188,9 +190,10 @@ class HebrewProber(CharSetProber):
|
||||
# for example legally end with a Non-Final Pe or Kaf. However, the
|
||||
# benefit of these letters as Non-Final letters outweighs the damage
|
||||
# since these words are quite rare.
|
||||
return wrap_ord(c) in [NORMAL_KAF, NORMAL_MEM, NORMAL_NUN, NORMAL_PE]
|
||||
return c in [self.NORMAL_KAF, self.NORMAL_MEM,
|
||||
self.NORMAL_NUN, self.NORMAL_PE]
|
||||
|
||||
def feed(self, aBuf):
|
||||
def feed(self, byte_str):
|
||||
# Final letter analysis for logical-visual decision.
|
||||
# Look for evidence that the received buffer is either logical Hebrew
|
||||
# or visual Hebrew.
|
||||
@@ -217,67 +220,73 @@ class HebrewProber(CharSetProber):
|
||||
# We automatically filter out all 7-bit characters (replace them with
|
||||
# spaces) so the word boundary detection works properly. [MAP]
|
||||
|
||||
if self.get_state() == eNotMe:
|
||||
if self.state == ProbingState.NOT_ME:
|
||||
# Both model probers say it's not them. No reason to continue.
|
||||
return eNotMe
|
||||
return ProbingState.NOT_ME
|
||||
|
||||
aBuf = self.filter_high_bit_only(aBuf)
|
||||
byte_str = self.filter_high_byte_only(byte_str)
|
||||
|
||||
for cur in aBuf:
|
||||
for cur in byte_str:
|
||||
if cur == ' ':
|
||||
# We stand on a space - a word just ended
|
||||
if self._mBeforePrev != ' ':
|
||||
# next-to-last char was not a space so self._mPrev is not a
|
||||
if self._before_prev != ' ':
|
||||
# next-to-last char was not a space so self._prev is not a
|
||||
# 1 letter word
|
||||
if self.is_final(self._mPrev):
|
||||
if self.is_final(self._prev):
|
||||
# case (1) [-2:not space][-1:final letter][cur:space]
|
||||
self._mFinalCharLogicalScore += 1
|
||||
elif self.is_non_final(self._mPrev):
|
||||
self._final_char_logical_score += 1
|
||||
elif self.is_non_final(self._prev):
|
||||
# case (2) [-2:not space][-1:Non-Final letter][
|
||||
# cur:space]
|
||||
self._mFinalCharVisualScore += 1
|
||||
self._final_char_visual_score += 1
|
||||
else:
|
||||
# Not standing on a space
|
||||
if ((self._mBeforePrev == ' ') and
|
||||
(self.is_final(self._mPrev)) and (cur != ' ')):
|
||||
if ((self._before_prev == ' ') and
|
||||
(self.is_final(self._prev)) and (cur != ' ')):
|
||||
# case (3) [-2:space][-1:final letter][cur:not space]
|
||||
self._mFinalCharVisualScore += 1
|
||||
self._mBeforePrev = self._mPrev
|
||||
self._mPrev = cur
|
||||
self._final_char_visual_score += 1
|
||||
self._before_prev = self._prev
|
||||
self._prev = cur
|
||||
|
||||
# Forever detecting, till the end or until both model probers return
|
||||
# eNotMe (handled above)
|
||||
return eDetecting
|
||||
# ProbingState.NOT_ME (handled above)
|
||||
return ProbingState.DETECTING
|
||||
|
||||
def get_charset_name(self):
|
||||
@property
|
||||
def charset_name(self):
|
||||
# Make the decision: is it Logical or Visual?
|
||||
# If the final letter score distance is dominant enough, rely on it.
|
||||
finalsub = self._mFinalCharLogicalScore - self._mFinalCharVisualScore
|
||||
if finalsub >= MIN_FINAL_CHAR_DISTANCE:
|
||||
return LOGICAL_HEBREW_NAME
|
||||
if finalsub <= -MIN_FINAL_CHAR_DISTANCE:
|
||||
return VISUAL_HEBREW_NAME
|
||||
finalsub = self._final_char_logical_score - self._final_char_visual_score
|
||||
if finalsub >= self.MIN_FINAL_CHAR_DISTANCE:
|
||||
return self.LOGICAL_HEBREW_NAME
|
||||
if finalsub <= -self.MIN_FINAL_CHAR_DISTANCE:
|
||||
return self.VISUAL_HEBREW_NAME
|
||||
|
||||
# It's not dominant enough, try to rely on the model scores instead.
|
||||
modelsub = (self._mLogicalProber.get_confidence()
|
||||
- self._mVisualProber.get_confidence())
|
||||
if modelsub > MIN_MODEL_DISTANCE:
|
||||
return LOGICAL_HEBREW_NAME
|
||||
if modelsub < -MIN_MODEL_DISTANCE:
|
||||
return VISUAL_HEBREW_NAME
|
||||
modelsub = (self._logical_prober.get_confidence()
|
||||
- self._visual_prober.get_confidence())
|
||||
if modelsub > self.MIN_MODEL_DISTANCE:
|
||||
return self.LOGICAL_HEBREW_NAME
|
||||
if modelsub < -self.MIN_MODEL_DISTANCE:
|
||||
return self.VISUAL_HEBREW_NAME
|
||||
|
||||
# Still no good, back to final letter distance, maybe it'll save the
|
||||
# day.
|
||||
if finalsub < 0.0:
|
||||
return VISUAL_HEBREW_NAME
|
||||
return self.VISUAL_HEBREW_NAME
|
||||
|
||||
# (finalsub > 0 - Logical) or (don't know what to do) default to
|
||||
# Logical.
|
||||
return LOGICAL_HEBREW_NAME
|
||||
return self.LOGICAL_HEBREW_NAME
|
||||
|
||||
def get_state(self):
|
||||
@property
|
||||
def language(self):
|
||||
return 'Hebrew'
|
||||
|
||||
@property
|
||||
def state(self):
|
||||
# Remain active as long as any of the model probers are active.
|
||||
if (self._mLogicalProber.get_state() == eNotMe) and \
|
||||
(self._mVisualProber.get_state() == eNotMe):
|
||||
return eNotMe
|
||||
return eDetecting
|
||||
if (self._logical_prober.state == ProbingState.NOT_ME) and \
|
||||
(self._visual_prober.state == ProbingState.NOT_ME):
|
||||
return ProbingState.NOT_ME
|
||||
return ProbingState.DETECTING
|
||||
|
||||
Reference in New Issue
Block a user