improving "boolean detection" by automatic recognition of convenient --string candidate

This commit is contained in:
Miroslav Stampar
2012-04-10 21:48:34 +00:00
parent 698b7a15d9
commit 119eec3598
5 changed files with 32 additions and 3 deletions

View File

@@ -124,6 +124,7 @@ from lib.core.settings import TIME_STDEV_COEFF
from lib.core.settings import DYNAMICITY_MARK_LENGTH
from lib.core.settings import REFLECTIVE_MISS_THRESHOLD
from lib.core.settings import SENSITIVE_DATA_REGEX
from lib.core.settings import TEXT_TAG_REGEX
from lib.core.settings import UNION_UNIQUE_FIFO_LENGTH
from lib.core.settings import URI_INJECTION_MARK_CHAR
from lib.core.settings import URI_QUESTION_MARKER
@@ -2155,6 +2156,13 @@ def extractRegexResult(regex, content, flags=0):
return retVal
def extractTextTagContent(page):
"""
Returns list containing content from "textual" tags
"""
return [_.group('result') for _ in re.finditer(TEXT_TAG_REGEX, page or "")]
def trimAlphaNum(value):
"""
Trims alpha numeric characters from start and ending of a given value

View File

@@ -62,10 +62,13 @@ URI_QUESTION_MARKER = "__QUESTION_MARK__"
PAYLOAD_DELIMITER = "\x00"
CHAR_INFERENCE_MARK = "%c"
PRINTABLE_CHAR_REGEX = r'[^\x00-\x1f\x7e-\xff]'
PRINTABLE_CHAR_REGEX = r"[^\x00-\x1f\x7e-\xff]"
# regular expression used for extracting results from google search
GOOGLE_REGEX = r'url\?q=(http[^>]+)&sa=U&amp'
GOOGLE_REGEX = r"url\?q=(http[^>]+)&sa=U&amp"
# regular expression used for extracting content from "textual" tags
TEXT_TAG_REGEX = r"(?si)<(abbr|acronym|b|blockquote|br|center|cite|code|dt|em|font|h\d|i|li|p|pre|q|strong|sub|sup|td|th|title|tt|u)(?!\w).*?>(?P<result>[^<]+)"
# dumping characters used in GROUP_CONCAT MySQL technique
CONCAT_ROW_DELIMITER = ','

View File

@@ -43,6 +43,7 @@ class _ThreadData(threading.local):
self.disableStdOut = False
self.hashDBCursor = None
self.inTransaction = False
self.lastComparisonPage = None
self.lastErrorPage = None
self.lastHTTPError = None
self.lastRedirectMsg = None