adding --titles switch (killer switch for pages with lots of dynamicity and/or international ones)

This commit is contained in:
Miroslav Stampar
2011-06-10 23:18:43 +00:00
parent 15d72ec566
commit f8dde2c23b
5 changed files with 22 additions and 3 deletions

View File

@@ -9,6 +9,7 @@ See the file 'doc/COPYING' for copying permission
import re
from lib.core.common import extractRegexResult
from lib.core.common import getFilteredPageContent
from lib.core.common import removeDynamicContent
from lib.core.common import wasLastRequestDBMSError
@@ -19,6 +20,7 @@ from lib.core.data import logger
from lib.core.exception import sqlmapNoneDataException
from lib.core.settings import DEFAULT_PAGE_ENCODING
from lib.core.settings import DIFF_TOLERANCE
from lib.core.settings import HTML_TITLE_REGEX
from lib.core.settings import MIN_RATIO
from lib.core.settings import MAX_RATIO
from lib.core.settings import LOWER_RATIO_BOUND
@@ -80,8 +82,13 @@ def comparison(page, getRatioValue=False, pageLength=None):
elif isinstance(seqMatcher.a, unicode) and isinstance(page, str):
seqMatcher.a = seqMatcher.a.encode(kb.pageEncoding or DEFAULT_PAGE_ENCODING, 'ignore')
seqMatcher.set_seq1(getFilteredPageContent(seqMatcher.a, True) if conf.textOnly else seqMatcher.a)
seqMatcher.set_seq2(getFilteredPageContent(page, True) if conf.textOnly else page)
if conf.titles:
seqMatcher.set_seq1(extractRegexResult(HTML_TITLE_REGEX, seqMatcher.a))
seqMatcher.set_seq2(extractRegexResult(HTML_TITLE_REGEX, page))
else:
seqMatcher.set_seq1(getFilteredPageContent(seqMatcher.a, True) if conf.textOnly else seqMatcher.a)
seqMatcher.set_seq2(getFilteredPageContent(page, True) if conf.textOnly else page)
if seqMatcher.a is None or seqMatcher.b is None:
ratio = None
else: