Minor update

This commit is contained in:
Miroslav Stampar
2019-11-06 14:45:48 +01:00
parent adbc4bae5d
commit 1b1c37e12c
4 changed files with 24 additions and 21 deletions

View File

@@ -195,27 +195,28 @@ def crawl(target):
for url in threadData.shared.value:
kb.targets.add((urldecode(url, kb.pageEncoding), None, None, None, None))
if kb.normalizeCrawlingChoice is None:
message = "do you want to normalize "
message += "crawling results [Y/n] "
if kb.targets:
if kb.normalizeCrawlingChoice is None:
message = "do you want to normalize "
message += "crawling results [Y/n] "
kb.normalizeCrawlingChoice = readInput(message, default='Y', boolean=True)
kb.normalizeCrawlingChoice = readInput(message, default='Y', boolean=True)
if kb.normalizeCrawlingChoice:
seen = set()
results = OrderedSet()
if kb.normalizeCrawlingChoice:
seen = set()
results = OrderedSet()
for target in kb.targets:
match = re.search(r"/[^/?]*\?.*\Z", target[0])
if match:
key = re.sub(r"=[^=&]*", "=", match.group(0))
if key not in seen:
results.add(target)
seen.add(key)
for target in kb.targets:
match = re.search(r"/[^/?]*\?.*\Z", target[0])
if match:
key = re.sub(r"=[^=&]*", "=", match.group(0))
if key not in seen:
results.add(target)
seen.add(key)
kb.targets = results
kb.targets = results
storeResultsToFile(kb.targets)
storeResultsToFile(kb.targets)
def storeResultsToFile(results):
if not results: