Minor patch

This commit is contained in:
Miroslav Stampar
2019-11-08 15:19:50 +01:00
parent a90324d592
commit 7eb7bddb25
3 changed files with 4 additions and 4 deletions

View File

@@ -117,7 +117,7 @@ def crawl(target):
if (extractRegexResult(r"\A[^?]+\.(?P<result>\w+)(\?|\Z)", url) or "").lower() not in CRAWL_EXCLUDE_EXTENSIONS:
with kb.locks.value:
threadData.shared.deeper.add(url)
if re.search(r"(.*?)\?(.+)", url) and not re.search(r"\?\d+\Z", url):
if re.search(r"(.*?)\?(.+)", url) and not re.search(r"\?(v=)?\d+\Z", url):
threadData.shared.value.add(url)
except UnicodeEncodeError: # for non-HTML files
pass