Fix for crawler and redirection case

This commit is contained in:
stamparm
2013-04-30 18:08:26 +02:00
parent 09e7f4f697
commit ebe8ee3500
3 changed files with 5 additions and 1 deletions

View File

@@ -72,6 +72,8 @@ def crawl(target):
href = tag.get("href") if hasattr(tag, "get") else tag.group("href")
if href:
if threadData.lastRedirectURL and threadData.lastRedirectURL[0] == threadData.lastRequestUID:
current = threadData.lastRedirectURL[1]
url = urlparse.urljoin(current, href)
# flag to know if we are dealing with the same target host