adding Beautifulsoup (BSD) into extras; adding --crawl to options

This commit is contained in:
Miroslav Stampar
2011-06-20 11:32:30 +00:00
parent 8c04aa871a
commit 07e2c72943
9 changed files with 2168 additions and 3 deletions

View File

@@ -589,7 +589,7 @@ def start():
if kb.dataOutputFlag and not conf.multipleTargets:
logger.info("Fetched data logged to text files under '%s'" % conf.outputPath)
if conf.multipleTargets:
if conf.multipleTargets and conf.resultsFilename:
infoMsg = "you can find results of scanning in multiple targets "
infoMsg += "mode inside the CSV file '%s'" % conf.resultsFilename
logger.info(infoMsg)

View File

@@ -114,6 +114,7 @@ from lib.request.certhandler import HTTPSCertAuthHandler
from lib.request.rangehandler import HTTPRangeHandler
from lib.request.redirecthandler import SmartRedirectHandler
from lib.request.templates import getPageTemplate
from lib.utils.crawler import Crawler
from lib.utils.deps import checkDependencies
from lib.utils.google import Google
@@ -388,6 +389,13 @@ def __setRequestFromFile():
__feedTargetsDict(conf.requestFile, addedTargetUrls)
def __setCrawler():
if not conf.crawl:
return
crawler = Crawler()
crawler.getTargetUrls()
def __setGoogleDorking():
"""
This function checks if the way to request testable hosts is through
@@ -1278,7 +1286,7 @@ def __cleanupOptions():
if conf.tmpPath:
conf.tmpPath = ntToPosixSlashes(normalizePath(conf.tmpPath))
if conf.googleDork or conf.logFile or conf.bulkFile or conf.forms:
if conf.googleDork or conf.logFile or conf.bulkFile or conf.forms or conf.crawl:
conf.multipleTargets = True
if conf.optimize:
@@ -1800,6 +1808,7 @@ def init(inputOptions=advancedDict(), overrideOptions=False):
__setDNSCache()
__setSafeUrl()
__setGoogleDorking()
__setCrawler()
__setBulkMultipleTargets()
__urllib2Opener()
__findPageForms()

View File

@@ -167,6 +167,8 @@ optDict = {
"beep": "boolean",
"checkPayload": "boolean",
"cleanup": "boolean",
"crawl": "boolean",
"forms": "boolean",
"googlePage": "integer",
"mobile": "boolean",
"pageRank": "boolean",

View File

@@ -511,6 +511,10 @@ def cmdLineParser():
help="Clean up the DBMS by sqlmap specific "
"UDF and tables")
miscellaneous.add_option("--crawl", dest="crawl",
action="store_true",
help="Crawl the website starting from the target url")
miscellaneous.add_option("--forms", dest="forms",
action="store_true",
help="Parse and test forms on target url")

95
lib/utils/crawler.py Normal file
View File

@@ -0,0 +1,95 @@
#!/usr/bin/env python
"""
$Id$
Copyright (c) 2006-2011 sqlmap developers (http://sqlmap.sourceforge.net/)
See the file 'doc/COPYING' for copying permission
"""
import re
import threading
import urlparse
from lib.core.common import dataToStdout
from lib.core.data import conf
from lib.core.data import kb
from lib.core.data import logger
from lib.core.exception import sqlmapConnectionException
from lib.core.threads import getCurrentThreadData
from lib.core.threads import runThreads
from lib.request.connect import Connect as Request
from extra.beautifulsoup.beautifulsoup import BeautifulSoup
from extra.oset.pyoset import oset
class Crawler:
"""
This class defines methods used to perform crawling (command
line option '--crawl'
"""
def getTargetUrls(self, depth=1):
try:
threadData = getCurrentThreadData()
threadData.shared.outputs = oset()
lockNames = ('limits', 'outputs')
for lock in lockNames:
kb.locks[lock] = threading.Lock()
def crawlThread():
threadData = getCurrentThreadData()
while kb.threadContinue:
kb.locks.limits.acquire()
if threadData.shared.unprocessed:
current = threadData.shared.unprocessed.pop()
kb.locks.limits.release()
else:
kb.locks.limits.release()
break
content = Request.getPage(url=conf.url)[0]
if not kb.threadContinue:
break
soup = BeautifulSoup(content)
for tag in soup('a'):
if tag.get("href"):
url = urlparse.urljoin(conf.url, tag.get("href"))
# flag to know if we are dealing with the same target host
target = reduce(lambda x, y: x == y, map(lambda x: urlparse.urlparse(x).netloc.split(':')[0], [url, conf.url]))
if target:
kb.locks.outputs.acquire()
threadData.shared.deeper.add(url)
if re.search(r"(.*?)\?(.+)", url):
threadData.shared.outputs.add(url)
kb.locks.outputs.release()
threadData.shared.deeper = set()
threadData.shared.unprocessed = set([conf.url])
logger.info("starting crawling")
for i in xrange(depth):
numThreads = min(conf.threads, len(threadData.shared.unprocessed))
logger.debug("processing depth: %d" % i)
runThreads(numThreads, crawlThread)
threadData.shared.unprocessed = threadData.shared.deeper
except KeyboardInterrupt:
warnMsg = "user aborted during crawling. sqlmap "
warnMsg += "will use partial list"
logger.warn(warnMsg)
except sqlmapConnectionException, e:
errMsg = "connection exception detected. sqlmap "
errMsg += "will use partial list"
errMsg += "'%s'" % e
logger.critical(errMsg)
finally:
for url in threadData.shared.outputs:
kb.targetUrls.add(( url, None, None, None ))
kb.suppressResumeInfo = False

View File

@@ -60,7 +60,7 @@ class Google:
"""
for match in self.__matches:
if re.search(r"(.*?)\?(.+)", match, re.I):
if re.search(r"(.*?)\?(.+)", match):
kb.targetUrls.add(( htmlunescape(htmlunescape(match)), None, None, None ))
elif re.search(URI_INJECTABLE_REGEX, match, re.I):
kb.targetUrls.add(( htmlunescape(htmlunescape("%s" % match)), None, None, None ))