diff --git a/CHANGELOG b/CHANGELOG index f290d5e92..ab1217a1a 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,5 +1,8 @@ # Nmap Changelog ($Id$); -*-text-*- +o [NSE] Added the script http-grep that attempts to match web pages and urls + against a given pattern. [Patrik] + o [NSE] Added stop function to crawler so that scripts can properly shutdown the crawler in case they want to end early. [Patrik] diff --git a/nselib/httpspider.lua b/nselib/httpspider.lua index c69f2a6b3..192f5e0c5 100644 --- a/nselib/httpspider.lua +++ b/nselib/httpspider.lua @@ -17,8 +17,43 @@ -- * Crawler -- ** This class is responsible for the actual crawling. -- +-- The following sample code shows how the spider could be used: +-- +-- local crawler = httpspider.Crawler:new( host, port, '/', { scriptname = SCRIPT_NAME } ) +-- crawler:set_timeout(10000) +-- +-- local result +-- while(true) do +-- local status, r = crawler:crawl() +-- if ( not(status) ) then +-- break +-- end +-- if ( r.response.body:match(str_match) ) then +-- crawler:stop() +-- result = r.url +-- break +-- end +-- end +-- +-- return result +-- +-- -- @author Patrik Karlsson -- +-- @args httpspider.maxdepth the maximum amount of directories beneath +-- the initial url to spider. A negative value disables the limit. +-- (default: 3) +-- @args httpspider.maxpagecount the maximum amount of pages to visit. +-- A negative value disables the limit (default: 20) +-- @args httpspider.url the url to start spidering. This is a URL +-- relative to the scanned host eg. /default.html (default: /) +-- @args httpspider.withinhost only spider URLs within the same host. +-- (default: true) +-- @args httpspider.withindomain only spider URLs within the same +-- domain. This widens the scope from withinhost and can +-- not be used in combination. (default: false) +-- @args httpspider.noblacklist if set, doesn't load the default blacklist +-- module(... or "httpspider", package.seeall) @@ -679,7 +714,6 @@ Crawler = { -- does the crawling crawl = function(self) - self.response_queue = self.response_queue or {} local condvar = nmap.condvar(self.response_queue) if ( not(self.thread) ) then @@ -704,7 +738,4 @@ Crawler = { condvar "signal" condvar "wait" end - - - } diff --git a/scripts/http-grep.nse b/scripts/http-grep.nse new file mode 100644 index 000000000..4d5fc98fc --- /dev/null +++ b/scripts/http-grep.nse @@ -0,0 +1,110 @@ +description = [[ +Spiders a website and attempts to match all pages and urls against a given +string. Matches are counted and grouped per url under which they were +discovered. +]] + +--- +-- @usage +-- nmap -p 80 www.example.com --script http-grep --script-args='http-grep.match="[A-Za-z0-9%.%%%+%-]+@[A-Za-z0-9%.%%%+%-]+%.%w%w%w?%w?",http-grep.breakonmatch' +-- +-- @output +-- PORT STATE SERVICE REASON +-- 80/tcp open http syn-ack +-- | http-grep: +-- | (4) http://example.com/name/ +-- | + name@example.com +-- | + name@example.com +-- | + name@example.com +-- | + name@example.com +-- | (4) http://example.com/sales.html +-- | + sales@example.com +-- | + sales@example.com +-- | + sales@example.com +-- |__ + sales@example.com + + +-- @args http-grep.match the string to match in urls and page contents +-- @args http-grep.maxdepth the maximum amount of directories beneath +-- the initial url to spider. A negative value disables the limit. +-- (default: 3) +-- @args http-grep.maxpagecount the maximum amount of pages to visit. +-- A negative value disables the limit (default: 20) +-- @args http-grep.url the url to start spidering. This is a URL +-- relative to the scanned host eg. /default.html (default: /) +-- @args http-grep.withinhost only spider URLs within the same host. +-- (default: true) +-- @args http-grep.withindomain only spider URLs within the same +-- domain. This widens the scope from withinhost and can +-- not be used in combination. (default: false) + +author = "Patrik Karlsson" +license = "Same as Nmap--See http://nmap.org/book/man-legal.html" +categories = {"discovery", "safe"} + +require 'httpspider' +require 'shortport' +require 'url' + +portrule = shortport.http + +-- Shortens a matching string if it exceeds 60 characters +-- All characters after 60 will be replaced with ... +local function shortenMatch(match) + if ( #match > 60 ) then + return match:sub(1, 60) .. " ..." + else + return match + end +end + +action = function(host, port) + + -- read script specific arguments + local match = stdnse.get_script_args("http-grep.match") + local break_on_match = stdnse.get_script_args("http-grep.breakonmatch") + + if ( not(match) ) then + return stdnse.format_output(true, "ERROR: Argument http-grep.match was not set") + end + + local crawler = httpspider.Crawler:new(host, port, '/', { scriptname = SCRIPT_NAME } ) + local results = {} + + -- set timeout to 10 seconds + crawler:set_timeout(10000) + + while(true) do + local status, r = crawler:crawl() + -- if the crawler fails it can be due to a number of different reasons + -- most of them are "legitimate" and should not be reason to abort + if ( not(status) ) then + if ( r.err ) then + return stdnse.format_output(true, "ERROR: %s", r.reason) + else + break + end + end + + local matches = {} + local body = r.response.body + -- try to match the url and body + if ( body:match( match ) or tostring(r.url):match(match) ) then + local count = select(2, body:gsub(match, match)) + for match in body:gmatch(match) do + table.insert(matches, "+ " .. shortenMatch(match)) + end + + matches.name = ("(%d) %s"):format(count,tostring(r.url)) + table.insert(results, matches) + + -- should we continue to search for matches? + if ( break_on_match ) then + crawler:stop() + break + end + end + end + table.sort(results, function(a,b) return a.name>b.name end) + return stdnse.format_output(true, results) +end \ No newline at end of file diff --git a/scripts/script.db b/scripts/script.db index bdaa44bdb..35fcc6535 100644 --- a/scripts/script.db +++ b/scripts/script.db @@ -101,6 +101,7 @@ Entry { filename = "http-enum.nse", categories = { "discovery", "intrusive", "vu Entry { filename = "http-favicon.nse", categories = { "default", "discovery", "safe", } } Entry { filename = "http-form-brute.nse", categories = { "brute", "intrusive", } } Entry { filename = "http-google-malware.nse", categories = { "discovery", "external", "malware", "safe", } } +Entry { filename = "http-grep.nse", categories = { "discovery", "safe", } } Entry { filename = "http-headers.nse", categories = { "discovery", "safe", } } Entry { filename = "http-iis-webdav-vuln.nse", categories = { "intrusive", "vuln", } } Entry { filename = "http-joomla-brute.nse", categories = { "brute", "intrusive", } }