mirror of
https://github.com/nmap/nmap.git
synced 2025-12-09 22:21:29 +00:00
o [NSE] Added a new httpspider library and the script http-email-harvest that
collects e-mail addresses by spidering a website. [Patrik]
This commit is contained in:
110
scripts/http-email-harvest.nse
Normal file
110
scripts/http-email-harvest.nse
Normal file
@@ -0,0 +1,110 @@
|
||||
description = [[
|
||||
Spiders a web site and collects e-mail addresses
|
||||
]]
|
||||
|
||||
---
|
||||
-- @usage
|
||||
-- nmap --script=http-email-harvest <target>
|
||||
--
|
||||
-- @output
|
||||
-- PORT STATE SERVICE REASON
|
||||
-- 80/tcp open http syn-ack
|
||||
-- | http-email-harvest:
|
||||
-- | Spidering limited to: maxdepth=3; maxpagecount=20
|
||||
-- | root@examplec.com
|
||||
-- |_ postmaster@example.com
|
||||
--
|
||||
-- @args http-email-harvest.maxdepth the maximum amount of directories beneath
|
||||
-- the initial url to spider. A negative value disables the limit.
|
||||
-- (default: 3)
|
||||
-- @args http-email-harvest.maxpagecount the maximum amount of pages to visit.
|
||||
-- A negative value disables the limit (default: 20)
|
||||
-- @args http-email-harvest.url the url to start spidering. This is a URL
|
||||
-- relative to the scanned host eg. /default.html (default: /)
|
||||
-- @args http-email-harvest.withinhost only spider URLs within the same host.
|
||||
-- (default: true)
|
||||
-- @args http-email-harvest.withindomain only spider URLs within the same
|
||||
-- domain. This widens the scope from <code>withinhost</code> and can
|
||||
-- not be used in combination. (default: false)
|
||||
--
|
||||
|
||||
author = "Patrik Karlsson"
|
||||
categories = {"discovery", "safe"}
|
||||
|
||||
require "httpspider"
|
||||
require "shortport"
|
||||
|
||||
portrule = shortport.http
|
||||
|
||||
function action(host, port)
|
||||
local EMAIL_PATTERN = "[A-Za-z0-9%.%%%+%-]+@[A-Za-z0-9%.%%%+%-]+%.%w%w%w?%w?"
|
||||
|
||||
-- by default, we cap the script at a maximum depth of 3
|
||||
local maxdepth = tonumber(stdnse.get_script_args("http-email-harvest.maxdepth")) or 3
|
||||
-- by default, we cap the script at a maximum pagecount of 20
|
||||
local maxpagecount = tonumber(stdnse.get_script_args("http-email-harvest.maxpagecount")) or 20
|
||||
|
||||
local url = stdnse.get_script_args("http-email-harvest.url") or "/"
|
||||
local withinhost = stdnse.get_script_args("http-email-harvest.withinhost")
|
||||
local withindomain = stdnse.get_script_args("http-email-harvest.withindomain")
|
||||
|
||||
if ( maxdepth < 0 ) then maxdepth = nil end
|
||||
if ( maxpagecount < 0 ) then maxpagecount = nil end
|
||||
|
||||
stdnse.print_debug(2, "%s: Running crawler maxdepth: %s; maxpagecount: %s",
|
||||
SCRIPT_NAME, maxdepth or "[none]", maxpagecount or "[none]")
|
||||
|
||||
local crawler = httpspider.Crawler:new(host, port, url or '/', {
|
||||
maxdepth = maxdepth,
|
||||
maxpagecount = maxpagecount,
|
||||
withinhost = withinhost,
|
||||
withindomain= withindomain,
|
||||
}
|
||||
)
|
||||
|
||||
crawler:set_timeout(10000)
|
||||
|
||||
local emails = {}
|
||||
while(true) do
|
||||
local status, r = crawler:crawl()
|
||||
-- if the crawler fails it can be due to a number of different reasons
|
||||
-- most of them are "legitimate" and should not be reason to abort
|
||||
if ( not(status) ) then
|
||||
if ( r.err ) then
|
||||
return stdnse.format_output(true, "ERROR: %s", r.reason)
|
||||
else
|
||||
break
|
||||
end
|
||||
end
|
||||
|
||||
-- Collect each e-mail address and build a unique index of them
|
||||
for email in r.response.body:gmatch(EMAIL_PATTERN) do
|
||||
emails[email] = true
|
||||
end
|
||||
end
|
||||
|
||||
-- if no email addresses were collected abort
|
||||
if ( not(emails) ) then return end
|
||||
|
||||
local results = {}
|
||||
for email, _ in pairs(emails) do
|
||||
table.insert(results, email)
|
||||
end
|
||||
|
||||
-- Inform the user of the limitations that were used
|
||||
if ( maxdepth > 0 or maxpagecount > 0 ) then
|
||||
local limit = "Spidering limited to: "
|
||||
if ( maxdepth > 0 ) then
|
||||
limit = limit .. ("maxdepth=%d; "):format(maxdepth)
|
||||
end
|
||||
if ( maxpagecount > 0 ) then
|
||||
limit = limit .. ("maxpagecount=%d"):format(maxpagecount)
|
||||
end
|
||||
if ( #results == 0 ) then
|
||||
table.insert(results, limit)
|
||||
else
|
||||
results.name = limit
|
||||
end
|
||||
end
|
||||
return stdnse.format_output(true, results)
|
||||
end
|
||||
@@ -94,6 +94,7 @@ Entry { filename = "http-cors.nse", categories = { "default", "discovery", "safe
|
||||
Entry { filename = "http-date.nse", categories = { "discovery", "safe", } }
|
||||
Entry { filename = "http-default-accounts.nse", categories = { "auth", "discovery", "safe", } }
|
||||
Entry { filename = "http-domino-enum-passwords.nse", categories = { "auth", "intrusive", } }
|
||||
Entry { filename = "http-email-harvest.nse", categories = { "discovery", "safe", } }
|
||||
Entry { filename = "http-enum.nse", categories = { "discovery", "intrusive", "vuln", } }
|
||||
Entry { filename = "http-favicon.nse", categories = { "default", "discovery", "safe", } }
|
||||
Entry { filename = "http-form-brute.nse", categories = { "brute", "intrusive", } }
|
||||
|
||||
Reference in New Issue
Block a user