mirror of
https://github.com/nmap/nmap.git
synced 2025-12-21 15:09:02 +00:00
Fixed a number of bugs and prepared the library to handle the new redirect
code being added to the http-library. [Patrik]
This commit is contained in:
@@ -73,19 +73,10 @@ Options = {
|
|||||||
|
|
||||||
-- set a few default values
|
-- set a few default values
|
||||||
o.timeout = options.timeout or 10000
|
o.timeout = options.timeout or 10000
|
||||||
o.withindomain = o.withindomain or false
|
|
||||||
|
|
||||||
-- we default to withinhost, unless withindomain is set
|
|
||||||
if ( o.withindomain ) then
|
|
||||||
o.withinhost = o.withinhost or false
|
|
||||||
else
|
|
||||||
o.withinhost = o.withinhost or true
|
|
||||||
end
|
|
||||||
|
|
||||||
o.whitelist = o.whitelist or {}
|
o.whitelist = o.whitelist or {}
|
||||||
o.blacklist = o.blacklist or {}
|
o.blacklist = o.blacklist or {}
|
||||||
|
|
||||||
if ( o.withinhost or o.withindomain ) then
|
if ( o.withinhost == true or o.withindomain == true ) then
|
||||||
local host_match, domain_match
|
local host_match, domain_match
|
||||||
if ( ( o.base_url:getProto() == 'https' and o.base_url:getPort() == 443 ) or
|
if ( ( o.base_url:getProto() == 'https' and o.base_url:getPort() == 443 ) or
|
||||||
( o.base_url:getProto() == 'http' and o.base_url:getPort() == 80 ) ) then
|
( o.base_url:getProto() == 'http' and o.base_url:getPort() == 80 ) ) then
|
||||||
@@ -101,7 +92,6 @@ Options = {
|
|||||||
domain_match = ("%s://.*%s/"):format(o.base_url:getProto(), o.base_url:getDomain() )
|
domain_match = ("%s://.*%s/"):format(o.base_url:getProto(), o.base_url:getDomain() )
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
-- set up the appropriate matching functions
|
-- set up the appropriate matching functions
|
||||||
if ( o.withinhost ) then
|
if ( o.withinhost ) then
|
||||||
o.withinhost = function(url) return string.match(tostring(url), host_match) end
|
o.withinhost = function(url) return string.match(tostring(url), host_match) end
|
||||||
@@ -410,7 +400,14 @@ URL = {
|
|||||||
|
|
||||||
-- Gets the domain component of the URL
|
-- Gets the domain component of the URL
|
||||||
-- @return domain string containing the hosts domain
|
-- @return domain string containing the hosts domain
|
||||||
getDomain = function(self) return self.domain end,
|
getDomain = function(self)
|
||||||
|
if ( self.domain ) then
|
||||||
|
return self.domain
|
||||||
|
-- fallback to the host, if we can't find a domain
|
||||||
|
else
|
||||||
|
return self.host
|
||||||
|
end
|
||||||
|
end,
|
||||||
|
|
||||||
-- Converts the URL to a string
|
-- Converts the URL to a string
|
||||||
-- @return url string containing the string representation of the url
|
-- @return url string containing the string representation of the url
|
||||||
@@ -589,7 +586,7 @@ Crawler = {
|
|||||||
crawl_thread = function(self, response_queue)
|
crawl_thread = function(self, response_queue)
|
||||||
local condvar = nmap.condvar(response_queue)
|
local condvar = nmap.condvar(response_queue)
|
||||||
|
|
||||||
if ( self.options.withinhost and self.options.withindomain ) then
|
if ( false ~= self.options.withinhost and false ~= self.options.withindomain ) then
|
||||||
table.insert(response_queue, { false, { err = true, reason = "Invalid options: withinhost and withindomain can't both be true" } })
|
table.insert(response_queue, { false, { err = true, reason = "Invalid options: withinhost and withindomain can't both be true" } })
|
||||||
condvar "signal"
|
condvar "signal"
|
||||||
return
|
return
|
||||||
@@ -635,12 +632,25 @@ Crawler = {
|
|||||||
local response = http.get(url:getHost(), url:getPort(), url:getFile(), { timeout = self.options.timeout } )
|
local response = http.get(url:getHost(), url:getPort(), url:getFile(), { timeout = self.options.timeout } )
|
||||||
self.processed[tostring(url)] = true
|
self.processed[tostring(url)] = true
|
||||||
|
|
||||||
|
if ( response ) then
|
||||||
|
-- were we redirected?
|
||||||
|
if ( response.location ) then
|
||||||
|
-- was the link absolute?
|
||||||
|
if ( response.location:match("^http") ) then
|
||||||
|
url = URL:new(response.location)
|
||||||
|
-- guess not
|
||||||
|
else
|
||||||
|
url.path = response.location
|
||||||
|
end
|
||||||
|
end
|
||||||
-- if we have a response, proceed scraping it
|
-- if we have a response, proceed scraping it
|
||||||
if ( response.body ) then
|
if ( response.body ) then
|
||||||
local links = LinkExtractor:new(url, response.body, self.options):getLinks()
|
local links = LinkExtractor:new(url, response.body, self.options):getLinks()
|
||||||
self.urlqueue:add(links)
|
self.urlqueue:add(links)
|
||||||
end
|
end
|
||||||
|
else
|
||||||
|
response = { body = "", headers = {} }
|
||||||
|
end
|
||||||
table.insert(response_queue, { true, { url = url, response = response } } )
|
table.insert(response_queue, { true, { url = url, response = response } } )
|
||||||
while ( PREFETCH_SIZE < #response_queue ) do
|
while ( PREFETCH_SIZE < #response_queue ) do
|
||||||
stdnse.print_debug(2, "%s: Response queue full, waiting ...", LIBRARY_NAME)
|
stdnse.print_debug(2, "%s: Response queue full, waiting ...", LIBRARY_NAME)
|
||||||
@@ -659,28 +669,78 @@ Crawler = {
|
|||||||
return
|
return
|
||||||
end
|
end
|
||||||
|
|
||||||
self.options.maxdepth = self.options.maxdepth or tonumber(stdnse.get_script_args(sn .. ".maxdepth"))
|
if ( nil == self.options.maxdepth ) then
|
||||||
self.options.maxpagecount = self.options.maxpagecount or tonumber(stdnse.get_script_args(sn .. ".maxpagecount"))
|
self.options.maxdepth = tonumber(stdnse.get_script_args(sn .. ".maxdepth"))
|
||||||
self.url = self.url or stdnse.get_script_args(sn .. ".url")
|
end
|
||||||
self.options.withinhost = self.options.withinhost or stdnse.get_script_args(sn .. ".withinhost")
|
if ( nil == self.options.maxpagecount ) then
|
||||||
self.options.withindomain = self.options.withindomain or stdnse.get_script_args(sn .. ".withindomain")
|
self.options.maxpagecount = tonumber(stdnse.get_script_args(sn .. ".maxpagecount"))
|
||||||
self.options.noblacklist = self.options.noblacklist or stdnse.get_script_args(sn .. ".noblacklist")
|
end
|
||||||
|
if ( nil == self.url ) then
|
||||||
|
self.url = stdnse.get_script_args(sn .. ".url")
|
||||||
|
end
|
||||||
|
if ( nil == self.options.withinhost ) then
|
||||||
|
self.options.withinhost = stdnse.get_script_args(sn .. ".withinhost")
|
||||||
|
end
|
||||||
|
if ( nil == self.options.withindomain ) then
|
||||||
|
self.options.withindomain = stdnse.get_script_args(sn .. ".withindomain")
|
||||||
|
end
|
||||||
|
if ( nil == self.options.noblacklist ) then
|
||||||
|
self.options.noblacklist = stdnse.get_script_args(sn .. ".noblacklist")
|
||||||
|
end
|
||||||
end,
|
end,
|
||||||
|
|
||||||
-- Loads the argument on a library level
|
-- Loads the argument on a library level
|
||||||
loadLibraryArguments = function(self)
|
loadLibraryArguments = function(self)
|
||||||
local ln = LIBRARY_NAME
|
local ln = LIBRARY_NAME
|
||||||
|
|
||||||
self.options.maxdepth = self.options.maxdepth or tonumber(stdnse.get_script_args(ln .. ".maxdepth"))
|
if ( nil == self.options.maxdepth ) then
|
||||||
self.options.maxpagecount = self.options.maxpagecount or tonumber(stdnse.get_script_args(ln .. ".maxpagecount"))
|
self.options.maxdepth = tonumber(stdnse.get_script_args(ln .. ".maxdepth"))
|
||||||
self.url = self.url or stdnse.get_script_args(ln .. ".url")
|
end
|
||||||
self.options.withinhost = self.options.withinhost or stdnse.get_script_args(ln .. ".withinhost")
|
if ( nil == self.options.maxpagecount ) then
|
||||||
self.options.withindomain = self.options.withindomain or stdnse.get_script_args(ln .. ".withindomain")
|
self.options.maxpagecount = tonumber(stdnse.get_script_args(ln .. ".maxpagecount"))
|
||||||
self.options.noblacklist = self.options.noblacklist or stdnse.get_script_args(ln .. ".noblacklist")
|
end
|
||||||
|
if ( nil == self.url ) then
|
||||||
|
self.url = stdnse.get_script_args(ln .. ".url")
|
||||||
|
end
|
||||||
|
if ( nil == self.options.withinhost ) then
|
||||||
|
self.options.withinhost = stdnse.get_script_args(ln .. ".withinhost")
|
||||||
|
end
|
||||||
|
if ( nil == self.options.withindomain ) then
|
||||||
|
self.options.withindomain = stdnse.get_script_args(ln .. ".withindomain")
|
||||||
|
end
|
||||||
|
if ( nil == self.options.noblacklist ) then
|
||||||
|
self.options.noblacklist = stdnse.get_script_args(ln .. ".noblacklist")
|
||||||
|
end
|
||||||
end,
|
end,
|
||||||
|
|
||||||
-- Loads any defaults for arguments that were not set
|
-- Loads any defaults for arguments that were not set
|
||||||
loadDefaultArguments = function(self)
|
loadDefaultArguments = function(self)
|
||||||
|
local function tobool(b)
|
||||||
|
if ( nil == b ) then
|
||||||
|
return
|
||||||
|
end
|
||||||
|
assert("string" == type(b) or "boolean" == type(b), "httpspider: tobool failed, unsupported type")
|
||||||
|
if ( "string" == type(b) ) then
|
||||||
|
if ( "true" == b ) then
|
||||||
|
return true
|
||||||
|
else
|
||||||
|
return false
|
||||||
|
end
|
||||||
|
end
|
||||||
|
return b
|
||||||
|
end
|
||||||
|
|
||||||
|
-- fixup some booleans to make sure they're actually booleans
|
||||||
|
self.options.withinhost = tobool(self.options.withinhost)
|
||||||
|
self.options.withindomain = tobool(self.options.withindomain)
|
||||||
|
self.options.noblacklist = tobool(self.options.noblacklist)
|
||||||
|
|
||||||
|
if ( self.options.withinhost == nil ) then
|
||||||
|
self.options.withinhost = true
|
||||||
|
end
|
||||||
|
if ( self.options.withindomain == nil ) then
|
||||||
|
self.options.withindomain = false
|
||||||
|
end
|
||||||
self.options.maxdepth = self.options.maxdepth or 3
|
self.options.maxdepth = self.options.maxdepth or 3
|
||||||
self.options.maxpagecount = self.options.maxpagecount or 20
|
self.options.maxpagecount = self.options.maxpagecount or 20
|
||||||
self.url = self.url or '/'
|
self.url = self.url or '/'
|
||||||
@@ -690,7 +750,6 @@ Crawler = {
|
|||||||
getLimitations = function(self)
|
getLimitations = function(self)
|
||||||
local o = self.options
|
local o = self.options
|
||||||
local limits = {}
|
local limits = {}
|
||||||
|
|
||||||
if ( o.maxdepth > 0 or o.maxpagecount > 0 or
|
if ( o.maxdepth > 0 or o.maxpagecount > 0 or
|
||||||
o.withinhost or o.wihtindomain ) then
|
o.withinhost or o.wihtindomain ) then
|
||||||
if ( o.maxdepth > 0 ) then
|
if ( o.maxdepth > 0 ) then
|
||||||
@@ -700,7 +759,7 @@ Crawler = {
|
|||||||
table.insert(limits, ("maxpagecount=%d"):format(o.maxpagecount))
|
table.insert(limits, ("maxpagecount=%d"):format(o.maxpagecount))
|
||||||
end
|
end
|
||||||
if ( o.withindomain ) then
|
if ( o.withindomain ) then
|
||||||
table.insert(limits, ("withindomain=%s"):format(o.base_url:getDomain()))
|
table.insert(limits, ("withindomain=%s"):format(o.base_url:getDomain() or o.base_url:getHost()))
|
||||||
end
|
end
|
||||||
if ( o.withinhost ) then
|
if ( o.withinhost ) then
|
||||||
table.insert(limits, ("withinhost=%s"):format(o.base_url:getHost()))
|
table.insert(limits, ("withinhost=%s"):format(o.base_url:getHost()))
|
||||||
|
|||||||
@@ -58,6 +58,10 @@ action = function(host, port)
|
|||||||
-- create a new crawler instance
|
-- create a new crawler instance
|
||||||
local crawler = httpspider.Crawler:new( host, port, nil, { scriptname = SCRIPT_NAME } )
|
local crawler = httpspider.Crawler:new( host, port, nil, { scriptname = SCRIPT_NAME } )
|
||||||
|
|
||||||
|
if ( not(crawler) ) then
|
||||||
|
return
|
||||||
|
end
|
||||||
|
|
||||||
-- create a table entry in the registry
|
-- create a table entry in the registry
|
||||||
nmap.registry.auth_urls = nmap.registry.auth_urls or {}
|
nmap.registry.auth_urls = nmap.registry.auth_urls or {}
|
||||||
crawler:set_timeout(10000)
|
crawler:set_timeout(10000)
|
||||||
@@ -70,7 +74,7 @@ action = function(host, port)
|
|||||||
-- most of them are "legitimate" and should not be reason to abort
|
-- most of them are "legitimate" and should not be reason to abort
|
||||||
if ( not(status) ) then
|
if ( not(status) ) then
|
||||||
if ( r.err ) then
|
if ( r.err ) then
|
||||||
return stdnse.format_output(true, "ERROR: %s", r.reason)
|
return stdnse.format_output(true, ("ERROR: %s"):format(r.reason))
|
||||||
else
|
else
|
||||||
break
|
break
|
||||||
end
|
end
|
||||||
@@ -101,6 +105,8 @@ action = function(host, port)
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
if ( #auth_urls > 1 ) then
|
if ( #auth_urls > 1 ) then
|
||||||
return stdnse.format_output(true, tab.dump(auth_urls))
|
local result = { tab.dump(auth_urls) }
|
||||||
|
result.name = crawler:getLimitations()
|
||||||
|
return stdnse.format_output(true, result)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
@@ -45,6 +45,9 @@ function action(host, port)
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if ( not(crawler) ) then
|
||||||
|
return
|
||||||
|
end
|
||||||
crawler:set_timeout(10000)
|
crawler:set_timeout(10000)
|
||||||
|
|
||||||
local emails = {}
|
local emails = {}
|
||||||
@@ -54,7 +57,7 @@ function action(host, port)
|
|||||||
-- most of them are "legitimate" and should not be reason to abort
|
-- most of them are "legitimate" and should not be reason to abort
|
||||||
if ( not(status) ) then
|
if ( not(status) ) then
|
||||||
if ( r.err ) then
|
if ( r.err ) then
|
||||||
return stdnse.format_output(true, "ERROR: %s", r.reason)
|
return stdnse.format_output(true, ("ERROR: %s"):format(r.reason))
|
||||||
else
|
else
|
||||||
break
|
break
|
||||||
end
|
end
|
||||||
|
|||||||
Reference in New Issue
Block a user