mirror of
https://github.com/nmap/nmap.git
synced 2025-12-12 10:49:02 +00:00
Applied patch from Daniel Miller to fix two bugs in the httpspider library:
* First bug, the LinkExtractor portion of httpspider doesn't check for a negative
maxdepth (indicating no limit), and rejects all links.
* Second bug, the withinhost and withindomain matching functions would throw an error
when presented with a URL without a host portion.
In addition the validate_link function was moved out to a separate function in the
LinkExtractor Class. [Daniel Miller]
This commit is contained in:
@@ -89,7 +89,7 @@ Options = {
|
|||||||
end
|
end
|
||||||
elseif ( parsed_u.scheme ~= o.base_url:getProto() ) then
|
elseif ( parsed_u.scheme ~= o.base_url:getProto() ) then
|
||||||
return false
|
return false
|
||||||
elseif ( parsed_u.host:lower() ~= o.base_url:getHost():lower() ) then
|
elseif ( parsed_u.host == nil or parsed_u.host:lower() ~= o.base_url:getHost():lower() ) then
|
||||||
return false
|
return false
|
||||||
end
|
end
|
||||||
return true
|
return true
|
||||||
@@ -103,7 +103,7 @@ Options = {
|
|||||||
end
|
end
|
||||||
elseif ( parsed_u.scheme ~= o.base_url:getProto() ) then
|
elseif ( parsed_u.scheme ~= o.base_url:getProto() ) then
|
||||||
return false
|
return false
|
||||||
elseif ( parsed_u.host:sub(-#o.base_url:getDomain()):lower() ~= o.base_url:getDomain():lower() ) then
|
elseif ( parsed_u.host == nil or parsed_u.host:sub(-#o.base_url:getDomain()):lower() ~= o.base_url:getDomain():lower() ) then
|
||||||
return false
|
return false
|
||||||
end
|
end
|
||||||
return true
|
return true
|
||||||
@@ -216,6 +216,65 @@ LinkExtractor = {
|
|||||||
end
|
end
|
||||||
end,
|
end,
|
||||||
|
|
||||||
|
validate_link = function(self, url)
|
||||||
|
local valid = true
|
||||||
|
|
||||||
|
-- if our url is nil, abort, this could be due to a number of
|
||||||
|
-- reasons such as unsupported protocols: javascript, mail ... or
|
||||||
|
-- that the URL failed to parse for some reason
|
||||||
|
if ( url == nil or tostring(url) == nil ) then
|
||||||
|
return false
|
||||||
|
end
|
||||||
|
|
||||||
|
-- linkdepth trumps whitelisting
|
||||||
|
if ( self.options.maxdepth and self.options.maxdepth >= 0 ) then
|
||||||
|
local depth = self:getDepth( url )
|
||||||
|
if ( -1 == depth or depth > self.options.maxdepth ) then
|
||||||
|
stdnse.print_debug(3, "%s: Skipping link depth: %d; b_url=%s; url=%s", LIBRARY_NAME, depth, tostring(self.options.base_url), tostring(url))
|
||||||
|
return false
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
-- withindomain trumps any whitelisting
|
||||||
|
if ( self.options.withindomain ) then
|
||||||
|
if ( not(self.options.withindomain(url)) ) then
|
||||||
|
stdnse.print_debug(2, "%s: Link is not within domain: %s", LIBRARY_NAME, tostring(url))
|
||||||
|
return false
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
-- withinhost trumps any whitelisting
|
||||||
|
if ( self.options.withinhost ) then
|
||||||
|
if ( not(self.options.withinhost(url)) ) then
|
||||||
|
stdnse.print_debug(2, "%s: Link is not within host: %s", LIBRARY_NAME, tostring(url))
|
||||||
|
return false
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
-- run through all blacklists
|
||||||
|
if ( #self.options.blacklist > 0 ) then
|
||||||
|
for _, func in ipairs(self.options.blacklist) do
|
||||||
|
if ( func(url) ) then
|
||||||
|
stdnse.print_debug(2, "%s: Blacklist match: %s", LIBRARY_NAME, tostring(url))
|
||||||
|
valid = false
|
||||||
|
break
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
-- check the url against our whitelist
|
||||||
|
if ( #self.options.whitelist > 0 ) then
|
||||||
|
for _, func in ipairs(self.options.whitelist) do
|
||||||
|
if ( func(url) ) then
|
||||||
|
stdnse.print_debug(2, "%s: Whitelist match: %s", LIBRARY_NAME, tostring(url))
|
||||||
|
valid = true
|
||||||
|
break
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
return valid
|
||||||
|
end,
|
||||||
|
|
||||||
-- Parses a HTML response and extracts all links it can find
|
-- Parses a HTML response and extracts all links it can find
|
||||||
-- The function currently supports href, src and action links
|
-- The function currently supports href, src and action links
|
||||||
-- Also all behaviour options, such as depth, white- and black-list are
|
-- Also all behaviour options, such as depth, white- and black-list are
|
||||||
@@ -252,66 +311,7 @@ LinkExtractor = {
|
|||||||
|
|
||||||
local url = URL:new(link)
|
local url = URL:new(link)
|
||||||
|
|
||||||
local function validate_link()
|
local valid = self:validate_link(url)
|
||||||
local valid = true
|
|
||||||
|
|
||||||
-- if our url is nil, abort, this could be due to a number of
|
|
||||||
-- reasons such as unsupported protocols: javascript, mail ... or
|
|
||||||
-- that the URL failed to parse for some reason
|
|
||||||
if ( url == nil or tostring(url) == nil ) then
|
|
||||||
return false
|
|
||||||
end
|
|
||||||
|
|
||||||
-- linkdepth trumps whitelisting
|
|
||||||
if ( self.options.maxdepth ) then
|
|
||||||
local depth = self:getDepth( url )
|
|
||||||
if ( -1 == depth or depth > self.options.maxdepth ) then
|
|
||||||
stdnse.print_debug(3, "%s: Skipping link depth: %d; b_url=%s; url=%s", LIBRARY_NAME, depth, tostring(self.options.base_url), tostring(url))
|
|
||||||
return false
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
-- withindomain trumps any whitelisting
|
|
||||||
if ( self.options.withindomain ) then
|
|
||||||
if ( not(self.options.withindomain(url)) ) then
|
|
||||||
stdnse.print_debug(2, "%s: Link is not within domain: %s", LIBRARY_NAME, tostring(url))
|
|
||||||
return false
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
-- withinhost trumps any whitelisting
|
|
||||||
if ( self.options.withinhost ) then
|
|
||||||
if ( not(self.options.withinhost(url)) ) then
|
|
||||||
stdnse.print_debug(2, "%s: Link is not within host: %s", LIBRARY_NAME, tostring(url))
|
|
||||||
return false
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
-- run through all blacklists
|
|
||||||
if ( #self.options.blacklist > 0 ) then
|
|
||||||
for _, func in ipairs(self.options.blacklist) do
|
|
||||||
if ( func(url) ) then
|
|
||||||
stdnse.print_debug(2, "%s: Blacklist match: %s", LIBRARY_NAME, tostring(url))
|
|
||||||
valid = false
|
|
||||||
break
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
-- check the url against our whitelist
|
|
||||||
if ( #self.options.whitelist > 0 ) then
|
|
||||||
for _, func in ipairs(self.options.whitelist) do
|
|
||||||
if ( func(url) ) then
|
|
||||||
stdnse.print_debug(2, "%s: Whitelist match: %s", LIBRARY_NAME, tostring(url))
|
|
||||||
valid = true
|
|
||||||
break
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
return valid
|
|
||||||
end
|
|
||||||
|
|
||||||
local valid = validate_link()
|
|
||||||
|
|
||||||
if ( valid ) then
|
if ( valid ) then
|
||||||
stdnse.print_debug(3, "%s: Adding link: %s", LIBRARY_NAME, tostring(url))
|
stdnse.print_debug(3, "%s: Adding link: %s", LIBRARY_NAME, tostring(url))
|
||||||
|
|||||||
Reference in New Issue
Block a user