1
0
mirror of https://github.com/nmap/nmap.git synced 2025-12-10 09:49:05 +00:00

Applied patch from Daniel Miller to fix two bugs in the httpspider library:

* First bug, the LinkExtractor portion of httpspider doesn't check for a negative
    maxdepth (indicating no limit), and rejects all links.
  * Second bug, the withinhost and withindomain matching functions would throw an error
    when presented with a URL without a host portion. 

In addition the validate_link function was moved out to a separate function in the
LinkExtractor Class. [Daniel Miller]
This commit is contained in:
patrik
2012-05-22 17:26:12 +00:00
parent 22c7faa94b
commit 84c3de36fc

View File

@@ -89,7 +89,7 @@ Options = {
end
elseif ( parsed_u.scheme ~= o.base_url:getProto() ) then
return false
elseif ( parsed_u.host:lower() ~= o.base_url:getHost():lower() ) then
elseif ( parsed_u.host == nil or parsed_u.host:lower() ~= o.base_url:getHost():lower() ) then
return false
end
return true
@@ -103,7 +103,7 @@ Options = {
end
elseif ( parsed_u.scheme ~= o.base_url:getProto() ) then
return false
elseif ( parsed_u.host:sub(-#o.base_url:getDomain()):lower() ~= o.base_url:getDomain():lower() ) then
elseif ( parsed_u.host == nil or parsed_u.host:sub(-#o.base_url:getDomain()):lower() ~= o.base_url:getDomain():lower() ) then
return false
end
return true
@@ -216,43 +216,7 @@ LinkExtractor = {
end
end,
-- Parses a HTML response and extracts all links it can find
-- The function currently supports href, src and action links
-- Also all behaviour options, such as depth, white- and black-list are
-- processed in here.
parse = function(self)
local links = {}
local patterns = {
'[hH][rR][eE][fF]%s*=%s*[\'"]%s*([^"^\']-)%s*[\'"]',
'[hH][rR][eE][fF]%s*=%s*([^\'\"][^%s>]+)',
'[sS][rR][cC]%s*=%s*[\'"]%s*([^"^\']-)%s*[\'"]',
'[sS][rR][cC]%s*=%s*([^\'\"][^%s>]+)',
'[aA][cC][tT][iI][oO][nN]%s*=%s*[\'"]%s*([^"^\']+%s*)[\'"]',
}
local base_hrefs = {
'[Bb][Aa][Ss][Ee]%s*[Hh][Rr][Ee][Ff]%s*=%s*[\'"](%s*[^"^\']+%s*)[\'"]',
'[Bb][Aa][Ss][Ee]%s*[Hh][Rr][Ee][Ff]%s*=%s*([^\'\"][^%s>]+)'
}
local base_href
for _, pattern in ipairs(base_hrefs) do
base_href = self.html:match(pattern)
if ( base_href ) then
break
end
end
for _, pattern in ipairs(patterns) do
for l in self.html:gfind(pattern) do
local link = l
if ( not(LinkExtractor.isAbsolute(l)) ) then
link = LinkExtractor.createAbsolute(self.url, l, base_href)
end
local url = URL:new(link)
local function validate_link()
validate_link = function(self, url)
local valid = true
-- if our url is nil, abort, this could be due to a number of
@@ -263,7 +227,7 @@ LinkExtractor = {
end
-- linkdepth trumps whitelisting
if ( self.options.maxdepth ) then
if ( self.options.maxdepth and self.options.maxdepth >= 0 ) then
local depth = self:getDepth( url )
if ( -1 == depth or depth > self.options.maxdepth ) then
stdnse.print_debug(3, "%s: Skipping link depth: %d; b_url=%s; url=%s", LIBRARY_NAME, depth, tostring(self.options.base_url), tostring(url))
@@ -309,9 +273,45 @@ LinkExtractor = {
end
end
return valid
end,
-- Parses a HTML response and extracts all links it can find
-- The function currently supports href, src and action links
-- Also all behaviour options, such as depth, white- and black-list are
-- processed in here.
parse = function(self)
local links = {}
local patterns = {
'[hH][rR][eE][fF]%s*=%s*[\'"]%s*([^"^\']-)%s*[\'"]',
'[hH][rR][eE][fF]%s*=%s*([^\'\"][^%s>]+)',
'[sS][rR][cC]%s*=%s*[\'"]%s*([^"^\']-)%s*[\'"]',
'[sS][rR][cC]%s*=%s*([^\'\"][^%s>]+)',
'[aA][cC][tT][iI][oO][nN]%s*=%s*[\'"]%s*([^"^\']+%s*)[\'"]',
}
local base_hrefs = {
'[Bb][Aa][Ss][Ee]%s*[Hh][Rr][Ee][Ff]%s*=%s*[\'"](%s*[^"^\']+%s*)[\'"]',
'[Bb][Aa][Ss][Ee]%s*[Hh][Rr][Ee][Ff]%s*=%s*([^\'\"][^%s>]+)'
}
local base_href
for _, pattern in ipairs(base_hrefs) do
base_href = self.html:match(pattern)
if ( base_href ) then
break
end
end
local valid = validate_link()
for _, pattern in ipairs(patterns) do
for l in self.html:gfind(pattern) do
local link = l
if ( not(LinkExtractor.isAbsolute(l)) ) then
link = LinkExtractor.createAbsolute(self.url, l, base_href)
end
local url = URL:new(link)
local valid = self:validate_link(url)
if ( valid ) then
stdnse.print_debug(3, "%s: Adding link: %s", LIBRARY_NAME, tostring(url))