mirror of
https://github.com/nmap/nmap.git
synced 2025-12-11 10:19:03 +00:00
Applied patch from Daniel Miller to fix two bugs in the httpspider library:
* First bug, the LinkExtractor portion of httpspider doesn't check for a negative
maxdepth (indicating no limit), and rejects all links.
* Second bug, the withinhost and withindomain matching functions would throw an error
when presented with a URL without a host portion.
In addition the validate_link function was moved out to a separate function in the
LinkExtractor Class. [Daniel Miller]
This commit is contained in:
@@ -89,7 +89,7 @@ Options = {
|
|||||||
end
|
end
|
||||||
elseif ( parsed_u.scheme ~= o.base_url:getProto() ) then
|
elseif ( parsed_u.scheme ~= o.base_url:getProto() ) then
|
||||||
return false
|
return false
|
||||||
elseif ( parsed_u.host:lower() ~= o.base_url:getHost():lower() ) then
|
elseif ( parsed_u.host == nil or parsed_u.host:lower() ~= o.base_url:getHost():lower() ) then
|
||||||
return false
|
return false
|
||||||
end
|
end
|
||||||
return true
|
return true
|
||||||
@@ -103,7 +103,7 @@ Options = {
|
|||||||
end
|
end
|
||||||
elseif ( parsed_u.scheme ~= o.base_url:getProto() ) then
|
elseif ( parsed_u.scheme ~= o.base_url:getProto() ) then
|
||||||
return false
|
return false
|
||||||
elseif ( parsed_u.host:sub(-#o.base_url:getDomain()):lower() ~= o.base_url:getDomain():lower() ) then
|
elseif ( parsed_u.host == nil or parsed_u.host:sub(-#o.base_url:getDomain()):lower() ~= o.base_url:getDomain():lower() ) then
|
||||||
return false
|
return false
|
||||||
end
|
end
|
||||||
return true
|
return true
|
||||||
@@ -216,43 +216,7 @@ LinkExtractor = {
|
|||||||
end
|
end
|
||||||
end,
|
end,
|
||||||
|
|
||||||
-- Parses a HTML response and extracts all links it can find
|
validate_link = function(self, url)
|
||||||
-- The function currently supports href, src and action links
|
|
||||||
-- Also all behaviour options, such as depth, white- and black-list are
|
|
||||||
-- processed in here.
|
|
||||||
parse = function(self)
|
|
||||||
local links = {}
|
|
||||||
local patterns = {
|
|
||||||
'[hH][rR][eE][fF]%s*=%s*[\'"]%s*([^"^\']-)%s*[\'"]',
|
|
||||||
'[hH][rR][eE][fF]%s*=%s*([^\'\"][^%s>]+)',
|
|
||||||
'[sS][rR][cC]%s*=%s*[\'"]%s*([^"^\']-)%s*[\'"]',
|
|
||||||
'[sS][rR][cC]%s*=%s*([^\'\"][^%s>]+)',
|
|
||||||
'[aA][cC][tT][iI][oO][nN]%s*=%s*[\'"]%s*([^"^\']+%s*)[\'"]',
|
|
||||||
}
|
|
||||||
|
|
||||||
local base_hrefs = {
|
|
||||||
'[Bb][Aa][Ss][Ee]%s*[Hh][Rr][Ee][Ff]%s*=%s*[\'"](%s*[^"^\']+%s*)[\'"]',
|
|
||||||
'[Bb][Aa][Ss][Ee]%s*[Hh][Rr][Ee][Ff]%s*=%s*([^\'\"][^%s>]+)'
|
|
||||||
}
|
|
||||||
|
|
||||||
local base_href
|
|
||||||
for _, pattern in ipairs(base_hrefs) do
|
|
||||||
base_href = self.html:match(pattern)
|
|
||||||
if ( base_href ) then
|
|
||||||
break
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
for _, pattern in ipairs(patterns) do
|
|
||||||
for l in self.html:gfind(pattern) do
|
|
||||||
local link = l
|
|
||||||
if ( not(LinkExtractor.isAbsolute(l)) ) then
|
|
||||||
link = LinkExtractor.createAbsolute(self.url, l, base_href)
|
|
||||||
end
|
|
||||||
|
|
||||||
local url = URL:new(link)
|
|
||||||
|
|
||||||
local function validate_link()
|
|
||||||
local valid = true
|
local valid = true
|
||||||
|
|
||||||
-- if our url is nil, abort, this could be due to a number of
|
-- if our url is nil, abort, this could be due to a number of
|
||||||
@@ -263,7 +227,7 @@ LinkExtractor = {
|
|||||||
end
|
end
|
||||||
|
|
||||||
-- linkdepth trumps whitelisting
|
-- linkdepth trumps whitelisting
|
||||||
if ( self.options.maxdepth ) then
|
if ( self.options.maxdepth and self.options.maxdepth >= 0 ) then
|
||||||
local depth = self:getDepth( url )
|
local depth = self:getDepth( url )
|
||||||
if ( -1 == depth or depth > self.options.maxdepth ) then
|
if ( -1 == depth or depth > self.options.maxdepth ) then
|
||||||
stdnse.print_debug(3, "%s: Skipping link depth: %d; b_url=%s; url=%s", LIBRARY_NAME, depth, tostring(self.options.base_url), tostring(url))
|
stdnse.print_debug(3, "%s: Skipping link depth: %d; b_url=%s; url=%s", LIBRARY_NAME, depth, tostring(self.options.base_url), tostring(url))
|
||||||
@@ -309,9 +273,45 @@ LinkExtractor = {
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
return valid
|
return valid
|
||||||
|
end,
|
||||||
|
|
||||||
|
-- Parses a HTML response and extracts all links it can find
|
||||||
|
-- The function currently supports href, src and action links
|
||||||
|
-- Also all behaviour options, such as depth, white- and black-list are
|
||||||
|
-- processed in here.
|
||||||
|
parse = function(self)
|
||||||
|
local links = {}
|
||||||
|
local patterns = {
|
||||||
|
'[hH][rR][eE][fF]%s*=%s*[\'"]%s*([^"^\']-)%s*[\'"]',
|
||||||
|
'[hH][rR][eE][fF]%s*=%s*([^\'\"][^%s>]+)',
|
||||||
|
'[sS][rR][cC]%s*=%s*[\'"]%s*([^"^\']-)%s*[\'"]',
|
||||||
|
'[sS][rR][cC]%s*=%s*([^\'\"][^%s>]+)',
|
||||||
|
'[aA][cC][tT][iI][oO][nN]%s*=%s*[\'"]%s*([^"^\']+%s*)[\'"]',
|
||||||
|
}
|
||||||
|
|
||||||
|
local base_hrefs = {
|
||||||
|
'[Bb][Aa][Ss][Ee]%s*[Hh][Rr][Ee][Ff]%s*=%s*[\'"](%s*[^"^\']+%s*)[\'"]',
|
||||||
|
'[Bb][Aa][Ss][Ee]%s*[Hh][Rr][Ee][Ff]%s*=%s*([^\'\"][^%s>]+)'
|
||||||
|
}
|
||||||
|
|
||||||
|
local base_href
|
||||||
|
for _, pattern in ipairs(base_hrefs) do
|
||||||
|
base_href = self.html:match(pattern)
|
||||||
|
if ( base_href ) then
|
||||||
|
break
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
local valid = validate_link()
|
for _, pattern in ipairs(patterns) do
|
||||||
|
for l in self.html:gfind(pattern) do
|
||||||
|
local link = l
|
||||||
|
if ( not(LinkExtractor.isAbsolute(l)) ) then
|
||||||
|
link = LinkExtractor.createAbsolute(self.url, l, base_href)
|
||||||
|
end
|
||||||
|
|
||||||
|
local url = URL:new(link)
|
||||||
|
|
||||||
|
local valid = self:validate_link(url)
|
||||||
|
|
||||||
if ( valid ) then
|
if ( valid ) then
|
||||||
stdnse.print_debug(3, "%s: Adding link: %s", LIBRARY_NAME, tostring(url))
|
stdnse.print_debug(3, "%s: Adding link: %s", LIBRARY_NAME, tostring(url))
|
||||||
|
|||||||
Reference in New Issue
Block a user