From 84c3de36fcd969d70c72729cb90d1d1f953375dc Mon Sep 17 00:00:00 2001 From: patrik Date: Tue, 22 May 2012 17:26:12 +0000 Subject: [PATCH] Applied patch from Daniel Miller to fix two bugs in the httpspider library: * First bug, the LinkExtractor portion of httpspider doesn't check for a negative maxdepth (indicating no limit), and rejects all links. * Second bug, the withinhost and withindomain matching functions would throw an error when presented with a URL without a host portion. In addition the validate_link function was moved out to a separate function in the LinkExtractor Class. [Daniel Miller] --- nselib/httpspider.lua | 124 +++++++++++++++++++++--------------------- 1 file changed, 62 insertions(+), 62 deletions(-) diff --git a/nselib/httpspider.lua b/nselib/httpspider.lua index b0880609a..c52565b46 100644 --- a/nselib/httpspider.lua +++ b/nselib/httpspider.lua @@ -89,7 +89,7 @@ Options = { end elseif ( parsed_u.scheme ~= o.base_url:getProto() ) then return false - elseif ( parsed_u.host:lower() ~= o.base_url:getHost():lower() ) then + elseif ( parsed_u.host == nil or parsed_u.host:lower() ~= o.base_url:getHost():lower() ) then return false end return true @@ -103,7 +103,7 @@ Options = { end elseif ( parsed_u.scheme ~= o.base_url:getProto() ) then return false - elseif ( parsed_u.host:sub(-#o.base_url:getDomain()):lower() ~= o.base_url:getDomain():lower() ) then + elseif ( parsed_u.host == nil or parsed_u.host:sub(-#o.base_url:getDomain()):lower() ~= o.base_url:getDomain():lower() ) then return false end return true @@ -216,6 +216,65 @@ LinkExtractor = { end end, + validate_link = function(self, url) + local valid = true + + -- if our url is nil, abort, this could be due to a number of + -- reasons such as unsupported protocols: javascript, mail ... or + -- that the URL failed to parse for some reason + if ( url == nil or tostring(url) == nil ) then + return false + end + + -- linkdepth trumps whitelisting + if ( self.options.maxdepth and self.options.maxdepth >= 0 ) then + local depth = self:getDepth( url ) + if ( -1 == depth or depth > self.options.maxdepth ) then + stdnse.print_debug(3, "%s: Skipping link depth: %d; b_url=%s; url=%s", LIBRARY_NAME, depth, tostring(self.options.base_url), tostring(url)) + return false + end + end + + -- withindomain trumps any whitelisting + if ( self.options.withindomain ) then + if ( not(self.options.withindomain(url)) ) then + stdnse.print_debug(2, "%s: Link is not within domain: %s", LIBRARY_NAME, tostring(url)) + return false + end + end + + -- withinhost trumps any whitelisting + if ( self.options.withinhost ) then + if ( not(self.options.withinhost(url)) ) then + stdnse.print_debug(2, "%s: Link is not within host: %s", LIBRARY_NAME, tostring(url)) + return false + end + end + + -- run through all blacklists + if ( #self.options.blacklist > 0 ) then + for _, func in ipairs(self.options.blacklist) do + if ( func(url) ) then + stdnse.print_debug(2, "%s: Blacklist match: %s", LIBRARY_NAME, tostring(url)) + valid = false + break + end + end + end + + -- check the url against our whitelist + if ( #self.options.whitelist > 0 ) then + for _, func in ipairs(self.options.whitelist) do + if ( func(url) ) then + stdnse.print_debug(2, "%s: Whitelist match: %s", LIBRARY_NAME, tostring(url)) + valid = true + break + end + end + end + return valid + end, + -- Parses a HTML response and extracts all links it can find -- The function currently supports href, src and action links -- Also all behaviour options, such as depth, white- and black-list are @@ -252,66 +311,7 @@ LinkExtractor = { local url = URL:new(link) - local function validate_link() - local valid = true - - -- if our url is nil, abort, this could be due to a number of - -- reasons such as unsupported protocols: javascript, mail ... or - -- that the URL failed to parse for some reason - if ( url == nil or tostring(url) == nil ) then - return false - end - - -- linkdepth trumps whitelisting - if ( self.options.maxdepth ) then - local depth = self:getDepth( url ) - if ( -1 == depth or depth > self.options.maxdepth ) then - stdnse.print_debug(3, "%s: Skipping link depth: %d; b_url=%s; url=%s", LIBRARY_NAME, depth, tostring(self.options.base_url), tostring(url)) - return false - end - end - - -- withindomain trumps any whitelisting - if ( self.options.withindomain ) then - if ( not(self.options.withindomain(url)) ) then - stdnse.print_debug(2, "%s: Link is not within domain: %s", LIBRARY_NAME, tostring(url)) - return false - end - end - - -- withinhost trumps any whitelisting - if ( self.options.withinhost ) then - if ( not(self.options.withinhost(url)) ) then - stdnse.print_debug(2, "%s: Link is not within host: %s", LIBRARY_NAME, tostring(url)) - return false - end - end - - -- run through all blacklists - if ( #self.options.blacklist > 0 ) then - for _, func in ipairs(self.options.blacklist) do - if ( func(url) ) then - stdnse.print_debug(2, "%s: Blacklist match: %s", LIBRARY_NAME, tostring(url)) - valid = false - break - end - end - end - - -- check the url against our whitelist - if ( #self.options.whitelist > 0 ) then - for _, func in ipairs(self.options.whitelist) do - if ( func(url) ) then - stdnse.print_debug(2, "%s: Whitelist match: %s", LIBRARY_NAME, tostring(url)) - valid = true - break - end - end - end - return valid - end - - local valid = validate_link() + local valid = self:validate_link(url) if ( valid ) then stdnse.print_debug(3, "%s: Adding link: %s", LIBRARY_NAME, tostring(url))