From c2a9a5bbe313a5a3deee6490e44c373423e9db7a Mon Sep 17 00:00:00 2001 From: rewanth Date: Mon, 14 Aug 2017 20:30:57 +0000 Subject: [PATCH] Removes parse_url from http.lua and makes url.parse more functional. Closes #952. --- nselib/http.lua | 120 ++++++------------------- nselib/url.lua | 80 +++++++++++++++++ scripts/http-mobileversion-checker.nse | 3 +- scripts/http-useragent-tester.nse | 3 +- 4 files changed, 109 insertions(+), 97 deletions(-) diff --git a/nselib/http.lua b/nselib/http.lua index b1486f5a3..b9f4348f9 100644 --- a/nselib/http.lua +++ b/nselib/http.lua @@ -2716,99 +2716,6 @@ function response_contains(response, pattern, case_sensitive) return false end ----Take a URI or URL in any form and convert it to its component parts. --- --- The URL can optionally have a protocol definition ('http://'), a server --- ('scanme.insecure.org'), a port (':80'), a URI ('/test/file.php'), and a --- query string ('?username=ron&password=turtle'). At the minimum, a path or --- protocol and url are required. --- ---@param url The incoming URL to parse ---@return A table containing the result, which can have the following fields: --- * protocol --- * hostname --- * port --- * uri --- * querystring --- All fields are strings except querystring, which is a table --- containing name=value pairs. -function parse_url(url) - local result = {} - - -- Save the original URL - result['original'] = url - - -- Split the protocol off, if it exists - local colonslashslash = string.find(url, '://') - if(colonslashslash) then - result['protocol'] = string.sub(url, 1, colonslashslash - 1) - url = string.sub(url, colonslashslash + 3) - end - - -- Split the host:port from the path - local slash, host_port - slash = string.find(url, '/') - if(slash) then - host_port = string.sub(url, 1, slash - 1) - result['path_query'] = string.sub(url, slash) - else - -- If there's no slash, then it's just a URL (if it has a http://) or a path (if it doesn't) - if(result['protocol']) then - result['host_port'] = url - else - result['path_query'] = url - end - end - if(host_port == '') then - host_port = nil - end - - -- Split the host and port apart, if possible - if(host_port) then - local colon = string.find(host_port, ':') - if(colon) then - result['host'] = string.sub(host_port, 1, colon - 1) - result['port'] = tonumber(string.sub(host_port, colon + 1)) - else - result['host'] = host_port - end - end - - -- Split the path and querystring apart - if(result['path_query']) then - local question = string.find(result['path_query'], '?') - if(question) then - result['path'] = string.sub(result['path_query'], 1, question - 1) - result['raw_querystring'] = string.sub(result['path_query'], question + 1) - else - result['path'] = result['path_query'] - end - - -- Split up the query, if necessary - if(result['raw_querystring']) then - result['querystring'] = {} - local values = stdnse.strsplit('&', result['raw_querystring']) - for i, v in ipairs(values) do - local name, value = table.unpack(stdnse.strsplit('=', v)) - result['querystring'][name] = value - end - end - - -- Get the extension of the file, if any, or set that it's a folder - if(string.match(result['path'], "/$")) then - result['is_folder'] = true - else - result['is_folder'] = false - local split_str = stdnse.strsplit('%.', result['path']) - if(split_str and #split_str > 1) then - result['extension'] = split_str[#split_str] - end - end - end - - return result -end - ---This function should be called whenever a valid path (a path that doesn't -- contain a known 404 page) is discovered. -- @@ -2831,11 +2738,34 @@ function save_path(host, port, path, status, links_to, linked_from, contenttype) -- Make sure we have a proper hostname and port host = stdnse.get_hostname(host) if(type(port) == 'table') then - port = port.number + port = port['number'] end -- Parse the path - local parsed = parse_url(path) + local parsed = url.parse(path) + + -- contains both query and fragment + parsed['raw_querystring'] = parsed['query'] + + if parsed['fragment'] then + parsed['raw_querystring'] = ( parsed['raw_querystring'] or "" ) .. '#' .. parsed['fragment'] + end + + if parsed['raw_querystring'] then + parsed['path_query'] = parsed['path'] .. '?' .. parsed['raw_querystring'] + else + parsed['path_query'] = parsed['path'] + end + + -- Split up the query, if necessary + if(parsed['raw_querystring']) then + parsed['querystring'] = {} + local values = stdnse.strsplit('&', parsed['raw_querystring']) + for i, v in ipairs(values) do + local name, value = table.unpack(stdnse.strsplit('=', v)) + parsed['querystring'][name] = value + end + end -- Add to the 'all_pages' key stdnse.registry_add_array({parsed['host'] or host, 'www', parsed['port'] or port, 'all_pages'}, parsed['path']) diff --git a/nselib/url.lua b/nselib/url.lua index 40ff20403..7d182ec77 100644 --- a/nselib/url.lua +++ b/nselib/url.lua @@ -36,6 +36,7 @@ local _G = require "_G" local stdnse = require "stdnse" local string = require "string" local table = require "table" +local unittest = require "unittest" local base = _G @@ -151,6 +152,7 @@ end function parse(url, default) -- initialize default parameters local parsed = {} + for i,v in base.pairs(default or parsed) do parsed[i] = v end -- remove whitespace -- url = string.gsub(url, "%s", "") @@ -177,8 +179,19 @@ function parse(url, default) parsed.params = p return "" end) + -- path is whatever was left parsed.path = url + + -- Checks for folder route and extension + if parsed.path:sub(-1) == "/" then + parsed.is_folder = true + else + parsed.is_folder = false + parsed.extension = parsed.path:match("%.([^/.;]+)%f[;\0][^/]*$") + end + + -- Represents host:port, port = nil if not used. local authority = parsed.authority if not authority then return parsed end authority = string.gsub(authority,"^([^@]*)@", @@ -385,4 +398,71 @@ function get_default_port (scheme) return ports[(scheme or ""):lower()] end +if not unittest.testing() then + return _ENV +end + +test_suite = unittest.TestSuite:new() + +local result = parse("https://dummy:pass@example.com:9999/example.ext?k1=v1&k2=v2#fragment=/") +local expected = { + scheme = "https", + authority = "dummy:pass@example.com:9999", + userinfo = "dummy:pass", + user = "dummy", + password = "pass", + host = "example.com", + port = 9999, + path = "/example.ext", + query = "k1=v1&k2=v2", + fragment = "fragment=/", + is_folder = false, + extension = "ext", +} + +test_suite:add_test(unittest.is_nil(result.params), "params") +for k, v in pairs(expected) do + test_suite:add_test(unittest.equal(result[k], v), k) +end + +local result = parse("http://dummy@example.com:1234/example.ext/another.php;k1=v1?k2=v2#k3=v3") +local expected = { + scheme = "http", + authority = "dummy@example.com:1234", + userinfo = "dummy", + user = "dummy", + host = "example.com", + port = 1234, + path = "/example.ext/another.php", + params = "k1=v1", + query = "k2=v2", + fragment = "k3=v3", + is_folder = false, + extension = "php", +} + +test_suite:add_test(unittest.is_nil(result.password), "password") +for k, v in pairs(expected) do + test_suite:add_test(unittest.equal(result[k], v), k) +end + +local result = parse("//example/example.folder/?k1=v1&k2=v2#k3/v3.bar") +local expected = { + authority = "example", + host = "example", + path = "/example.folder/", + query = "k1=v1&k2=v2", + fragment = "k3/v3.bar", + is_folder = true, +} + +test_suite:add_test(unittest.is_nil(result.scheme), "scheme") +test_suite:add_test(unittest.is_nil(result.userinfo), "userinfo") +test_suite:add_test(unittest.is_nil(result.port), "port") +test_suite:add_test(unittest.is_nil(result.params), "params") +test_suite:add_test(unittest.is_nil(result.extension), "extension") +for k, v in pairs(expected) do + test_suite:add_test(unittest.equal(result[k], v), k) +end + return _ENV; diff --git a/scripts/http-mobileversion-checker.nse b/scripts/http-mobileversion-checker.nse index ae0942b92..6d2c5563d 100644 --- a/scripts/http-mobileversion-checker.nse +++ b/scripts/http-mobileversion-checker.nse @@ -29,6 +29,7 @@ local target = require "target" local shortport = require "shortport" local httpspider = require "httpspider" local stdnse = require "stdnse" +local url = require "url" getLastLoc = function(host, port, useragent) @@ -71,7 +72,7 @@ action = function(host, port) -- If the mobile browser request is redirected to a different page, that must be the mobile version's page. if loc ~= mobloc then local msg = "Found mobile version: " .. mobloc - local mobhost = http.parse_url(mobloc) + local mobhost = url.parse(mobloc) if not crawler:iswithinhost(mobhost.host) then msg = msg .. " (Redirected to a different host)" if newtargets then diff --git a/scripts/http-useragent-tester.nse b/scripts/http-useragent-tester.nse index 7aa6802e1..d6e0acabc 100644 --- a/scripts/http-useragent-tester.nse +++ b/scripts/http-useragent-tester.nse @@ -76,6 +76,7 @@ local httpspider = require "httpspider" local shortport = require "shortport" local stdnse = require "stdnse" local table = require "table" +local url = require "url" getLastLoc = function(host, port, useragent) @@ -158,7 +159,7 @@ action = function(host, port) -- If the library's request returned a different location, that means the request was redirected somewhere else, hence is forbidden. if libloc and loc ~= libloc then forb[l] = {} - local libhost = http.parse_url(libloc) + local libhost = url.parse(libloc) if not crawler:iswithinhost(libhost.host) then forb[l]['Different Host'] = tostring(libloc) if newtargets then