1
0
mirror of https://github.com/nmap/nmap.git synced 2025-12-06 04:31:29 +00:00

Removes parse_url from http.lua and makes url.parse more functional. Closes #952.

This commit is contained in:
rewanth
2017-08-14 20:30:57 +00:00
parent 011e65c1b7
commit c2a9a5bbe3
4 changed files with 109 additions and 97 deletions

View File

@@ -2716,99 +2716,6 @@ function response_contains(response, pattern, case_sensitive)
return false
end
---Take a URI or URL in any form and convert it to its component parts.
--
-- The URL can optionally have a protocol definition ('http://'), a server
-- ('scanme.insecure.org'), a port (':80'), a URI ('/test/file.php'), and a
-- query string ('?username=ron&password=turtle'). At the minimum, a path or
-- protocol and url are required.
--
--@param url The incoming URL to parse
--@return A table containing the result, which can have the following fields:
-- * protocol
-- * hostname
-- * port
-- * uri
-- * querystring
-- All fields are strings except querystring, which is a table
-- containing name=value pairs.
function parse_url(url)
local result = {}
-- Save the original URL
result['original'] = url
-- Split the protocol off, if it exists
local colonslashslash = string.find(url, '://')
if(colonslashslash) then
result['protocol'] = string.sub(url, 1, colonslashslash - 1)
url = string.sub(url, colonslashslash + 3)
end
-- Split the host:port from the path
local slash, host_port
slash = string.find(url, '/')
if(slash) then
host_port = string.sub(url, 1, slash - 1)
result['path_query'] = string.sub(url, slash)
else
-- If there's no slash, then it's just a URL (if it has a http://) or a path (if it doesn't)
if(result['protocol']) then
result['host_port'] = url
else
result['path_query'] = url
end
end
if(host_port == '') then
host_port = nil
end
-- Split the host and port apart, if possible
if(host_port) then
local colon = string.find(host_port, ':')
if(colon) then
result['host'] = string.sub(host_port, 1, colon - 1)
result['port'] = tonumber(string.sub(host_port, colon + 1))
else
result['host'] = host_port
end
end
-- Split the path and querystring apart
if(result['path_query']) then
local question = string.find(result['path_query'], '?')
if(question) then
result['path'] = string.sub(result['path_query'], 1, question - 1)
result['raw_querystring'] = string.sub(result['path_query'], question + 1)
else
result['path'] = result['path_query']
end
-- Split up the query, if necessary
if(result['raw_querystring']) then
result['querystring'] = {}
local values = stdnse.strsplit('&', result['raw_querystring'])
for i, v in ipairs(values) do
local name, value = table.unpack(stdnse.strsplit('=', v))
result['querystring'][name] = value
end
end
-- Get the extension of the file, if any, or set that it's a folder
if(string.match(result['path'], "/$")) then
result['is_folder'] = true
else
result['is_folder'] = false
local split_str = stdnse.strsplit('%.', result['path'])
if(split_str and #split_str > 1) then
result['extension'] = split_str[#split_str]
end
end
end
return result
end
---This function should be called whenever a valid path (a path that doesn't
-- contain a known 404 page) is discovered.
--
@@ -2831,11 +2738,34 @@ function save_path(host, port, path, status, links_to, linked_from, contenttype)
-- Make sure we have a proper hostname and port
host = stdnse.get_hostname(host)
if(type(port) == 'table') then
port = port.number
port = port['number']
end
-- Parse the path
local parsed = parse_url(path)
local parsed = url.parse(path)
-- contains both query and fragment
parsed['raw_querystring'] = parsed['query']
if parsed['fragment'] then
parsed['raw_querystring'] = ( parsed['raw_querystring'] or "" ) .. '#' .. parsed['fragment']
end
if parsed['raw_querystring'] then
parsed['path_query'] = parsed['path'] .. '?' .. parsed['raw_querystring']
else
parsed['path_query'] = parsed['path']
end
-- Split up the query, if necessary
if(parsed['raw_querystring']) then
parsed['querystring'] = {}
local values = stdnse.strsplit('&', parsed['raw_querystring'])
for i, v in ipairs(values) do
local name, value = table.unpack(stdnse.strsplit('=', v))
parsed['querystring'][name] = value
end
end
-- Add to the 'all_pages' key
stdnse.registry_add_array({parsed['host'] or host, 'www', parsed['port'] or port, 'all_pages'}, parsed['path'])

View File

@@ -36,6 +36,7 @@ local _G = require "_G"
local stdnse = require "stdnse"
local string = require "string"
local table = require "table"
local unittest = require "unittest"
local base = _G
@@ -151,6 +152,7 @@ end
function parse(url, default)
-- initialize default parameters
local parsed = {}
for i,v in base.pairs(default or parsed) do parsed[i] = v end
-- remove whitespace
-- url = string.gsub(url, "%s", "")
@@ -177,8 +179,19 @@ function parse(url, default)
parsed.params = p
return ""
end)
-- path is whatever was left
parsed.path = url
-- Checks for folder route and extension
if parsed.path:sub(-1) == "/" then
parsed.is_folder = true
else
parsed.is_folder = false
parsed.extension = parsed.path:match("%.([^/.;]+)%f[;\0][^/]*$")
end
-- Represents host:port, port = nil if not used.
local authority = parsed.authority
if not authority then return parsed end
authority = string.gsub(authority,"^([^@]*)@",
@@ -385,4 +398,71 @@ function get_default_port (scheme)
return ports[(scheme or ""):lower()]
end
if not unittest.testing() then
return _ENV
end
test_suite = unittest.TestSuite:new()
local result = parse("https://dummy:pass@example.com:9999/example.ext?k1=v1&k2=v2#fragment=/")
local expected = {
scheme = "https",
authority = "dummy:pass@example.com:9999",
userinfo = "dummy:pass",
user = "dummy",
password = "pass",
host = "example.com",
port = 9999,
path = "/example.ext",
query = "k1=v1&k2=v2",
fragment = "fragment=/",
is_folder = false,
extension = "ext",
}
test_suite:add_test(unittest.is_nil(result.params), "params")
for k, v in pairs(expected) do
test_suite:add_test(unittest.equal(result[k], v), k)
end
local result = parse("http://dummy@example.com:1234/example.ext/another.php;k1=v1?k2=v2#k3=v3")
local expected = {
scheme = "http",
authority = "dummy@example.com:1234",
userinfo = "dummy",
user = "dummy",
host = "example.com",
port = 1234,
path = "/example.ext/another.php",
params = "k1=v1",
query = "k2=v2",
fragment = "k3=v3",
is_folder = false,
extension = "php",
}
test_suite:add_test(unittest.is_nil(result.password), "password")
for k, v in pairs(expected) do
test_suite:add_test(unittest.equal(result[k], v), k)
end
local result = parse("//example/example.folder/?k1=v1&k2=v2#k3/v3.bar")
local expected = {
authority = "example",
host = "example",
path = "/example.folder/",
query = "k1=v1&k2=v2",
fragment = "k3/v3.bar",
is_folder = true,
}
test_suite:add_test(unittest.is_nil(result.scheme), "scheme")
test_suite:add_test(unittest.is_nil(result.userinfo), "userinfo")
test_suite:add_test(unittest.is_nil(result.port), "port")
test_suite:add_test(unittest.is_nil(result.params), "params")
test_suite:add_test(unittest.is_nil(result.extension), "extension")
for k, v in pairs(expected) do
test_suite:add_test(unittest.equal(result[k], v), k)
end
return _ENV;

View File

@@ -29,6 +29,7 @@ local target = require "target"
local shortport = require "shortport"
local httpspider = require "httpspider"
local stdnse = require "stdnse"
local url = require "url"
getLastLoc = function(host, port, useragent)
@@ -71,7 +72,7 @@ action = function(host, port)
-- If the mobile browser request is redirected to a different page, that must be the mobile version's page.
if loc ~= mobloc then
local msg = "Found mobile version: " .. mobloc
local mobhost = http.parse_url(mobloc)
local mobhost = url.parse(mobloc)
if not crawler:iswithinhost(mobhost.host) then
msg = msg .. " (Redirected to a different host)"
if newtargets then

View File

@@ -76,6 +76,7 @@ local httpspider = require "httpspider"
local shortport = require "shortport"
local stdnse = require "stdnse"
local table = require "table"
local url = require "url"
getLastLoc = function(host, port, useragent)
@@ -158,7 +159,7 @@ action = function(host, port)
-- If the library's request returned a different location, that means the request was redirected somewhere else, hence is forbidden.
if libloc and loc ~= libloc then
forb[l] = {}
local libhost = http.parse_url(libloc)
local libhost = url.parse(libloc)
if not crawler:iswithinhost(libhost.host) then
forb[l]['Different Host'] = tostring(libloc)
if newtargets then