mirror of
https://github.com/nmap/nmap.git
synced 2025-12-06 04:31:29 +00:00
Removes parse_url from http.lua and makes url.parse more functional. Closes #952.
This commit is contained in:
120
nselib/http.lua
120
nselib/http.lua
@@ -2716,99 +2716,6 @@ function response_contains(response, pattern, case_sensitive)
|
||||
return false
|
||||
end
|
||||
|
||||
---Take a URI or URL in any form and convert it to its component parts.
|
||||
--
|
||||
-- The URL can optionally have a protocol definition ('http://'), a server
|
||||
-- ('scanme.insecure.org'), a port (':80'), a URI ('/test/file.php'), and a
|
||||
-- query string ('?username=ron&password=turtle'). At the minimum, a path or
|
||||
-- protocol and url are required.
|
||||
--
|
||||
--@param url The incoming URL to parse
|
||||
--@return A table containing the result, which can have the following fields:
|
||||
-- * protocol
|
||||
-- * hostname
|
||||
-- * port
|
||||
-- * uri
|
||||
-- * querystring
|
||||
-- All fields are strings except querystring, which is a table
|
||||
-- containing name=value pairs.
|
||||
function parse_url(url)
|
||||
local result = {}
|
||||
|
||||
-- Save the original URL
|
||||
result['original'] = url
|
||||
|
||||
-- Split the protocol off, if it exists
|
||||
local colonslashslash = string.find(url, '://')
|
||||
if(colonslashslash) then
|
||||
result['protocol'] = string.sub(url, 1, colonslashslash - 1)
|
||||
url = string.sub(url, colonslashslash + 3)
|
||||
end
|
||||
|
||||
-- Split the host:port from the path
|
||||
local slash, host_port
|
||||
slash = string.find(url, '/')
|
||||
if(slash) then
|
||||
host_port = string.sub(url, 1, slash - 1)
|
||||
result['path_query'] = string.sub(url, slash)
|
||||
else
|
||||
-- If there's no slash, then it's just a URL (if it has a http://) or a path (if it doesn't)
|
||||
if(result['protocol']) then
|
||||
result['host_port'] = url
|
||||
else
|
||||
result['path_query'] = url
|
||||
end
|
||||
end
|
||||
if(host_port == '') then
|
||||
host_port = nil
|
||||
end
|
||||
|
||||
-- Split the host and port apart, if possible
|
||||
if(host_port) then
|
||||
local colon = string.find(host_port, ':')
|
||||
if(colon) then
|
||||
result['host'] = string.sub(host_port, 1, colon - 1)
|
||||
result['port'] = tonumber(string.sub(host_port, colon + 1))
|
||||
else
|
||||
result['host'] = host_port
|
||||
end
|
||||
end
|
||||
|
||||
-- Split the path and querystring apart
|
||||
if(result['path_query']) then
|
||||
local question = string.find(result['path_query'], '?')
|
||||
if(question) then
|
||||
result['path'] = string.sub(result['path_query'], 1, question - 1)
|
||||
result['raw_querystring'] = string.sub(result['path_query'], question + 1)
|
||||
else
|
||||
result['path'] = result['path_query']
|
||||
end
|
||||
|
||||
-- Split up the query, if necessary
|
||||
if(result['raw_querystring']) then
|
||||
result['querystring'] = {}
|
||||
local values = stdnse.strsplit('&', result['raw_querystring'])
|
||||
for i, v in ipairs(values) do
|
||||
local name, value = table.unpack(stdnse.strsplit('=', v))
|
||||
result['querystring'][name] = value
|
||||
end
|
||||
end
|
||||
|
||||
-- Get the extension of the file, if any, or set that it's a folder
|
||||
if(string.match(result['path'], "/$")) then
|
||||
result['is_folder'] = true
|
||||
else
|
||||
result['is_folder'] = false
|
||||
local split_str = stdnse.strsplit('%.', result['path'])
|
||||
if(split_str and #split_str > 1) then
|
||||
result['extension'] = split_str[#split_str]
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
return result
|
||||
end
|
||||
|
||||
---This function should be called whenever a valid path (a path that doesn't
|
||||
-- contain a known 404 page) is discovered.
|
||||
--
|
||||
@@ -2831,11 +2738,34 @@ function save_path(host, port, path, status, links_to, linked_from, contenttype)
|
||||
-- Make sure we have a proper hostname and port
|
||||
host = stdnse.get_hostname(host)
|
||||
if(type(port) == 'table') then
|
||||
port = port.number
|
||||
port = port['number']
|
||||
end
|
||||
|
||||
-- Parse the path
|
||||
local parsed = parse_url(path)
|
||||
local parsed = url.parse(path)
|
||||
|
||||
-- contains both query and fragment
|
||||
parsed['raw_querystring'] = parsed['query']
|
||||
|
||||
if parsed['fragment'] then
|
||||
parsed['raw_querystring'] = ( parsed['raw_querystring'] or "" ) .. '#' .. parsed['fragment']
|
||||
end
|
||||
|
||||
if parsed['raw_querystring'] then
|
||||
parsed['path_query'] = parsed['path'] .. '?' .. parsed['raw_querystring']
|
||||
else
|
||||
parsed['path_query'] = parsed['path']
|
||||
end
|
||||
|
||||
-- Split up the query, if necessary
|
||||
if(parsed['raw_querystring']) then
|
||||
parsed['querystring'] = {}
|
||||
local values = stdnse.strsplit('&', parsed['raw_querystring'])
|
||||
for i, v in ipairs(values) do
|
||||
local name, value = table.unpack(stdnse.strsplit('=', v))
|
||||
parsed['querystring'][name] = value
|
||||
end
|
||||
end
|
||||
|
||||
-- Add to the 'all_pages' key
|
||||
stdnse.registry_add_array({parsed['host'] or host, 'www', parsed['port'] or port, 'all_pages'}, parsed['path'])
|
||||
|
||||
@@ -36,6 +36,7 @@ local _G = require "_G"
|
||||
local stdnse = require "stdnse"
|
||||
local string = require "string"
|
||||
local table = require "table"
|
||||
local unittest = require "unittest"
|
||||
local base = _G
|
||||
|
||||
|
||||
@@ -151,6 +152,7 @@ end
|
||||
function parse(url, default)
|
||||
-- initialize default parameters
|
||||
local parsed = {}
|
||||
|
||||
for i,v in base.pairs(default or parsed) do parsed[i] = v end
|
||||
-- remove whitespace
|
||||
-- url = string.gsub(url, "%s", "")
|
||||
@@ -177,8 +179,19 @@ function parse(url, default)
|
||||
parsed.params = p
|
||||
return ""
|
||||
end)
|
||||
|
||||
-- path is whatever was left
|
||||
parsed.path = url
|
||||
|
||||
-- Checks for folder route and extension
|
||||
if parsed.path:sub(-1) == "/" then
|
||||
parsed.is_folder = true
|
||||
else
|
||||
parsed.is_folder = false
|
||||
parsed.extension = parsed.path:match("%.([^/.;]+)%f[;\0][^/]*$")
|
||||
end
|
||||
|
||||
-- Represents host:port, port = nil if not used.
|
||||
local authority = parsed.authority
|
||||
if not authority then return parsed end
|
||||
authority = string.gsub(authority,"^([^@]*)@",
|
||||
@@ -385,4 +398,71 @@ function get_default_port (scheme)
|
||||
return ports[(scheme or ""):lower()]
|
||||
end
|
||||
|
||||
if not unittest.testing() then
|
||||
return _ENV
|
||||
end
|
||||
|
||||
test_suite = unittest.TestSuite:new()
|
||||
|
||||
local result = parse("https://dummy:pass@example.com:9999/example.ext?k1=v1&k2=v2#fragment=/")
|
||||
local expected = {
|
||||
scheme = "https",
|
||||
authority = "dummy:pass@example.com:9999",
|
||||
userinfo = "dummy:pass",
|
||||
user = "dummy",
|
||||
password = "pass",
|
||||
host = "example.com",
|
||||
port = 9999,
|
||||
path = "/example.ext",
|
||||
query = "k1=v1&k2=v2",
|
||||
fragment = "fragment=/",
|
||||
is_folder = false,
|
||||
extension = "ext",
|
||||
}
|
||||
|
||||
test_suite:add_test(unittest.is_nil(result.params), "params")
|
||||
for k, v in pairs(expected) do
|
||||
test_suite:add_test(unittest.equal(result[k], v), k)
|
||||
end
|
||||
|
||||
local result = parse("http://dummy@example.com:1234/example.ext/another.php;k1=v1?k2=v2#k3=v3")
|
||||
local expected = {
|
||||
scheme = "http",
|
||||
authority = "dummy@example.com:1234",
|
||||
userinfo = "dummy",
|
||||
user = "dummy",
|
||||
host = "example.com",
|
||||
port = 1234,
|
||||
path = "/example.ext/another.php",
|
||||
params = "k1=v1",
|
||||
query = "k2=v2",
|
||||
fragment = "k3=v3",
|
||||
is_folder = false,
|
||||
extension = "php",
|
||||
}
|
||||
|
||||
test_suite:add_test(unittest.is_nil(result.password), "password")
|
||||
for k, v in pairs(expected) do
|
||||
test_suite:add_test(unittest.equal(result[k], v), k)
|
||||
end
|
||||
|
||||
local result = parse("//example/example.folder/?k1=v1&k2=v2#k3/v3.bar")
|
||||
local expected = {
|
||||
authority = "example",
|
||||
host = "example",
|
||||
path = "/example.folder/",
|
||||
query = "k1=v1&k2=v2",
|
||||
fragment = "k3/v3.bar",
|
||||
is_folder = true,
|
||||
}
|
||||
|
||||
test_suite:add_test(unittest.is_nil(result.scheme), "scheme")
|
||||
test_suite:add_test(unittest.is_nil(result.userinfo), "userinfo")
|
||||
test_suite:add_test(unittest.is_nil(result.port), "port")
|
||||
test_suite:add_test(unittest.is_nil(result.params), "params")
|
||||
test_suite:add_test(unittest.is_nil(result.extension), "extension")
|
||||
for k, v in pairs(expected) do
|
||||
test_suite:add_test(unittest.equal(result[k], v), k)
|
||||
end
|
||||
|
||||
return _ENV;
|
||||
|
||||
@@ -29,6 +29,7 @@ local target = require "target"
|
||||
local shortport = require "shortport"
|
||||
local httpspider = require "httpspider"
|
||||
local stdnse = require "stdnse"
|
||||
local url = require "url"
|
||||
|
||||
getLastLoc = function(host, port, useragent)
|
||||
|
||||
@@ -71,7 +72,7 @@ action = function(host, port)
|
||||
-- If the mobile browser request is redirected to a different page, that must be the mobile version's page.
|
||||
if loc ~= mobloc then
|
||||
local msg = "Found mobile version: " .. mobloc
|
||||
local mobhost = http.parse_url(mobloc)
|
||||
local mobhost = url.parse(mobloc)
|
||||
if not crawler:iswithinhost(mobhost.host) then
|
||||
msg = msg .. " (Redirected to a different host)"
|
||||
if newtargets then
|
||||
|
||||
@@ -76,6 +76,7 @@ local httpspider = require "httpspider"
|
||||
local shortport = require "shortport"
|
||||
local stdnse = require "stdnse"
|
||||
local table = require "table"
|
||||
local url = require "url"
|
||||
|
||||
getLastLoc = function(host, port, useragent)
|
||||
|
||||
@@ -158,7 +159,7 @@ action = function(host, port)
|
||||
-- If the library's request returned a different location, that means the request was redirected somewhere else, hence is forbidden.
|
||||
if libloc and loc ~= libloc then
|
||||
forb[l] = {}
|
||||
local libhost = http.parse_url(libloc)
|
||||
local libhost = url.parse(libloc)
|
||||
if not crawler:iswithinhost(libhost.host) then
|
||||
forb[l]['Different Host'] = tostring(libloc)
|
||||
if newtargets then
|
||||
|
||||
Reference in New Issue
Block a user