1
0
mirror of https://github.com/nmap/nmap.git synced 2025-12-07 13:11:28 +00:00

Handle internationalized domain names (IDN)

This commit is contained in:
dmiller
2022-09-21 21:03:57 +00:00
parent 2f9fde995c
commit e3ab97215d
7 changed files with 68 additions and 24 deletions

View File

@@ -1,5 +1,8 @@
#Nmap Changelog ($Id$); -*-text-*- #Nmap Changelog ($Id$); -*-text-*-
o [GH#1023] Handle Internationalized Domain Names (IDN) like Яндекс.рф on
platforms where getaddrinfo supports the AI_IDN flag. [Daniel Miller]
o Avoid storing many small strings from IPv4 OS detection results in the global o Avoid storing many small strings from IPv4 OS detection results in the global
string_pool. These were effectively leaked after a host is done being string_pool. These were effectively leaked after a host is done being
scanned, since string_pool allocations are not freed until Nmap quits. scanned, since string_pool allocations are not freed until Nmap quits.

View File

@@ -125,6 +125,10 @@ NmapOps::~NmapOps() {
free(datadir); free(datadir);
datadir = NULL; datadir = NULL;
} }
if (locale) {
free(locale);
locale = NULL;
}
#ifndef NOLUA #ifndef NOLUA
if (scriptversion || script) if (scriptversion || script)
@@ -305,6 +309,7 @@ void NmapOps::Initialize() {
numhosts_up = 0; numhosts_up = 0;
numhosts_scanning = 0; numhosts_scanning = 0;
noninteractive = false; noninteractive = false;
locale = NULL;
current_scantype = STYPE_UNKNOWN; current_scantype = STYPE_UNKNOWN;
ipoptions = NULL; ipoptions = NULL;
ipoptionslen = 0; ipoptionslen = 0;

View File

@@ -356,6 +356,7 @@ class NmapOps {
int numhosts_scanning; int numhosts_scanning;
stype current_scantype; stype current_scantype;
bool noninteractive; bool noninteractive;
char *locale;
bool release_memory; /* suggest to release memory before quitting. used to find memory leaks. */ bool release_memory; /* suggest to release memory before quitting. used to find memory leaks. */
private: private:

View File

@@ -63,6 +63,7 @@
/* $Id$ */ /* $Id$ */
#include <signal.h> #include <signal.h>
#include <locale.h>
#include "nmap.h" #include "nmap.h"
#include "NmapOps.h" #include "NmapOps.h"
@@ -116,6 +117,7 @@ int main(int argc, char *argv[]) {
int ret; int ret;
int i; int i;
o.locale = strdup(setlocale(LC_CTYPE, NULL));
set_program_name(argv[0]); set_program_name(argv[0]);
#ifdef __amigaos__ #ifdef __amigaos__

View File

@@ -145,6 +145,7 @@ local stringaux = require "stringaux"
local table = require "table" local table = require "table"
local tableaux = require "tableaux" local tableaux = require "tableaux"
local url = require "url" local url = require "url"
local ascii_hostname = url.ascii_hostname
local smbauth = require "smbauth" local smbauth = require "smbauth"
local unicode = require "unicode" local unicode = require "unicode"
@@ -187,8 +188,9 @@ local function get_host_field(host, port, scheme)
if host_header then return host_header end if host_header then return host_header end
-- If there's no host, we can't invent a name. -- If there's no host, we can't invent a name.
if not host then return nil end if not host then return nil end
local hostname = ascii_hostname(host)
-- If there's no port, just return hostname. -- If there's no port, just return hostname.
if not port then return stdnse.get_hostname(host) end if not port then return hostname end
if type(port) == "string" then if type(port) == "string" then
port = tonumber(port) port = tonumber(port)
assert(port, "Invalid port: not a number or table") assert(port, "Invalid port: not a number or table")
@@ -200,7 +202,7 @@ local function get_host_field(host, port, scheme)
if scheme then if scheme then
-- Caller provided scheme. If it's default, return just the hostname. -- Caller provided scheme. If it's default, return just the hostname.
if number == get_default_port(scheme) then if number == get_default_port(scheme) then
return stdnse.get_hostname(host) return hostname
end end
else else
scheme = url.get_default_scheme(port) scheme = url.get_default_scheme(port)
@@ -210,12 +212,12 @@ local function get_host_field(host, port, scheme)
if (ssl_port and scheme == 'https') or if (ssl_port and scheme == 'https') or
(not ssl_port and scheme == 'http') then (not ssl_port and scheme == 'http') then
-- If it's SSL and https, or if it's plaintext and http, return just the hostname. -- If it's SSL and https, or if it's plaintext and http, return just the hostname.
return stdnse.get_hostname(host) return hostname
end end
end end
end end
-- No special cases matched, so include the port number in the host header -- No special cases matched, so include the port number in the host header
return stdnse.get_hostname(host) .. ":" .. number return hostname .. ":" .. number
end end
-- Skip *( SP | HT ) starting at offset. See RFC 2616, section 2.2. -- Skip *( SP | HT ) starting at offset. See RFC 2616, section 2.2.
@@ -1076,7 +1078,7 @@ local function lookup_cache (method, host, port, path, options)
if type(port) == "table" then port = port.number end if type(port) == "table" then port = port.number end
local key = stdnse.get_hostname(host)..":"..port..":"..path; local key = ascii_hostname(host)..":"..port..":"..path;
local mutex = nmap.mutex(tostring(lookup_cache)..key); local mutex = nmap.mutex(tostring(lookup_cache)..key);
local state = { local state = {
@@ -1615,7 +1617,7 @@ local redirect_ok_rules = {
-- * ccTLDs are not treated as such. The rule will not stop a redirect -- * ccTLDs are not treated as such. The rule will not stop a redirect
-- from foo.co.uk to bar.co.uk even though it logically should. -- from foo.co.uk to bar.co.uk even though it logically should.
function (url, host, port) function (url, host, port)
local hostname = stdnse.get_hostname(host) local hostname = ascii_hostname(host)
if hostname == host.ip then if hostname == host.ip then
return url.host == hostname return url.host == hostname
end end
@@ -1700,7 +1702,7 @@ function parse_redirect(host, port, path, response)
local u = url.parse(response.header.location) local u = url.parse(response.header.location)
if ( not(u.host) ) then if ( not(u.host) ) then
-- we're dealing with a relative url -- we're dealing with a relative url
u.host = stdnse.get_hostname(host) u.host = ascii_hostname(host)
end end
-- do port fixup -- do port fixup
u.port = u.port or get_default_port(u.scheme) or port.number u.port = u.port or get_default_port(u.scheme) or port.number
@@ -1811,7 +1813,7 @@ function get_url( u, options )
path = path .. "?" .. parsed.query path = path .. "?" .. parsed.query
end end
return get( parsed.host, port, path, options ) return get( parsed.ascii_host or parsed.host, port, path, options )
end end
---Fetches a resource with a HEAD request. ---Fetches a resource with a HEAD request.
@@ -2857,7 +2859,7 @@ end
--@param contenttype [optional] The content-type value for the path, if it's known. --@param contenttype [optional] The content-type value for the path, if it's known.
function save_path(host, port, path, status, links_to, linked_from, contenttype) function save_path(host, port, path, status, links_to, linked_from, contenttype)
-- Make sure we have a proper hostname and port -- Make sure we have a proper hostname and port
host = stdnse.get_hostname(host) host = ascii_hostname(host)
if(type(port) == 'table') then if(type(port) == 'table') then
port = port['number'] port = port['number']
end end
@@ -2888,42 +2890,50 @@ function save_path(host, port, path, status, links_to, linked_from, contenttype)
end end
end end
if parsed.host then
host = parsed.ascii_host or parsed.host
end
if parsed.port then
port = parsed.port
end
-- Add to the 'all_pages' key -- Add to the 'all_pages' key
stdnse.registry_add_array({parsed['host'] or host, 'www', parsed['port'] or port, 'all_pages'}, parsed['path']) stdnse.registry_add_array({host, 'www', port, 'all_pages'}, parsed['path'])
-- Add the URL with querystring to all_pages_full_query -- Add the URL with querystring to all_pages_full_query
stdnse.registry_add_array({parsed['host'] or host, 'www', parsed['port'] or port, 'all_pages_full_query'}, parsed['path_query']) stdnse.registry_add_array({host, 'www', port, 'all_pages_full_query'}, parsed['path_query'])
-- Add the URL to a key matching the response code -- Add the URL to a key matching the response code
if(status) then if(status) then
stdnse.registry_add_array({parsed['host'] or host, 'www', parsed['port'] or port, 'status_codes', status}, parsed['path']) stdnse.registry_add_array({host, 'www', port, 'status_codes', status}, parsed['path'])
end end
-- If it's a directory, add it to the directories list; otherwise, add it to the files list -- If it's a directory, add it to the directories list; otherwise, add it to the files list
if(parsed['is_folder']) then if(parsed['is_folder']) then
stdnse.registry_add_array({parsed['host'] or host, 'www', parsed['port'] or port, 'directories'}, parsed['path']) stdnse.registry_add_array({host, 'www', port, 'directories'}, parsed['path'])
else else
stdnse.registry_add_array({parsed['host'] or host, 'www', parsed['port'] or port, 'files'}, parsed['path']) stdnse.registry_add_array({host, 'www', port, 'files'}, parsed['path'])
end end
-- If we have an extension, add it to the extensions key -- If we have an extension, add it to the extensions key
if(parsed['extension']) then if(parsed['extension']) then
stdnse.registry_add_array({parsed['host'] or host, 'www', parsed['port'] or port, 'extensions', parsed['extension']}, parsed['path']) stdnse.registry_add_array({host, 'www', port, 'extensions', parsed['extension']}, parsed['path'])
end end
-- Add an entry for the page and its arguments -- Add an entry for the page and its arguments
if(parsed['querystring']) then if(parsed['querystring']) then
-- Add all scripts with a querystring to the 'cgi' and 'cgi_full_query' keys -- Add all scripts with a querystring to the 'cgi' and 'cgi_full_query' keys
stdnse.registry_add_array({parsed['host'] or host, 'www', parsed['port'] or port, 'cgi'}, parsed['path']) stdnse.registry_add_array({host, 'www', port, 'cgi'}, parsed['path'])
stdnse.registry_add_array({parsed['host'] or host, 'www', parsed['port'] or port, 'cgi_full_query'}, parsed['path_query']) stdnse.registry_add_array({host, 'www', port, 'cgi_full_query'}, parsed['path_query'])
-- Add the query string alone to the registry (probably not necessary) -- Add the query string alone to the registry (probably not necessary)
stdnse.registry_add_array({parsed['host'] or host, 'www', parsed['port'] or port, 'cgi_querystring', parsed['path'] }, parsed['raw_querystring']) stdnse.registry_add_array({host, 'www', port, 'cgi_querystring', parsed['path'] }, parsed['raw_querystring'])
-- Add the individual arguments for the page, along with their values -- Add the individual arguments for the page, along with their values
for key, value in pairs(parsed['querystring']) do for key, value in pairs(parsed['querystring']) do
stdnse.registry_add_array({parsed['host'] or host, 'www', parsed['port'] or port, 'cgi_args', parsed['path']}, parsed['querystring']) stdnse.registry_add_array({host, 'www', port, 'cgi_args', parsed['path']}, parsed['querystring'])
end end
end end
@@ -2934,7 +2944,7 @@ function save_path(host, port, path, status, links_to, linked_from, contenttype)
end end
for _, v in ipairs(links_to) do for _, v in ipairs(links_to) do
stdnse.registry_add_array({parsed['host'] or host, 'www', parsed['port'] or port, 'links_to', parsed['path_query']}, v) stdnse.registry_add_array({host, 'www', port, 'links_to', parsed['path_query']}, v)
end end
end end
@@ -2945,13 +2955,13 @@ function save_path(host, port, path, status, links_to, linked_from, contenttype)
end end
for _, v in ipairs(linked_from) do for _, v in ipairs(linked_from) do
stdnse.registry_add_array({parsed['host'] or host, 'www', parsed['port'] or port, 'links_to', v}, parsed['path_query']) stdnse.registry_add_array({host, 'www', port, 'links_to', v}, parsed['path_query'])
end end
end end
-- Save it as a content-type, if we have one -- Save it as a content-type, if we have one
if(contenttype) then if(contenttype) then
stdnse.registry_add_array({parsed['host'] or host, 'www', parsed['port'] or port, 'content-type', contenttype}, parsed['path_query']) stdnse.registry_add_array({host, 'www', port, 'content-type', contenttype}, parsed['path_query'])
end end
end end

View File

@@ -138,6 +138,21 @@ local function normalize_escape (s)
return escape(unescape(s)) return escape(unescape(s))
end end
function ascii_hostname(host)
local hostname = stdnse.get_hostname(host)
if hostname:match("[\x80-\xff]") then
-- TODO: Allow other Unicode encodings
local decoded = unicode.decode(hostname, unicode.utf8_dec)
if decoded then
local ascii_host = idna.toASCII(decoded)
if ascii_host then
hostname = ascii_host
end
end
end
return hostname
end
--- ---
-- Parses a URL and returns a table with all its parts according to RFC 3986. -- Parses a URL and returns a table with all its parts according to RFC 3986.
-- --
@@ -219,8 +234,7 @@ function parse(url, default)
function(p) parsed.port = tonumber(p); return "" end) function(p) parsed.port = tonumber(p); return "" end)
if authority ~= "" then parsed.host = authority end if authority ~= "" then parsed.host = authority end
if parsed.host then if parsed.host then
-- TODO: Allow other Unicode encodings parsed.ascii_host = ascii_hostname(parsed.host)
parsed.ascii_host = idna.toASCII(unicode.decode(parsed.host, unicode.utf8_dec))
end end
local userinfo = parsed.userinfo local userinfo = parsed.userinfo
if not userinfo then return parsed end if not userinfo then return parsed end

View File

@@ -65,6 +65,7 @@
#include "nmap.h" #include "nmap.h"
#include <locale.h>
#include "nbase.h" #include "nbase.h"
#include <dnet.h> #include <dnet.h>
#include "tcpip.h" #include "tcpip.h"
@@ -419,7 +420,15 @@ struct addrinfo *resolve_all(const char *hostname, int pf) {
hints.ai_family = pf; hints.ai_family = pf;
/* Otherwise we get multiple identical addresses with different socktypes. */ /* Otherwise we get multiple identical addresses with different socktypes. */
hints.ai_socktype = SOCK_DGRAM; hints.ai_socktype = SOCK_DGRAM;
#ifdef AI_IDN
/* Try resolving internationalized domain names */
hints.ai_flags = AI_IDN;
setlocale(LC_CTYPE, "");
#endif
rc = getaddrinfo(hostname, NULL, &hints, &result); rc = getaddrinfo(hostname, NULL, &hints, &result);
#ifdef AI_IDN
setlocale(LC_CTYPE, o.locale);
#endif
if (rc != 0){ if (rc != 0){
if (o.debugging > 1) if (o.debugging > 1)
error("Error resolving %s: %s", hostname, gai_strerror(rc)); error("Error resolving %s: %s", hostname, gai_strerror(rc));