From bec78e7ea9d37bcb8dc67bde9cc4c111c5150866 Mon Sep 17 00:00:00 2001 From: dmiller Date: Thu, 28 Sep 2017 01:57:26 +0000 Subject: [PATCH] Allow parsing Unicode URLs --- nselib/url.lua | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/nselib/url.lua b/nselib/url.lua index 7d182ec77..296406157 100644 --- a/nselib/url.lua +++ b/nselib/url.lua @@ -36,6 +36,8 @@ local _G = require "_G" local stdnse = require "stdnse" local string = require "string" local table = require "table" +local idna = require "idna" +local unicode = require "unicode" local unittest = require "unittest" local base = _G @@ -199,6 +201,8 @@ function parse(url, default) authority = string.gsub(authority, ":(%d+)$", function(p) parsed.port = tonumber(p); return "" end) if authority ~= "" then parsed.host = authority end + -- TODO: Allow other Unicode encodings + parsed.ascii_host = idna.toASCII(unicode.decode(parsed.host, unicode.utf8_dec)) local userinfo = parsed.userinfo if not userinfo then return parsed end userinfo = string.gsub(userinfo, ":([^:]*)$",