mirror of
https://github.com/nmap/nmap.git
synced 2025-12-09 14:11:29 +00:00
631 lines
16 KiB
Lua
631 lines
16 KiB
Lua
---
|
|
-- Library methods for handling punycode strings.
|
|
--
|
|
-- Punycode is a simple and efficient transfer encoding syntax designed
|
|
-- for use with Internationalized Domain Names in Applications (IDNA).
|
|
-- It uniquely and reversibly transforms a Unicode string into an ASCII
|
|
-- string. ASCII characters in the Unicode string are represented
|
|
-- literally, and non-ASCII characters are represented by ASCII
|
|
-- characters that are allowed in host name labels (letters, digits, and
|
|
-- hyphens). This document defines a general algorithm called
|
|
-- Bootstring that allows a string of basic code points to uniquely
|
|
-- represent any string of code points drawn from a larger set.
|
|
-- Punycode is an instance of Bootstring that uses particular parameter
|
|
-- values specified by this document, appropriate for IDNA.
|
|
--
|
|
-- Advantages of Bootstring algorithm are Completeness, Uniqueness,
|
|
-- Reversibility, Efficient encoding, Simplicity and Readability.
|
|
--
|
|
-- References:
|
|
-- * http://ietf.org/rfc/rfc3492.txt
|
|
--
|
|
-- @author Rewanth Cool
|
|
-- @copyright Same as Nmap--See https://nmap.org/book/man-legal.html
|
|
|
|
local stdnse = require "stdnse"
|
|
local string = require "string"
|
|
local math = require "math"
|
|
local table = require "table"
|
|
local unicode = require "unicode"
|
|
local unittest = require "unittest"
|
|
|
|
_ENV = stdnse.module("punycode", stdnse.seeall)
|
|
|
|
-- Localize few functions for a tiny speed boost, since these will be
|
|
-- used frequently.
|
|
local floor = math.floor
|
|
local byte = string.byte
|
|
local char = string.char
|
|
local find = string.find
|
|
local match = string.match
|
|
local reverse = string.reverse
|
|
local sub = string.sub
|
|
|
|
-- Highest positive signed 32-bit float value
|
|
local maxInt = 0x7FFFFFFF
|
|
|
|
-- Regular expressions (RFC 3490 separators)
|
|
local regexSeparators = {
|
|
0x3002, -- Ideographic full stop
|
|
0xFF0E, -- Fullwidth full stop
|
|
0xFF61 -- Halfwidth ideographic full stop
|
|
}
|
|
|
|
-- Bootstring parameters
|
|
local base = 0x24
|
|
local tMin = 0x1
|
|
local tMax = 0x1A
|
|
local skew = 0x26
|
|
local damp = 0x2BC
|
|
local initialBias = 0x48
|
|
local initialN = 0x80
|
|
local delimiter = char("0x2D")
|
|
|
|
-- Convenience shortcuts
|
|
local baseMinusTMin = base - tMin
|
|
|
|
-- This function finds and replaces matched values in a table.
|
|
--
|
|
-- @param tbl Table of values.
|
|
-- @param val Value to to be replaced in the table.
|
|
-- @param new_val Value to be replaced with.
|
|
-- @return Returns a new table with new values.
|
|
local function find_and_replace(tbl, val, new_val)
|
|
|
|
for index, data in pairs(tbl) do
|
|
if data == val then
|
|
tbl[index] = new_val
|
|
end
|
|
end
|
|
|
|
return tbl
|
|
|
|
end
|
|
|
|
|
|
-- Bias adaptation function as per section 3.4 of RFC 3492.
|
|
-- https://tools.ietf.org/html/rfc3492#section-3.4
|
|
-- The following function is adapted from punycode.js by Mathias Bynens
|
|
-- under the MIT License.
|
|
local function adapt(delta, numPoints, firstTime)
|
|
|
|
local k = 0;
|
|
|
|
if firstTime then
|
|
delta = floor(delta / damp)
|
|
else
|
|
delta = (delta >> 1)
|
|
end
|
|
|
|
delta = delta + floor(delta / numPoints)
|
|
|
|
while delta > (baseMinusTMin * tMax >> 1) do
|
|
delta = floor(delta / baseMinusTMin)
|
|
k = k + base
|
|
end
|
|
|
|
return floor(k + (baseMinusTMin + 1) * delta / (delta + skew))
|
|
|
|
end
|
|
|
|
-- The following function converts boolean value to integer.
|
|
--
|
|
-- @param status boolean value is given as input.
|
|
-- @return Returns 0/1 based on the given boolean input.
|
|
local function boolToNum(status)
|
|
|
|
if status == true then
|
|
return 1
|
|
else
|
|
return 0
|
|
end
|
|
|
|
end
|
|
|
|
-- This function converts a basic code point into a digit/integer.
|
|
--
|
|
-- @param codePoint The basic numeric code point value.
|
|
-- @return The numeric value of a basic code point (for use in
|
|
-- representing integers) in the range `0` to `base - 1`, or `base` if
|
|
-- the code point does not represent a value.
|
|
-- The following function is adapted from punycode.js by Mathias Bynens
|
|
-- under the MIT License.
|
|
local function basicToDigit(codePoint)
|
|
|
|
if (codePoint - 0x30 < 0x0A) then
|
|
return codePoint - 0x16
|
|
end
|
|
if (codePoint - 0x41 < 0x1A) then
|
|
return codePoint - 0x41
|
|
end
|
|
if (codePoint - 0x61 < 0x1A) then
|
|
return codePoint - 0x61
|
|
end
|
|
|
|
return base
|
|
|
|
end
|
|
|
|
|
|
-- This function converts a digit/integer into a basic code point.
|
|
--
|
|
-- @param digit The numeric value of a basic code point.
|
|
-- @return The basic code point whose value (when used for
|
|
-- representing integers) is `digit`, which needs to be in the range
|
|
-- `0` to `base - 1`. If `flag` is non-zero, the uppercase form is
|
|
-- used; else, the lowercase form is used. The behavior is undefined
|
|
-- if `flag` is non-zero and `digit` has no uppercase form.
|
|
-- The following function is adapted from punycode.js by Mathias Bynens
|
|
-- under the MIT License.
|
|
local function digitToBasic(digit, flag)
|
|
-- 0..25 map to ASCII a..z or A..Z
|
|
-- 26..35 map to ASCII 0..9
|
|
return digit + 22 + 75 * boolToNum(digit < 26) - (boolToNum((flag ~= 0)) << 5)
|
|
end
|
|
|
|
-- Creates a string based on an array of numeric code points.
|
|
--
|
|
-- @param input String of input to be encoded.
|
|
-- @param decoder Sets the decoding format to be used.
|
|
-- @return The new encoded string
|
|
-- The following function is adapted from punycode.js by Mathias Bynens
|
|
-- under the MIT License.
|
|
function encode_input(input, decoder)
|
|
|
|
local output = {}
|
|
|
|
-- Convert the input into an array of Unicode code points.
|
|
input = unicode.decode(input, decoder)
|
|
|
|
-- Cache the length.
|
|
local inputLength = #input
|
|
|
|
-- Initialize the state.
|
|
local n = initialN
|
|
local delta = 0
|
|
local bias = initialBias
|
|
|
|
-- Handle the basic code points.
|
|
for _, v in ipairs(input) do
|
|
if v < 0x80 then
|
|
table.insert(output, char(v))
|
|
end
|
|
end
|
|
|
|
local basicLength = #output
|
|
local handledCPCount = basicLength
|
|
|
|
-- `handledCPCount` is the number of code points that have been handled
|
|
-- `basicLength` is the number of basic code points.
|
|
-- Finish the basic string with a delimiter unless it's empty.
|
|
if (basicLength > 0) then
|
|
table.insert(output, delimiter)
|
|
end
|
|
|
|
-- Main encoding loop:
|
|
while (handledCPCount < inputLength) do
|
|
-- All non-basic code points < n have been handled already. Find
|
|
-- the next larger one:
|
|
local m = maxInt
|
|
for _, v in ipairs(input) do
|
|
if v >= n and v < m then
|
|
m = v
|
|
end
|
|
end
|
|
|
|
-- Increase `delta` enough to advance the decoder's <n,i> state to
|
|
-- <m,0>, but guard against overflow.
|
|
local handledCPCountPlusOne = handledCPCount + 1
|
|
if (m - n > floor((maxInt - delta) / handledCPCountPlusOne)) then
|
|
--error('overflow')
|
|
return nil, "Overflow exception occurred."
|
|
end
|
|
|
|
delta = delta + (m - n) * handledCPCountPlusOne
|
|
n = m
|
|
|
|
for _, currentValue in ipairs(input) do
|
|
|
|
if currentValue < n then
|
|
delta = delta + 1 --Move this down incase of wrong answer
|
|
if delta > maxInt then
|
|
--error("overflow")
|
|
return nil, "Overflow exception occurred."
|
|
end
|
|
end
|
|
|
|
if (currentValue == n) then
|
|
-- Represent delta as a generalized variable-length integer.
|
|
local q = delta
|
|
local k = base
|
|
|
|
repeat
|
|
local t
|
|
|
|
if k <= bias then
|
|
t = tMin
|
|
else
|
|
if k >= bias + tMax then
|
|
t = tMax
|
|
else
|
|
t = k - bias
|
|
end
|
|
end
|
|
|
|
if q < t then
|
|
break
|
|
end
|
|
|
|
local qMinusT = q - t
|
|
local baseMinusT = base - t
|
|
local ans = digitToBasic(t + qMinusT % baseMinusT, 0)
|
|
|
|
table.insert(output, char(ans))
|
|
|
|
q = floor(qMinusT / baseMinusT)
|
|
|
|
k = k + base
|
|
until false
|
|
|
|
local ans = digitToBasic(q, 0)
|
|
table.insert(output, char(ans))
|
|
bias = adapt(delta, handledCPCountPlusOne, handledCPCount == basicLength)
|
|
|
|
delta = 0
|
|
handledCPCount = handledCPCount + 1
|
|
end
|
|
end
|
|
|
|
delta = delta + 1
|
|
n = n + 1
|
|
|
|
end
|
|
|
|
return table.concat(output, '')
|
|
|
|
end
|
|
|
|
-- Converts a Punycode string of ASCII-only symbols to a
|
|
-- string of Unicode symbols.
|
|
--
|
|
-- @param input The Punycode string of ASCII-only symbols.
|
|
-- @param encoder Defines the type of encoding format to be used.
|
|
-- @return The resulting string of Unicode symbols.
|
|
-- The following function is adapted from punycode.js by Mathias Bynens
|
|
-- under the MIT License.
|
|
function decode_input(input, encoder)
|
|
|
|
local output = {}
|
|
local inputLength = #input
|
|
local i = 0
|
|
local n = initialN
|
|
local bias = initialBias
|
|
|
|
local basic
|
|
if find(reverse(input), delimiter) then
|
|
basic = #input - find(reverse(input), delimiter)
|
|
else
|
|
basic = -1
|
|
end
|
|
|
|
if basic < 0 then
|
|
basic = 0
|
|
end
|
|
|
|
for j = 1, basic do
|
|
local c = sub(input, j, j)
|
|
local value = byte(c)
|
|
|
|
if value >= 0x80 then
|
|
--error("Not basic")
|
|
return nil, "Not basic exception occurred."
|
|
end
|
|
table.insert(output, value)
|
|
end
|
|
|
|
local index
|
|
if basic > 0 then
|
|
index = basic + 1
|
|
else
|
|
index = 0
|
|
end
|
|
|
|
while index < inputLength do
|
|
local oldi = i
|
|
local w = 1
|
|
local k = base
|
|
|
|
repeat
|
|
|
|
if index >= inputLength then
|
|
--error("Invalid input")
|
|
return nil, "Invalid input exception occurred."
|
|
end
|
|
|
|
local c = sub(input, index+1, index+1)
|
|
local value = byte(c)
|
|
local digit = basicToDigit(value)
|
|
|
|
index = index + 1
|
|
|
|
if (digit >= base or digit > floor((maxInt - i) / w)) then
|
|
--error('overflow');
|
|
return nil, "Overflow exception occurred."
|
|
end
|
|
i = i + digit * w;
|
|
|
|
local t
|
|
if k <= bias then
|
|
t = tMin
|
|
else
|
|
if k >= bias + tMax then
|
|
t = tMax
|
|
else
|
|
t = k - bias
|
|
end
|
|
end
|
|
|
|
if digit < t then
|
|
break
|
|
end
|
|
|
|
local baseMinusT = base - t;
|
|
if (w > floor(maxInt / baseMinusT)) then
|
|
--error('overflow');
|
|
return nil, "Overflow exception occurred."
|
|
end
|
|
|
|
w = w * baseMinusT;
|
|
k = k + base
|
|
|
|
until false
|
|
|
|
local out = #output + 1;
|
|
|
|
bias = adapt(i - oldi, out, oldi == 0)
|
|
|
|
-- `i` was supposed to wrap around from `out` to `0`,
|
|
-- incrementing `n` each time, so we'll fix that now:
|
|
if (floor(i / out) > maxInt - n) then
|
|
--error('overflow');
|
|
return nil, "Overflow exception occurred."
|
|
end
|
|
|
|
n = n + floor(i / out);
|
|
i = i % out;
|
|
for temp = #output, i, -1 do
|
|
output[temp+1] = output[temp]
|
|
end
|
|
output[i+1] = n
|
|
i = i + 1
|
|
end
|
|
|
|
return unicode.encode(output, encoder)
|
|
|
|
end
|
|
|
|
-- Performs punycode encoding on a label
|
|
--
|
|
-- @param s String of input to be encoded.
|
|
-- @param decoder A decoder function to convert the domain into a
|
|
-- table of Unicode code points.
|
|
-- @return Returns encoded string.
|
|
function encode_label(s, decoder)
|
|
|
|
local flag = false
|
|
local decoded_tbl = unicode.decode(s, decoder)
|
|
|
|
-- Looks for non-ASCII character
|
|
for _, val in pairs(decoded_tbl) do
|
|
|
|
if not (val >=0 and val <= 127) then
|
|
flag = true
|
|
break
|
|
end
|
|
|
|
end
|
|
|
|
if flag then
|
|
|
|
local res, err = encode_input(s, decoder)
|
|
if err then
|
|
return nil, err
|
|
end
|
|
|
|
return 'xn--' .. res
|
|
|
|
else
|
|
return s
|
|
end
|
|
|
|
end
|
|
|
|
--- Decodes a punycode-encoded label to Unicode.
|
|
--
|
|
-- @param s String of input
|
|
-- @param encoder An encoder function to convert a Unicode code point
|
|
-- into a string of bytes. Default: unicode.utf8_enc
|
|
-- @return Returns decoded string.
|
|
function decode_label(s, encoder)
|
|
|
|
if match(s, "^xn%-%-") then
|
|
|
|
local res, err = decode_input(sub(s, 5):lower(), encoder)
|
|
if err then
|
|
return nil, err
|
|
end
|
|
|
|
return res
|
|
|
|
else
|
|
return s
|
|
end
|
|
|
|
end
|
|
|
|
--- Splits the domain name and maps it with the corresponding data.
|
|
--
|
|
-- @param s The domain name to be processed.
|
|
-- @param fn The function to be called for every label.
|
|
-- @param formatter The type of encoder/decoder to be used.
|
|
-- @param delimiter delimiter character for concatinating output.
|
|
-- @return Returns encoded/decoded string based on the formatter.
|
|
-- The following function is adapted from punycode.js by Mathias Bynens
|
|
-- under the MIT License.
|
|
function mapLabels(labels, fn, formatter, delimiter)
|
|
|
|
local encoded = {}
|
|
|
|
for index, v in ipairs(labels) do
|
|
|
|
local res, err = fn(labels[index], formatter)
|
|
|
|
if err then
|
|
stdnse.debug2(err)
|
|
return nil
|
|
end
|
|
|
|
encoded[index] = res
|
|
end
|
|
|
|
return table.concat(encoded, delimiter)
|
|
|
|
end
|
|
|
|
--- Breaks the tables of codepoints using a delimiter.
|
|
--
|
|
-- @param A table is given as an input which contains codepoints.
|
|
-- @param ASCII value of delimiter is provided.
|
|
-- @return Returns table of tables after breaking the give table using delimiter.
|
|
function breakInput(codepoints, delimiter)
|
|
|
|
local tbl = {}
|
|
local output = {}
|
|
|
|
local delimiter = delimiter or 0x002E
|
|
|
|
for _, v in ipairs(codepoints) do
|
|
if v == delimiter then
|
|
table.insert(output, tbl)
|
|
tbl = {}
|
|
else
|
|
table.insert(tbl, v)
|
|
end
|
|
end
|
|
|
|
table.insert(output, tbl)
|
|
|
|
return output
|
|
|
|
end
|
|
|
|
---
|
|
-- This function converts the given domain name or string into a
|
|
-- ASCII string.
|
|
--
|
|
-- @param input Domain or string to be decoded.
|
|
-- @param decoder A decoder function to convert the domain into a
|
|
-- table of Unicode code points. Default: unicode.utf8_dec
|
|
-- @param encoder An encoder function to convert a Unicode code
|
|
-- point into a string of bytes.
|
|
-- @param decoder An decoder function to decode the input string
|
|
-- into an array of code points.
|
|
-- @return Returns decoded string in the desired format.
|
|
-- @return Throws an error, if any.
|
|
function encode(input, encoder, decoder)
|
|
|
|
decoder = decoder or unicode.utf8_dec
|
|
encoder = encoder or unicode.utf8_enc
|
|
|
|
local decoded_tbl = unicode.decode(input, decoder)
|
|
|
|
-- Works only for punycode.
|
|
for _, val in pairs(regexSeparators) do
|
|
decoded_tbl = find_and_replace(decoded_tbl, val, byte('.'))
|
|
end
|
|
|
|
local delimiterCodePoint = 0x002E
|
|
-- Expects codepoints and delimiter values.
|
|
local codepointLabels = breakInput(decoded_tbl, delimiterCodePoint)
|
|
|
|
local stringLabels = {}
|
|
|
|
for _, label in ipairs(codepointLabels) do
|
|
table.insert(stringLabels, unicode.encode(label, encoder))
|
|
end
|
|
|
|
local delimiter = unicode.encode({0x002E}, encoder)
|
|
|
|
return mapLabels(stringLabels, encode_label, decoder, delimiter)
|
|
end
|
|
|
|
---
|
|
-- This function converts the given domain name or string into a
|
|
-- Unicode string.
|
|
--
|
|
-- @param input Domain or string to be encoded.
|
|
-- @param encoder An encoder function to convert a Unicode code
|
|
-- point into a string of bytes.
|
|
-- @param decoder An decoder function to decode the input string
|
|
-- into an array of code points.
|
|
-- @return Returns encoded string in the desired format.
|
|
-- @return Throws an error, if any.
|
|
function decode(input, encoder, decoder)
|
|
|
|
encoder = encoder or unicode.utf8_enc
|
|
decoder = decoder or unicode.utf8_dec
|
|
local delimiterCodePoint = 0x002E
|
|
local delimiter = unicode.encode({0x002E}, encoder)
|
|
|
|
local codepoints = unicode.decode(input, decoder)
|
|
local codepointLabels = breakInput(codepoints, delimiterCodePoint)
|
|
|
|
local stringLabels = {}
|
|
|
|
for _, label in ipairs(codepointLabels) do
|
|
table.insert(stringLabels, unicode.encode(label, encoder))
|
|
end
|
|
|
|
return mapLabels(stringLabels, decode_label, encoder, delimiter)
|
|
|
|
end
|
|
|
|
--Ignore the rest if we are not testing.
|
|
if not unittest.testing() then
|
|
return _ENV
|
|
end
|
|
|
|
-- Table of punycode test cases.
|
|
local testCases = {
|
|
{
|
|
"xn--0zwm56d",
|
|
"\xe6\xb5\x8b\xe8\xaf\x95"
|
|
},
|
|
{
|
|
"xn--knigsgsschen-lcb0w",
|
|
"k\xc3\xb6nigsg\xc3\xa4sschen"
|
|
},
|
|
{
|
|
"xn--ab-fsf",
|
|
"a\xe0\xa5\x8db"
|
|
},
|
|
{
|
|
"xn--maana-pta",
|
|
"ma\xc3\xb1ana"
|
|
},
|
|
{
|
|
"xn----dqo34k",
|
|
"\xe2\x98\x83-\xe2\x8c\x98"
|
|
}
|
|
}
|
|
|
|
test_suite = unittest.TestSuite:new()
|
|
|
|
-- Running test cases against Encoding function.
|
|
for i, v in ipairs(testCases) do
|
|
test_suite:add_test(unittest.equal(decode(v[1]), v[2]))
|
|
test_suite:add_test(unittest.equal(encode(v[2]), v[1]))
|
|
end
|
|
|
|
return _ENV
|