mirror of
https://github.com/nmap/nmap.git
synced 2025-12-06 04:31:29 +00:00
Move domain splitting and unicode decoding from punycode to idna.
This commit is contained in:
@@ -236,6 +236,33 @@ function validate(tableOfTables, checkHyphens)
|
||||
|
||||
end
|
||||
|
||||
--- Breaks the tables of codepoints using a delimiter.
|
||||
--
|
||||
-- @param A table is given as an input which contains codepoints.
|
||||
-- @param ASCII value of delimiter is provided.
|
||||
-- @return Returns table of tables after breaking the give table using delimiter.
|
||||
local function breakInput(codepoints, delimiter)
|
||||
|
||||
local tbl = {}
|
||||
local output = {}
|
||||
|
||||
local delimiter = delimiter or 0x002E
|
||||
|
||||
for _, v in ipairs(codepoints) do
|
||||
if v == delimiter then
|
||||
table.insert(output, tbl)
|
||||
tbl = {}
|
||||
else
|
||||
table.insert(tbl, v)
|
||||
end
|
||||
end
|
||||
|
||||
table.insert(output, tbl)
|
||||
|
||||
return output
|
||||
|
||||
end
|
||||
|
||||
--- Converts the input codepoints into ASCII text based on IDNA rules.
|
||||
--
|
||||
-- @param codepoints Table of codepoints of decoded input.
|
||||
@@ -249,13 +276,11 @@ end
|
||||
-- Default: false.
|
||||
-- @param useSTD3ASCIIRules Boolean value to represent ASCII rules. Default: true.
|
||||
-- @param delimiter codepoint of the character to be used as delimiter.
|
||||
-- @param encoder Encoder function to convert a Unicode codepoint into a
|
||||
-- string of bytes.
|
||||
-- @param An decoder function to decode the input string
|
||||
-- into an array of code points.
|
||||
-- @return Returns the IDNA ASCII format of the input.
|
||||
-- @return Throws nil, if there is any error in conversion.
|
||||
function toASCII(codepoints, transitionalProcessing, checkHyphens, checkBidi, checkJoiners, useSTD3ASCIIRules, delimiter, encoder, decoder)
|
||||
function toASCII(codepoints, transitionalProcessing, checkHyphens, checkBidi, checkJoiners, useSTD3ASCIIRules, delimiter, decoder)
|
||||
|
||||
-- Assigns default values if not specified.
|
||||
if transitionalProcessing == nil then
|
||||
@@ -282,7 +307,6 @@ function toASCII(codepoints, transitionalProcessing, checkHyphens, checkBidi, ch
|
||||
end
|
||||
|
||||
delimiter = delimiter or 0x002E
|
||||
encoder = encoder or unicode.utf8_enc
|
||||
decoder = decoder or unicode.utf8_dec
|
||||
|
||||
local decoded_tbl, disallowedCodePoints = map(codepoints, useSTD3ASCIIRules, transitionalProcessing)
|
||||
@@ -297,7 +321,7 @@ function toASCII(codepoints, transitionalProcessing, checkHyphens, checkBidi, ch
|
||||
end
|
||||
|
||||
-- Breaks the codepoints into multiple tables using delimiter.
|
||||
decoded_tbl = punycode.breakInput(decoded_tbl, delimiter)
|
||||
decoded_tbl = breakInput(decoded_tbl, delimiter)
|
||||
|
||||
if decoded_tbl == nil then
|
||||
return nil
|
||||
@@ -308,20 +332,16 @@ function toASCII(codepoints, transitionalProcessing, checkHyphens, checkBidi, ch
|
||||
return nil
|
||||
end
|
||||
|
||||
local stringLabels = {}
|
||||
|
||||
-- Convert the codepoints into Unicode strings before passing them to mapLabels function.
|
||||
for _, label in ipairs(decoded_tbl) do
|
||||
table.insert(stringLabels, unicode.encode(label, encoder))
|
||||
for i, label in ipairs(decoded_tbl) do
|
||||
decoded_tbl[i] = punycode.encode_label(label)
|
||||
end
|
||||
|
||||
return punycode.mapLabels(stringLabels, punycode.encode_label, decoder, unicode.encode({0x002E}, encoder))
|
||||
return table.concat(decoded_tbl, ".")
|
||||
|
||||
end
|
||||
|
||||
--- Converts the input into Unicode codepoitns based on IDNA rules.
|
||||
--- Converts the input into Unicode codepoints based on IDNA rules.
|
||||
--
|
||||
-- @param codepoints Table of codepoints of decoded input.
|
||||
-- @param name A domain name in string format
|
||||
-- @param transitionalProcessing Boolean value. Default: true.
|
||||
-- @param checkHyphens Boolean flag for checking hyphens presence in input.
|
||||
-- Default: true.
|
||||
@@ -333,11 +353,9 @@ end
|
||||
-- @param delimiter, codepoint of the character to be used as delimiter.
|
||||
-- @param encoder Encoder function to convert a Unicode codepoint into a
|
||||
-- string of bytes.
|
||||
-- @param An decoder function to decode the input string
|
||||
-- into an array of code points.
|
||||
-- @return Returns the Unicode format of the input based on IDNA rules.
|
||||
-- @return Throws nil, if there is any error in conversion.
|
||||
function toUnicode(decoded_tbl, transitionalProcessing, checkHyphens, checkBidi, checkJoiners, useSTD3ASCIIRules, delimiter, encoder, decoder)
|
||||
function toUnicode(decoded_tbl, transitionalProcessing, checkHyphens, checkBidi, checkJoiners, useSTD3ASCIIRules, delimiter, encoder)
|
||||
|
||||
-- Assigns default values if not specified.
|
||||
if transitionalProcessing == nil then
|
||||
@@ -358,22 +376,25 @@ function toUnicode(decoded_tbl, transitionalProcessing, checkHyphens, checkBidi,
|
||||
|
||||
delimiter = delimiter or 0x002E
|
||||
encoder = encoder or unicode.utf8_enc
|
||||
decoder = decoder or unicode.utf8_dec
|
||||
|
||||
-- Breaks the codepoints into multiple tables using delimiter.
|
||||
decoded_tbl = punycode.breakInput(decoded_tbl, delimiter)
|
||||
decoded_tbl = stdnse.strsplit('%'.. string.char(delimiter), decoded_tbl)
|
||||
if decoded_tbl == nil then
|
||||
return nil
|
||||
end
|
||||
|
||||
local stringLabels = {}
|
||||
|
||||
-- Format the codepoints into strings before passing to punycode.mapLabels
|
||||
for _, label in ipairs(decoded_tbl) do
|
||||
table.insert(stringLabels, unicode.encode(label, encoder))
|
||||
local output = {}
|
||||
for i, label in ipairs(decoded_tbl) do
|
||||
local decoded = punycode.decode_label(label)
|
||||
for j = 1, #decoded do
|
||||
output[#output+1] = decoded[j]
|
||||
end
|
||||
if i < #decoded_tbl then
|
||||
output[#output+1] = delimiter
|
||||
end
|
||||
end
|
||||
|
||||
return punycode.mapLabels(stringLabels, punycode.decode_label, encoder, unicode.encode({0x002E}, encoder))
|
||||
return unicode.encode(output, encoder)
|
||||
|
||||
end
|
||||
|
||||
@@ -528,7 +549,7 @@ end
|
||||
|
||||
for _, v in ipairs(encodingAndDecodingTestCases) do
|
||||
test_suite:add_test(unittest.equal(toASCII(unicode.decode(v[1], unicode.utf8_dec)), v[2]))
|
||||
test_suite:add_test(unittest.equal(toUnicode(unicode.decode(v[2], unicode.utf8_dec)), v[1]))
|
||||
test_suite:add_test(unittest.equal(toUnicode(v[2],nil,nil,nil,nil,nil,nil,unicode.utf8_enc), v[1]))
|
||||
end
|
||||
|
||||
for _, v in ipairs(multipleProcessingTestCases) do
|
||||
|
||||
@@ -61,25 +61,6 @@ local delimiter = char("0x2D")
|
||||
-- Convenience shortcuts
|
||||
local baseMinusTMin = base - tMin
|
||||
|
||||
-- This function finds and replaces matched values in a table.
|
||||
--
|
||||
-- @param tbl Table of values.
|
||||
-- @param val Value to to be replaced in the table.
|
||||
-- @param new_val Value to be replaced with.
|
||||
-- @return Returns a new table with new values.
|
||||
local function find_and_replace(tbl, val, new_val)
|
||||
|
||||
for index, data in pairs(tbl) do
|
||||
if data == val then
|
||||
tbl[index] = new_val
|
||||
end
|
||||
end
|
||||
|
||||
return tbl
|
||||
|
||||
end
|
||||
|
||||
|
||||
-- Bias adaptation function as per section 3.4 of RFC 3492.
|
||||
-- https://tools.ietf.org/html/rfc3492#section-3.4
|
||||
-- The following function is adapted from punycode.js by Mathias Bynens
|
||||
@@ -162,18 +143,15 @@ end
|
||||
|
||||
-- Creates a string based on an array of numeric code points.
|
||||
--
|
||||
-- @param input String of input to be encoded.
|
||||
-- @param input list-table of Unicode code points
|
||||
-- @param decoder Sets the decoding format to be used.
|
||||
-- @return The new encoded string
|
||||
-- The following function is adapted from punycode.js by Mathias Bynens
|
||||
-- under the MIT License.
|
||||
function encode_input(input, decoder)
|
||||
function encode_input(input)
|
||||
|
||||
local output = {}
|
||||
|
||||
-- Convert the input into an array of Unicode code points.
|
||||
input = unicode.decode(input, decoder)
|
||||
|
||||
-- Cache the length.
|
||||
local inputLength = #input
|
||||
|
||||
@@ -283,14 +261,13 @@ function encode_input(input, decoder)
|
||||
end
|
||||
|
||||
-- Converts a Punycode string of ASCII-only symbols to a
|
||||
-- string of Unicode symbols.
|
||||
-- list-table of Unicode code points.
|
||||
--
|
||||
-- @param input The Punycode string of ASCII-only symbols.
|
||||
-- @param encoder Defines the type of encoding format to be used.
|
||||
-- @return The resulting string of Unicode symbols.
|
||||
-- @return The resulting list-table of Unicode code points.
|
||||
-- The following function is adapted from punycode.js by Mathias Bynens
|
||||
-- under the MIT License.
|
||||
function decode_input(input, encoder)
|
||||
function decode_input(input)
|
||||
|
||||
local output = {}
|
||||
local inputLength = #input
|
||||
@@ -397,23 +374,23 @@ function decode_input(input, encoder)
|
||||
i = i + 1
|
||||
end
|
||||
|
||||
return unicode.encode(output, encoder)
|
||||
return output
|
||||
|
||||
end
|
||||
|
||||
-- Performs punycode encoding on a label
|
||||
--
|
||||
-- @param s String of input to be encoded.
|
||||
-- @param decoder A decoder function to convert the domain into a
|
||||
-- table of Unicode code points.
|
||||
-- @return Returns encoded string.
|
||||
function encode_label(s, decoder)
|
||||
-- If the label is already ASCII, it is returned as a string. If any encoding
|
||||
-- was required, the "xn--" prefix is added.
|
||||
--
|
||||
-- @param u A list-table of Unicode code points representing a domain label
|
||||
-- @return A punycode-encoded ASCII string
|
||||
function encode_label(u)
|
||||
|
||||
local flag = false
|
||||
local decoded_tbl = unicode.decode(s, decoder)
|
||||
|
||||
-- Looks for non-ASCII character
|
||||
for _, val in pairs(decoded_tbl) do
|
||||
for _, val in pairs(u) do
|
||||
|
||||
if not (val >=0 and val <= 127) then
|
||||
flag = true
|
||||
@@ -424,7 +401,7 @@ function encode_label(s, decoder)
|
||||
|
||||
if flag then
|
||||
|
||||
local res, err = encode_input(s, decoder)
|
||||
local res, err = encode_input(u)
|
||||
if err then
|
||||
return nil, err
|
||||
end
|
||||
@@ -432,22 +409,24 @@ function encode_label(s, decoder)
|
||||
return 'xn--' .. res
|
||||
|
||||
else
|
||||
return s
|
||||
return unicode.encode(u, unicode.utf8_enc)
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
--- Decodes a punycode-encoded label to Unicode.
|
||||
--
|
||||
-- @param s String of input
|
||||
-- @param encoder An encoder function to convert a Unicode code point
|
||||
-- into a string of bytes. Default: unicode.utf8_enc
|
||||
-- @return Returns decoded string.
|
||||
function decode_label(s, encoder)
|
||||
-- If the label starts with "xn--", it will be punycode-decoded. Otherwise, it
|
||||
-- will be decoded as UTF-8 (ASCII). The return value is always a table of
|
||||
-- Unicode code points.
|
||||
--
|
||||
-- @param s String of input.
|
||||
-- @return A table of Unicode code points.
|
||||
function decode_label(s)
|
||||
|
||||
if match(s, "^xn%-%-") then
|
||||
|
||||
local res, err = decode_input(sub(s, 5):lower(), encoder)
|
||||
local res, err = decode_input(sub(s, 5))
|
||||
if err then
|
||||
return nil, err
|
||||
end
|
||||
@@ -455,67 +434,11 @@ function decode_label(s, encoder)
|
||||
return res
|
||||
|
||||
else
|
||||
return s
|
||||
return unicode.decode(s, unicode.utf8_dec)
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
--- Splits the domain name and maps it with the corresponding data.
|
||||
--
|
||||
-- @param s The domain name to be processed.
|
||||
-- @param fn The function to be called for every label.
|
||||
-- @param formatter The type of encoder/decoder to be used.
|
||||
-- @param delimiter delimiter character for concatinating output.
|
||||
-- @return Returns encoded/decoded string based on the formatter.
|
||||
-- The following function is adapted from punycode.js by Mathias Bynens
|
||||
-- under the MIT License.
|
||||
function mapLabels(labels, fn, formatter, delimiter)
|
||||
|
||||
local encoded = {}
|
||||
|
||||
for index, v in ipairs(labels) do
|
||||
|
||||
local res, err = fn(labels[index], formatter)
|
||||
|
||||
if err then
|
||||
stdnse.debug2(err)
|
||||
return nil
|
||||
end
|
||||
|
||||
encoded[index] = res
|
||||
end
|
||||
|
||||
return table.concat(encoded, delimiter)
|
||||
|
||||
end
|
||||
|
||||
--- Breaks the tables of codepoints using a delimiter.
|
||||
--
|
||||
-- @param A table is given as an input which contains codepoints.
|
||||
-- @param ASCII value of delimiter is provided.
|
||||
-- @return Returns table of tables after breaking the give table using delimiter.
|
||||
function breakInput(codepoints, delimiter)
|
||||
|
||||
local tbl = {}
|
||||
local output = {}
|
||||
|
||||
local delimiter = delimiter or 0x002E
|
||||
|
||||
for _, v in ipairs(codepoints) do
|
||||
if v == delimiter then
|
||||
table.insert(output, tbl)
|
||||
tbl = {}
|
||||
else
|
||||
table.insert(tbl, v)
|
||||
end
|
||||
end
|
||||
|
||||
table.insert(output, tbl)
|
||||
|
||||
return output
|
||||
|
||||
end
|
||||
|
||||
--Ignore the rest if we are not testing.
|
||||
if not unittest.testing() then
|
||||
return _ENV
|
||||
@@ -549,8 +472,8 @@ test_suite = unittest.TestSuite:new()
|
||||
|
||||
-- Running test cases against Encoding function.
|
||||
for i, v in ipairs(testCases) do
|
||||
test_suite:add_test(unittest.equal(decode_label(v[1], unicode.utf8_enc), v[2]))
|
||||
test_suite:add_test(unittest.equal(encode_label(v[2], unicode.utf8_dec), v[1]))
|
||||
test_suite:add_test(unittest.equal(unicode.encode(decode_label(v[1]), unicode.utf8_enc), v[2]))
|
||||
test_suite:add_test(unittest.equal(encode_label(unicode.decode(v[2], unicode.utf8_dec)), v[1]))
|
||||
end
|
||||
|
||||
return _ENV
|
||||
|
||||
Reference in New Issue
Block a user