diff --git a/nselib/idna.lua b/nselib/idna.lua index 55ddc464a..73a9b28de 100644 --- a/nselib/idna.lua +++ b/nselib/idna.lua @@ -275,12 +275,9 @@ end -- @param checkJoiners Boolean flag to check for ContextJ rules in input. -- Default: false. -- @param useSTD3ASCIIRules Boolean value to represent ASCII rules. Default: true. --- @param delimiter codepoint of the character to be used as delimiter. --- @param An decoder function to decode the input string --- into an array of code points. -- @return Returns the IDNA ASCII format of the input. -- @return Throws nil, if there is any error in conversion. -function toASCII(codepoints, transitionalProcessing, checkHyphens, checkBidi, checkJoiners, useSTD3ASCIIRules, delimiter, decoder) +function toASCII(codepoints, transitionalProcessing, checkHyphens, checkBidi, checkJoiners, useSTD3ASCIIRules) -- Assigns default values if not specified. if transitionalProcessing == nil then @@ -306,9 +303,6 @@ function toASCII(codepoints, transitionalProcessing, checkHyphens, checkBidi, ch useSTD3ASCIIRules = true end - delimiter = delimiter or 0x002E - decoder = decoder or unicode.utf8_dec - local decoded_tbl, disallowedCodePoints = map(codepoints, useSTD3ASCIIRules, transitionalProcessing) if decoded_tbl == nil then @@ -321,7 +315,7 @@ function toASCII(codepoints, transitionalProcessing, checkHyphens, checkBidi, ch end -- Breaks the codepoints into multiple tables using delimiter. - decoded_tbl = breakInput(decoded_tbl, delimiter) + decoded_tbl = breakInput(decoded_tbl, 0x2E) if decoded_tbl == nil then return nil @@ -341,7 +335,10 @@ end --- Converts the input into Unicode codepoints based on IDNA rules. -- --- @param name A domain name in string format +-- Note that the input should already be a table of Unicode code points. If +-- your input is an ASCII string, convert it by using +-- unicode.decode with the unicode.utf8_dec decoder. +-- @param codepoints A domain name as a list of code points. -- @param transitionalProcessing Boolean value. Default: true. -- @param checkHyphens Boolean flag for checking hyphens presence in input. -- Default: true. @@ -350,12 +347,9 @@ end -- @param checkJoiners Boolean flag to check for ContextJ rules in input. -- Default: false. -- @param useSTD3ASCIIRules Boolean value to represent ASCII rules. Default: true. --- @param delimiter, codepoint of the character to be used as delimiter. --- @param encoder Encoder function to convert a Unicode codepoint into a --- string of bytes. -- @return Returns the Unicode format of the input based on IDNA rules. -- @return Throws nil, if there is any error in conversion. -function toUnicode(decoded_tbl, transitionalProcessing, checkHyphens, checkBidi, checkJoiners, useSTD3ASCIIRules, delimiter, encoder) +function toUnicode(codepoints, transitionalProcessing, checkHyphens, checkBidi, checkJoiners, useSTD3ASCIIRules) -- Assigns default values if not specified. if transitionalProcessing == nil then @@ -374,27 +368,36 @@ function toUnicode(decoded_tbl, transitionalProcessing, checkHyphens, checkBidi, useSTD3ASCIIRules = true end - delimiter = delimiter or 0x002E - encoder = encoder or unicode.utf8_enc - -- Breaks the codepoints into multiple tables using delimiter. - decoded_tbl = stdnse.strsplit('%'.. string.char(delimiter), decoded_tbl) + local decoded_tbl, disallowedCodePoints = map(codepoints, useSTD3ASCIIRules, transitionalProcessing) + decoded_tbl = breakInput(decoded_tbl, 0x2E) if decoded_tbl == nil then return nil end + -- Validates the codepoints and if any invalid codepoint found, returns nil. + --if not validate(decoded_tbl, checkHyphens) then + -- return nil + --end + local output = {} for i, label in ipairs(decoded_tbl) do - local decoded = punycode.decode_label(label) - for j = 1, #decoded do - output[#output+1] = decoded[j] + if label[1] == string.byte("x") and + label[2] == string.byte("n") and + label[3] == string.byte("-") and + label[4] == string.byte("-") then + local decoded = punycode.decode_label(unicode.encode(label, unicode.utf8_enc)) + label = decoded or label + end + for j = 1, #label do + output[#output+1] = label[j] end if i < #decoded_tbl then - output[#output+1] = delimiter + output[#output+1] = 0x2E end end - return unicode.encode(output, encoder) + return output end @@ -549,7 +552,7 @@ end for _, v in ipairs(encodingAndDecodingTestCases) do test_suite:add_test(unittest.equal(toASCII(unicode.decode(v[1], unicode.utf8_dec)), v[2])) - test_suite:add_test(unittest.equal(toUnicode(v[2],nil,nil,nil,nil,nil,nil,unicode.utf8_enc), v[1])) + test_suite:add_test(unittest.equal(unicode.encode(toUnicode(unicode.decode(v[2], unicode.utf8_dec)), unicode.utf8_enc), v[1])) end for _, v in ipairs(multipleProcessingTestCases) do