diff --git a/nselib/unicode.lua b/nselib/unicode.lua index f70449911..a365d30fa 100644 --- a/nselib/unicode.lua +++ b/nselib/unicode.lua @@ -189,6 +189,181 @@ function utf8_dec(buf, pos) return pos + 1 + n, cp end +--Invert a one-to-one mapping +local function invert(t) + local out = {} + for k, v in pairs(t) do + out[v] = k + end + return out +end + +-- Code Page 437, native US-English Windows OEM code page +local cp437_decode = { + [0x80] = 0x00c7, + [0x81] = 0x00fc, + [0x82] = 0x00e9, + [0x83] = 0x00e2, + [0x84] = 0x00e4, + [0x85] = 0x00e0, + [0x86] = 0x00e5, + [0x87] = 0x00e7, + [0x88] = 0x00ea, + [0x89] = 0x00eb, + [0x8a] = 0x00e8, + [0x8b] = 0x00ef, + [0x8c] = 0x00ee, + [0x8d] = 0x00ec, + [0x8e] = 0x00c4, + [0x8f] = 0x00c5, + [0x90] = 0x00c9, + [0x91] = 0x00e6, + [0x92] = 0x00c6, + [0x93] = 0x00f4, + [0x94] = 0x00f6, + [0x95] = 0x00f2, + [0x96] = 0x00fb, + [0x97] = 0x00f9, + [0x98] = 0x00ff, + [0x99] = 0x00d6, + [0x9a] = 0x00dc, + [0x9b] = 0x00a2, + [0x9c] = 0x00a3, + [0x9d] = 0x00a5, + [0x9e] = 0x20a7, + [0x9f] = 0x0192, + [0xa0] = 0x00e1, + [0xa1] = 0x00ed, + [0xa2] = 0x00f3, + [0xa3] = 0x00fa, + [0xa4] = 0x00f1, + [0xa5] = 0x00d1, + [0xa6] = 0x00aa, + [0xa7] = 0x00ba, + [0xa8] = 0x00bf, + [0xa9] = 0x2310, + [0xaa] = 0x00ac, + [0xab] = 0x00bd, + [0xac] = 0x00bc, + [0xad] = 0x00a1, + [0xae] = 0x00ab, + [0xaf] = 0x00bb, + [0xb0] = 0x2591, + [0xb1] = 0x2592, + [0xb2] = 0x2593, + [0xb3] = 0x2502, + [0xb4] = 0x2524, + [0xb5] = 0x2561, + [0xb6] = 0x2562, + [0xb7] = 0x2556, + [0xb8] = 0x2555, + [0xb9] = 0x2563, + [0xba] = 0x2551, + [0xbb] = 0x2557, + [0xbc] = 0x255d, + [0xbd] = 0x255c, + [0xbe] = 0x255b, + [0xbf] = 0x2510, + [0xc0] = 0x2514, + [0xc1] = 0x2534, + [0xc2] = 0x252c, + [0xc3] = 0x251c, + [0xc4] = 0x2500, + [0xc5] = 0x253c, + [0xc6] = 0x255e, + [0xc7] = 0x255f, + [0xc8] = 0x255a, + [0xc9] = 0x2554, + [0xca] = 0x2569, + [0xcb] = 0x2566, + [0xcc] = 0x2560, + [0xcd] = 0x2550, + [0xce] = 0x256c, + [0xcf] = 0x2567, + [0xd0] = 0x2568, + [0xd1] = 0x2564, + [0xd2] = 0x2565, + [0xd3] = 0x2559, + [0xd4] = 0x2558, + [0xd5] = 0x2552, + [0xd6] = 0x2553, + [0xd7] = 0x256b, + [0xd8] = 0x256a, + [0xd9] = 0x2518, + [0xda] = 0x250c, + [0xdb] = 0x2588, + [0xdc] = 0x2584, + [0xdd] = 0x258c, + [0xde] = 0x2590, + [0xdf] = 0x2580, + [0xe0] = 0x03b1, + [0xe1] = 0x00df, + [0xe2] = 0x0393, + [0xe3] = 0x03c0, + [0xe4] = 0x03a3, + [0xe5] = 0x03c3, + [0xe6] = 0x00b5, + [0xe7] = 0x03c4, + [0xe8] = 0x03a6, + [0xe9] = 0x0398, + [0xea] = 0x03a9, + [0xeb] = 0x03b4, + [0xec] = 0x221e, + [0xed] = 0x03c6, + [0xee] = 0x03b5, + [0xef] = 0x2229, + [0xf0] = 0x2261, + [0xf1] = 0x00b1, + [0xf2] = 0x2265, + [0xf3] = 0x2264, + [0xf4] = 0x2320, + [0xf5] = 0x2321, + [0xf6] = 0x00f7, + [0xf7] = 0x2248, + [0xf8] = 0x00b0, + [0xf9] = 0x2219, + [0xfa] = 0x00b7, + [0xfb] = 0x221a, + [0xfc] = 0x207f, + [0xfd] = 0x00b2, + [0xfe] = 0x25a0, + [0xff] = 0x00a0, +} +local cp437_encode = invert(cp437_decode) + +---Encode a Unicode code point to CP437 +-- +-- Returns nil if the code point cannot be found in CP437 +--@param cp The Unicode code point as a number +--@return A string containing the related CP437 character +function cp437_enc(cp) + if cp < 0x80 then + return char(cp) + else + local bv = cp437_encode[cp] + if bv == nil then + return nil + else + return char(bv) + end + end +end + +---Decodes a CP437 character +--@param buf A string containing the character +--@param pos The index in the string where the character begins +--@return pos The index in the string where the character ended +--@return cp The code point of the character as a number +function cp437_dec(buf, pos) + pos = pos or 1 + local bv = byte(buf, pos) + if bv < 0x80 then + return pos + 1, bv + else + return pos + 1, cp437_decode[bv] + end +end + ---Helper function for the common case of UTF-16 to UTF-8 transcoding, such as --from a Windows/SMB unicode string to a printable ASCII (subset of UTF-8) --string. @@ -235,5 +410,7 @@ test_suite:add_test(unittest.table_equal(decode("\x08\xD8\x45\xDF=\0R\0a\0", utf test_suite:add_test(unittest.table_equal(decode("\xD8\x08\xDF\x45\0=\0R\0a", utf16_dec, true), {0x12345,61,82,97}),"decode utf-16, big-endian") test_suite:add_test(unittest.equal(utf16to8("\x08\xD8\x45\xDF=\0R\0a\0"), "\xF0\x92\x8D\x85=Ra"),"utf16to8") test_suite:add_test(unittest.equal(utf8to16("\xF0\x92\x8D\x85=Ra"), "\x08\xD8\x45\xDF=\0R\0a\0"),"utf8to16") +test_suite:add_test(unittest.equal(encode({0x221e, 0x2248, 0x30}, cp437_enc), "\xec\xf70"), "encode cp437") +test_suite:add_test(unittest.table_equal(decode("\x81ber", cp437_dec), {0xfc, 0x62, 0x65, 0x72}), "decode cp437") return _ENV