mirror of
https://github.com/nmap/nmap.git
synced 2025-12-09 22:21:29 +00:00
Add CP437 (native Windows OEM) to unicode.lua
This commit is contained in:
@@ -189,6 +189,181 @@ function utf8_dec(buf, pos)
|
||||
return pos + 1 + n, cp
|
||||
end
|
||||
|
||||
--Invert a one-to-one mapping
|
||||
local function invert(t)
|
||||
local out = {}
|
||||
for k, v in pairs(t) do
|
||||
out[v] = k
|
||||
end
|
||||
return out
|
||||
end
|
||||
|
||||
-- Code Page 437, native US-English Windows OEM code page
|
||||
local cp437_decode = {
|
||||
[0x80] = 0x00c7,
|
||||
[0x81] = 0x00fc,
|
||||
[0x82] = 0x00e9,
|
||||
[0x83] = 0x00e2,
|
||||
[0x84] = 0x00e4,
|
||||
[0x85] = 0x00e0,
|
||||
[0x86] = 0x00e5,
|
||||
[0x87] = 0x00e7,
|
||||
[0x88] = 0x00ea,
|
||||
[0x89] = 0x00eb,
|
||||
[0x8a] = 0x00e8,
|
||||
[0x8b] = 0x00ef,
|
||||
[0x8c] = 0x00ee,
|
||||
[0x8d] = 0x00ec,
|
||||
[0x8e] = 0x00c4,
|
||||
[0x8f] = 0x00c5,
|
||||
[0x90] = 0x00c9,
|
||||
[0x91] = 0x00e6,
|
||||
[0x92] = 0x00c6,
|
||||
[0x93] = 0x00f4,
|
||||
[0x94] = 0x00f6,
|
||||
[0x95] = 0x00f2,
|
||||
[0x96] = 0x00fb,
|
||||
[0x97] = 0x00f9,
|
||||
[0x98] = 0x00ff,
|
||||
[0x99] = 0x00d6,
|
||||
[0x9a] = 0x00dc,
|
||||
[0x9b] = 0x00a2,
|
||||
[0x9c] = 0x00a3,
|
||||
[0x9d] = 0x00a5,
|
||||
[0x9e] = 0x20a7,
|
||||
[0x9f] = 0x0192,
|
||||
[0xa0] = 0x00e1,
|
||||
[0xa1] = 0x00ed,
|
||||
[0xa2] = 0x00f3,
|
||||
[0xa3] = 0x00fa,
|
||||
[0xa4] = 0x00f1,
|
||||
[0xa5] = 0x00d1,
|
||||
[0xa6] = 0x00aa,
|
||||
[0xa7] = 0x00ba,
|
||||
[0xa8] = 0x00bf,
|
||||
[0xa9] = 0x2310,
|
||||
[0xaa] = 0x00ac,
|
||||
[0xab] = 0x00bd,
|
||||
[0xac] = 0x00bc,
|
||||
[0xad] = 0x00a1,
|
||||
[0xae] = 0x00ab,
|
||||
[0xaf] = 0x00bb,
|
||||
[0xb0] = 0x2591,
|
||||
[0xb1] = 0x2592,
|
||||
[0xb2] = 0x2593,
|
||||
[0xb3] = 0x2502,
|
||||
[0xb4] = 0x2524,
|
||||
[0xb5] = 0x2561,
|
||||
[0xb6] = 0x2562,
|
||||
[0xb7] = 0x2556,
|
||||
[0xb8] = 0x2555,
|
||||
[0xb9] = 0x2563,
|
||||
[0xba] = 0x2551,
|
||||
[0xbb] = 0x2557,
|
||||
[0xbc] = 0x255d,
|
||||
[0xbd] = 0x255c,
|
||||
[0xbe] = 0x255b,
|
||||
[0xbf] = 0x2510,
|
||||
[0xc0] = 0x2514,
|
||||
[0xc1] = 0x2534,
|
||||
[0xc2] = 0x252c,
|
||||
[0xc3] = 0x251c,
|
||||
[0xc4] = 0x2500,
|
||||
[0xc5] = 0x253c,
|
||||
[0xc6] = 0x255e,
|
||||
[0xc7] = 0x255f,
|
||||
[0xc8] = 0x255a,
|
||||
[0xc9] = 0x2554,
|
||||
[0xca] = 0x2569,
|
||||
[0xcb] = 0x2566,
|
||||
[0xcc] = 0x2560,
|
||||
[0xcd] = 0x2550,
|
||||
[0xce] = 0x256c,
|
||||
[0xcf] = 0x2567,
|
||||
[0xd0] = 0x2568,
|
||||
[0xd1] = 0x2564,
|
||||
[0xd2] = 0x2565,
|
||||
[0xd3] = 0x2559,
|
||||
[0xd4] = 0x2558,
|
||||
[0xd5] = 0x2552,
|
||||
[0xd6] = 0x2553,
|
||||
[0xd7] = 0x256b,
|
||||
[0xd8] = 0x256a,
|
||||
[0xd9] = 0x2518,
|
||||
[0xda] = 0x250c,
|
||||
[0xdb] = 0x2588,
|
||||
[0xdc] = 0x2584,
|
||||
[0xdd] = 0x258c,
|
||||
[0xde] = 0x2590,
|
||||
[0xdf] = 0x2580,
|
||||
[0xe0] = 0x03b1,
|
||||
[0xe1] = 0x00df,
|
||||
[0xe2] = 0x0393,
|
||||
[0xe3] = 0x03c0,
|
||||
[0xe4] = 0x03a3,
|
||||
[0xe5] = 0x03c3,
|
||||
[0xe6] = 0x00b5,
|
||||
[0xe7] = 0x03c4,
|
||||
[0xe8] = 0x03a6,
|
||||
[0xe9] = 0x0398,
|
||||
[0xea] = 0x03a9,
|
||||
[0xeb] = 0x03b4,
|
||||
[0xec] = 0x221e,
|
||||
[0xed] = 0x03c6,
|
||||
[0xee] = 0x03b5,
|
||||
[0xef] = 0x2229,
|
||||
[0xf0] = 0x2261,
|
||||
[0xf1] = 0x00b1,
|
||||
[0xf2] = 0x2265,
|
||||
[0xf3] = 0x2264,
|
||||
[0xf4] = 0x2320,
|
||||
[0xf5] = 0x2321,
|
||||
[0xf6] = 0x00f7,
|
||||
[0xf7] = 0x2248,
|
||||
[0xf8] = 0x00b0,
|
||||
[0xf9] = 0x2219,
|
||||
[0xfa] = 0x00b7,
|
||||
[0xfb] = 0x221a,
|
||||
[0xfc] = 0x207f,
|
||||
[0xfd] = 0x00b2,
|
||||
[0xfe] = 0x25a0,
|
||||
[0xff] = 0x00a0,
|
||||
}
|
||||
local cp437_encode = invert(cp437_decode)
|
||||
|
||||
---Encode a Unicode code point to CP437
|
||||
--
|
||||
-- Returns nil if the code point cannot be found in CP437
|
||||
--@param cp The Unicode code point as a number
|
||||
--@return A string containing the related CP437 character
|
||||
function cp437_enc(cp)
|
||||
if cp < 0x80 then
|
||||
return char(cp)
|
||||
else
|
||||
local bv = cp437_encode[cp]
|
||||
if bv == nil then
|
||||
return nil
|
||||
else
|
||||
return char(bv)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
---Decodes a CP437 character
|
||||
--@param buf A string containing the character
|
||||
--@param pos The index in the string where the character begins
|
||||
--@return pos The index in the string where the character ended
|
||||
--@return cp The code point of the character as a number
|
||||
function cp437_dec(buf, pos)
|
||||
pos = pos or 1
|
||||
local bv = byte(buf, pos)
|
||||
if bv < 0x80 then
|
||||
return pos + 1, bv
|
||||
else
|
||||
return pos + 1, cp437_decode[bv]
|
||||
end
|
||||
end
|
||||
|
||||
---Helper function for the common case of UTF-16 to UTF-8 transcoding, such as
|
||||
--from a Windows/SMB unicode string to a printable ASCII (subset of UTF-8)
|
||||
--string.
|
||||
@@ -235,5 +410,7 @@ test_suite:add_test(unittest.table_equal(decode("\x08\xD8\x45\xDF=\0R\0a\0", utf
|
||||
test_suite:add_test(unittest.table_equal(decode("\xD8\x08\xDF\x45\0=\0R\0a", utf16_dec, true), {0x12345,61,82,97}),"decode utf-16, big-endian")
|
||||
test_suite:add_test(unittest.equal(utf16to8("\x08\xD8\x45\xDF=\0R\0a\0"), "\xF0\x92\x8D\x85=Ra"),"utf16to8")
|
||||
test_suite:add_test(unittest.equal(utf8to16("\xF0\x92\x8D\x85=Ra"), "\x08\xD8\x45\xDF=\0R\0a\0"),"utf8to16")
|
||||
test_suite:add_test(unittest.equal(encode({0x221e, 0x2248, 0x30}, cp437_enc), "\xec\xf70"), "encode cp437")
|
||||
test_suite:add_test(unittest.table_equal(decode("\x81ber", cp437_dec), {0xfc, 0x62, 0x65, 0x72}), "decode cp437")
|
||||
|
||||
return _ENV
|
||||
|
||||
Reference in New Issue
Block a user