mirror of
https://github.com/nmap/nmap.git
synced 2025-12-10 17:59:04 +00:00
Bugfixes in json.lua
A few bugs crept in during the conversion to LPEG parsing: 1. Unicode parsing of UTF-16 surrogate pairs returned an ending position that was off by 2, so that the last 2 characters in the escape code were parsed as literal characters. 2. Arrays and objects were not being tagged as such with make_array and make_object during parsing. This commit fixes these bugs, and returns to using unicode.utf8_enc for UTF-8 encoding instead of a separately-defined function. Also, the tests at the end (which were helpful in finding both of the bugs above) were converted to unittest.lua tests.
This commit is contained in:
288
nselib/json.lua
288
nselib/json.lua
@@ -15,7 +15,6 @@
|
|||||||
-- @author Martin Holst Swende (originally), David Fifield, Patrick Donnelly
|
-- @author Martin Holst Swende (originally), David Fifield, Patrick Donnelly
|
||||||
-- @copyright Same as Nmap--See http://nmap.org/book/man-legal.html
|
-- @copyright Same as Nmap--See http://nmap.org/book/man-legal.html
|
||||||
|
|
||||||
-- TODO: Unescape/escape unicode
|
|
||||||
-- Version 0.4
|
-- Version 0.4
|
||||||
-- Created 01/25/2010 - v0.1 - created by Martin Holst Swende <martin@swende.se>
|
-- Created 01/25/2010 - v0.1 - created by Martin Holst Swende <martin@swende.se>
|
||||||
-- Heavily modified 02/22/2010 - v0.3. Rewrote the parser into an OO-form, to not have to handle
|
-- Heavily modified 02/22/2010 - v0.3. Rewrote the parser into an OO-form, to not have to handle
|
||||||
@@ -29,6 +28,7 @@ local stdnse = require "stdnse"
|
|||||||
local string = require "string"
|
local string = require "string"
|
||||||
local table = require "table"
|
local table = require "table"
|
||||||
local unicode = require "unicode"
|
local unicode = require "unicode"
|
||||||
|
local unittest = require "unittest"
|
||||||
_ENV = stdnse.module("json", stdnse.seeall)
|
_ENV = stdnse.module("json", stdnse.seeall)
|
||||||
|
|
||||||
local lpeg = require "lpeg";
|
local lpeg = require "lpeg";
|
||||||
@@ -55,57 +55,43 @@ end
|
|||||||
local NULL = {};
|
local NULL = {};
|
||||||
_M.NULL = NULL;
|
_M.NULL = NULL;
|
||||||
|
|
||||||
-- Encode a Unicode code point to UTF-8. See RFC 3629.
|
--- Makes a table be treated as a JSON Array when generating JSON
|
||||||
-- Does not check that cp is a real charaacter; that is, doesn't exclude the
|
--
|
||||||
-- surrogate range U+D800 - U+DFFF and a handful of others.
|
-- A table treated as an Array has all non-number indices ignored.
|
||||||
local function utf8_enc (cp)
|
-- @param t a table to be treated as an array
|
||||||
local result = {};
|
function make_array(t)
|
||||||
local n, mask;
|
local mt = getmetatable(t) or {}
|
||||||
|
mt["json"] = "array"
|
||||||
|
setmetatable(t, mt)
|
||||||
|
return t
|
||||||
|
end
|
||||||
|
|
||||||
if cp % 1.0 ~= 0.0 or cp < 0 then
|
--- Makes a table be treated as a JSON Object when generating JSON
|
||||||
-- Only defined for nonnegative integers.
|
--
|
||||||
error("utf code point defined only for non-negative integers");
|
-- A table treated as an Object has all non-number indices ignored.
|
||||||
elseif cp <= 0x7F then
|
-- @param t a table to be treated as an object
|
||||||
-- Special case of one-byte encoding.
|
function make_object(t)
|
||||||
return string.char(cp);
|
local mt = getmetatable(t) or {}
|
||||||
elseif cp <= 0x7FF then
|
mt["json"] = "object"
|
||||||
n = 2;
|
setmetatable(t, mt)
|
||||||
mask = 0xC0;
|
return t
|
||||||
elseif cp <= 0xFFFF then
|
|
||||||
n = 3;
|
|
||||||
mask = 0xE0;
|
|
||||||
elseif cp <= 0x10FFFF then
|
|
||||||
n = 4;
|
|
||||||
mask = 0xF0;
|
|
||||||
else
|
|
||||||
assert(false);
|
|
||||||
end
|
|
||||||
|
|
||||||
while n > 1 do
|
|
||||||
result[n] = 0x80 + bit.band(cp, 0x3F);
|
|
||||||
cp = bit.rshift(cp, 6);
|
|
||||||
n = n - 1;
|
|
||||||
end
|
|
||||||
result[1] = mask + cp;
|
|
||||||
|
|
||||||
return string.char(unpack(result));
|
|
||||||
end
|
end
|
||||||
|
|
||||||
-- Decode a Unicode escape, assuming that self.pos starts just after the
|
-- Decode a Unicode escape, assuming that self.pos starts just after the
|
||||||
-- initial \u. May consume an additional escape in the case of a UTF-16
|
-- initial \u. May consume an additional escape in the case of a UTF-16
|
||||||
-- surrogate pair. See RFC 2781 for UTF-16.
|
-- surrogate pair. See RFC 2781 for UTF-16.
|
||||||
local unicode = P [[\u]] * C(locale().xdigit * locale().xdigit * locale().xdigit * locale().xdigit);
|
local unicode_esc = P [[\u]] * C(locale().xdigit * locale().xdigit * locale().xdigit * locale().xdigit);
|
||||||
local function unicode16 (subject, position, hex)
|
local function unicode16 (subject, position, hex)
|
||||||
local cp = assert(tonumber(hex, 16));
|
local cp = assert(tonumber(hex, 16));
|
||||||
|
|
||||||
if cp < 0xD800 or cp > 0xDFFF then
|
if cp < 0xD800 or cp > 0xDFFF then
|
||||||
return position, utf8_enc(cp);
|
return position, unicode.utf8_enc(cp);
|
||||||
elseif cp >= 0xDC00 and cp <= 0xDFFF then
|
elseif cp >= 0xDC00 and cp <= 0xDFFF then
|
||||||
error(("Not a Unicode character: U+%04X"):format(cp));
|
error(("Not a Unicode character: U+%04X"):format(cp));
|
||||||
end
|
end
|
||||||
|
|
||||||
-- Beginning of a UTF-16 surrogate.
|
-- Beginning of a UTF-16 surrogate.
|
||||||
local lowhex = unicode:match(subject, position);
|
local lowhex = unicode_esc:match(subject, position);
|
||||||
|
|
||||||
if not lowhex then
|
if not lowhex then
|
||||||
error(("Bad unicode escape \\u%s (missing low surrogate)"):format(hex))
|
error(("Bad unicode escape \\u%s (missing low surrogate)"):format(hex))
|
||||||
@@ -114,9 +100,9 @@ local function unicode16 (subject, position, hex)
|
|||||||
if not (cp2 >= 0xDC00 and cp2 <= 0xDFFF) then
|
if not (cp2 >= 0xDC00 and cp2 <= 0xDFFF) then
|
||||||
error(("Bad unicode escape \\u%s\\u%s (bad low surrogate)"):format(hex, lowhex))
|
error(("Bad unicode escape \\u%s\\u%s (bad low surrogate)"):format(hex, lowhex))
|
||||||
end
|
end
|
||||||
position = position+4;
|
position = position+6 -- consume '\uXXXX'
|
||||||
cp = 0x10000 + bit.band(cp, 0x3FF) * 0x400 + bit.band(cp2, 0x3FF)
|
cp = 0x10000 + bit.band(cp, 0x3FF) * 0x400 + bit.band(cp2, 0x3FF)
|
||||||
return position, utf8_enc(cp);
|
return position, unicode.utf8_enc(cp);
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
@@ -134,11 +120,11 @@ local json = locale {
|
|||||||
K "false" * Cc(false)+
|
K "false" * Cc(false)+
|
||||||
K "null" * Cc(NULL);
|
K "null" * Cc(NULL);
|
||||||
|
|
||||||
object = Cf(Ct "" * P "{" * V "space"^0 * (V "members")^-1 * V "space"^0 * P "}", rawset);
|
object = Cf(Ct "" * P "{" * V "space"^0 * (V "members")^-1 * V "space"^0 * P "}", rawset) / make_object;
|
||||||
members = V "pair" * (V "space"^0 * P "," * V "space"^0 * V "pair")^0;
|
members = V "pair" * (V "space"^0 * P "," * V "space"^0 * V "pair")^0;
|
||||||
pair = Cg(V "string" * V "space"^0 * P ":" * V "space"^0 * V "value");
|
pair = Cg(V "string" * V "space"^0 * P ":" * V "space"^0 * V "value");
|
||||||
|
|
||||||
array = Ct(P "[" * V "space"^0 * (V "elements")^-1 * V "space"^0 * P "]");
|
array = Ct(P "[" * V "space"^0 * (V "elements")^-1 * V "space"^0 * P "]") / make_array;
|
||||||
elements = V "value" * V "space"^0 * (P "," * V "space"^0 * V "value")^0;
|
elements = V "value" * V "space"^0 * (P "," * V "space"^0 * V "value")^0;
|
||||||
|
|
||||||
string = Ct(P [["]] * (V "char")^0 * P [["]]) / table.concat;
|
string = Ct(P [["]] * (V "char")^0 * P [["]]) / table.concat;
|
||||||
@@ -227,26 +213,6 @@ local function escape(str)
|
|||||||
return "\"" .. string.gsub(str, ".", ESCAPE_TABLE) .. "\""
|
return "\"" .. string.gsub(str, ".", ESCAPE_TABLE) .. "\""
|
||||||
end
|
end
|
||||||
|
|
||||||
--- Makes a table be treated as a JSON Array when generating JSON
|
|
||||||
--
|
|
||||||
-- A table treated as an Array has all non-number indices ignored.
|
|
||||||
-- @param t a table to be treated as an array
|
|
||||||
function make_array(t)
|
|
||||||
local mt = getmetatable(t) or {}
|
|
||||||
mt["json"] = "array"
|
|
||||||
setmetatable(t, mt)
|
|
||||||
end
|
|
||||||
|
|
||||||
--- Makes a table be treated as a JSON Object when generating JSON
|
|
||||||
--
|
|
||||||
-- A table treated as an Object has all non-number indices ignored.
|
|
||||||
-- @param t a table to be treated as an object
|
|
||||||
function make_object(t)
|
|
||||||
local mt = getmetatable(t) or {}
|
|
||||||
mt["json"] = "object"
|
|
||||||
setmetatable(t, mt)
|
|
||||||
end
|
|
||||||
|
|
||||||
--- Checks what JSON type a variable will be treated as when generating JSON
|
--- Checks what JSON type a variable will be treated as when generating JSON
|
||||||
-- @param var a variable to inspect
|
-- @param var a variable to inspect
|
||||||
-- @return a string containing the JSON type. Valid values are "array",
|
-- @return a string containing the JSON type. Valid values are "array",
|
||||||
@@ -304,55 +270,169 @@ function generate(obj)
|
|||||||
error("Unknown data type in generate")
|
error("Unknown data type in generate")
|
||||||
end
|
end
|
||||||
|
|
||||||
|
if not unittest.testing() then
|
||||||
|
return _ENV
|
||||||
|
end
|
||||||
|
|
||||||
----------------------------------------------------------------------------------
|
----------------------------------------------------------------------------------
|
||||||
-- Test-code for debugging purposes below
|
-- Test-code for debugging purposes below
|
||||||
----------------------------------------------------------------------------------
|
----------------------------------------------------------------------------------
|
||||||
|
|
||||||
local TESTS = {
|
local TESTS = {
|
||||||
'{"a":1}',
|
{
|
||||||
'{"a":true}',
|
'{"a":1}',
|
||||||
'{"a": false}',
|
generates = '{"a": 1}',
|
||||||
'{"a": null \r\n, \t "b" \f:"ehlo"}',
|
is = "object",
|
||||||
'{"a\\"a":"a\\"b\\"c\\"d"}',
|
test = function(o) return o["a"] == 1 end
|
||||||
'{"foo":"gaz\\"onk", "pi":3.14159,"hello":{ "wo":"rld"}}',
|
},
|
||||||
'{"a":1, "b":2}',
|
{
|
||||||
'{"foo":"gazonk", "pi":3.14159,"hello":{ "wo":"rl\\td"}}',
|
'{"a":true}',
|
||||||
'[1,2,3,4,5,null,false,true,"\195\164\195\165\195\182\195\177","bar"]',
|
generates = '{"a": true}',
|
||||||
'[]',-- This will yield {} in toJson, since in lua there is only one basic datatype - and no difference when empty
|
is = "object",
|
||||||
'{}',
|
test = function(o) return o["a"] == true end
|
||||||
'', -- error
|
},
|
||||||
'null', -- error
|
{
|
||||||
'"abc"', -- error
|
'{"a": false}',
|
||||||
'{a":1}', -- error
|
generates = '{"a": false}',
|
||||||
'{"a" bad :1}', -- error
|
is = "object",
|
||||||
'["a\\\\t"]', -- Should become Lua {"a\\t"}
|
test = function(o) return o["a"] == false end
|
||||||
'[0.0.0]', -- error
|
},
|
||||||
'[-1]',
|
{
|
||||||
'[-1.123e-2]',
|
'{"a": null \r\n, \t "b" \f:"ehlo"}',
|
||||||
'[5e3]',
|
is = "object",
|
||||||
'[5e+3]',
|
test = function(o) return o["a"] == NULL end
|
||||||
'[5E-3]',
|
},
|
||||||
'[5.5e3]',
|
{
|
||||||
'["a\\\\"]', -- Should become Lua {"a\\"}
|
'{"a\\"a":"a\\"b\\"c\\"d"}',
|
||||||
'{"a}": 1}', -- Should become Lua {"a}" = 1}
|
generates = '{"a\\"a": "a\\"b\\"c\\"d"}',
|
||||||
'["key": "value"]', -- error
|
is = "object",
|
||||||
'["\\u0041"]', -- Should become Lua {"A"}
|
test = function(o) return o['a"a'] == 'a"b"c"d' end
|
||||||
'["\\uD800"]', -- error
|
},
|
||||||
'["\\uD834\\uDD1EX"]', -- Should become Lua {"\240\157\132\158X"}
|
{
|
||||||
|
'{"foo":"gaz\\"onk", "pi":3.14159,"hello":{ "wo":"rl\\td"}}',
|
||||||
|
is = "object",
|
||||||
|
test = function(o) return (
|
||||||
|
o["foo"] == 'gaz"onk' and
|
||||||
|
o["pi"] == 3.14159 and
|
||||||
|
o["hello"]["wo"] == "rl\td"
|
||||||
|
) end
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'{"a":1, "b":2}',
|
||||||
|
is = "object",
|
||||||
|
test = function(o)
|
||||||
|
local j = generate(o)
|
||||||
|
return ( -- order is random
|
||||||
|
j == '{"a": 1, "b": 2}' or
|
||||||
|
j == '{"b": 2, "a": 1}'
|
||||||
|
) end
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'[1,2,3,4,5,null,false,true,"\195\164\195\165\195\182\195\177","bar"]',
|
||||||
|
generates = '[1, 2, 3, 4, 5, null, false, true, "\195\164\195\165\195\182\195\177", "bar"]',
|
||||||
|
is = "array",
|
||||||
|
test = function(o) return #o == 10 end
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'[]',
|
||||||
|
generates = '[]',
|
||||||
|
is = "array",
|
||||||
|
test = function(o) return not next(o) end
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'{}',
|
||||||
|
generates = '{}',
|
||||||
|
is = "object",
|
||||||
|
test = function(o) return not next(o) end
|
||||||
|
},
|
||||||
|
{'', valid=false},
|
||||||
|
{'null', valid=false}, -- error
|
||||||
|
{'"abc"', valid=false}, -- error
|
||||||
|
{'{a":1}', valid=false}, -- error
|
||||||
|
{'{"a" bad :1}', valid=false}, -- error
|
||||||
|
{
|
||||||
|
'["a\\\\t"]',
|
||||||
|
generates = '["a\\\\t"]',
|
||||||
|
is = "array",
|
||||||
|
test = function(o) return o[1] == "a\\t" end
|
||||||
|
}, -- Should become Lua {"a\\t"}
|
||||||
|
{'[0.0.0]', valid=false}, -- error
|
||||||
|
{
|
||||||
|
'[-1]',
|
||||||
|
generates = '[-1]',
|
||||||
|
is = "array",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'[-1.123e-2]',
|
||||||
|
generates = '[-0.01123]',
|
||||||
|
is = "array",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'[5e3]',
|
||||||
|
generates = '[5000]',
|
||||||
|
is = "array",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'[5e+3]',
|
||||||
|
generates = '[5000]',
|
||||||
|
is = "array",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'[5E-3]',
|
||||||
|
generates = '[0.005]',
|
||||||
|
is = "array",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'[5.5e3]',
|
||||||
|
generates = '[5500]',
|
||||||
|
is = "array",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'["a\\\\"]',
|
||||||
|
generates = '["a\\\\"]',
|
||||||
|
is = "array",
|
||||||
|
}, -- Should become Lua {"a\\"}
|
||||||
|
{
|
||||||
|
' {"a}": 1} ',
|
||||||
|
generates = '{"a}": 1}',
|
||||||
|
is = "object",
|
||||||
|
test = function(o) return o["a}"] == 1 end
|
||||||
|
}, -- Should become Lua {"a}" = 1}
|
||||||
|
{'["key": "value"]', valid=false}, -- error
|
||||||
|
{
|
||||||
|
'["\\u0041"]',
|
||||||
|
generates = '["A"]',
|
||||||
|
is = "array",
|
||||||
|
}, -- Should become Lua {"A"}
|
||||||
|
{'["\\uD800"]', valid=false}, -- error
|
||||||
|
{
|
||||||
|
'["\\uD834\\uDD1EX"]',
|
||||||
|
generates = '["\240\157\132\158X"]',
|
||||||
|
is = "array",
|
||||||
|
}, -- Should become Lua {"\240\157\132\158X"}
|
||||||
}
|
}
|
||||||
function test()
|
|
||||||
print("Tests running")
|
test_suite = unittest.TestSuite:new()
|
||||||
local i,v,res,status
|
|
||||||
for i,v in pairs(TESTS) do
|
local equal = unittest.equal
|
||||||
print("----------------------------")
|
local is_false = unittest.is_false
|
||||||
print(("%q"):format(v))
|
local is_true = unittest.is_true
|
||||||
status,res = parse(v)
|
|
||||||
if not status then print( res) end
|
for _, test in ipairs(TESTS) do
|
||||||
if(status) then
|
local status, val = parse(test[1])
|
||||||
print(("%q"):format(generate(res)))
|
if test.valid == false then
|
||||||
else
|
test_suite:add_test(is_false(status), "Syntax error status is false")
|
||||||
print("Error:".. res)
|
test_suite:add_test(equal(val, "syntax error"), "Syntax error")
|
||||||
end
|
break
|
||||||
|
end
|
||||||
|
if test.generates then
|
||||||
|
test_suite:add_test(equal(generate(val), test.generates), "Generate")
|
||||||
|
end
|
||||||
|
if test.is then
|
||||||
|
test_suite:add_test(equal(typeof(val), test.is), "JSON type")
|
||||||
|
end
|
||||||
|
if test.test then
|
||||||
|
test_suite:add_test(is_true(test.test(val)), "Extra test")
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user