1
0
mirror of https://github.com/nmap/nmap.git synced 2025-12-10 17:59:04 +00:00

Bugfixes in json.lua

A few bugs crept in during the conversion to LPEG parsing:

1. Unicode parsing of UTF-16 surrogate pairs returned an ending position
that was off by 2, so that the last 2 characters in the escape code were
parsed as literal characters.

2. Arrays and objects were not being tagged as such with make_array and
make_object during parsing.

This commit fixes these bugs, and returns to using unicode.utf8_enc for
UTF-8 encoding instead of a separately-defined function. Also, the tests
at the end (which were helpful in finding both of the bugs above) were
converted to unittest.lua tests.
This commit is contained in:
dmiller
2015-02-28 12:43:43 +00:00
parent 41c4b9fbc8
commit fac667e621

View File

@@ -15,7 +15,6 @@
-- @author Martin Holst Swende (originally), David Fifield, Patrick Donnelly -- @author Martin Holst Swende (originally), David Fifield, Patrick Donnelly
-- @copyright Same as Nmap--See http://nmap.org/book/man-legal.html -- @copyright Same as Nmap--See http://nmap.org/book/man-legal.html
-- TODO: Unescape/escape unicode
-- Version 0.4 -- Version 0.4
-- Created 01/25/2010 - v0.1 - created by Martin Holst Swende <martin@swende.se> -- Created 01/25/2010 - v0.1 - created by Martin Holst Swende <martin@swende.se>
-- Heavily modified 02/22/2010 - v0.3. Rewrote the parser into an OO-form, to not have to handle -- Heavily modified 02/22/2010 - v0.3. Rewrote the parser into an OO-form, to not have to handle
@@ -29,6 +28,7 @@ local stdnse = require "stdnse"
local string = require "string" local string = require "string"
local table = require "table" local table = require "table"
local unicode = require "unicode" local unicode = require "unicode"
local unittest = require "unittest"
_ENV = stdnse.module("json", stdnse.seeall) _ENV = stdnse.module("json", stdnse.seeall)
local lpeg = require "lpeg"; local lpeg = require "lpeg";
@@ -55,57 +55,43 @@ end
local NULL = {}; local NULL = {};
_M.NULL = NULL; _M.NULL = NULL;
-- Encode a Unicode code point to UTF-8. See RFC 3629. --- Makes a table be treated as a JSON Array when generating JSON
-- Does not check that cp is a real charaacter; that is, doesn't exclude the --
-- surrogate range U+D800 - U+DFFF and a handful of others. -- A table treated as an Array has all non-number indices ignored.
local function utf8_enc (cp) -- @param t a table to be treated as an array
local result = {}; function make_array(t)
local n, mask; local mt = getmetatable(t) or {}
mt["json"] = "array"
setmetatable(t, mt)
return t
end
if cp % 1.0 ~= 0.0 or cp < 0 then --- Makes a table be treated as a JSON Object when generating JSON
-- Only defined for nonnegative integers. --
error("utf code point defined only for non-negative integers"); -- A table treated as an Object has all non-number indices ignored.
elseif cp <= 0x7F then -- @param t a table to be treated as an object
-- Special case of one-byte encoding. function make_object(t)
return string.char(cp); local mt = getmetatable(t) or {}
elseif cp <= 0x7FF then mt["json"] = "object"
n = 2; setmetatable(t, mt)
mask = 0xC0; return t
elseif cp <= 0xFFFF then
n = 3;
mask = 0xE0;
elseif cp <= 0x10FFFF then
n = 4;
mask = 0xF0;
else
assert(false);
end
while n > 1 do
result[n] = 0x80 + bit.band(cp, 0x3F);
cp = bit.rshift(cp, 6);
n = n - 1;
end
result[1] = mask + cp;
return string.char(unpack(result));
end end
-- Decode a Unicode escape, assuming that self.pos starts just after the -- Decode a Unicode escape, assuming that self.pos starts just after the
-- initial \u. May consume an additional escape in the case of a UTF-16 -- initial \u. May consume an additional escape in the case of a UTF-16
-- surrogate pair. See RFC 2781 for UTF-16. -- surrogate pair. See RFC 2781 for UTF-16.
local unicode = P [[\u]] * C(locale().xdigit * locale().xdigit * locale().xdigit * locale().xdigit); local unicode_esc = P [[\u]] * C(locale().xdigit * locale().xdigit * locale().xdigit * locale().xdigit);
local function unicode16 (subject, position, hex) local function unicode16 (subject, position, hex)
local cp = assert(tonumber(hex, 16)); local cp = assert(tonumber(hex, 16));
if cp < 0xD800 or cp > 0xDFFF then if cp < 0xD800 or cp > 0xDFFF then
return position, utf8_enc(cp); return position, unicode.utf8_enc(cp);
elseif cp >= 0xDC00 and cp <= 0xDFFF then elseif cp >= 0xDC00 and cp <= 0xDFFF then
error(("Not a Unicode character: U+%04X"):format(cp)); error(("Not a Unicode character: U+%04X"):format(cp));
end end
-- Beginning of a UTF-16 surrogate. -- Beginning of a UTF-16 surrogate.
local lowhex = unicode:match(subject, position); local lowhex = unicode_esc:match(subject, position);
if not lowhex then if not lowhex then
error(("Bad unicode escape \\u%s (missing low surrogate)"):format(hex)) error(("Bad unicode escape \\u%s (missing low surrogate)"):format(hex))
@@ -114,9 +100,9 @@ local function unicode16 (subject, position, hex)
if not (cp2 >= 0xDC00 and cp2 <= 0xDFFF) then if not (cp2 >= 0xDC00 and cp2 <= 0xDFFF) then
error(("Bad unicode escape \\u%s\\u%s (bad low surrogate)"):format(hex, lowhex)) error(("Bad unicode escape \\u%s\\u%s (bad low surrogate)"):format(hex, lowhex))
end end
position = position+4; position = position+6 -- consume '\uXXXX'
cp = 0x10000 + bit.band(cp, 0x3FF) * 0x400 + bit.band(cp2, 0x3FF) cp = 0x10000 + bit.band(cp, 0x3FF) * 0x400 + bit.band(cp2, 0x3FF)
return position, utf8_enc(cp); return position, unicode.utf8_enc(cp);
end end
end end
@@ -134,11 +120,11 @@ local json = locale {
K "false" * Cc(false)+ K "false" * Cc(false)+
K "null" * Cc(NULL); K "null" * Cc(NULL);
object = Cf(Ct "" * P "{" * V "space"^0 * (V "members")^-1 * V "space"^0 * P "}", rawset); object = Cf(Ct "" * P "{" * V "space"^0 * (V "members")^-1 * V "space"^0 * P "}", rawset) / make_object;
members = V "pair" * (V "space"^0 * P "," * V "space"^0 * V "pair")^0; members = V "pair" * (V "space"^0 * P "," * V "space"^0 * V "pair")^0;
pair = Cg(V "string" * V "space"^0 * P ":" * V "space"^0 * V "value"); pair = Cg(V "string" * V "space"^0 * P ":" * V "space"^0 * V "value");
array = Ct(P "[" * V "space"^0 * (V "elements")^-1 * V "space"^0 * P "]"); array = Ct(P "[" * V "space"^0 * (V "elements")^-1 * V "space"^0 * P "]") / make_array;
elements = V "value" * V "space"^0 * (P "," * V "space"^0 * V "value")^0; elements = V "value" * V "space"^0 * (P "," * V "space"^0 * V "value")^0;
string = Ct(P [["]] * (V "char")^0 * P [["]]) / table.concat; string = Ct(P [["]] * (V "char")^0 * P [["]]) / table.concat;
@@ -227,26 +213,6 @@ local function escape(str)
return "\"" .. string.gsub(str, ".", ESCAPE_TABLE) .. "\"" return "\"" .. string.gsub(str, ".", ESCAPE_TABLE) .. "\""
end end
--- Makes a table be treated as a JSON Array when generating JSON
--
-- A table treated as an Array has all non-number indices ignored.
-- @param t a table to be treated as an array
function make_array(t)
local mt = getmetatable(t) or {}
mt["json"] = "array"
setmetatable(t, mt)
end
--- Makes a table be treated as a JSON Object when generating JSON
--
-- A table treated as an Object has all non-number indices ignored.
-- @param t a table to be treated as an object
function make_object(t)
local mt = getmetatable(t) or {}
mt["json"] = "object"
setmetatable(t, mt)
end
--- Checks what JSON type a variable will be treated as when generating JSON --- Checks what JSON type a variable will be treated as when generating JSON
-- @param var a variable to inspect -- @param var a variable to inspect
-- @return a string containing the JSON type. Valid values are "array", -- @return a string containing the JSON type. Valid values are "array",
@@ -304,55 +270,169 @@ function generate(obj)
error("Unknown data type in generate") error("Unknown data type in generate")
end end
if not unittest.testing() then
return _ENV
end
---------------------------------------------------------------------------------- ----------------------------------------------------------------------------------
-- Test-code for debugging purposes below -- Test-code for debugging purposes below
---------------------------------------------------------------------------------- ----------------------------------------------------------------------------------
local TESTS = { local TESTS = {
'{"a":1}', {
'{"a":true}', '{"a":1}',
'{"a": false}', generates = '{"a": 1}',
'{"a": null \r\n, \t "b" \f:"ehlo"}', is = "object",
'{"a\\"a":"a\\"b\\"c\\"d"}', test = function(o) return o["a"] == 1 end
'{"foo":"gaz\\"onk", "pi":3.14159,"hello":{ "wo":"rld"}}', },
'{"a":1, "b":2}', {
'{"foo":"gazonk", "pi":3.14159,"hello":{ "wo":"rl\\td"}}', '{"a":true}',
'[1,2,3,4,5,null,false,true,"\195\164\195\165\195\182\195\177","bar"]', generates = '{"a": true}',
'[]',-- This will yield {} in toJson, since in lua there is only one basic datatype - and no difference when empty is = "object",
'{}', test = function(o) return o["a"] == true end
'', -- error },
'null', -- error {
'"abc"', -- error '{"a": false}',
'{a":1}', -- error generates = '{"a": false}',
'{"a" bad :1}', -- error is = "object",
'["a\\\\t"]', -- Should become Lua {"a\\t"} test = function(o) return o["a"] == false end
'[0.0.0]', -- error },
'[-1]', {
'[-1.123e-2]', '{"a": null \r\n, \t "b" \f:"ehlo"}',
'[5e3]', is = "object",
'[5e+3]', test = function(o) return o["a"] == NULL end
'[5E-3]', },
'[5.5e3]', {
'["a\\\\"]', -- Should become Lua {"a\\"} '{"a\\"a":"a\\"b\\"c\\"d"}',
'{"a}": 1}', -- Should become Lua {"a}" = 1} generates = '{"a\\"a": "a\\"b\\"c\\"d"}',
'["key": "value"]', -- error is = "object",
'["\\u0041"]', -- Should become Lua {"A"} test = function(o) return o['a"a'] == 'a"b"c"d' end
'["\\uD800"]', -- error },
'["\\uD834\\uDD1EX"]', -- Should become Lua {"\240\157\132\158X"} {
'{"foo":"gaz\\"onk", "pi":3.14159,"hello":{ "wo":"rl\\td"}}',
is = "object",
test = function(o) return (
o["foo"] == 'gaz"onk' and
o["pi"] == 3.14159 and
o["hello"]["wo"] == "rl\td"
) end
},
{
'{"a":1, "b":2}',
is = "object",
test = function(o)
local j = generate(o)
return ( -- order is random
j == '{"a": 1, "b": 2}' or
j == '{"b": 2, "a": 1}'
) end
},
{
'[1,2,3,4,5,null,false,true,"\195\164\195\165\195\182\195\177","bar"]',
generates = '[1, 2, 3, 4, 5, null, false, true, "\195\164\195\165\195\182\195\177", "bar"]',
is = "array",
test = function(o) return #o == 10 end
},
{
'[]',
generates = '[]',
is = "array",
test = function(o) return not next(o) end
},
{
'{}',
generates = '{}',
is = "object",
test = function(o) return not next(o) end
},
{'', valid=false},
{'null', valid=false}, -- error
{'"abc"', valid=false}, -- error
{'{a":1}', valid=false}, -- error
{'{"a" bad :1}', valid=false}, -- error
{
'["a\\\\t"]',
generates = '["a\\\\t"]',
is = "array",
test = function(o) return o[1] == "a\\t" end
}, -- Should become Lua {"a\\t"}
{'[0.0.0]', valid=false}, -- error
{
'[-1]',
generates = '[-1]',
is = "array",
},
{
'[-1.123e-2]',
generates = '[-0.01123]',
is = "array",
},
{
'[5e3]',
generates = '[5000]',
is = "array",
},
{
'[5e+3]',
generates = '[5000]',
is = "array",
},
{
'[5E-3]',
generates = '[0.005]',
is = "array",
},
{
'[5.5e3]',
generates = '[5500]',
is = "array",
},
{
'["a\\\\"]',
generates = '["a\\\\"]',
is = "array",
}, -- Should become Lua {"a\\"}
{
' {"a}": 1} ',
generates = '{"a}": 1}',
is = "object",
test = function(o) return o["a}"] == 1 end
}, -- Should become Lua {"a}" = 1}
{'["key": "value"]', valid=false}, -- error
{
'["\\u0041"]',
generates = '["A"]',
is = "array",
}, -- Should become Lua {"A"}
{'["\\uD800"]', valid=false}, -- error
{
'["\\uD834\\uDD1EX"]',
generates = '["\240\157\132\158X"]',
is = "array",
}, -- Should become Lua {"\240\157\132\158X"}
} }
function test()
print("Tests running") test_suite = unittest.TestSuite:new()
local i,v,res,status
for i,v in pairs(TESTS) do local equal = unittest.equal
print("----------------------------") local is_false = unittest.is_false
print(("%q"):format(v)) local is_true = unittest.is_true
status,res = parse(v)
if not status then print( res) end for _, test in ipairs(TESTS) do
if(status) then local status, val = parse(test[1])
print(("%q"):format(generate(res))) if test.valid == false then
else test_suite:add_test(is_false(status), "Syntax error status is false")
print("Error:".. res) test_suite:add_test(equal(val, "syntax error"), "Syntax error")
end break
end
if test.generates then
test_suite:add_test(equal(generate(val), test.generates), "Generate")
end
if test.is then
test_suite:add_test(equal(typeof(val), test.is), "JSON type")
end
if test.test then
test_suite:add_test(is_true(test.test(val)), "Extra test")
end end
end end