1
0
mirror of https://github.com/nmap/nmap.git synced 2025-12-15 04:09:01 +00:00

Merged Lpeg branch

This commit is contained in:
devin
2014-06-26 20:12:54 +00:00
parent 9fe9545d49
commit d68396d823
16 changed files with 6128 additions and 3700 deletions

View File

@@ -12,7 +12,7 @@
-- <code>undefined</code>.) <code>NULL</code> values in JSON are represented by
-- the special value <code>json.NULL</code>.
--
-- @author Martin Holst Swende
-- @author Martin Holst Swende (originally), David Fifield, Patrick Donnelly
-- @copyright Same as Nmap--See http://nmap.org/book/man-legal.html
-- TODO: Unescape/escape unicode
@@ -23,7 +23,7 @@
-- Modified 02/27/2010 - v0.4 Added unicode handling (written by David Fifield). Renamed toJson
-- and fromJson into generate() and parse(), implemented more proper numeric parsing and added some more error checking.
local bit = require "bit"
local bit = require "bit";
local nmap = require "nmap"
local stdnse = require "stdnse"
local string = require "string"
@@ -31,6 +31,155 @@ local table = require "table"
local unicode = require "unicode"
_ENV = stdnse.module("json", stdnse.seeall)
local lpeg = require "lpeg";
local locale = lpeg.locale;
local P = lpeg.P;
local R = lpeg.R;
local S = lpeg.S;
local V = lpeg.V;
local C = lpeg.C;
local Cb = lpeg.Cb;
local Cc = lpeg.Cc;
local Cf = lpeg.Cf;
local Cg = lpeg.Cg;
local Cp = lpeg.Cp;
local Cs = lpeg.Cs;
local Ct = lpeg.Ct;
local Cmt = lpeg.Cmt;
-- case sensitive keyword
local function K (a)
return P(a) * -(locale().alnum + "_");
end
local NULL = {};
_M.NULL = NULL;
-- Encode a Unicode code point to UTF-8. See RFC 3629.
-- Does not check that cp is a real charaacter; that is, doesn't exclude the
-- surrogate range U+D800 - U+DFFF and a handful of others.
local function utf8_enc (cp)
local result = {};
local n, mask;
if cp % 1.0 ~= 0.0 or cp < 0 then
-- Only defined for nonnegative integers.
error("utf code point defined only for non-negative integers");
elseif cp <= 0x7F then
-- Special case of one-byte encoding.
return string.char(cp);
elseif cp <= 0x7FF then
n = 2;
mask = 0xC0;
elseif cp <= 0xFFFF then
n = 3;
mask = 0xE0;
elseif cp <= 0x10FFFF then
n = 4;
mask = 0xF0;
else
assert(false);
end
while n > 1 do
result[n] = 0x80 + bit.band(cp, 0x3F);
cp = bit.rshift(cp, 6);
n = n - 1;
end
result[1] = mask + cp;
return string.char(unpack(result));
end
-- Decode a Unicode escape, assuming that self.pos starts just after the
-- initial \u. May consume an additional escape in the case of a UTF-16
-- surrogate pair. See RFC 2781 for UTF-16.
local unicode = P [[\u]] * C(locale().xdigit * locale().xdigit * locale().xdigit * locale().xdigit);
local function unicode16 (subject, position, hex)
local cp = assert(tonumber(hex, 16));
if cp < 0xD800 or cp > 0xDFFF then
return position, utf8_enc(cp);
elseif cp >= 0xDC00 and cp <= 0xDFFF then
error(("Not a Unicode character: U+%04X"):format(cp));
end
-- Beginning of a UTF-16 surrogate.
local lowhex = unicode:match(subject, position);
if not lowhex then
error(("Bad unicode escape \\u%s (missing low surrogate)"):format(hex))
else
local cp2 = assert(tonumber(lowhex, 16));
if not (cp2 >= 0xDC00 and cp2 <= 0xDFFF) then
error(("Bad unicode escape \\u%s\\u%s (bad low surrogate)"):format(hex, lowhex))
end
position = position+4;
cp = 0x10000 + bit.band(cp, 0x3FF) * 0x400 + bit.band(cp2, 0x3FF)
return position, utf8_enc(cp);
end
end
-- call lpeg.locale on the grammar to add V "space"
local json = locale {
V "json";
json = V "space"^0 * V "value" * V "space"^0 * P(-1); -- FIXME should be 'V "object" + V "array"' instead of 'V "value"' ?
value = V "string" +
V "number" +
V "object" +
V "array" +
K "true" * Cc(true)+
K "false" * Cc(false)+
K "null" * Cc(NULL);
object = Cf(Ct "" * P "{" * V "space"^0 * (V "members")^-1 * V "space"^0 * P "}", rawset);
members = V "pair" * (V "space"^0 * P "," * V "space"^0 * V "pair")^0;
pair = Cg(V "string" * V "space"^0 * P ":" * V "space"^0 * V "value");
array = Ct(P "[" * V "space"^0 * (V "elements")^-1 * V "space"^0 * P "]");
elements = V "value" * V "space"^0 * (P "," * V "space"^0 * V "value")^0;
string = Ct(P [["]] * (V "char")^0 * P [["]]) / table.concat;
char = P [[\"]] * Cc [["]] +
P [[\\]] * Cc [[\]] +
P [[\b]] * Cc "\b" +
P [[\f]] * Cc "\f" +
P [[\n]] * Cc "\n" +
P [[\r]] * Cc "\r" +
P [[\t]] * Cc "\t" +
P [[\u]] * Cmt(C(V "xdigit" * V "xdigit" * V "xdigit" * V "xdigit"), unicode16) +
P [[\]] * C(1) +
(C(1) - P [["]]);
number = C((P "-")^-1 * V "space"^0 * (V "hexadecimal" + V "floating" + V "integer")) / function (a) return assert(tonumber(a)) end;
hexadecimal = P "0x" * V "xdigit"^1;
floating = (V "digit"^1 * P "." * V "digit"^0 + V "digit"^0 * P "." * V "digit"^1) * (V "exponent")^-1;
integer = V "digit"^1 * (V "exponent")^-1;
exponent = S "eE" * (S "-+")^-1 * V "digit"^1;
};
json = P(json); -- compile the grammar
--- Parses JSON data into a Lua object.
-- This is the method you probably want to use if you use this library from a
-- script.
--@param data a json string
--@return status true if ok, false if bad
--@return an object representing the json, or error message
function parse (data)
local status, object = pcall(json.match, json, data);
if not status then
return false, object;
elseif object then
return true, object;
else
return false, "syntax error";
end
end
--Some local shortcuts
local function dbg(str,...)
stdnse.print_debug("Json:"..str, ...)
@@ -47,9 +196,6 @@ local function dbg_err(str,...)
stdnse.print_debug("json-ERR:"..str, ...)
end
-- Javascript null representation, see explanation above
NULL = {}
-- See section 2.5 for escapes.
-- For convenience, ESCAPE_TABLE maps to escape sequences complete with
-- backslash, and REVERSE_ESCAPE_TABLE maps from single escape characters
@@ -125,7 +271,6 @@ end
--@param obj a table containing data
--@return a string containing valid json
function generate(obj)
-- NULL-check must be performed before
-- checking type == table, since the NULL-object
-- is a table
@@ -158,302 +303,6 @@ function generate(obj)
error("Unknown data type in generate")
end
-- This is the parser, implemented in OO-form to deal with state better
Json = {}
-- Constructor
function Json:new(input)
local o = {}
setmetatable(o, self)
self.__index = self
o.input = input
o.pos = 1 -- Pos is where the NEXT letter will be read
return o
end
-- Gets next character and ups the position
--@return next character
function Json:next()
self.pos = self.pos+1
return self.input:sub(self.pos-1, self.pos-1)
end
-- Updates the position to next non whitespace position
function Json:eatWhiteSpace()
--Find next non-white char
local a,b = self.input:find("%S",self.pos)
if not a then
self:syntaxerror("Empty data")
return
end
self.pos = a
end
-- Jumps to a specified position
--@param position where to go
function Json:jumpTo(position)
self.pos = position
end
-- Returns next character, but without upping position
--@return next character
function Json:peek()
return self.input:sub(self.pos, self.pos)
end
--@return true if more input is in store
function Json:hasMore()
return self.input:len() >= self.pos
end
-- Checks that the following input is equal to a string
-- and updates position so next char will be after that string
-- If false, triggers a syntax error
--@param str the string to test
function Json:assertStr(str)
local content = self.input:sub(self.pos,self.pos+str:len()-1)
if(content == str) then-- All ok
-- Jump forward
self:jumpTo(self.pos+str:len())
return
end
self:syntaxerror(("Expected '%s' but got '%s'"):format( str, content))
end
-- Trigger a syntax error
function Json:syntaxerror(reason)
self.error = ("Syntax error near pos %d: %s input: %s"):format( self.pos, reason, self.input)
dbg(self.error)
end
-- Check if any errors has occurred
function Json:errors()
return self.error ~= nil
end
-- Parses a top-level JSON structure (object or array).
--@return the parsed object or puts error messages in self.error
function Json:parseStart()
-- The top level of JSON only allows an object or an array. Only inside
-- of the outermost container can other types appear.
self:eatWhiteSpace()
local c = self:peek()
if c == '{' then
return self:parseObject()
elseif c == '[' then
return self:parseArray()
else
self:syntaxerror(("JSON must start with object or array (started with %s)"):format(c))
return
end
end
-- Parses a value
--@return the parsed value
function Json:parseValue()
self:eatWhiteSpace()
local c = self:peek()
local value
if c == '{' then
value = self:parseObject()
elseif c == '[' then
value = self:parseArray()
elseif c == '"' then
value = self:parseString()
elseif c == 'n' then
self:assertStr("null")
value = NULL
elseif c == 't' then
self:assertStr("true")
value = true
elseif c == 'f' then
self:assertStr("false")
value = false
else -- numeric
-- number = [ minus ] int [ frac ] [ exp ]
local a,b =self.input:find("-?%d+%.?%d*[eE]?[+-]?%d*", self.pos)
if not a or not b then
self:syntaxerror("Error 1 parsing numeric value")
return
end
value = tonumber(self.input:sub(a,b))
if(value == nil) then
self:syntaxerror("Error 2 parsing numeric value")
return
end
self:jumpTo(b+1)
end
return value
end
-- Parses a json object {}
--@return the object (or triggers a syntax error)
function Json:parseObject()
local object = {}
make_object(object)
local _= self:next() -- Eat {
while(self:hasMore() and not self:errors()) do
self:eatWhiteSpace()
local c = self:peek()
if(c == '}') then -- Empty object, probably
self:next() -- Eat it
return object
end
if(c ~= '"') then
self:syntaxerror(("Expected '\"', got '%s'"):format(c))
return
end
local key = self:parseString()
if self:errors() then
return
end
self:eatWhiteSpace()
c = self:next()
if(c ~= ':') then
self:syntaxerror("Expected ':' got "..c)
return
end
local value = self:parseValue()
if self:errors() then
return
end
object[key] = value
self:eatWhiteSpace()
c = self:next()
-- Valid now is , or }
if(c == '}') then
return object
end
if(c ~= ',') then
self:syntaxerror("Expected ',' or '}', got "..c)
return
end
end
end
-- Parses a json array [] or triggers a syntax error
--@return the array object
function Json:parseArray()
local array = {}
make_array(array)
self:next()
while(self:hasMore() and not self:errors()) do
self:eatWhiteSpace()
if(self:peek() == ']') then -- Empty array, probably
self:next()
break
end
local value = self:parseValue()
if self:errors() then
return
end
table.insert(array, value)
self:eatWhiteSpace()
local c = self:next()
-- Valid now is , or ]
if(c == ']') then return array end
if(c ~= ',') then
self:syntaxerror(("Expected ',' but got '%s'"):format(c))
return
end
end
return array
end
-- Decode a Unicode escape, assuming that self.pos starts just after the
-- initial \u. May consume an additional escape in the case of a UTF-16
-- surrogate pair. See RFC 2781 for UTF-16.
function Json:parseUnicodeEscape()
local n, cp
local hex, lowhex
local s, e
s, e, hex = self.input:find("^(....)", self.pos)
if not hex then
self:syntaxerror(("EOF in Unicode escape \\u%s"):format(self.input:sub(self.pos)))
return
end
n = tonumber(hex, 16)
if not n then
self:syntaxerror(("Bad unicode escape \\u%s"):format(hex))
return
end
cp = n
self.pos = e + 1
if n < 0xD800 or n > 0xDFFF then
return cp
end
if n >= 0xDC00 and n <= 0xDFFF then
self:syntaxerror(("Not a Unicode character: U+%04X"):format(cp))
return
end
-- Beginning of a UTF-16 surrogate.
s, e, lowhex = self.input:find("^\\u(....)", self.pos)
if not lowhex then
self:syntaxerror(("Bad unicode escape \\u%s (missing low surrogate)"):format(hex))
return
end
n = tonumber(lowhex, 16)
if not n or not (n >= 0xDC00 and n <= 0xDFFF) then
self:syntaxerror(("Bad unicode escape \\u%s\\u%s (bad low surrogate)"):format(hex, lowhex))
return
end
self.pos = e + 1
cp = 0x10000 + bit.band(cp, 0x3FF) * 0x400 + bit.band(n, 0x3FF)
-- also remove last "
return cp
end
-- Parses a json string
-- @return the string or triggers syntax error
function Json:parseString()
local val = ''
local c = self:next()
assert( c == '"')
while(self:hasMore()) do
local c = self:next()
if(c == '"') then -- end of string
break
elseif(c == '\\') then-- Escaped char
local d = self:next()
if REVERSE_ESCAPE_TABLE[d] ~= nil then
val = val .. REVERSE_ESCAPE_TABLE[d]
elseif d == 'u' then -- Unicode chars
local codepoint = self:parseUnicodeEscape()
if not codepoint then
return
end
val = val .. unicode.utf8_enc(codepoint)
else
self:syntaxerror(("Undefined escape character '%s'"):format(d))
return false
end
else -- Char
val = val .. c
end
end
return val
end
--- Parses json data into an object form
--
-- This is the method you probably want to use if you
-- use this library from a script.
--@param data a json string
--@return status true if ok, false if bad
--@return an object representing the json, or error message
function parse(data)
local parser = Json:new(data)
local result = parser:parseStart()
if(parser.error) then
return false, parser.error
end
return true, result
end
----------------------------------------------------------------------------------
-- Test-code for debugging purposes below
----------------------------------------------------------------------------------
@@ -470,10 +319,9 @@ local TESTS = {
'[1,2,3,4,5,null,false,true,"\195\164\195\165\195\182\195\177","bar"]',
'[]',-- This will yield {} in toJson, since in lua there is only one basic datatype - and no difference when empty
'{}',
'', -- error
'null', -- error
'"abc"', -- error
'', -- error
'null', -- error
'"abc"', -- error
'{a":1}', -- error
'{"a" bad :1}', -- error
'["a\\\\t"]', -- Should become Lua {"a\\t"}
@@ -496,11 +344,11 @@ function test()
local i,v,res,status
for i,v in pairs(TESTS) do
print("----------------------------")
print(v)
print(("%q"):format(v))
status,res = parse(v)
if not status then print( res) end
if(status) then
print(generate(res))
print(("%q"):format(generate(res)))
else
print("Error:".. res)
end