1
0
mirror of https://github.com/nmap/nmap.git synced 2025-12-06 04:31:29 +00:00
Files
nmap/nselib/formulas.lua
dmiller 6f57df02be Don't strip leading 0s in stdnse.tobinary
Every place this function is used, the result is padded with 0s anyway,
so may as well not strip them here. Didn't remove the padding code since
this could return only 4 chars, and most padding is to 8-char width.
2020-11-30 23:22:07 +00:00

221 lines
5.7 KiB
Lua

---
-- Formula functions for various calculations.
--
-- The library lets scripts to use common mathematical functions to compute percentages,
-- averages, entropy, randomness and other calculations. Scripts that generate statistics
-- and metrics can also make use of this library.
--
-- @copyright Same as Nmap--See https://nmap.org/book/man-legal.html
---
local math = require "math"
local stdnse = require "stdnse"
local string = require "string"
local table = require "table"
local unittest = require "unittest"
_ENV = stdnse.module("formulas", stdnse.seeall)
--- Calculate the entropy of a password.
--
-- A random password's information entropy, H, is given by the formula: H = L *
-- (logN) / (log2), where N is the number of possible symbols and L is the
-- number of symbols in the password. Based on
-- https://en.wikipedia.org/wiki/Password_strength
-- @param value The password to check
-- @return The entropy in bits
calcPwdEntropy = function(value)
local total, hasdigit, haslower, hasupper, hasspaces = 0, 0, 0, 0, false
if string.find(value, "%d") then
hasdigit = 1
end
if string.find(value, "%l") then
haslower = 1
end
if string.find(value, "%u") then
hasupper = 1
end
if string.find(value, ' ') then
hasspaces = true
end
-- The values 10, 26, 26 have been taken from Wikipedia's entropy table.
local total = hasdigit * 10 + hasupper * 26 + haslower * 26
local entropy = math.floor(math.log(total) * #value / math.log(2))
return entropy
end
-- A chi-square test for the null hypothesis that the members of data are drawn
-- from a uniform distribution over num_cats categories.
local function chi2(data, num_cats)
local bins = {}
local x2, delta, expected
for _, x in ipairs(data) do
bins[x] = bins[x] or 0
bins[x] = bins[x] + 1
end
expected = #data / num_cats
x2 = 0.0
for _, n in pairs(bins) do
delta = n - expected
x2 = x2 + delta * delta
end
x2 = x2 / expected
return x2
end
local function c_to_bin (c)
local n = stdnse.tobinary(c:byte())
return ("0"):rep(8-#n)..n
end
-- Split a string into a sequence of bit strings of the given length.
-- splitbits("abc", 5) --> {"01100", "00101", "10001", "00110"}
-- Any short final group is omitted.
local function splitbits(s, n)
local bits = s:gsub(".", c_to_bin)
local seq = {}
for i = 1, #bits - n, n do
seq[#seq + 1] = bits:sub(i, i + n - 1)
end
return seq
end
-- chi-square cdf table at 0.95 confidence for different degrees of freedom.
-- >>> import scipy.stats, scipy.optimize
-- >>> scipy.optimize.newton(lambda x: scipy.stats.chi2(dof).cdf(x) - 0.95, dof)
local CHI2_CDF = {
[3] = 7.8147279032511738,
[15] = 24.99579013972863,
[255] = 293.2478350807001,
}
--- Checks whether a sample looks random
--
-- Because our sample is so small (only 16 bytes), do a chi-square
-- goodness of fit test across groups of 2, 4, and 8 bits. If using only
-- 8 bits, for example, any sample whose bytes are all different would
-- pass the test. Using 2 bits will tend to catch things like pure
-- ASCII, where one out of every four samples never has its high bit
-- set.
-- @param data The data to check
-- @return True if the data appears to be random, false otherwise
function looksRandom(data)
local x2
x2 = chi2(splitbits(data, 2), 4)
if x2 > CHI2_CDF[3] then
return false
end
x2 = chi2(splitbits(data, 4), 16)
if x2 > CHI2_CDF[15] then
return false
end
x2 = chi2({string.byte(data, 1, -1)}, 256)
if x2 > CHI2_CDF[255] then
return false
end
return true
end
--- Return the mean and sample standard deviation of an array, using the
-- algorithm from Knuth Vol. 2, Section 4.2.2.
--
-- @params t An array-style table of values
-- @return The mean of the values
-- @return The standard deviation of the values
function mean_stddev(t)
local i, m, s, sigma
if #t == 0 then
return nil, nil
elseif #t == 1 then
return t[1], 0
end
m = t[1]
s = 0
for i = 2, #t do
local mp = m
m = m + (t[i] - m) / i
s = s + (t[i] - mp) * (t[i] - m)
end
sigma = math.sqrt(s / (#t - 1))
return m, sigma
end
-- Partition function for quickselect and quicksort
local function partition(t, left, right, pivot)
local pv = t[pivot]
t[pivot], t[right] = t[right], t[pivot]
local storeidx = left
for i=left, right-1 do
assert(storeidx < right)
if t[i] < pv then
t[storeidx], t[i] = t[i], t[storeidx]
storeidx = storeidx + 1
end
end
t[storeidx], t[right] = t[right], t[storeidx]
return storeidx
end
-- Quickselect algorithm
local function _qselect(t, left, right, k)
if left == right then
return t[left]
end
local pivot = math.random(left, right)
pivot = partition(t, left, right, pivot)
if k == pivot then
return t[k]
elseif k < pivot then
return _qselect(t, left, pivot - 1, k)
else
return _qselect(t, pivot + 1, right, k)
end
end
--- Return the k-th largest element in a list
--
-- @param t The list, not sorted
-- @param k The ordinal value to return
-- @return The k-th largest element in the list
function quickselect(t, k)
local tc = {}
-- Work on a copy of the table, since we modify in-place
table.move(t, 1, #t, 1, tc)
return _qselect(tc, 1, #tc, k)
end
--- Find the median of a list
--
--@param t the table/list of values
--@return the median value
function median(t)
return quickselect(t, math.ceil(#t/2))
end
if not unittest.testing() then
return _ENV
end
test_suite = unittest.TestSuite:new()
local table_equal = unittest.table_equal
test_suite:add_test(table_equal(splitbits("abc", 5), {"01100", "00101", "10001", "00110"}), 'splitbits("abc", 5)')
return _ENV