diff --git a/nselib/http.lua b/nselib/http.lua index c6fd4876c..fe63c4dd3 100644 --- a/nselib/http.lua +++ b/nselib/http.lua @@ -14,8 +14,13 @@ -- concatenated and separated by commas. The body value is a string -- containing the body of the HTTP response. -- @copyright Same as Nmap--See http://nmap.org/book/man-legal.html +-- @args http-max-cache-size The maximum memory size (in bytes) of the cache. -- +local MAX_CACHE_SIZE = "http-max-cache-size"; + +local coroutine = require "coroutine"; +local table = require "table"; module(... or "http",package.seeall) @@ -29,6 +34,20 @@ local have_ssl = (nmap.have_ssl() and pcall(require, "openssl")) -- The 404 used for URL checks local URL_404 = '/Nmap404Check' .. os.time(os.date('*t')) +-- Recursively copy a table. +-- Only recurs when a value is a table, other values are copied by assignment. +local function tcopy (t) + local tc = {}; + for k,v in pairs(t) do + if type(v) == "table" then + tc[k] = tcopy(v); + else + tc[k] = v; + end + end + return tc; +end + -- Skip *( SP | HT ) starting at offset. See RFC 2616, section 2.2. -- @return the first index following the spaces. -- @return the spaces skipped over. @@ -484,6 +503,117 @@ function buildCookies(cookies, path) return cookie end +local function check_size (cache) + local max_size = tonumber(nmap.registry.args[MAX_CACHE_SIZE] or 1e6); + local size = cache.size; + + if size > max_size then + stdnse.print_debug(1, + "Current http cache size (%d bytes) exceeds max size of %d", + size, max_size); + table.sort(cache, function(r1, r2) + return (r1.last_used or 0) < (r2.last_used or 0); + end); + + for i, record in ipairs(cache) do + if size <= max_size then break end + local result = record.result; + if type(result.body) == "string" then + size = size - record.size; + record.size, record.get, result.body = 0, false, ""; + end + end + cache.size = size; + end + stdnse.print_debug(1, "Final http cache size (%d bytes) of max size of %d", + size, max_size); + return size; +end + +-- Cache of GET and HEAD requests. Uses <"host:port:path", record>. +-- record is in the format: +-- result: The result from http.get or http.head +-- last_used: The time the record was last accessed or made. +-- get: Was the result received from a request to get or recently wiped? +-- size: The size of the record, equal to #record.result.body. +-- network_cost: The cost of the request on the network (upload). +local cache = {size = 0}; + +-- Unique value to signal value is being retrieved. +-- Also holds pairs, working thread is value +local WORKING = setmetatable({}, {__mode = "v"}); + +local function lookup_cache (method, host, port, path, options) + options = options or {}; + local bypass_cache = options.bypass_cache; -- do not lookup + local no_cache = options.no_cache; -- do not save result + local no_cache_body = options.no_cache_body; -- do not save body + + if type(host) == "table" then host = host.ip end + if type(port) == "table" then port = port.number end + + local key = host..":"..port..":"..path; + local mutex = nmap.mutex(tostring(lookup_cache)..key); + + local state = { + mutex = mutex, + key = key, + method = method, + bypass_cache = bypass_cache, + no_cache = no_cache, + no_cache_body = no_cache_body, + }; + + while true do + mutex "lock"; + local record = cache[key]; + if bypass_cache or record == nil or method == "GET" and not record.get then + WORKING[mutex] = coroutine.running(); + cache[key], state.old_record = WORKING, record; + return nil, state; + elseif record == WORKING then + local working = WORKING[mutex]; + if working == nil or coroutine.status(working) == "dead" then + -- thread died before insert_cache could be called + cache[key] = nil; -- reset + end + mutex "done"; + else + mutex "done"; + record.last_used = os.time(); + return tcopy(record.result), state; + end + end +end + +local function insert_cache (state, result, raw_response) + local key = assert(state.key); + local mutex = assert(state.mutex); + + if result == nil or state.no_cache or + result.status == 206 then -- ignore partial content response + cache[key] = state.old_record; + else + local record = { + result = tcopy(result), + last_used = os.time(), + get = state.method == "GET", + size = type(result.body) == "string" and #result.body or 0, + network_cost = #raw_response, + }; + result = record.result; -- only modify copy + cache[key], cache[#cache+1] = record, record; + if state.no_cache_body then + record.get, result.body = false, ""; + end + if type(result.body) == "string" then + cache.size = cache.size + #result.body; + check_size(cache); + end + end + mutex "done"; +end + --- Fetches a resource with a GET request. -- -- The first argument is either a string with the hostname or a table like the @@ -501,10 +631,15 @@ end -- @return Table as described in the module description. -- @see http.parseResult get = function( host, port, path, options, cookies ) - local data, mod_options = buildGet(host, port, path, options, cookies) - data = buildRequest(data, mod_options) - local response = request(host, port, data) - return parseResult(response) + local result, state = lookup_cache("GET", host, port, path, options); + if result == nil then + local data, mod_options = buildGet(host, port, path, options, cookies) + data = buildRequest(data, mod_options) + local response = request(host, port, data) + result = parseResult(response) + insert_cache(state, result, response); + end + return result; end --- Fetches a resource with a HEAD request. @@ -524,10 +659,15 @@ end -- @return Table as described in the module description. -- @see http.parseResult head = function( host, port, path, options, cookies ) - local data, mod_options = buildHead(host, port, path, options, cookies) - data = buildRequest(data, mod_options) - local response = request(host, port, data) - return parseResult(response) + local result, state = lookup_cache("HEAD", host, port, path, options); + if result == nil then + local data, mod_options = buildHead(host, port, path, options, cookies) + data = buildRequest(data, mod_options) + local response = request(host, port, data) + result = parseResult(response) + insert_cache(state, result, response); + end + return result; end --- Fetches a resource with a POST request. @@ -750,6 +890,10 @@ end -- * timeout: A timeout used for socket operations. -- * header: A table containing additional headers to be used for the request. -- * content: The content of the message (content-length will be added -- set header['Content-Length'] to override) +-- * bypass_cache: The contents of the cache is ignored for the request (method == "GET" or "HEAD") +-- * no_cache: The result of the request is not saved in the cache (method == "GET" or "HEAD"). +-- * no_cache_body: The body of the request is not saved in the cache (method == "GET" or "HEAD"). + request = function( host, port, data ) local opts