diff --git a/nse_macros.h b/nse_macros.h index 9667b5b9f..6cb59fd24 100644 --- a/nse_macros.h +++ b/nse_macros.h @@ -29,7 +29,7 @@ #define SCRIPT_ENGINE_LIB_DIR "nselib\\" #else #define SCRIPT_ENGINE_LIB_DIR "nselib/" - #define SCRIPT_ENGINE_LIBEXEC_DIR "nse/" + #define SCRIPT_ENGINE_LIBEXEC_DIR "scripts/" #endif #define SCRIPT_ENGINE_DATABASE "script.db" diff --git a/scripts/robots.nse b/scripts/robots.nse index 33ffc076f..971368953 100644 --- a/scripts/robots.nse +++ b/scripts/robots.nse @@ -1,39 +1,71 @@ require('shortport') require('strbuf') +require('stdnse') require('listop') id = "robots.txt" author = "Eddie Bell " -description = "Download a http servers robots.txt file and display all disallowed entries" +description = "Probes for a http servers robots.txt file and returns a summary of it" license = "See nmaps COPYING for licence" -categories = {"safe"} +categories = {"intrusive"} runlevel = 1.0 portrule = shortport.port_or_service(80, "http") -local last_len = 0 --- split the output in 40 character lines -local function buildOutput(output, w) - local len = string.len(w) - - for i,v in ipairs(output) do - if w == v then return nil end +-- validates a robots.txt according to IETF conventions. Note +-- there is no standard governing robots.txt files but the +-- IETF document is the closes thing +local function validate(robot_txt) + local function valid(line) + return string.find(line, '^#.*') or + string.find(line, '^user%-agent:.*') or + string.find(line, '^disallow:.*') or + string.find(line, '^allow:.') or + string.len(line) == 0 end + + -- test if the robots.txt data is valid + local results = listop.map(valid, robot_txt) + local invalid_lines = listop.filter(function(x) return not x end, results) + return listop.is_empty(invalid_lines) +end + +local function analyse_robots(r_data, output) + local function is_present(line) + if string.find(line, '^#.*') then return 1 + elseif string.find(line, '^user%-agent:.*') then return 2 + elseif string.find(line, '^disallow:.*') then return 3 + else return 0 + end + end + + local function gen_match(match_id, present) + return listop.filter(function(x) return x == match_id end, present) + end + + -- parse robots file and check for its elements + local robot_txt = stdnse.strsplit("\n", r_data) + local present = listop.map(is_present, robot_txt) - if last_len == 0 or last_len + len <= 40 then - last_len = last_len + len - else - output = output .. '\n' - last_len = 0 + if not listop.is_empty(gen_match(1, present)) then + output = output .. "! contains disallowed entries\n" end - output = output .. w - output = output .. ' ' + if not listop.is_empty(gen_match(2, present)) then + output = output .. "! mentions specific user-agents\n" + end + + if not listop.is_empty(gen_match(3, present)) then + output = output .. "! contains comments, which may be interesting\n" + end + + if not validate(robot_txt) then + output = output .. "! does not adhere to IETF conventions\n" + end end action = function(host, port) - local soc, lines, status - + local lines, status, soc, query, s, e local catch = function() soc.close() end local try = nmap.new_try(catch) @@ -41,8 +73,9 @@ action = function(host, port) soc = nmap.new_socket() soc:set_timeout(4000) try(soc:connect(host.ip, port.number)) - - local query = strbuf.new() + + -- test if robots.txt is present + query = strbuf.new() query = query .. "GET /robots.txt HTTP/1.1" query = query .. "Accept: */*" query = query .. "Accept-Language: en" @@ -52,25 +85,56 @@ action = function(host, port) try(soc:send(strbuf.dump(query, '\r\n'))) local response = strbuf.new() + local output = strbuf.new() + while true do status, lines = soc:receive_lines(1) if not status then break end response = response .. lines end - if not string.find(strbuf.dump(response), "HTTP/1.1 200 OK") then - return nil + local hdata = strbuf.dump(response, '\n') + + if string.find(hdata, "HTTP/1.1 200 OK") then + for w in string.gmatch(hdata, "Content%-Type:%s*([^\r\n]*)\r\n") do + output = output .. w .. '\n' + end + + -- remove http protocol stuff and analyse robots.txt file + s, e = string.find(hdata, "\r\n\r\n") + hdata = string.lower(hdata) + if e then analyse_robots(string.sub(hdata, e), output) end end - -- parse all disallowed entries - local output = strbuf.new() - for w in string.gmatch(strbuf.dump(response, '\n'), "Disallow:%s*([^\n]*)\n") do - buildOutput(output, w) + soc:close() + try(soc:connect(host.ip, port.number)) + strbuf.clear(query) + strbuf.clear(response) + + -- test to see if info.txt is present + query = query .. "GET /info.txt HTTP/1.1" + query = query .. "Accept: */*" + query = query .. "Accept-Language: en" + query = query .. "User-Agent: Nmap NSE" + query = query .. "Host: " .. host.ip .. ":" .. port.number + query = query .. '\r\n\r\n'; + try(soc:send(strbuf.dump(query, '\r\n'))) + + while true do + status, lines = soc:receive_lines(1) + if not status then break end + response = response .. lines end - if not listop.is_empty(output) then + if string.find(strbuf.dump(response), "HTTP/1.1 200 OK") then + output = output .. "\n! info.txt is present\n" + end + + soc:close() + + if listop.is_empty(output) then + return nil + else return strbuf.dump(output) end - - return nil end