diff --git a/scripts/robots.nse b/scripts/robots.nse index 971368953..33ffc076f 100644 --- a/scripts/robots.nse +++ b/scripts/robots.nse @@ -1,71 +1,39 @@ require('shortport') require('strbuf') -require('stdnse') require('listop') id = "robots.txt" author = "Eddie Bell " -description = "Probes for a http servers robots.txt file and returns a summary of it" +description = "Download a http servers robots.txt file and display all disallowed entries" license = "See nmaps COPYING for licence" -categories = {"intrusive"} +categories = {"safe"} runlevel = 1.0 portrule = shortport.port_or_service(80, "http") +local last_len = 0 --- validates a robots.txt according to IETF conventions. Note --- there is no standard governing robots.txt files but the --- IETF document is the closes thing -local function validate(robot_txt) - local function valid(line) - return string.find(line, '^#.*') or - string.find(line, '^user%-agent:.*') or - string.find(line, '^disallow:.*') or - string.find(line, '^allow:.') or - string.len(line) == 0 +-- split the output in 40 character lines +local function buildOutput(output, w) + local len = string.len(w) + + for i,v in ipairs(output) do + if w == v then return nil end end - - -- test if the robots.txt data is valid - local results = listop.map(valid, robot_txt) - local invalid_lines = listop.filter(function(x) return not x end, results) - return listop.is_empty(invalid_lines) -end - -local function analyse_robots(r_data, output) - local function is_present(line) - if string.find(line, '^#.*') then return 1 - elseif string.find(line, '^user%-agent:.*') then return 2 - elseif string.find(line, '^disallow:.*') then return 3 - else return 0 - end - end - - local function gen_match(match_id, present) - return listop.filter(function(x) return x == match_id end, present) - end - - -- parse robots file and check for its elements - local robot_txt = stdnse.strsplit("\n", r_data) - local present = listop.map(is_present, robot_txt) - if not listop.is_empty(gen_match(1, present)) then - output = output .. "! contains disallowed entries\n" + if last_len == 0 or last_len + len <= 40 then + last_len = last_len + len + else + output = output .. '\n' + last_len = 0 end - if not listop.is_empty(gen_match(2, present)) then - output = output .. "! mentions specific user-agents\n" - end - - if not listop.is_empty(gen_match(3, present)) then - output = output .. "! contains comments, which may be interesting\n" - end - - if not validate(robot_txt) then - output = output .. "! does not adhere to IETF conventions\n" - end + output = output .. w + output = output .. ' ' end action = function(host, port) - local lines, status, soc, query, s, e + local soc, lines, status + local catch = function() soc.close() end local try = nmap.new_try(catch) @@ -73,9 +41,8 @@ action = function(host, port) soc = nmap.new_socket() soc:set_timeout(4000) try(soc:connect(host.ip, port.number)) - - -- test if robots.txt is present - query = strbuf.new() + + local query = strbuf.new() query = query .. "GET /robots.txt HTTP/1.1" query = query .. "Accept: */*" query = query .. "Accept-Language: en" @@ -85,56 +52,25 @@ action = function(host, port) try(soc:send(strbuf.dump(query, '\r\n'))) local response = strbuf.new() + while true do + status, lines = soc:receive_lines(1) + if not status then break end + response = response .. lines + end + + if not string.find(strbuf.dump(response), "HTTP/1.1 200 OK") then + return nil + end + + -- parse all disallowed entries local output = strbuf.new() - - while true do - status, lines = soc:receive_lines(1) - if not status then break end - response = response .. lines + for w in string.gmatch(strbuf.dump(response, '\n'), "Disallow:%s*([^\n]*)\n") do + buildOutput(output, w) end - local hdata = strbuf.dump(response, '\n') - - if string.find(hdata, "HTTP/1.1 200 OK") then - for w in string.gmatch(hdata, "Content%-Type:%s*([^\r\n]*)\r\n") do - output = output .. w .. '\n' - end - - -- remove http protocol stuff and analyse robots.txt file - s, e = string.find(hdata, "\r\n\r\n") - hdata = string.lower(hdata) - if e then analyse_robots(string.sub(hdata, e), output) end - end - - soc:close() - try(soc:connect(host.ip, port.number)) - strbuf.clear(query) - strbuf.clear(response) - - -- test to see if info.txt is present - query = query .. "GET /info.txt HTTP/1.1" - query = query .. "Accept: */*" - query = query .. "Accept-Language: en" - query = query .. "User-Agent: Nmap NSE" - query = query .. "Host: " .. host.ip .. ":" .. port.number - query = query .. '\r\n\r\n'; - try(soc:send(strbuf.dump(query, '\r\n'))) - - while true do - status, lines = soc:receive_lines(1) - if not status then break end - response = response .. lines - end - - if string.find(strbuf.dump(response), "HTTP/1.1 200 OK") then - output = output .. "\n! info.txt is present\n" - end - - soc:close() - - if listop.is_empty(output) then - return nil - else + if not listop.is_empty(output) then return strbuf.dump(output) end + + return nil end