1
0
mirror of https://github.com/nmap/nmap.git synced 2025-12-07 13:11:28 +00:00

New and improved robots script

* Instead of printing all disallowed entries it now only prints the
  first 20. In verbose and debug modes it prints more entries, the exact
  number depends on the debug and vebose level

* Prints out the number of disallowed entries 

* Prints a message if a robot.txt file exists but has not entries.

* More resilient parsing and duplicate removal code

* Extended the line length to 50 characters so less vertical space used
This commit is contained in:
ejlbell
2008-06-06 17:30:09 +00:00
parent 8b0b2559ea
commit 7cf7259f9d

View File

@@ -1,54 +1,90 @@
require('shortport')
require('strbuf')
require('listop')
require('http')
id = "robots.txt"
author = "Eddie Bell <ejlbell@gmail.com>"
description = "Download a http servers robots.txt file and display all disallowed entries"
description = "check for robots.txt with disallowed entries (print them in debug/verbose mode)"
license = "Same as Nmap--See http://nmap.org/book/man-legal.html"
categories = {"default", "safe"}
runlevel = 1.0
portrule = shortport.port_or_service({80,443}, {"http","https"})
portrule = shortport.port_or_service({80, 8080}, {"http"})
local last_len = 0
-- split the output in 40 character lines
-- split the output in 50 character length lines
local function buildOutput(output, w)
local len = string.len(w)
if w:len() == 0 then
return nil
end
-- check for duplicates
for i,v in ipairs(output) do
if w == v then return nil end
if w == v or w == v:sub(2, v:len()) then
return nil
end
end
if last_len == 0 or last_len + len <= 40 then
last_len = last_len + len
-- format lines
if last_len == 0 or last_len + w:len() <= 50 then
last_len = last_len + w:len()
nl = ''
else
output = output .. '\n'
last_len = 0
nl = '\n'
end
output = output .. w
output = output .. ' '
output = output .. (nl .. w)
end
-- parse all disallowed entries in body and add them to a strbuf
local function parse_robots(body, output)
for line in body:gmatch("[^\r\n]+") do
for w in line:gmatch('[Dd]isallow:%s*(.*)') do
w = w:gsub("%s*#.*", "")
buildOutput(output, w)
end
end
return #output
end
action = function(host, port)
local answer = http.get( host, port, "/robots.txt" )
local answer = http.get(host, port, "/robots.txt" )
if answer.status ~= 200 then
return nil
end
-- parse all disallowed entries and remove comments
local v_level = nmap.verbosity() + (nmap.debugging()*2)
local output = strbuf.new()
for w in string.gmatch(answer.body, "Disallow:%s*([^\n]*)\n") do
w = w:gsub("%s*#.*", "")
buildOutput(output, w)
local detail = 15
dis_count = parse_robots(answer.body, output)
if dis_count == 0 then
return "is empty or has no disallowed entries"
end
if not listop.is_empty(output) then
return strbuf.dump(output)
-- verbose/debug mode, print 50 entries
if v_level > 1 and v_level < 5 then
detail = 40
-- double debug mode, print everything
elseif v_level >= 5 then
detail = dis_count
end
return nil
-- check we have enough entries
if detail > dis_count then
detail = dis_count
end
noun = dis_count == 1 and "entry " or "entries "
shown = (detail == 0 or detail == dis_count)
and "\n" or '(' .. detail .. ' shown)\n'
return "has " .. dis_count .. " disallowed " .. noun ..
shown .. table.concat(output, ' ', 1, detail)
end