1
0
mirror of https://github.com/nmap/nmap.git synced 2025-12-12 10:49:02 +00:00

Accidentally committed experimental version of robots.nse

This commit is contained in:
ejlbell
2007-11-05 21:41:47 +00:00
parent 4694e4e7a7
commit 9dea83347b

View File

@@ -1,71 +1,39 @@
require('shortport') require('shortport')
require('strbuf') require('strbuf')
require('stdnse')
require('listop') require('listop')
id = "robots.txt" id = "robots.txt"
author = "Eddie Bell <ejlbell@gmail.com>" author = "Eddie Bell <ejlbell@gmail.com>"
description = "Probes for a http servers robots.txt file and returns a summary of it" description = "Download a http servers robots.txt file and display all disallowed entries"
license = "See nmaps COPYING for licence" license = "See nmaps COPYING for licence"
categories = {"intrusive"} categories = {"safe"}
runlevel = 1.0 runlevel = 1.0
portrule = shortport.port_or_service(80, "http") portrule = shortport.port_or_service(80, "http")
local last_len = 0
-- validates a robots.txt according to IETF conventions. Note -- split the output in 40 character lines
-- there is no standard governing robots.txt files but the local function buildOutput(output, w)
-- IETF document is the closes thing local len = string.len(w)
local function validate(robot_txt)
local function valid(line) for i,v in ipairs(output) do
return string.find(line, '^#.*') or if w == v then return nil end
string.find(line, '^user%-agent:.*') or
string.find(line, '^disallow:.*') or
string.find(line, '^allow:.') or
string.len(line) == 0
end end
-- test if the robots.txt data is valid if last_len == 0 or last_len + len <= 40 then
local results = listop.map(valid, robot_txt) last_len = last_len + len
local invalid_lines = listop.filter(function(x) return not x end, results) else
return listop.is_empty(invalid_lines) output = output .. '\n'
end last_len = 0
local function analyse_robots(r_data, output)
local function is_present(line)
if string.find(line, '^#.*') then return 1
elseif string.find(line, '^user%-agent:.*') then return 2
elseif string.find(line, '^disallow:.*') then return 3
else return 0
end
end end
local function gen_match(match_id, present) output = output .. w
return listop.filter(function(x) return x == match_id end, present) output = output .. ' '
end
-- parse robots file and check for its elements
local robot_txt = stdnse.strsplit("\n", r_data)
local present = listop.map(is_present, robot_txt)
if not listop.is_empty(gen_match(1, present)) then
output = output .. "! contains disallowed entries\n"
end
if not listop.is_empty(gen_match(2, present)) then
output = output .. "! mentions specific user-agents\n"
end
if not listop.is_empty(gen_match(3, present)) then
output = output .. "! contains comments, which may be interesting\n"
end
if not validate(robot_txt) then
output = output .. "! does not adhere to IETF conventions\n"
end
end end
action = function(host, port) action = function(host, port)
local lines, status, soc, query, s, e local soc, lines, status
local catch = function() soc.close() end local catch = function() soc.close() end
local try = nmap.new_try(catch) local try = nmap.new_try(catch)
@@ -74,8 +42,7 @@ action = function(host, port)
soc:set_timeout(4000) soc:set_timeout(4000)
try(soc:connect(host.ip, port.number)) try(soc:connect(host.ip, port.number))
-- test if robots.txt is present local query = strbuf.new()
query = strbuf.new()
query = query .. "GET /robots.txt HTTP/1.1" query = query .. "GET /robots.txt HTTP/1.1"
query = query .. "Accept: */*" query = query .. "Accept: */*"
query = query .. "Accept-Language: en" query = query .. "Accept-Language: en"
@@ -85,56 +52,25 @@ action = function(host, port)
try(soc:send(strbuf.dump(query, '\r\n'))) try(soc:send(strbuf.dump(query, '\r\n')))
local response = strbuf.new() local response = strbuf.new()
local output = strbuf.new()
while true do while true do
status, lines = soc:receive_lines(1) status, lines = soc:receive_lines(1)
if not status then break end if not status then break end
response = response .. lines response = response .. lines
end end
local hdata = strbuf.dump(response, '\n') if not string.find(strbuf.dump(response), "HTTP/1.1 200 OK") then
if string.find(hdata, "HTTP/1.1 200 OK") then
for w in string.gmatch(hdata, "Content%-Type:%s*([^\r\n]*)\r\n") do
output = output .. w .. '\n'
end
-- remove http protocol stuff and analyse robots.txt file
s, e = string.find(hdata, "\r\n\r\n")
hdata = string.lower(hdata)
if e then analyse_robots(string.sub(hdata, e), output) end
end
soc:close()
try(soc:connect(host.ip, port.number))
strbuf.clear(query)
strbuf.clear(response)
-- test to see if info.txt is present
query = query .. "GET /info.txt HTTP/1.1"
query = query .. "Accept: */*"
query = query .. "Accept-Language: en"
query = query .. "User-Agent: Nmap NSE"
query = query .. "Host: " .. host.ip .. ":" .. port.number
query = query .. '\r\n\r\n';
try(soc:send(strbuf.dump(query, '\r\n')))
while true do
status, lines = soc:receive_lines(1)
if not status then break end
response = response .. lines
end
if string.find(strbuf.dump(response), "HTTP/1.1 200 OK") then
output = output .. "\n! info.txt is present\n"
end
soc:close()
if listop.is_empty(output) then
return nil return nil
else end
-- parse all disallowed entries
local output = strbuf.new()
for w in string.gmatch(strbuf.dump(response, '\n'), "Disallow:%s*([^\n]*)\n") do
buildOutput(output, w)
end
if not listop.is_empty(output) then
return strbuf.dump(output) return strbuf.dump(output)
end end
return nil
end end