1
0
mirror of https://github.com/nmap/nmap.git synced 2025-12-08 21:51:28 +00:00

Update to add additional blacklist entries the httpspider library. The goal is to avoid downloading and processing certain additional video, audio and binary formats.

This should speed up crawling certain sites.  In the case of http-email-harvest it should reduce some of the false positives generated by running the RegEx against binary data. The only script that this appears likely to have affected the results of would have been http-sitemap-generator and that script specifically disables the blacklist.
This commit is contained in:
tomsellers
2012-07-10 00:23:02 +00:00
parent 113f1791ff
commit b82c819afb

View File

@@ -596,11 +596,13 @@ Crawler = {
addDefaultBlacklist = function(self)
local extensions = {
image_extensions = {"png","jpg","jpeg","gif","bmp"},
video_extensions = {"avi","flv","ogg","mp4","wmv"},
audio_extensions = {"aac","m4a","mp3","wav"},
doc_extensions = {"pdf", "doc", "docx", "docm", "xls", "xlsx", "xlsm",
"ppt", "pptx", "pptm", "odf", "ods", "odp", "ps", "xps"},
archive_extensions = {"zip", "tar.gz", "gz", "rar", "7z", "sit", "sitx",
"tgz", "tar.bz", "tar", "iso"},
exe_extensions = {"exe", "com", "msi", "bin"}
exe_extensions = {"exe", "com", "msi", "bin","dmg"}
}
local blacklist = {}
for _, cat in pairs(extensions) do