1
0
mirror of https://github.com/nmap/nmap.git synced 2025-12-07 21:21:31 +00:00

Require extracted links to be within an HTML tag

httpspider was extracting "links" from javascript if there was a
variable called "src" or similar. By requiring an open HTML tag, we
eliminate this problem, still matching src, href, or action attributes
of any tag.
This commit is contained in:
dmiller
2018-03-09 19:07:49 +00:00
parent b4f741c18b
commit 807b66480a

View File

@@ -347,16 +347,16 @@ LinkExtractor = {
parse = function(self)
local links = {}
local patterns = {
'[hH][rR][eE][fF]%s*=%s*[\'"]%s*([^"^\']-)%s*[\'"]',
'[hH][rR][eE][fF]%s*=%s*([^\'\"][^%s>]+)',
'[sS][rR][cC]%s*=%s*[\'"]%s*([^"^\']-)%s*[\'"]',
'[sS][rR][cC]%s*=%s*([^\'\"][^%s>]+)',
'[aA][cC][tT][iI][oO][nN]%s*=%s*[\'"]%s*([^"^\']+%s*)[\'"]',
'<[^>]+[hH][rR][eE][fF]%s*=%s*[\'"]%s*([^"^\']-)%s*[\'"]',
'<[^>]+[hH][rR][eE][fF]%s*=%s*([^\'\"][^%s>]+)',
'<[^>]+[sS][rR][cC]%s*=%s*[\'"]%s*([^"^\']-)%s*[\'"]',
'<[^>]+[sS][rR][cC]%s*=%s*([^\'\"][^%s>]+)',
'<[^>]+[aA][cC][tT][iI][oO][nN]%s*=%s*[\'"]%s*([^"^\']+%s*)[\'"]',
}
local base_hrefs = {
'[Bb][Aa][Ss][Ee]%s*[Hh][Rr][Ee][Ff]%s*=%s*[\'"](%s*[^"^\']+%s*)[\'"]',
'[Bb][Aa][Ss][Ee]%s*[Hh][Rr][Ee][Ff]%s*=%s*([^\'\"][^%s>]+)'
'<[^>]+[Bb][Aa][Ss][Ee]%s*[Hh][Rr][Ee][Ff]%s*=%s*[\'"](%s*[^"^\']+%s*)[\'"]',
'<[^>]+[Bb][Aa][Ss][Ee]%s*[Hh][Rr][Ee][Ff]%s*=%s*([^\'\"][^%s>]+)'
}
local base_href