mirror of
https://github.com/nmap/nmap.git
synced 2025-12-07 21:21:31 +00:00
Require extracted links to be within an HTML tag
httpspider was extracting "links" from javascript if there was a variable called "src" or similar. By requiring an open HTML tag, we eliminate this problem, still matching src, href, or action attributes of any tag.
This commit is contained in:
@@ -347,16 +347,16 @@ LinkExtractor = {
|
||||
parse = function(self)
|
||||
local links = {}
|
||||
local patterns = {
|
||||
'[hH][rR][eE][fF]%s*=%s*[\'"]%s*([^"^\']-)%s*[\'"]',
|
||||
'[hH][rR][eE][fF]%s*=%s*([^\'\"][^%s>]+)',
|
||||
'[sS][rR][cC]%s*=%s*[\'"]%s*([^"^\']-)%s*[\'"]',
|
||||
'[sS][rR][cC]%s*=%s*([^\'\"][^%s>]+)',
|
||||
'[aA][cC][tT][iI][oO][nN]%s*=%s*[\'"]%s*([^"^\']+%s*)[\'"]',
|
||||
'<[^>]+[hH][rR][eE][fF]%s*=%s*[\'"]%s*([^"^\']-)%s*[\'"]',
|
||||
'<[^>]+[hH][rR][eE][fF]%s*=%s*([^\'\"][^%s>]+)',
|
||||
'<[^>]+[sS][rR][cC]%s*=%s*[\'"]%s*([^"^\']-)%s*[\'"]',
|
||||
'<[^>]+[sS][rR][cC]%s*=%s*([^\'\"][^%s>]+)',
|
||||
'<[^>]+[aA][cC][tT][iI][oO][nN]%s*=%s*[\'"]%s*([^"^\']+%s*)[\'"]',
|
||||
}
|
||||
|
||||
local base_hrefs = {
|
||||
'[Bb][Aa][Ss][Ee]%s*[Hh][Rr][Ee][Ff]%s*=%s*[\'"](%s*[^"^\']+%s*)[\'"]',
|
||||
'[Bb][Aa][Ss][Ee]%s*[Hh][Rr][Ee][Ff]%s*=%s*([^\'\"][^%s>]+)'
|
||||
'<[^>]+[Bb][Aa][Ss][Ee]%s*[Hh][Rr][Ee][Ff]%s*=%s*[\'"](%s*[^"^\']+%s*)[\'"]',
|
||||
'<[^>]+[Bb][Aa][Ss][Ee]%s*[Hh][Rr][Ee][Ff]%s*=%s*([^\'\"][^%s>]+)'
|
||||
}
|
||||
|
||||
local base_href
|
||||
|
||||
Reference in New Issue
Block a user