1
0
mirror of https://github.com/nmap/nmap.git synced 2025-12-07 05:01:29 +00:00

Parsing improvements for udp payloads

Save some memory and effort when parsing UDP payloads by reusing the
rather large buffer inside each token when possible, and only using
std::string::append() when necessary. For the current file, this avoids
*all* reallocations.
This commit is contained in:
dmiller
2021-12-12 23:42:39 +00:00
parent 2cbc7712da
commit 85c1fd9b18
2 changed files with 161 additions and 88 deletions

View File

@@ -29,6 +29,8 @@
# which case they will be all be sent concurrently. There is a limit # which case they will be all be sent concurrently. There is a limit
# of 255 payloads per port. # of 255 payloads per port.
# #
# Lines longer than 1024 characters will be ignored.
#
# Example: # Example:
# udp 1234 "payloaddatapayloaddata" # udp 1234 "payloaddatapayloaddata"
# "payloaddatapayloaddata" # "payloaddatapayloaddata"

View File

@@ -82,7 +82,17 @@ extern NmapOps o;
struct payload { struct payload {
std::string data; std::string data;
payload (const char *c, size_t n)
: data(c, n)
{}
/* Extra data such as source port goes here. */ /* Extra data such as source port goes here. */
/* If 2 payloads are equivalent according to this operator, we'll only keep
* the first one, so be sure you update it when adding other attributes. */
bool operator==(const payload& other) const {
return data == other.data;
}
}; };
/* The key for the payload lookup map is a (proto, port) pair. */ /* The key for the payload lookup map is a (proto, port) pair. */
@@ -108,16 +118,18 @@ static std::vector<struct payload *> uniquePayloads; // for accounting
/* Newlines are significant because keyword directives (like "source") that /* Newlines are significant because keyword directives (like "source") that
follow the payload string are significant to the end of the line. */ follow the payload string are significant to the end of the line. */
enum token_type { typedef enum token_type {
TOKEN_ERROR = -1,
TOKEN_EOF = 0, TOKEN_EOF = 0,
TOKEN_NEWLINE, TOKEN_NEWLINE,
TOKEN_SYMBOL, TOKEN_SYMBOL,
TOKEN_STRING, TOKEN_STRING,
}; } token_t;
struct token { struct token {
char text[1024]; token_t type;
size_t len; size_t len;
char text[1024];
}; };
static unsigned long line_no; static unsigned long line_no;
@@ -126,7 +138,7 @@ static unsigned long line_no;
error. The token type is also stored in token->type. For TOKEN_SYMBOL and error. The token type is also stored in token->type. For TOKEN_SYMBOL and
TOKEN_STRING, the text is stored in token->text and token->len. The text is TOKEN_STRING, the text is stored in token->text and token->len. The text is
null terminated. */ null terminated. */
static int next_token(FILE *fp, struct token *token) { static token_t next_token(FILE *fp, struct token *token) {
unsigned int i, tmplen; unsigned int i, tmplen;
int c; int c;
@@ -136,79 +148,86 @@ static int next_token(FILE *fp, struct token *token) {
while (isspace(c = fgetc(fp)) && c != '\n') while (isspace(c = fgetc(fp)) && c != '\n')
; ;
if (c == EOF) { switch(c) {
return TOKEN_EOF; case EOF:
} else if (c == '\n') { token->type = TOKEN_EOF;
line_no++; break;
return TOKEN_NEWLINE; case '\n':
} else if (c == '#') {
while ((c = fgetc(fp)) != EOF && c != '\n')
;
if (c == EOF) {
return TOKEN_EOF;
} else {
line_no++; line_no++;
return TOKEN_NEWLINE; token->type = TOKEN_NEWLINE;
} break;
} else if (c == '"') { case '#':
i = 0; while ((c = fgetc(fp)) != EOF && c != '\n')
while ((c = fgetc(fp)) != EOF && c != '\n' && c != '"') { ;
if (i + 1 >= sizeof(token->text)) if (c == EOF) {
return -1; token->type = TOKEN_EOF;
if (c == '\\') { } else {
token->text[i++] = '\\'; line_no++;
if (i + 1 >= sizeof(token->text)) token->type = TOKEN_NEWLINE;
return -1;
c = fgetc(fp);
if (c == EOF)
return -1;
} }
break;
case '"':
token->type = TOKEN_STRING;
i = 0;
while ((c = fgetc(fp)) != EOF && c != '\n' && c != '"') {
if (i + 1 >= sizeof(token->text))
return TOKEN_ERROR;
if (c == '\\') {
token->text[i++] = '\\';
if (i + 1 >= sizeof(token->text))
return TOKEN_ERROR;
c = fgetc(fp);
if (c == EOF)
return TOKEN_ERROR;
}
token->text[i++] = c;
}
if (c != '"')
return TOKEN_ERROR;
token->text[i] = '\0';
if (cstring_unescape(token->text, &tmplen) == NULL)
return TOKEN_ERROR;
token->len = tmplen;
break;
default:
token->type = TOKEN_SYMBOL;
i = 0;
token->text[i++] = c; token->text[i++] = c;
} while ((c = fgetc(fp)) != EOF && (isalnum(c) || c == ',' || c == '-')) {
if (c != '"') if (i + 1 >= sizeof(token->text))
return -1; return TOKEN_ERROR;
token->text[i] = '\0'; token->text[i++] = c;
if (cstring_unescape(token->text, &tmplen) == NULL) }
return -1; ungetc(c, fp);
token->len = tmplen; token->text[i] = '\0';
return TOKEN_STRING; token->len = i;
} else { break;
i = 0;
token->text[i++] = c;
while ((c = fgetc(fp)) != EOF && (isalnum(c) || c == ',' || c == '-')) {
if (i + 1 >= sizeof(token->text))
return -1;
token->text[i++] = c;
}
ungetc(c, fp);
token->text[i] = '\0';
token->len = i;
return TOKEN_SYMBOL;
} }
return -1; return token->type;
} }
/* Loop over fp, reading tokens and adding payloads to the global payloads map /* Loop over fp, reading tokens and adding payloads to the global payloads map
as they are completed. Returns -1 on error. */ as they are completed. Returns -1 on error. */
static int load_payloads_from_file(FILE *fp) { static int load_payloads_from_file(FILE *fp) {
struct token token; struct token token;
int type; unsigned long firstline = 0;
line_no = 1; line_no = 1;
type = next_token(fp, &token); token_t type = next_token(fp, &token);
for (;;) { for (;;) {
unsigned short *ports; unsigned short *ports;
int count; int count;
bool duplicate = false;
while (type == TOKEN_NEWLINE) /* Skip everything (unknown keywords from previous payload, unknown file
* keywords, etc.) until the next payload entry or EOF */
while (type != TOKEN_EOF && !(type == TOKEN_SYMBOL && strcmp(token.text, "udp") == 0))
type = next_token(fp, &token); type = next_token(fp, &token);
if (type == TOKEN_EOF) if (type == TOKEN_EOF)
break; break;
if (type != TOKEN_SYMBOL || strcmp(token.text, "udp") != 0) {
fprintf(stderr, "Expected \"udp\" at line %lu of %s.\n", line_no, PAYLOAD_FILENAME); firstline = line_no;
return -1;
}
type = next_token(fp, &token); type = next_token(fp, &token);
if (type != TOKEN_SYMBOL) { if (type != TOKEN_SYMBOL) {
@@ -221,46 +240,98 @@ static int load_payloads_from_file(FILE *fp) {
return -1; return -1;
} }
struct payload *portPayload = new struct payload; while(TOKEN_NEWLINE == (type = next_token(fp, &token)))
uniquePayloads.push_back(portPayload); ; // skip newlines
for (;;) {
type = next_token(fp, &token); if (type != TOKEN_STRING) {
if (type == TOKEN_STRING) log_write(LOG_STDERR, "Payload missing data at line %lu of %s.\n", line_no, PAYLOAD_FILENAME);
portPayload->data.append(token.text, token.len); // Try a new payload
else if (type == TOKEN_NEWLINE) free(ports);
; /* Nothing. */ continue;
else
break;
} }
/* Ignore keywords like "source" to the end of the line. */ struct payload *portPayload = NULL;
if (type == TOKEN_SYMBOL && strcmp(token.text, "udp") != 0) { // Peek at the next significant token
while (type != -1 && type != TOKEN_EOF && type != TOKEN_NEWLINE) struct token peek_token;
type = next_token(fp, &token); while (TOKEN_NEWLINE == (type = next_token(fp, &peek_token)))
; // skip newlines
// If it's a string continuation, see if we can squeeze it into the current token.
while (type == TOKEN_STRING) {
if (token.len + peek_token.len < sizeof(token.text)) {
// Next string fits in this one's buffer!
memcpy(token.text + token.len, peek_token.text, peek_token.len);
token.len += peek_token.len;
}
else {
// Token is full
if (portPayload == NULL) {
// Allocate new payload
portPayload = new struct payload (token.text, token.len);
}
else {
// append token to current payload
portPayload->data.append(token.text, token.len);
}
// peek_token becomes the previous token
token = peek_token;
}
// Keep peeking forward
while (TOKEN_NEWLINE == (type = next_token(fp, &peek_token)))
; // skip newlines
}
// If the string is still going, but we got an error, abandon this payload.
if (type == TOKEN_ERROR && peek_token.type == TOKEN_STRING) {
log_write(LOG_STDERR, "Error parsing payload data at line %lu of %s.\n", line_no, PAYLOAD_FILENAME);
if (portPayload)
delete portPayload;
// maybe we can pick up at the next payload.
type = next_token(fp, &token);
free(ports);
continue;
}
// Otherwise, stash the last token in the payload and move on.
if (portPayload == NULL) {
// Allocate new payload
portPayload = new struct payload (token.text, token.len);
}
else {
// append token to current payload
portPayload->data.append(token.text, token.len);
}
token = peek_token;
// Here we would parse additional keywords like "source" that we might care about.
// Make sure these payloads are actually unique!
for (std::vector<struct payload *>::const_iterator it = uniquePayloads.begin();
it != uniquePayloads.end(); ++it) {
if (**it == *portPayload) {
// Probably not what they intended.
log_write(LOG_STDERR, "Duplicate payload on line %lu of %s.\n", firstline, PAYLOAD_FILENAME);
// Since they're functionally equivalent, only keep one copy.
duplicate = true;
delete portPayload;
portPayload = *it;
break;
}
}
if (!duplicate) {
uniquePayloads.push_back(portPayload);
duplicate = false;
} }
for (int p = 0; p < count; p++) { for (int p = 0; p < count; p++) {
std::vector<struct payload *>::const_iterator portPayloadVectorIterator;
const struct proto_dport key(IPPROTO_UDP, ports[p]); const struct proto_dport key(IPPROTO_UDP, ports[p]);
bool duplicate = false;
std::vector<struct payload *> &portPayloadVector = portPayloads[key]; std::vector<struct payload *> &portPayloadVector = portPayloads[key];
for (portPayloadVectorIterator = portPayloadVector.begin(); // Ports are unique, and we ensured payloads are unique earlier, so no chance of duplicate here.
portPayloadVectorIterator != portPayloadVector.end(); portPayloadVector.push_back(portPayload);
portPayloadVectorIterator++) { if (portPayloadVector.size() > MAX_PAYLOADS_PER_PORT) {
if (*portPayloadVectorIterator == portPayload) { fatal("Number of UDP payloads for port %u exceeds the limit of %u.\n", ports[p], MAX_PAYLOADS_PER_PORT);
log_write(LOG_STDERR, "UDP port payload duplication found on port: %u\n", ports[p]);
duplicate = true;
break;
}
}
if (!duplicate) {
portPayloadVector.push_back(portPayload);
if (portPayloadVector.size() > MAX_PAYLOADS_PER_PORT) {
fatal("Number of UDP payloads for port %u exceeds the limit of %u.\n", ports[p], MAX_PAYLOADS_PER_PORT);
}
} }
} }