mirror of
https://github.com/nmap/nmap.git
synced 2025-12-07 21:21:31 +00:00
Parsing improvements for udp payloads
Save some memory and effort when parsing UDP payloads by reusing the rather large buffer inside each token when possible, and only using std::string::append() when necessary. For the current file, this avoids *all* reallocations.
This commit is contained in:
@@ -29,6 +29,8 @@
|
|||||||
# which case they will be all be sent concurrently. There is a limit
|
# which case they will be all be sent concurrently. There is a limit
|
||||||
# of 255 payloads per port.
|
# of 255 payloads per port.
|
||||||
#
|
#
|
||||||
|
# Lines longer than 1024 characters will be ignored.
|
||||||
|
#
|
||||||
# Example:
|
# Example:
|
||||||
# udp 1234 "payloaddatapayloaddata"
|
# udp 1234 "payloaddatapayloaddata"
|
||||||
# "payloaddatapayloaddata"
|
# "payloaddatapayloaddata"
|
||||||
|
|||||||
183
payload.cc
183
payload.cc
@@ -82,7 +82,17 @@ extern NmapOps o;
|
|||||||
|
|
||||||
struct payload {
|
struct payload {
|
||||||
std::string data;
|
std::string data;
|
||||||
|
|
||||||
|
payload (const char *c, size_t n)
|
||||||
|
: data(c, n)
|
||||||
|
{}
|
||||||
/* Extra data such as source port goes here. */
|
/* Extra data such as source port goes here. */
|
||||||
|
|
||||||
|
/* If 2 payloads are equivalent according to this operator, we'll only keep
|
||||||
|
* the first one, so be sure you update it when adding other attributes. */
|
||||||
|
bool operator==(const payload& other) const {
|
||||||
|
return data == other.data;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
/* The key for the payload lookup map is a (proto, port) pair. */
|
/* The key for the payload lookup map is a (proto, port) pair. */
|
||||||
@@ -108,16 +118,18 @@ static std::vector<struct payload *> uniquePayloads; // for accounting
|
|||||||
|
|
||||||
/* Newlines are significant because keyword directives (like "source") that
|
/* Newlines are significant because keyword directives (like "source") that
|
||||||
follow the payload string are significant to the end of the line. */
|
follow the payload string are significant to the end of the line. */
|
||||||
enum token_type {
|
typedef enum token_type {
|
||||||
|
TOKEN_ERROR = -1,
|
||||||
TOKEN_EOF = 0,
|
TOKEN_EOF = 0,
|
||||||
TOKEN_NEWLINE,
|
TOKEN_NEWLINE,
|
||||||
TOKEN_SYMBOL,
|
TOKEN_SYMBOL,
|
||||||
TOKEN_STRING,
|
TOKEN_STRING,
|
||||||
};
|
} token_t;
|
||||||
|
|
||||||
struct token {
|
struct token {
|
||||||
char text[1024];
|
token_t type;
|
||||||
size_t len;
|
size_t len;
|
||||||
|
char text[1024];
|
||||||
};
|
};
|
||||||
|
|
||||||
static unsigned long line_no;
|
static unsigned long line_no;
|
||||||
@@ -126,7 +138,7 @@ static unsigned long line_no;
|
|||||||
error. The token type is also stored in token->type. For TOKEN_SYMBOL and
|
error. The token type is also stored in token->type. For TOKEN_SYMBOL and
|
||||||
TOKEN_STRING, the text is stored in token->text and token->len. The text is
|
TOKEN_STRING, the text is stored in token->text and token->len. The text is
|
||||||
null terminated. */
|
null terminated. */
|
||||||
static int next_token(FILE *fp, struct token *token) {
|
static token_t next_token(FILE *fp, struct token *token) {
|
||||||
unsigned int i, tmplen;
|
unsigned int i, tmplen;
|
||||||
int c;
|
int c;
|
||||||
|
|
||||||
@@ -136,79 +148,86 @@ static int next_token(FILE *fp, struct token *token) {
|
|||||||
while (isspace(c = fgetc(fp)) && c != '\n')
|
while (isspace(c = fgetc(fp)) && c != '\n')
|
||||||
;
|
;
|
||||||
|
|
||||||
if (c == EOF) {
|
switch(c) {
|
||||||
return TOKEN_EOF;
|
case EOF:
|
||||||
} else if (c == '\n') {
|
token->type = TOKEN_EOF;
|
||||||
|
break;
|
||||||
|
case '\n':
|
||||||
line_no++;
|
line_no++;
|
||||||
return TOKEN_NEWLINE;
|
token->type = TOKEN_NEWLINE;
|
||||||
} else if (c == '#') {
|
break;
|
||||||
|
case '#':
|
||||||
while ((c = fgetc(fp)) != EOF && c != '\n')
|
while ((c = fgetc(fp)) != EOF && c != '\n')
|
||||||
;
|
;
|
||||||
if (c == EOF) {
|
if (c == EOF) {
|
||||||
return TOKEN_EOF;
|
token->type = TOKEN_EOF;
|
||||||
} else {
|
} else {
|
||||||
line_no++;
|
line_no++;
|
||||||
return TOKEN_NEWLINE;
|
token->type = TOKEN_NEWLINE;
|
||||||
}
|
}
|
||||||
} else if (c == '"') {
|
break;
|
||||||
|
case '"':
|
||||||
|
token->type = TOKEN_STRING;
|
||||||
i = 0;
|
i = 0;
|
||||||
while ((c = fgetc(fp)) != EOF && c != '\n' && c != '"') {
|
while ((c = fgetc(fp)) != EOF && c != '\n' && c != '"') {
|
||||||
if (i + 1 >= sizeof(token->text))
|
if (i + 1 >= sizeof(token->text))
|
||||||
return -1;
|
return TOKEN_ERROR;
|
||||||
if (c == '\\') {
|
if (c == '\\') {
|
||||||
token->text[i++] = '\\';
|
token->text[i++] = '\\';
|
||||||
if (i + 1 >= sizeof(token->text))
|
if (i + 1 >= sizeof(token->text))
|
||||||
return -1;
|
return TOKEN_ERROR;
|
||||||
c = fgetc(fp);
|
c = fgetc(fp);
|
||||||
if (c == EOF)
|
if (c == EOF)
|
||||||
return -1;
|
return TOKEN_ERROR;
|
||||||
}
|
}
|
||||||
token->text[i++] = c;
|
token->text[i++] = c;
|
||||||
}
|
}
|
||||||
if (c != '"')
|
if (c != '"')
|
||||||
return -1;
|
return TOKEN_ERROR;
|
||||||
token->text[i] = '\0';
|
token->text[i] = '\0';
|
||||||
if (cstring_unescape(token->text, &tmplen) == NULL)
|
if (cstring_unescape(token->text, &tmplen) == NULL)
|
||||||
return -1;
|
return TOKEN_ERROR;
|
||||||
token->len = tmplen;
|
token->len = tmplen;
|
||||||
return TOKEN_STRING;
|
break;
|
||||||
} else {
|
default:
|
||||||
|
token->type = TOKEN_SYMBOL;
|
||||||
i = 0;
|
i = 0;
|
||||||
token->text[i++] = c;
|
token->text[i++] = c;
|
||||||
while ((c = fgetc(fp)) != EOF && (isalnum(c) || c == ',' || c == '-')) {
|
while ((c = fgetc(fp)) != EOF && (isalnum(c) || c == ',' || c == '-')) {
|
||||||
if (i + 1 >= sizeof(token->text))
|
if (i + 1 >= sizeof(token->text))
|
||||||
return -1;
|
return TOKEN_ERROR;
|
||||||
token->text[i++] = c;
|
token->text[i++] = c;
|
||||||
}
|
}
|
||||||
ungetc(c, fp);
|
ungetc(c, fp);
|
||||||
token->text[i] = '\0';
|
token->text[i] = '\0';
|
||||||
token->len = i;
|
token->len = i;
|
||||||
return TOKEN_SYMBOL;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
return -1;
|
return token->type;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Loop over fp, reading tokens and adding payloads to the global payloads map
|
/* Loop over fp, reading tokens and adding payloads to the global payloads map
|
||||||
as they are completed. Returns -1 on error. */
|
as they are completed. Returns -1 on error. */
|
||||||
static int load_payloads_from_file(FILE *fp) {
|
static int load_payloads_from_file(FILE *fp) {
|
||||||
struct token token;
|
struct token token;
|
||||||
int type;
|
unsigned long firstline = 0;
|
||||||
|
|
||||||
line_no = 1;
|
line_no = 1;
|
||||||
type = next_token(fp, &token);
|
token_t type = next_token(fp, &token);
|
||||||
for (;;) {
|
for (;;) {
|
||||||
unsigned short *ports;
|
unsigned short *ports;
|
||||||
int count;
|
int count;
|
||||||
|
bool duplicate = false;
|
||||||
|
|
||||||
while (type == TOKEN_NEWLINE)
|
/* Skip everything (unknown keywords from previous payload, unknown file
|
||||||
|
* keywords, etc.) until the next payload entry or EOF */
|
||||||
|
while (type != TOKEN_EOF && !(type == TOKEN_SYMBOL && strcmp(token.text, "udp") == 0))
|
||||||
type = next_token(fp, &token);
|
type = next_token(fp, &token);
|
||||||
if (type == TOKEN_EOF)
|
if (type == TOKEN_EOF)
|
||||||
break;
|
break;
|
||||||
if (type != TOKEN_SYMBOL || strcmp(token.text, "udp") != 0) {
|
|
||||||
fprintf(stderr, "Expected \"udp\" at line %lu of %s.\n", line_no, PAYLOAD_FILENAME);
|
firstline = line_no;
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
type = next_token(fp, &token);
|
type = next_token(fp, &token);
|
||||||
if (type != TOKEN_SYMBOL) {
|
if (type != TOKEN_SYMBOL) {
|
||||||
@@ -221,48 +240,100 @@ static int load_payloads_from_file(FILE *fp) {
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct payload *portPayload = new struct payload;
|
while(TOKEN_NEWLINE == (type = next_token(fp, &token)))
|
||||||
uniquePayloads.push_back(portPayload);
|
; // skip newlines
|
||||||
for (;;) {
|
|
||||||
type = next_token(fp, &token);
|
if (type != TOKEN_STRING) {
|
||||||
if (type == TOKEN_STRING)
|
log_write(LOG_STDERR, "Payload missing data at line %lu of %s.\n", line_no, PAYLOAD_FILENAME);
|
||||||
portPayload->data.append(token.text, token.len);
|
// Try a new payload
|
||||||
else if (type == TOKEN_NEWLINE)
|
free(ports);
|
||||||
; /* Nothing. */
|
continue;
|
||||||
else
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Ignore keywords like "source" to the end of the line. */
|
struct payload *portPayload = NULL;
|
||||||
if (type == TOKEN_SYMBOL && strcmp(token.text, "udp") != 0) {
|
// Peek at the next significant token
|
||||||
while (type != -1 && type != TOKEN_EOF && type != TOKEN_NEWLINE)
|
struct token peek_token;
|
||||||
|
while (TOKEN_NEWLINE == (type = next_token(fp, &peek_token)))
|
||||||
|
; // skip newlines
|
||||||
|
|
||||||
|
// If it's a string continuation, see if we can squeeze it into the current token.
|
||||||
|
while (type == TOKEN_STRING) {
|
||||||
|
if (token.len + peek_token.len < sizeof(token.text)) {
|
||||||
|
// Next string fits in this one's buffer!
|
||||||
|
memcpy(token.text + token.len, peek_token.text, peek_token.len);
|
||||||
|
token.len += peek_token.len;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// Token is full
|
||||||
|
if (portPayload == NULL) {
|
||||||
|
// Allocate new payload
|
||||||
|
portPayload = new struct payload (token.text, token.len);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// append token to current payload
|
||||||
|
portPayload->data.append(token.text, token.len);
|
||||||
|
}
|
||||||
|
// peek_token becomes the previous token
|
||||||
|
token = peek_token;
|
||||||
|
}
|
||||||
|
// Keep peeking forward
|
||||||
|
while (TOKEN_NEWLINE == (type = next_token(fp, &peek_token)))
|
||||||
|
; // skip newlines
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the string is still going, but we got an error, abandon this payload.
|
||||||
|
if (type == TOKEN_ERROR && peek_token.type == TOKEN_STRING) {
|
||||||
|
log_write(LOG_STDERR, "Error parsing payload data at line %lu of %s.\n", line_no, PAYLOAD_FILENAME);
|
||||||
|
if (portPayload)
|
||||||
|
delete portPayload;
|
||||||
|
// maybe we can pick up at the next payload.
|
||||||
type = next_token(fp, &token);
|
type = next_token(fp, &token);
|
||||||
|
free(ports);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Otherwise, stash the last token in the payload and move on.
|
||||||
|
if (portPayload == NULL) {
|
||||||
|
// Allocate new payload
|
||||||
|
portPayload = new struct payload (token.text, token.len);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// append token to current payload
|
||||||
|
portPayload->data.append(token.text, token.len);
|
||||||
|
}
|
||||||
|
token = peek_token;
|
||||||
|
|
||||||
|
// Here we would parse additional keywords like "source" that we might care about.
|
||||||
|
|
||||||
|
// Make sure these payloads are actually unique!
|
||||||
|
for (std::vector<struct payload *>::const_iterator it = uniquePayloads.begin();
|
||||||
|
it != uniquePayloads.end(); ++it) {
|
||||||
|
if (**it == *portPayload) {
|
||||||
|
// Probably not what they intended.
|
||||||
|
log_write(LOG_STDERR, "Duplicate payload on line %lu of %s.\n", firstline, PAYLOAD_FILENAME);
|
||||||
|
// Since they're functionally equivalent, only keep one copy.
|
||||||
|
duplicate = true;
|
||||||
|
delete portPayload;
|
||||||
|
portPayload = *it;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!duplicate) {
|
||||||
|
uniquePayloads.push_back(portPayload);
|
||||||
|
duplicate = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int p = 0; p < count; p++) {
|
for (int p = 0; p < count; p++) {
|
||||||
std::vector<struct payload *>::const_iterator portPayloadVectorIterator;
|
|
||||||
const struct proto_dport key(IPPROTO_UDP, ports[p]);
|
const struct proto_dport key(IPPROTO_UDP, ports[p]);
|
||||||
bool duplicate = false;
|
|
||||||
|
|
||||||
std::vector<struct payload *> &portPayloadVector = portPayloads[key];
|
std::vector<struct payload *> &portPayloadVector = portPayloads[key];
|
||||||
|
|
||||||
for (portPayloadVectorIterator = portPayloadVector.begin();
|
// Ports are unique, and we ensured payloads are unique earlier, so no chance of duplicate here.
|
||||||
portPayloadVectorIterator != portPayloadVector.end();
|
|
||||||
portPayloadVectorIterator++) {
|
|
||||||
if (*portPayloadVectorIterator == portPayload) {
|
|
||||||
log_write(LOG_STDERR, "UDP port payload duplication found on port: %u\n", ports[p]);
|
|
||||||
duplicate = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!duplicate) {
|
|
||||||
portPayloadVector.push_back(portPayload);
|
portPayloadVector.push_back(portPayload);
|
||||||
if (portPayloadVector.size() > MAX_PAYLOADS_PER_PORT) {
|
if (portPayloadVector.size() > MAX_PAYLOADS_PER_PORT) {
|
||||||
fatal("Number of UDP payloads for port %u exceeds the limit of %u.\n", ports[p], MAX_PAYLOADS_PER_PORT);
|
fatal("Number of UDP payloads for port %u exceeds the limit of %u.\n", ports[p], MAX_PAYLOADS_PER_PORT);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
free(ports);
|
free(ports);
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user