From aaaed61c52959c89109c9d00c5053a7aa2d178ea Mon Sep 17 00:00:00 2001 From: david Date: Sun, 8 Feb 2009 05:28:38 +0000 Subject: [PATCH] =?UTF-8?q?Improve=20the=20efficiency=20of=20xml=5Fconvert?= =?UTF-8?q?.=20The=20old=20version=20was=20wasteful=20of=20space;=20it=20s?= =?UTF-8?q?tarted=20by=C3=82=C2=A0allocating=20six=20times=20the=20size=20?= =?UTF-8?q?of=20the=20input=20string=20because=20in=20the=20worst=20case?= =?UTF-8?q?=20each=20byte=20can=20take=20up=20to=20six=20bytes=20when=20es?= =?UTF-8?q?caped=20(&#xXX;).=20It=20was=20wasteful=20of=20time=20because?= =?UTF-8?q?=20it=20built=20the=20string=20up=20with=20strncat,=20which=20p?= =?UTF-8?q?ads=20the=20entire=20destination=20buffer=20with=20null=20bytes?= =?UTF-8?q?=20every=20time=20it=20was=20called.=20This=20led=20to=20quadra?= =?UTF-8?q?tic=20time=20complexity,=20not=20linear=20as=20expected.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The new version uses the usual strategy of doubling the size of the buffer whenever it runs out of space. It builds up the string using memcpy, checking each time that there is space for the new copy. --- output.cc | 111 ++++++++++++++++++++++++++---------------------------- 1 file changed, 54 insertions(+), 57 deletions(-) diff --git a/output.cc b/output.cc index db641edc8..87b3be09f 100644 --- a/output.cc +++ b/output.cc @@ -887,66 +887,63 @@ log_write(LOG_PLAIN, "%d service%s unrecognized despite returning data. If you k } -/* Note that this escapes newlines, which is generally needed in - attributes to avoid parser normalization, but might not be needed - or desirable in XML content outside of attributes. So if we find - some cases where we don't want \r\n\t escaped, we'll have to add a - parameter to control this. */ -char* xml_convert (const char* str) { - char *temp, ch=0, prevch = 0, *p; - int strl = strlen(str); - temp = (char *) safe_malloc(strl*6+1); - char *end = temp + strl * 6 + 1; - for (p = temp;(prevch = ch, ch = *str);str++) { - const char *a; - if ((unsigned char) ch > 0x7F) { - /* Escape anything outside of ASCII--we have to emit UTF-8 and an easy - way to do that is to emit ASCII. */ - char buf[32]; - Snprintf(buf, sizeof(buf), "&#x%02X;", (unsigned char) ch); - a = buf; +/* Escape a string for inclusion in XML. This gets <>&, "' for attribute values, + -- for inside comments, and characters with value > 0x7F. It also gets + control characters with value < 0x20 to avoid parser normalization of \r\n\t + in attribute values. If this is not desired in some cases, we'll have to add + a parameter to control this. */ +char *xml_convert(const char *str) { + /* result is the result buffer; n + 1 is the allocated size. Double the + allocation when space runs out. */ + char *result = NULL; + size_t n = 0, len; + const char *p; + int i; + + i = 0; + for (p = str; *p != '\0'; p++) { + const char *repl; + char buf[32]; + + if (*p == '<') + repl = "<"; + else if (*p == '>') + repl = ">"; + else if (*p == '&') + repl = "&"; + else if (*p == '"') + repl = """; + else if (*p == '\'') + repl = "'"; + else if (*p == '-' && p > str && *(p - 1) == '-') { + /* Escape -- for comments. */ + repl = "-"; + } else if (*p < 0x20 || (unsigned char) *p > 0x7F) { + /* Escape control characters and anything outside of ASCII. We have to + emit UTF-8 and an easy way to do that is to emit ASCII. */ + Snprintf(buf, sizeof(buf), "&#x%x;", (unsigned char) *p); + repl = buf; } else { - switch (ch) { - case '\t': - a = " "; - break; - case '\r': - a = " "; - break; - case '\n': - a = " "; - break; - case '<': - a = "<"; - break; - case '>': - a = ">"; - break; - case '&': - a = "&"; - break; - case '"': - a = """; - break; - case '\'': - a = "'"; - break; - case '-': - if (prevch == '-') { /* Must escape -- for comments */ - a = "-"; - break; - } - default: - *p++ = ch; - continue; - } + /* Unescaped character. */ + buf[0] = *p; + buf[1] = '\0'; + repl = buf; } - assert(end - p > 1); - Strncpy(p,a, end - p - 1); p += strlen(a); // SAFE + + len = strlen(repl); + /* Double the size of the result buffer if necessary. */ + if (i + len > n) { + n = (i + len) * 2; + result = (char *) safe_realloc(result, n + 1); + } + memcpy(result + i, repl, len); + i += len; } - *p = 0; - temp = (char *) safe_realloc(temp,strlen(temp)+1); - return temp; + /* Trim to length. (Also does initial allocation when str is empty.) */ + result = (char *) safe_realloc(result, i + 1); + result[i] = '\0'; + + return result; } char *logfilename(const char *str, struct tm *tm)