From 20853ec49fa1fdff684934fc74ac624ca9125aa9 Mon Sep 17 00:00:00 2001 From: david Date: Wed, 10 Sep 2008 18:32:35 +0000 Subject: [PATCH] Make xml_convert escape any character > 0x7F, and use xml_convert to escape the value of the "args" attribute. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On Windows, I created a user account with the name "Kurt Gödel". When I ran a scan in Zenmap, Nmap created a temporary XML file that started like Notice the ö characters in the file names. They were not in UTF-8 but probably whatever the filesystem encoding is. Because Nmap's XML does not declare an encoding, it defaults to UTF-8, meaning this particular file was not even well-formed. In Zenmap it caused a crash like CRASH REPORTED: SYS.PLATFORM: win32 OS.NAME: nt Zenmap Version: 4.75 TRACEBACK: Traceback (most recent call last): File "C:\cygwin\home\david\nmap\zenmap\zenmapGUI\ScanNotebook.py", line 387, in verify_execution self.load_from_command(scan) File "C:\cygwin\home\david\nmap\zenmap\zenmapGUI\ScanNotebook.py", line 400, in load_from_command parsed = self._parse(command.get_xml_output_filename()) File "C:\cygwin\home\david\nmap\zenmap\zenmapGUI\ScanNotebook.py", line 444, in _parse parsed.parse_file(file_to_parse) File "C:\cygwin\home\david\nmap\zenmap\zenmapCore\NmapParser.py", line 749, in parse_file self.parse(f) File "C:\cygwin\home\david\nmap\zenmap\zenmapCore\NmapParser.py", line 743, in parse self.parser.parse(f) File "c:\Python25\lib\xml\sax\expatreader.py", line 107, in parse xmlreader.IncrementalParser.parse(self, source) File "c:\Python25\lib\xml\sax\xmlreader.py", line 123, in parse self.feed(buffer) File "c:\Python25\lib\xml\sax\expatreader.py", line 211, in feed self._err_handler.fatalError(exc) File "c:\Python25\lib\xml\sax\handler.py", line 38, in fatalError raise exception SAXParseException: c:\docume~1\kurtgö~1\locals~1\temp\zenmap-bcbuy6.xml:3:92: not well-formed (invalid token) Plus Internet Explorer wouldn't even open it. This change escapes the XML so it looks like --- nmap.cc | 7 ++++-- output.cc | 70 +++++++++++++++++++++++++++++++------------------------ 2 files changed, 44 insertions(+), 33 deletions(-) diff --git a/nmap.cc b/nmap.cc index 98fb9717b..364cbe738 100644 --- a/nmap.cc +++ b/nmap.cc @@ -1496,8 +1496,11 @@ int nmap_main(int argc, char *argv[]) { log_write(LOG_NORMAL|LOG_MACHINE|LOG_XML,"\n"); log_write(LOG_XML, "\n", (unsigned long) timep, mytime, NMAP_VERSION); diff --git a/output.cc b/output.cc index 12fe9191f..a1acb5b7b 100644 --- a/output.cc +++ b/output.cc @@ -890,39 +890,47 @@ char* xml_convert (const char* str) { char *end = temp + strl * 6 + 1; for (p = temp;(prevch = ch, ch = *str);str++) { const char *a; - switch (ch) { - case '\t': - a = " "; - break; - case '\r': - a = " "; - break; - case '\n': - a = " "; - break; - case '<': - a = "<"; - break; - case '>': - a = ">"; - break; - case '&': - a = "&"; - break; - case '"': - a = """; - break; - case '\'': - a = "'"; - break; - case '-': - if (prevch == '-') { /* Must escape -- for comments */ - a = "-"; + if ((unsigned char) ch > 0x7F) { + /* Escape anything outside of ASCII--we have to emit UTF-8 and an easy + way to do that is to emit ASCII. */ + char buf[32]; + Snprintf(buf, sizeof(buf), "&#x%02X;", (unsigned char) ch); + a = buf; + } else { + switch (ch) { + case '\t': + a = " "; break; + case '\r': + a = " "; + break; + case '\n': + a = " "; + break; + case '<': + a = "<"; + break; + case '>': + a = ">"; + break; + case '&': + a = "&"; + break; + case '"': + a = """; + break; + case '\'': + a = "'"; + break; + case '-': + if (prevch == '-') { /* Must escape -- for comments */ + a = "-"; + break; + } + default: + *p++ = ch; + continue; } - default: - *p++ = ch; - continue; } assert(end - p > 1); Strncpy(p,a, end - p - 1); p += strlen(a); // SAFE