diff --git a/nselib/slaxml.lua b/nselib/slaxml.lua
index 6847cc270..97340e4a9 100644
--- a/nselib/slaxml.lua
+++ b/nselib/slaxml.lua
@@ -1,10 +1,10 @@
---
-- This is the NSE implementation of SLAXML.
--- SLAXML is a pure-Lua SAX-like streaming XML parser. It is more robust
--- than many (simpler) pattern-based parsers that exist, properly supporting
--- code like , CDATA nodes, comments,
+-- SLAXML is a pure-Lua SAX-like streaming XML parser. It is more robust
+-- than many (simpler) pattern-based parsers that exist, properly supporting
+-- code like , CDATA nodes, comments,
-- namespaces, and processing instructions.
--- It is currently not a truly valid XML parser, however, as it allows certain XML that is
+-- It is currently not a truly valid XML parser, however, as it allows certain XML that is
-- syntactically-invalid (not well-formed) to be parsed without reporting an error.
-- The streaming parser does a simple pass through the input and reports what it sees along the way.
-- You can optionally ignore white-space only text nodes using the stripWhitespace option.
@@ -52,7 +52,7 @@
--
-- local value = someEl.attr['attribute-name'] : any namespace prefix of the attribute is not part of the name
--
--- local someAttr = someEl.attr[1] : an single attribute table (see below); useful for iterating all
+-- local someAttr = someEl.attr[1] : an single attribute table (see below); useful for iterating all
-- attributes of an element, or for disambiguating attributes with the same name in different namespaces
--
-- * someEl.kids : an array table of child elements, text nodes, comment nodes, and processing instructions
@@ -172,16 +172,22 @@ local DEFAULT_CALLBACKS = {
end,
}
+local entityMap = { ["lt"]="<", ["gt"]=">", ["amp"]="&", ["quot"]='"', ["apos"]="'" }
+local entitySwap = function(orig,n,s) return entityMap[s] or n=="#" and unicode.utf8_enc(tonumber('0'..s)) or orig end
+
parser = {
new = function(self, callbacks)
- local o = {
+ local o = {
_call = callbacks or DEFAULT_CALLBACKS
}
setmetatable(o, self)
self.__index = self
return o
end,
+
+ unescape = function(str) return string.gsub( str, '(&(#?)([%d%a]+);)', entitySwap ) end,
+
--- Parses the xml in sax like manner.
-- @self The parser object.
-- @param xml The xml body to be parsed.
@@ -202,11 +208,6 @@ parser = {
local nsStack = {}
local anyElement = false
-
- local entityMap = { ["lt"]="<", ["gt"]=">", ["amp"]="&", ["quot"]='"', ["apos"]="'" }
- local entitySwap = function(orig,n,s) return entityMap[s] or n=="#" and unicode.utf8_enc(tonumber('0'..s)) or orig end
- local function unescape(str) return gsub( str, '(&(#?)([%d%a]+);)', entitySwap ) end
-
local function finishText()
if first>textStart and self._call.text then
local text = sub(xml,textStart,first-1)
@@ -215,7 +216,7 @@ parser = {
text = gsub(text,'%s+$','')
if #text==0 then text=nil end
end
- if text then self._call.text(unescape(text)) end
+ if text then self._call.text(parser.unescape(text)) end
end
end
@@ -279,12 +280,12 @@ parser = {
first, last, match2 = find( xml, '^"([^<"]*)"', pos2 ) -- FIXME: disallow non-entity ampersands
if first then
pos = last+1
- match2 = unescape(match2)
+ match2 = parser.unescape(match2)
else
first, last, match2 = find( xml, "^'([^<']*)'", pos2 ) -- FIXME: disallow non-entity ampersands
if first then
pos = last+1
- match2 = unescape(match2)
+ match2 = parser.unescape(match2)
end
end
end