--- @alias xml_node_raw { tag: string, attribs: { [string]: string }, [integer]: xml_element } --- @alias xml_element string | xml_node --- @class xml_node --- @field tag string --- @field attribs { [string]: string } --- @field [integer] xml_element local xml_node = {}; xml_node.__index = xml_node; --- @param name string function xml_node:get_all(name) --- @type xml_node[] local res = {}; for _, el in ipairs(self) do if type(el) ~= "string" and el.tag == name then res[#res + 1] = el; end end return res; end --- @param name string function xml_node:get(name) local res = self:get_all(name); if #res == 0 then error("node '" .. name .. "' not found"); elseif #res > 1 then error("multiple nodes '" .. name .. "' exist"); else return res[1]; end end function xml_node:text() if #self == 0 then return ""; elseif #self == 1 and type(self[1]) == "string" then return self[1]; else error "not a text-only node"; end end --- @param raw xml_node_raw --- @return xml_node function xml_node.new(raw) local res = setmetatable({ tag = raw.tag, attribs = raw.attribs or {}, }, xml_node); table.move(raw, 1, #raw, 1, res); return res; end local function skip_spaces(raw, i) local next_i = raw:find("[^%s]", i); if next_i then return next_i end if raw:find("^%s", i) then local match = raw:match("^%s*", i); if match then return i + #match end else return i; end end local function parse_tag(raw, i) i = skip_spaces(raw, i); local tag = raw:match("^[%w0-9%-]+", i); if tag == nil then error("expected tag name near '" .. raw:sub(i, i + 25) .. "'") end i = i + #tag; local attribs = {}; while true do i = skip_spaces(raw, i); local all, key, _, val = raw:match("^(([%w0-9%-]-)%s*=%s*(['\"])(.-)%3%s*)", i); if all then attribs[key] = val; i = i + #all; else break; end end return { tag = tag, attribs = attribs }, i; end local function parse_part(raw, i, allow_version) i = skip_spaces(raw, i); repeat local comment_end; if raw:sub(i, i + 3) == "", i); i = comment_end or #raw; i = skip_spaces(raw, i); end until not comment_end; if i > #raw then return { type = "eof" }, i; elseif allow_version and raw:sub(i, i + 1) == "" then error("malformed XML near '" .. raw:sub(i, i + 25) .. "'"); end i = i + 2; return { type = "version", tag = tag.tag, attribs = tag.attribs }, i; elseif raw:sub(i, i + 1) == "" then i = i + 1; return { type = "end", tag = tag }, i; else error("malformed closing tag near '" .. raw:sub(i, i + 25) .. "'"); end elseif raw:sub(i, i) == "<" then i = i + 1; local tag; tag, i = parse_tag(raw, i); if raw:sub(i, i + 1) == "/>" then i = i + 2; return { type = "small", tag = tag.tag, attribs = tag.attribs }, i; elseif raw:sub(i, i) == ">" then i = i + 1; return { type = "begin", tag = tag.tag, attribs = tag.attribs }, i; else error("malformed opening tag near '" .. raw:sub(i, i + 25) .. "'"); end else local text_parts = {}; while i <= #raw do local text_end = raw:find("<", i); local text_part = raw:sub(i, text_end and text_end - 1 or #raw):match "^%s*(.-)%s*$"; if text_part ~= "" then text_parts[#text_parts + 1] = text_part; end if not text_end then break end i = text_end or #raw; local comment_end; if raw:sub(i, i + 3) == "", i); i = comment_end and comment_end + 1 or #raw; i = skip_spaces(raw, i); else break end end if #text_parts > 0 then return { type = "text", text = table.concat(text_parts, " ") }, i; elseif i > #raw then return { type = "eof" }, i; else error("malformed XML near '" .. raw:sub(i, i + 25) .. "'"); end end end --- @param raw string --- @return xml_element local function parse(raw) --- @type xml_node local curr_node = xml_node.new { tag = "document", attribs = {} }; local stack = { curr_node }; local i = 1; local first = true; while true do local part; part, i = parse_part(raw, i, first); if part.type == "eof" then break end first = false; if part.type == "text" then if #stack == 1 then error "text may not appear outside a tag"; else curr_node[#curr_node + 1] = part.text; end elseif part.type == "version" then curr_node.attribs.type = part.tag; curr_node.attribs.version = part.attribs.version; elseif part.type == "begin" then local new_node = xml_node.new { tag = part.tag, attribs = part.attribs }; curr_node[#curr_node + 1] = new_node; curr_node = new_node; stack[#stack + 1] = new_node; elseif part.type == "end" then if part.tag ~= curr_node.tag then error("closing tag '" .. part.tag .. "' doesn't match most recent opening tag '" .. curr_node.tag .. "'"); else table.remove(stack); curr_node = stack[#stack]; end elseif part.type == "small" then curr_node[#curr_node + 1] = xml_node.new { tag = part.tag, attribs = part.attribs }; else error "wtf"; end end if #stack > 1 then error("tag '" .. curr_node.tag .. "' was left open"); end return curr_node; end return { parse = parse, node = xml_node.new, };