slimpack/src/formats/xml.lua
2025-03-08 23:22:10 +02:00

252 lines
5.4 KiB
Lua

--- @alias xml_node_raw { tag: string, attribs: { [string]: string }, [integer]: xml_element }
--- @alias xml_element string | xml_node
--- @class xml_node
--- @field tag string
--- @field attribs { [string]: string }
--- @field [integer] xml_element
local xml_node = {};
xml_node.__index = xml_node;
--- @param name string
function xml_node:get_all(name)
--- @type xml_node[]
local res = {};
for _, el in ipairs(self) do
if type(el) ~= "string" and el.tag == name then
res[#res + 1] = el;
end
end
return res;
end
--- @param name string
function xml_node:get(name)
local res = self:get_all(name);
if #res == 0 then
error("node '" .. name .. "' not found");
elseif #res > 1 then
error("multiple nodes '" .. name .. "' exist");
else
return res[1];
end
end
function xml_node:text()
if #self == 0 then
return "";
elseif #self == 1 and type(self[1]) == "string" then
return self[1];
else
error "not a text-only node";
end
end
--- @param raw xml_node_raw
--- @return xml_node
function xml_node.new(raw)
local res = setmetatable({
tag = raw.tag,
attribs = raw.attribs or {},
}, xml_node);
table.move(raw, 1, #raw, 1, res);
return res;
end
local function skip_spaces(raw, i)
local next_i = raw:find("[^%s]", i);
if next_i then return next_i end
if raw:find("^%s", i) then
local match = raw:match("^%s*", i);
if match then return i + #match end
else
return i;
end
end
local function parse_tag(raw, i)
i = skip_spaces(raw, i);
local tag = raw:match("^[%w0-9%-]+", i);
if tag == nil then error("expected tag name near '" .. raw:sub(i, i + 25) .. "'") end
i = i + #tag;
local attribs = {};
while true do
i = skip_spaces(raw, i);
local all, key, _, val = raw:match("^(([%w0-9%-]-)%s*=%s*(['\"])(.-)%3%s*)", i);
if all then
attribs[key] = val;
i = i + #all;
else
break;
end
end
return { tag = tag, attribs = attribs }, i;
end
local function parse_part(raw, i, allow_version)
i = skip_spaces(raw, i);
repeat
local comment_end;
if raw:sub(i, i + 3) == "<!--" then
i = i + 43;
_, comment_end = raw:find("-->", i);
i = comment_end or #raw;
i = skip_spaces(raw, i);
end
until not comment_end;
if i > #raw then
return { type = "eof" }, i;
elseif allow_version and raw:sub(i, i + 1) == "<?" then
i = i + 2;
local tag;
tag, i = parse_tag(raw, i);
if raw:sub(i, i + 1) ~= "?>" then
error("malformed XML near '" .. raw:sub(i, i + 25) .. "'");
end
i = i + 2;
return { type = "version", tag = tag.tag, attribs = tag.attribs }, i;
elseif raw:sub(i, i + 1) == "</" then
i = i + 2;
i = skip_spaces(raw, i);
local tag = raw:match("[%w0-9%-]+", i);
if tag == nil then error("expected closing tag name near '" .. raw:sub(i, i + 25) .. "'") end
i = i + #tag;
if raw:sub(i, i) == ">" then
i = i + 1;
return { type = "end", tag = tag }, i;
else
error("malformed closing tag near '" .. raw:sub(i, i + 25) .. "'");
end
elseif raw:sub(i, i) == "<" then
i = i + 1;
local tag;
tag, i = parse_tag(raw, i);
if raw:sub(i, i + 1) == "/>" then
i = i + 2;
return { type = "small", tag = tag.tag, attribs = tag.attribs }, i;
elseif raw:sub(i, i) == ">" then
i = i + 1;
return { type = "begin", tag = tag.tag, attribs = tag.attribs }, i;
else
error("malformed opening tag near '" .. raw:sub(i, i + 25) .. "'");
end
else
local text_parts = {};
while i <= #raw do
local text_end = raw:find("<", i);
local text_part = raw:sub(i, text_end and text_end - 1 or #raw):match "^%s*(.-)%s*$";
if text_part ~= "" then
text_parts[#text_parts + 1] = text_part;
end
if not text_end then break end
i = text_end or #raw;
local comment_end;
if raw:sub(i, i + 3) == "<!--" then
i = i + 4;
_, comment_end = raw:find("-->", i);
i = comment_end and comment_end + 1 or #raw;
i = skip_spaces(raw, i);
else
break
end
end
if #text_parts > 0 then
return { type = "text", text = table.concat(text_parts, " ") }, i;
elseif i > #raw then
return { type = "eof" }, i;
else
error("malformed XML near '" .. raw:sub(i, i + 25) .. "'");
end
end
end
--- @param raw string
--- @return xml_element
local function parse(raw)
--- @type xml_node
local curr_node = xml_node.new { tag = "document", attribs = {} };
local stack = { curr_node };
local i = 1;
local first = true;
while true do
local part;
part, i = parse_part(raw, i, first);
if part.type == "eof" then break end
first = false;
if part.type == "text" then
if #stack == 1 then
error "text may not appear outside a tag";
else
curr_node[#curr_node + 1] = part.text;
end
elseif part.type == "version" then
curr_node.attribs.type = part.tag;
curr_node.attribs.version = part.attribs.version;
elseif part.type == "begin" then
local new_node = xml_node.new { tag = part.tag, attribs = part.attribs };
curr_node[#curr_node + 1] = new_node;
curr_node = new_node;
stack[#stack + 1] = new_node;
elseif part.type == "end" then
if part.tag ~= curr_node.tag then
error("closing tag '" .. part.tag .. "' doesn't match most recent opening tag '" .. curr_node.tag .. "'");
else
table.remove(stack);
curr_node = stack[#stack];
end
elseif part.type == "small" then
curr_node[#curr_node + 1] = xml_node.new { tag = part.tag, attribs = part.attribs };
else
error "wtf";
end
end
if #stack > 1 then
error("tag '" .. curr_node.tag .. "' was left open");
end
return curr_node;
end
return {
parse = parse,
node = xml_node.new,
};