252 lines
5.4 KiB
Lua
252 lines
5.4 KiB
Lua
--- @alias xml_node_raw { tag: string, attribs: { [string]: string }, [integer]: xml_element }
|
|
--- @alias xml_element string | xml_node
|
|
|
|
--- @class xml_node
|
|
--- @field tag string
|
|
--- @field attribs { [string]: string }
|
|
--- @field [integer] xml_element
|
|
local xml_node = {};
|
|
xml_node.__index = xml_node;
|
|
|
|
--- @param name string
|
|
function xml_node:get_all(name)
|
|
--- @type xml_node[]
|
|
local res = {};
|
|
|
|
for _, el in ipairs(self) do
|
|
if type(el) ~= "string" and el.tag == name then
|
|
res[#res + 1] = el;
|
|
end
|
|
end
|
|
|
|
return res;
|
|
end
|
|
|
|
--- @param name string
|
|
function xml_node:get(name)
|
|
local res = self:get_all(name);
|
|
if #res == 0 then
|
|
error("node '" .. name .. "' not found");
|
|
elseif #res > 1 then
|
|
error("multiple nodes '" .. name .. "' exist");
|
|
else
|
|
return res[1];
|
|
end
|
|
end
|
|
|
|
function xml_node:text()
|
|
if #self == 0 then
|
|
return "";
|
|
elseif #self == 1 and type(self[1]) == "string" then
|
|
return self[1];
|
|
else
|
|
error "not a text-only node";
|
|
end
|
|
end
|
|
|
|
--- @param raw xml_node_raw
|
|
--- @return xml_node
|
|
function xml_node.new(raw)
|
|
local res = setmetatable({
|
|
tag = raw.tag,
|
|
attribs = raw.attribs or {},
|
|
}, xml_node);
|
|
|
|
table.move(raw, 1, #raw, 1, res);
|
|
|
|
return res;
|
|
end
|
|
|
|
local function skip_spaces(raw, i)
|
|
local next_i = raw:find("[^%s]", i);
|
|
if next_i then return next_i end
|
|
|
|
if raw:find("^%s", i) then
|
|
local match = raw:match("^%s*", i);
|
|
if match then return i + #match end
|
|
else
|
|
return i;
|
|
end
|
|
end
|
|
|
|
local function parse_tag(raw, i)
|
|
i = skip_spaces(raw, i);
|
|
|
|
local tag = raw:match("^[%w0-9%-]+", i);
|
|
if tag == nil then error("expected tag name near '" .. raw:sub(i, i + 25) .. "'") end
|
|
|
|
i = i + #tag;
|
|
|
|
local attribs = {};
|
|
|
|
while true do
|
|
i = skip_spaces(raw, i);
|
|
|
|
local all, key, _, val = raw:match("^(([%w0-9%-]-)%s*=%s*(['\"])(.-)%3%s*)", i);
|
|
if all then
|
|
attribs[key] = val;
|
|
i = i + #all;
|
|
else
|
|
break;
|
|
end
|
|
end
|
|
|
|
return { tag = tag, attribs = attribs }, i;
|
|
end
|
|
|
|
local function parse_part(raw, i, allow_version)
|
|
i = skip_spaces(raw, i);
|
|
|
|
repeat
|
|
local comment_end;
|
|
|
|
if raw:sub(i, i + 3) == "<!--" then
|
|
i = i + 43;
|
|
|
|
_, comment_end = raw:find("-->", i);
|
|
i = comment_end or #raw;
|
|
i = skip_spaces(raw, i);
|
|
end
|
|
until not comment_end;
|
|
|
|
if i > #raw then
|
|
return { type = "eof" }, i;
|
|
elseif allow_version and raw:sub(i, i + 1) == "<?" then
|
|
i = i + 2;
|
|
|
|
local tag;
|
|
tag, i = parse_tag(raw, i);
|
|
|
|
if raw:sub(i, i + 1) ~= "?>" then
|
|
error("malformed XML near '" .. raw:sub(i, i + 25) .. "'");
|
|
end
|
|
i = i + 2;
|
|
|
|
return { type = "version", tag = tag.tag, attribs = tag.attribs }, i;
|
|
elseif raw:sub(i, i + 1) == "</" then
|
|
i = i + 2;
|
|
i = skip_spaces(raw, i);
|
|
|
|
local tag = raw:match("[%w0-9%-]+", i);
|
|
if tag == nil then error("expected closing tag name near '" .. raw:sub(i, i + 25) .. "'") end
|
|
|
|
i = i + #tag;
|
|
|
|
if raw:sub(i, i) == ">" then
|
|
i = i + 1;
|
|
return { type = "end", tag = tag }, i;
|
|
else
|
|
error("malformed closing tag near '" .. raw:sub(i, i + 25) .. "'");
|
|
end
|
|
|
|
elseif raw:sub(i, i) == "<" then
|
|
i = i + 1;
|
|
|
|
local tag;
|
|
tag, i = parse_tag(raw, i);
|
|
|
|
|
|
if raw:sub(i, i + 1) == "/>" then
|
|
i = i + 2;
|
|
return { type = "small", tag = tag.tag, attribs = tag.attribs }, i;
|
|
elseif raw:sub(i, i) == ">" then
|
|
i = i + 1;
|
|
return { type = "begin", tag = tag.tag, attribs = tag.attribs }, i;
|
|
else
|
|
error("malformed opening tag near '" .. raw:sub(i, i + 25) .. "'");
|
|
end
|
|
else
|
|
local text_parts = {};
|
|
|
|
while i <= #raw do
|
|
local text_end = raw:find("<", i);
|
|
|
|
local text_part = raw:sub(i, text_end and text_end - 1 or #raw):match "^%s*(.-)%s*$";
|
|
if text_part ~= "" then
|
|
text_parts[#text_parts + 1] = text_part;
|
|
end
|
|
|
|
if not text_end then break end
|
|
|
|
i = text_end or #raw;
|
|
|
|
local comment_end;
|
|
|
|
if raw:sub(i, i + 3) == "<!--" then
|
|
i = i + 4;
|
|
|
|
_, comment_end = raw:find("-->", i);
|
|
i = comment_end and comment_end + 1 or #raw;
|
|
i = skip_spaces(raw, i);
|
|
else
|
|
break
|
|
end
|
|
end
|
|
|
|
if #text_parts > 0 then
|
|
return { type = "text", text = table.concat(text_parts, " ") }, i;
|
|
elseif i > #raw then
|
|
return { type = "eof" }, i;
|
|
else
|
|
error("malformed XML near '" .. raw:sub(i, i + 25) .. "'");
|
|
end
|
|
end
|
|
|
|
end
|
|
|
|
--- @param raw string
|
|
--- @return xml_element
|
|
local function parse(raw)
|
|
--- @type xml_node
|
|
local curr_node = xml_node.new { tag = "document", attribs = {} };
|
|
local stack = { curr_node };
|
|
local i = 1;
|
|
local first = true;
|
|
|
|
while true do
|
|
local part;
|
|
part, i = parse_part(raw, i, first);
|
|
if part.type == "eof" then break end
|
|
|
|
first = false;
|
|
|
|
if part.type == "text" then
|
|
if #stack == 1 then
|
|
error "text may not appear outside a tag";
|
|
else
|
|
curr_node[#curr_node + 1] = part.text;
|
|
end
|
|
elseif part.type == "version" then
|
|
curr_node.attribs.type = part.tag;
|
|
curr_node.attribs.version = part.attribs.version;
|
|
elseif part.type == "begin" then
|
|
local new_node = xml_node.new { tag = part.tag, attribs = part.attribs };
|
|
curr_node[#curr_node + 1] = new_node;
|
|
curr_node = new_node;
|
|
stack[#stack + 1] = new_node;
|
|
elseif part.type == "end" then
|
|
if part.tag ~= curr_node.tag then
|
|
error("closing tag '" .. part.tag .. "' doesn't match most recent opening tag '" .. curr_node.tag .. "'");
|
|
else
|
|
table.remove(stack);
|
|
curr_node = stack[#stack];
|
|
end
|
|
elseif part.type == "small" then
|
|
curr_node[#curr_node + 1] = xml_node.new { tag = part.tag, attribs = part.attribs };
|
|
else
|
|
error "wtf";
|
|
end
|
|
end
|
|
|
|
if #stack > 1 then
|
|
error("tag '" .. curr_node.tag .. "' was left open");
|
|
end
|
|
|
|
return curr_node;
|
|
end
|
|
|
|
return {
|
|
parse = parse,
|
|
node = xml_node.new,
|
|
};
|