From 18c60988510d735bcd15145309da5c04977fa021 Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Wed, 19 Oct 2022 14:21:05 +0300 Subject: [PATCH] feat: add basic expression parsing --- include/compiler/treeifier/ast.hh | 2 + include/compiler/treeifier/ast/helper.hh | 2 +- include/lang/common.hh | 16 ++ include/utils/data.hh | 2 +- include/utils/location.hh | 6 + src/compiler/treeifier/ast/conv.cc | 2 +- src/compiler/treeifier/ast/parsers/exp.cc | 242 ++++++++++++++++++ src/compiler/treeifier/ast/parsers/field.cc | 2 +- src/compiler/treeifier/ast/parsers/glob.cc | 11 +- .../treeifier/ast/parsers/identifier.cc | 2 +- src/compiler/treeifier/ast/parsers/nmsp.cc | 2 +- src/compiler/treeifier/ast/parsers/type.cc | 4 +- src/compiler/treeifier/ast/parsers/var.cc | 19 ++ src/compiler/treeifier/lexer.cc | 2 +- src/utils/location.cc | 22 ++ 15 files changed, 323 insertions(+), 13 deletions(-) create mode 100644 src/compiler/treeifier/ast/parsers/exp.cc create mode 100644 src/compiler/treeifier/ast/parsers/var.cc diff --git a/include/compiler/treeifier/ast.hh b/include/compiler/treeifier/ast.hh index 68d0aea..4a340e5 100644 --- a/include/compiler/treeifier/ast.hh +++ b/include/compiler/treeifier/ast.hh @@ -26,6 +26,7 @@ namespace ppc::comp::tree::ast { extern const parser_adder_t type_adder; extern const parser_adder_t exp_adder; extern const parser_adder_t field_adder; + extern const parser_adder_t var_adder; struct ast_ctx_t { private: @@ -70,6 +71,7 @@ namespace ppc::comp::tree::ast { add_parser(glob_adder); add_parser(type_adder); add_parser(exp_adder); + add_parser(var_adder); add_parser(field_adder); return *this; diff --git a/include/compiler/treeifier/ast/helper.hh b/include/compiler/treeifier/ast/helper.hh index a9a418d..1181ce2 100644 --- a/include/compiler/treeifier/ast/helper.hh +++ b/include/compiler/treeifier/ast/helper.hh @@ -107,7 +107,7 @@ namespace ppc::comp::tree::ast { bool push_parse(const std::string &name, data::array_t &out) { data::map_t res; if (parse(name, res)) { - out.push(res); + out.push_back(res); return true; } else return false; diff --git a/include/lang/common.hh b/include/lang/common.hh index 2bb6186..b846bf8 100644 --- a/include/lang/common.hh +++ b/include/lang/common.hh @@ -12,6 +12,22 @@ namespace ppc::lang { located_t(const T &val): T(val), location(location_t::NONE) { } located_t() { } }; + template + struct slocated_t { + T value; + location_t location; + + bool operator ==(const slocated_t &other) { + return value == other.value && location == other.location; + } + bool operator !=(const slocated_t &other) { + return !(*this == other); + } + + slocated_t(location_t loc, const T &val): value(val), location(loc) { } + slocated_t(const T &val): value(val), location(location_t::NONE) { } + slocated_t() { } + }; struct namespace_name_t : public std::vector { using base = std::vector; diff --git a/include/utils/data.hh b/include/utils/data.hh index 4e13f36..9dd385b 100644 --- a/include/utils/data.hh +++ b/include/utils/data.hh @@ -85,7 +85,7 @@ namespace ppc::data { } return res->second; } - const value_t &operator[](std::string name) const { + const value_t &operator [](std::string name) const { auto res = values.find(name); if (res == values.end()) throw "The map doesn't contain a key '" + name + "'."; return res->second; diff --git a/include/utils/location.hh b/include/utils/location.hh index 65efeff..b9728db 100644 --- a/include/utils/location.hh +++ b/include/utils/location.hh @@ -12,6 +12,12 @@ namespace ppc { std::size_t code_start; const std::string &filename; + location_t &operator=(const location_t &other); + bool operator==(const location_t &other) const; + bool operator !=(const location_t &other) const { + return !(*this == other); + } + operator std::string() const { return to_string(); } std::string to_string() const; location_t intersect(location_t other) const; diff --git a/src/compiler/treeifier/ast/conv.cc b/src/compiler/treeifier/ast/conv.cc index 2bf6c73..23019b4 100644 --- a/src/compiler/treeifier/ast/conv.cc +++ b/src/compiler/treeifier/ast/conv.cc @@ -54,7 +54,7 @@ namespace ppc::comp::tree::ast::conv { auto arr = res["content"].array({}); for (const auto &segment : nmsp) { - arr.push({ + arr.push_back({ { "location", loc_to_map(segment.location) }, { "content", segment }, { "$_name", "$_nmsp" }, diff --git a/src/compiler/treeifier/ast/parsers/exp.cc b/src/compiler/treeifier/ast/parsers/exp.cc new file mode 100644 index 0000000..de39658 --- /dev/null +++ b/src/compiler/treeifier/ast/parsers/exp.cc @@ -0,0 +1,242 @@ +#include "compiler/treeifier/ast/helper.hh" +#include +#include + +enum precedence_t { + NONE, + POSTFIX, + PREFIX, + MULT, + ADD, + SHIFT, + COMP, + EQU, + BIN_AND, + BIN_XOR, + BIN_OR, + BOOL_AND, + BOOL_OR, + TERNARY, + ASSIGN, + PAREN, + CALL_START, +}; + +struct op_data_t { + precedence_t precedence; + size_t op_n; + std::string name; + bool assoc; +}; + +op_data_t sizeof_data { precedence_t::PREFIX, 1, "sizeof", true }; + +std::map pre_ops { + { operator_t::INCREASE, { precedence_t::PREFIX, 1, "inc_pre" } }, + { operator_t::DECREASE, { precedence_t::PREFIX, 1, "dec_pre" } }, + { operator_t::ADD, { precedence_t::PREFIX, 1, "positive" } }, + { operator_t::SUBTRACT, { precedence_t::PREFIX, 1, "negative" } }, + { operator_t::BITWISE_NEGATIVE, { precedence_t::PREFIX, 1, "flip" } }, + { operator_t::MULTIPLY, { precedence_t::PREFIX, 1, "dereference" } }, + { operator_t::AND, { precedence_t::PREFIX, 1, "reference" } }, +}; +std::map bin_ops { + { operator_t::ADD, { precedence_t::ADD, 2, "add" } }, + { operator_t::SUBTRACT, { precedence_t::ADD, 2, "subtract" } }, + { operator_t::MULTIPLY, { precedence_t::MULT, 2, "multiply" } }, + { operator_t::DIVIDE, { precedence_t::MULT, 2, "divide" } }, + { operator_t::MODULO, { precedence_t::MULT, 2, "modulo" } }, + { operator_t::INCREASE, { precedence_t::POSTFIX, 1, "inc_post" } }, + { operator_t::DECREASE, { precedence_t::POSTFIX, 1, "dec_post" } }, + { (operator_t)-1, sizeof_data }, +}; + +class exp_parser_t : public parser_t { + map_t op_to_map(located_t op) const { + return { + { "$_name", "$_operator" }, + { "ops", array_t() }, + { "location", conv::loc_to_map(op.location) }, + { "op", op.name }, + }; + } + + bool pop(std::vector> &op_stack, array_t &res) const { + if (op_stack.empty()) return false; + + auto map = op_to_map(op_stack.back()); + auto op_n = op_stack.back().op_n; + op_stack.pop_back(); + + if (res.size() < op_n) return false; + + auto &ops = map["ops"].array(); + + for (size_t i = 0; i < op_n; i++) { + ops.push_back(res.back()); + res.pop_back(); + } + + std::reverse(ops.begin(), ops.end()); + res.push_back(map); + + return true; + } + bool pop_paren(std::vector> &op_stack, array_t &res) const { + bool has_paren = false; + for (const auto &op : op_stack) { + if (op.precedence == precedence_t::PAREN) { + has_paren = true; + break; + } + } + if (!has_paren) return false; + + while (true) { + if (op_stack.back().precedence == precedence_t::PAREN) break; + if (!pop(op_stack, res)) return false; + } + + op_stack.pop_back(); + return true; + } + bool pop_call(size_t n, std::vector> &op_stack, array_t &res) const { + map_t call = { + { "$_name", "$_call" }, + }; + + array_t &args = call["args"].array({}); + + for (size_t i = 0; i <= n; i++) { + args.push_back(res.back()); + res.pop_back(); + } + + std::reverse(args.begin(), args.end()); + + call["func"] = res.back(); + res.pop_back(); + res.push_back(call); + + op_stack.pop_back(); + return true; + } + bool pop_until(const op_data_t &data, tree_helper_t &h, std::vector> &op_stack, array_t &res) const { + while (!op_stack.empty()) { + auto &back_data = op_stack.back(); + if (data.assoc ? + back_data.precedence >= data.precedence : + back_data.precedence > data.precedence + ) break; + + if (!pop(op_stack, res)) return h.err("Expected an expression on the right side of this operator."); + } + return true; + } + + bool parse(ast_ctx_t &ctx, size_t &res_i, map_t &out) const { + tree_helper_t h(ctx, res_i); + + bool last_val = false; + map_t val; + std::vector> op_stack; + std::vector call_args_n; + auto res = array_t(); + + + while (true) { + if (h.ended()) break; + + if (!last_val && h.curr().is_identifier("sizeof")) { + op_stack.push_back({ h.loc(), sizeof_data }); + h.advance("Expected a value on the right side of the operator."); + continue; + } + if (h.curr().is_operator()) { + auto op = h.curr()._operator(); + if (last_val) { + if (op == operator_t::PAREN_OPEN) { + call_args_n.push_back(0); + op_stack.push_back({ h.loc(), { precedence_t::CALL_START } }); + h.advance("Expected an argument."); + last_val = false; + } + else if (op == operator_t::COMMA) { + if (call_args_n.size() == 0) h.err("Unexpected comma here."); + + pop_until({ precedence_t::CALL_START, .assoc = true }, h, op_stack, res); + h.advance("Expected an argument."); + call_args_n.back()++; + last_val = false; + } + else if (h.curr().is_operator(operator_t::PAREN_CLOSE)) { + bool is_call = false, is_paren = false; + + for (auto i = op_stack.rbegin(); i != op_stack.rend(); i++) { + if (i->precedence == precedence_t::PAREN) { + is_paren = true; + break; + } + else if (i->precedence == precedence_t::CALL_START) { + is_call = true; + break; + } + } + + if (is_call) pop_call(call_args_n.back(), op_stack, res); + else if (is_paren) pop_paren(op_stack, res); + else break; + + if (!h.try_advance()) break; + } + else if (bin_ops.find(op) != bin_ops.end()) { + auto data = bin_ops[op]; + pop_until(data, h, op_stack, res); + op_stack.push_back({ h.loc(), data }); + + if (data.op_n == 1) { + last_val = true; + if (!pop(op_stack, res)) return h.err("Expected an expression on the right side of this operator."); + if (h.try_advance()) break; + } + else { + last_val = false; + h.advance("Expected a value on the right side of the operator."); + } + } + else break; + } + else { + if (op == operator_t::PAREN_OPEN) { + op_stack.push_back({ h.loc(), { precedence_t::PAREN } }); + h.advance("Expected a value."); + last_val = false; + } + else if (pre_ops.find(op) != pre_ops.end()) { + op_stack.push_back({ h.loc(), pre_ops[op] }); + h.advance("Expected a value on the right side of the operator."); + } + else break; + } + continue; + } + if (!last_val && h.push_parse("$_exp_val", res)) last_val = true; + else break; + } + + if (res.size() == 0) return false; + + while (!op_stack.empty()) { + if (op_stack.back().precedence == precedence_t::PAREN) throw message_t::error("Unclosed paren.", op_stack.back().location); + if (!pop(op_stack, res)) return h.err("Expected an expression on the right side of this operator."); + } + + out = res.front().map(); + + return h.submit(false); + } + + public: exp_parser_t(): parser_t("$_exp") { } +}; + +const parser_adder_t ppc::comp::tree::ast::exp_adder = [](ast_ctx_t &ctx) { ctx.add_parser(new exp_parser_t()); }; diff --git a/src/compiler/treeifier/ast/parsers/field.cc b/src/compiler/treeifier/ast/parsers/field.cc index 79d1c0f..072e186 100644 --- a/src/compiler/treeifier/ast/parsers/field.cc +++ b/src/compiler/treeifier/ast/parsers/field.cc @@ -41,4 +41,4 @@ class field_parser_t : public parser_t { public: field_parser_t(): parser_t("$_field") { } }; -parser_adder_t ppc::comp::tree::ast::field_adder = [](ast_ctx_t &ctx) { ctx.add_parser(new field_parser_t(), "$_def"); }; +const parser_adder_t ppc::comp::tree::ast::field_adder = [](ast_ctx_t &ctx) { ctx.add_parser(new field_parser_t(), "$_def"); }; diff --git a/src/compiler/treeifier/ast/parsers/glob.cc b/src/compiler/treeifier/ast/parsers/glob.cc index 427b718..ead5048 100644 --- a/src/compiler/treeifier/ast/parsers/glob.cc +++ b/src/compiler/treeifier/ast/parsers/glob.cc @@ -45,6 +45,9 @@ auto nmsp_def_parser = nmsp_def_parser_t(); class glob_parser_t : public parser_t { bool parse(ast_ctx_t &ctx, size_t &res_i, data::map_t &out) const { tree_helper_t h(ctx, res_i); + + return h.parse("$_exp", out); + if (h.ended()) return true; if (nmsp_def_parser(ctx, h.i, (out["namespace"] = map_t()).map())) { ctx.nmsp = conv::map_to_nmsp(out["namespace"].map()); @@ -56,7 +59,7 @@ class glob_parser_t : public parser_t { while (true) { map_t map; if (!import_parser(ctx, h.i, map)) break; - imports.push(map); + imports.push_back(map); auto nmsp = conv::map_to_nmsp(map); if (!ctx.imports.emplace(nmsp).second) h.err("The namespace '" + nmsp.to_string() + "' is already imported."); @@ -79,8 +82,8 @@ public: glob_parser_t(): parser_t("$_glob") { } }; -parser_adder_t ppc::comp::tree::ast::glob_adder = [](ast_ctx_t &ctx) { - ctx.add_parser(new group_parser_t("$_def")); - ctx.add_parser(new group_parser_t("$_expr_val")); +const parser_adder_t ppc::comp::tree::ast::glob_adder = [](ast_ctx_t &ctx) { + ctx.add_group("$_def"); + ctx.add_group("$_exp_val"); ctx.add_parser(new glob_parser_t()); }; diff --git a/src/compiler/treeifier/ast/parsers/identifier.cc b/src/compiler/treeifier/ast/parsers/identifier.cc index 3b3d2fc..e44eaed 100644 --- a/src/compiler/treeifier/ast/parsers/identifier.cc +++ b/src/compiler/treeifier/ast/parsers/identifier.cc @@ -18,4 +18,4 @@ class identifier_parser_t : public parser_t { public: identifier_parser_t(): parser_t("$_identifier") { } }; -parser_adder_t ppc::comp::tree::ast::identifier_adder = [](ast_ctx_t &ctx) { ctx.add_parser(new identifier_parser_t()); }; +const parser_adder_t ppc::comp::tree::ast::identifier_adder = [](ast_ctx_t &ctx) { ctx.add_parser(new identifier_parser_t()); }; diff --git a/src/compiler/treeifier/ast/parsers/nmsp.cc b/src/compiler/treeifier/ast/parsers/nmsp.cc index b1bd4b4..576aec1 100644 --- a/src/compiler/treeifier/ast/parsers/nmsp.cc +++ b/src/compiler/treeifier/ast/parsers/nmsp.cc @@ -23,4 +23,4 @@ class nmsp_parser_t : public parser_t { public: nmsp_parser_t(): parser_t("$_nmsp") { } }; -parser_adder_t ppc::comp::tree::ast::nmsp_adder = [](ast_ctx_t &ctx) { ctx.add_parser(new nmsp_parser_t()); }; +const parser_adder_t ppc::comp::tree::ast::nmsp_adder = [](ast_ctx_t &ctx) { ctx.add_parser(new nmsp_parser_t()); }; diff --git a/src/compiler/treeifier/ast/parsers/type.cc b/src/compiler/treeifier/ast/parsers/type.cc index 0425a4d..cf0801f 100644 --- a/src/compiler/treeifier/ast/parsers/type.cc +++ b/src/compiler/treeifier/ast/parsers/type.cc @@ -27,7 +27,7 @@ class type_parser_t : public parser_t { out["location"] = conv::loc_to_map(h.res_loc()); out["name"] = nmsp_content[nmsp_content.size() - 1]; out["ptr_n"] = (float)ptr_n; - nmsp_content.pop(); + nmsp_content.pop_back(); if (nmsp_content.size() == 0) { auto loc = h.res_loc(); @@ -47,4 +47,4 @@ class type_parser_t : public parser_t { public: type_parser_t(): parser_t("$_type") { } }; -parser_adder_t ppc::comp::tree::ast::type_adder = [](ast_ctx_t &ctx) { ctx.add_parser(new type_parser_t()); }; +const parser_adder_t ppc::comp::tree::ast::type_adder = [](ast_ctx_t &ctx) { ctx.add_parser(new type_parser_t()); }; diff --git a/src/compiler/treeifier/ast/parsers/var.cc b/src/compiler/treeifier/ast/parsers/var.cc new file mode 100644 index 0000000..4d3a8e0 --- /dev/null +++ b/src/compiler/treeifier/ast/parsers/var.cc @@ -0,0 +1,19 @@ +#include "compiler/treeifier/ast/helper.hh" + +class var_parser_t : public parser_t { + bool parse(ast_ctx_t &ctx, size_t &res_i, map_t &out) const { + tree_helper_t h(ctx, res_i); + + if (h.curr().is_identifier()) { + out["content"] = h.curr().identifier(); + out["location"] = conv::loc_to_map(h.loc()); + return h.submit(true); + } + + return false; + } + + public: var_parser_t(): parser_t("$_var") { } +}; + +const parser_adder_t ppc::comp::tree::ast::var_adder = [](ast_ctx_t &ctx) { ctx.add_parser(new var_parser_t(), "$_exp_val"); }; diff --git a/src/compiler/treeifier/lexer.cc b/src/compiler/treeifier/lexer.cc index 2f89eea..4d6e1e3 100644 --- a/src/compiler/treeifier/lexer.cc +++ b/src/compiler/treeifier/lexer.cc @@ -34,7 +34,7 @@ static inline bool is_any(char c, std::string chars) { return res; } static inline bool is_operator(char c) { - return is_any(c, "=!<>+-*/%&|^?:,.(){}[];"); + return is_any(c, "=!<>+-*/%&|^?:,.(){}[];~"); } static res_t lexlet_default(char c, std::vector &tok); diff --git a/src/utils/location.cc b/src/utils/location.cc index bfc2481..4138fc1 100644 --- a/src/utils/location.cc +++ b/src/utils/location.cc @@ -2,6 +2,7 @@ #include using namespace ppc; +using namespace std::string_literals; std::string location_t::to_string() const { std::stringstream res; @@ -55,6 +56,27 @@ location_t location_t::intersect(location_t other) const { return a; } +location_t &location_t::operator=(const location_t &other) { + if (this->filename != other.filename) throw "Can't assign to location with different filename."s; + this->line = other.line; + this->start = other.start; + this->length = other.length; + this->code_start = other.code_start; + return *this; +} + +bool location_t::operator==(const location_t &other) const { + if (this->filename != other.filename) return false; + if (this->line != other.line) return false; + if (this->start != other.start) return false; + if (this->length != other.length) return false; + if (this->code_start != other.code_start) return false; + + return true; +} + + + std::string empty = ""; location_t::location_t():