From 537884fbb8ce7c429b3031261bc1e6809e71160a Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Tue, 27 Sep 2022 13:56:05 +0300 Subject: [PATCH 01/74] feat: add iterable slice --- include/utils/slice.hh | 52 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 include/utils/slice.hh diff --git a/include/utils/slice.hh b/include/utils/slice.hh new file mode 100644 index 0000000..ee080e6 --- /dev/null +++ b/include/utils/slice.hh @@ -0,0 +1,52 @@ +#include + +namespace ppc { + template + class slice_t { + private: + T *iterable; + std::size_t start; + std::size_t n; + public: + auto begin() const { return iterable->begin() + start; } + auto end() const { return iterable->end() + start + n; } + + auto size() const { return n; } + auto &operator[](std::size_t i) { return iterable[start + i]; } + const auto &operator[](std::size_t i) const { return iterable[start + i]; } + + slice_t(T &iterable, std::size_t start, std::size_t n) { + this->iterable = &iterable; + this->start = start; + this->n = n; + if (n == -1u) this->n = iterable.size() - start; + } + }; + + + template + inline slice_t slice(slice_t &sl) { + return slice_t(sl.iterable, sl.start, sl.n); + } + template + inline slice_t slice(slice_t &sl, std::size_t start) { + return slice_t(sl.iterable, sl.start + start, sl.n); + } + template + inline slice_t slice(slice_t &sl, std::size_t start, std::size_t n) { + return slice_t(sl.iterable, sl.start + start, n); + } + + template + inline slice_t slice(T &vec) { + return slice_t(vec, 0, vec.size()); + } + template + inline slice_t slice(T &vec, std::size_t start) { + return slice_t(vec, start, vec.size()); + } + template + inline slice_t slice(T &vec, std::size_t start, std::size_t n) { + return slice_t(vec, start, n); + } +} \ No newline at end of file From f758eae53e593f767395f02faf488e74dcdedf3c Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Tue, 27 Sep 2022 13:57:48 +0300 Subject: [PATCH 02/74] chore: add vec_slice_t type --- include/utils/slice.hh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/utils/slice.hh b/include/utils/slice.hh index ee080e6..7b22669 100644 --- a/include/utils/slice.hh +++ b/include/utils/slice.hh @@ -23,6 +23,9 @@ namespace ppc { } }; + template + using vec_slice_t = slice_t>; + template inline slice_t slice(slice_t &sl) { From bd6a837ecb146ca8715ba811075d0176a6b27cb6 Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Tue, 27 Sep 2022 14:09:02 +0300 Subject: [PATCH 03/74] feat: add construct and group parser --- include/compiler/treeifier/ast.hh | 41 +++++++++++++++++++++++++++++++ src/compiler/treeifier/ast.cc | 1 + 2 files changed, 42 insertions(+) create mode 100644 include/compiler/treeifier/ast.hh create mode 100644 src/compiler/treeifier/ast.cc diff --git a/include/compiler/treeifier/ast.hh b/include/compiler/treeifier/ast.hh new file mode 100644 index 0000000..76a8c1d --- /dev/null +++ b/include/compiler/treeifier/ast.hh @@ -0,0 +1,41 @@ +#pragma once + +#include +#include +#include +#include +#include "compiler/treeifier/tokenizer.hh" +#include "utils/data.hh" +#include "utils/slice.hh" +#include "lang/common.hh" + +using namespace std::string_literals; +using namespace ppc; + +namespace ppc::comp::tree::ast { + class constr_parser_t { + private: + std::string name; + public: + const std::string &name() { return name; } + virtual bool parse(messages::msg_stack_t &messages, vec_slice_t &tokens, data::map_t &out) = 0; + }; + + class group_parser_t : constr_parser_t { + private: + struct named_parser { + constr_parser_t *parser; + std::string name; + }; + std::list parsers; + std::unordered_map insertion_points; + public: + void add_insertion_point(constr_parser_t &parser, const std::string &name); + void add(constr_parser_t &parser); + void add(const std::string &ins_point, constr_parser_t &parser); + + bool parse(messages::msg_stack_t &messages, data::map_t &out); + + group_parser_t(); + }; +} \ No newline at end of file diff --git a/src/compiler/treeifier/ast.cc b/src/compiler/treeifier/ast.cc new file mode 100644 index 0000000..04e224d --- /dev/null +++ b/src/compiler/treeifier/ast.cc @@ -0,0 +1 @@ +#include "compiler/treeifier/ast.hh" From 189ec9e4c66bacdc47b874e8a5aec7395f5d3465 Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Tue, 27 Sep 2022 14:09:24 +0300 Subject: [PATCH 04/74] chore: make slice's functions only const --- include/utils/slice.hh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/utils/slice.hh b/include/utils/slice.hh index 7b22669..7a9fa33 100644 --- a/include/utils/slice.hh +++ b/include/utils/slice.hh @@ -12,8 +12,7 @@ namespace ppc { auto end() const { return iterable->end() + start + n; } auto size() const { return n; } - auto &operator[](std::size_t i) { return iterable[start + i]; } - const auto &operator[](std::size_t i) const { return iterable[start + i]; } + auto &operator[](std::size_t i) const { return (iterable*)[start + i]; } slice_t(T &iterable, std::size_t start, std::size_t n) { this->iterable = &iterable; From 4f22f27dc754f2f98a41256959f4e5a180784fba Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Tue, 27 Sep 2022 14:09:52 +0300 Subject: [PATCH 05/74] feat: add glob parser --- include/compiler/treeifier/ast.hh | 2 ++ src/compiler/treeifier/parsers/glob.cc | 11 +++++++++++ 2 files changed, 13 insertions(+) create mode 100644 src/compiler/treeifier/parsers/glob.cc diff --git a/include/compiler/treeifier/ast.hh b/include/compiler/treeifier/ast.hh index 76a8c1d..ba6eb11 100644 --- a/include/compiler/treeifier/ast.hh +++ b/include/compiler/treeifier/ast.hh @@ -38,4 +38,6 @@ namespace ppc::comp::tree::ast { group_parser_t(); }; + + extern const constr_parser_t &glob_parser; } \ No newline at end of file diff --git a/src/compiler/treeifier/parsers/glob.cc b/src/compiler/treeifier/parsers/glob.cc new file mode 100644 index 0000000..bd30023 --- /dev/null +++ b/src/compiler/treeifier/parsers/glob.cc @@ -0,0 +1,11 @@ +#include "compiler/treeifier/ast.hh" + +namespace ppc::comp::tree::ast { + class glob_parser_t : public constr_parser_t { + bool parse(messages::msg_stack_t &messages, vec_slice_t &tokens, data::map_t &out) { + + } + }; + + const constr_parser_t &glob_parser = glob_parser_t(); +} From df0e17d450bbacb883caf134c158e65a3f7eacea Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Mon, 3 Oct 2022 14:19:22 +0300 Subject: [PATCH 06/74] refactor: replace contrstructors with paren syntax --- include/lang/version.hh | 6 +++--- include/utils/data.hh | 2 +- include/utils/message.hh | 6 ++++++ src/compiler/treeifier/lexer.cc | 8 ++++---- src/compiler/treeifier/tokenizer.cc | 26 +++++++++++++------------- src/utils/data.cc | 12 ++++++------ src/utils/location.cc | 2 ++ 7 files changed, 35 insertions(+), 27 deletions(-) diff --git a/include/lang/version.hh b/include/lang/version.hh index 6fd01fc..89b3fc3 100644 --- a/include/lang/version.hh +++ b/include/lang/version.hh @@ -33,8 +33,8 @@ namespace ppc { bool operator ==(version_t other) const; inline bool operator !=(version_t other) const { return !(*this == other); } - version_t(uint16_t major, uint16_t minor, uint32_t revision) : major { major }, minor { minor }, revision { revision } { } - version_t(uint16_t major, uint16_t minor) : version_t { major, minor, -1u } { } - version_t(uint16_t major) : version_t { major, -1u, -1u } { } + version_t(uint16_t major, uint16_t minor, uint32_t revision) : major(major), minor(minor), revision(revision) { } + version_t(uint16_t major, uint16_t minor) : version_t(major, minor, -1u) { } + version_t(uint16_t major) : version_t(major, -1u, -1u) { } }; } diff --git a/include/utils/data.hh b/include/utils/data.hh index 2b4201b..9f5b1cd 100644 --- a/include/utils/data.hh +++ b/include/utils/data.hh @@ -70,7 +70,7 @@ namespace ppc::data { public: value_t &operator [](std::string name) { if (values.find(name) == values.end()) { - values.emplace(name, value_t { }); + values.emplace(name, value_t()); } return values[name]; diff --git a/include/utils/message.hh b/include/utils/message.hh index 90439bd..60f7657 100644 --- a/include/utils/message.hh +++ b/include/utils/message.hh @@ -17,6 +17,12 @@ namespace ppc::messages { location_t location; std::string content; + message_t(level_t level, std::string content, location_t loc = location_t::NONE) : + level(level), + content(content), + location(loc) { } + message_t() : message_t(DEBUG, "") { } + std::string to_string() const; bool is_severe() const; }; diff --git a/src/compiler/treeifier/lexer.cc b/src/compiler/treeifier/lexer.cc index 9783e90..7083922 100644 --- a/src/compiler/treeifier/lexer.cc +++ b/src/compiler/treeifier/lexer.cc @@ -231,13 +231,13 @@ static process_res_t hex_process(char curr) { static process_res_t bin_process(char curr) { if (curr == '0' || curr == '1') return lexer_none(); else if (is_digit(curr)) - throw messages::message_t { messages::message_t::ERROR, NO_LOCATION, "A binary literal may only contain zeroes and ones." }; + throw messages::message_t(messages::message_t::ERROR, "A binary literal may only contain zeroes and ones.", location_t::NONE); else return lexer_end(); } static process_res_t oct_process(char curr) { if (is_oct(curr)) return lexer_none(); else if (is_digit(curr)) - throw messages::message_t { messages::message_t::ERROR, NO_LOCATION, "An octal literal may only contain octal digits." }; + throw messages::message_t(messages::message_t::ERROR, "An octal literal may only contain octal digits.", location_t::NONE); else return lexer_end(); } @@ -378,13 +378,13 @@ std::vector token_t::parse_many(ppc::messages::msg_stack_t &msg_stack, } } catch (const messages::message_t &msg) { - throw messages::message_t { msg.level, { filename, line, start, i - length, length }, msg.content }; + throw messages::message_t(msg.level, msg.content, location_t(filename, line, start, i - length, length)); } } location_t loc = { filename, line, start, i - length, length }; if (curr.type) { - tokens.push_back(token_t { + tokens.push_back({ curr.type, std::string { curr_token.begin(), curr_token.end() }, { filename, line, start, i - length, length } }); diff --git a/src/compiler/treeifier/tokenizer.cc b/src/compiler/treeifier/tokenizer.cc index 6230af2..c91614f 100644 --- a/src/compiler/treeifier/tokenizer.cc +++ b/src/compiler/treeifier/tokenizer.cc @@ -12,7 +12,7 @@ static std::vector parse_string(msg_stack_t &msg_stack, bool is_char, lex: bool escaping = false; - std::vector res { }; + std::vector res; location_t curr_char_loc = token.location; curr_char_loc.length = 1; curr_char_loc.start++; @@ -34,7 +34,7 @@ static std::vector parse_string(msg_stack_t &msg_stack, bool is_char, lex: // TODO: Add support for oct, hex and utf8 literals else if (c == literal_char || c == '\\') new_c = c; else { - throw message_t { message_t::ERROR, curr_char_loc, "Unescapable character." }; + throw message_t(message_t::ERROR, "Unescapable character.", curr_char_loc); } res.push_back(new_c); escaping = false; @@ -49,8 +49,8 @@ static std::vector parse_string(msg_stack_t &msg_stack, bool is_char, lex: if (c == '\n') break; } - if (is_char) throw message_t { message_t::ERROR, token.location, "Unterminated char literal." }; - else throw message_t { message_t::ERROR, token.location, "Unterminated string literal." }; + if (is_char) throw message_t(message_t::ERROR, "Unterminated char literal.", token.location); + else throw message_t(message_t::ERROR, "Unterminated string literal.", token.location); } static tok::token_t parse_int(msg_stack_t &msg_stack, lex::token_t token) { enum radix_t { @@ -98,7 +98,7 @@ static tok::token_t parse_int(msg_stack_t &msg_stack, lex::token_t token) { case OCTAL: digit = c - '0'; if (digit < 0 || digit > 7) { - throw message_t { message_t::ERROR, token.location, "Octal literals may contain numbers between 0 and 7." }; + throw message_t(message_t::ERROR, "Octal literals may contain numbers between 0 and 7.", token.location); } res <<= 3; res |= digit; @@ -112,14 +112,14 @@ static tok::token_t parse_int(msg_stack_t &msg_stack, lex::token_t token) { if (c >= 'a' && c <= 'f') digit = c - 'a' + 9; else if (c >= 'A' && c <= 'F') digit = c - 'A' + 9; else if (c >= '0' && c <= '9') digit = c - '0'; - else throw message_t { message_t::ERROR, token.location, "Invalid character '"s + c + "' in hex literal." }; + else throw message_t(message_t::ERROR, "Invalid character '"s + c + "' in hex literal.", token.location); res <<= 4; res |= digit; break; } } - return tok::token_t { res, token.location }; + return tok::token_t(res, token.location); } static tok::token_t parse_float(msg_stack_t &msg_stack, lex::token_t token) { double whole = 0, fract = 0; @@ -143,20 +143,20 @@ static tok::token_t parse_float(msg_stack_t &msg_stack, lex::token_t token) { } } - return tok::token_t { whole + fract, token.location }; + return tok::token_t(whole + fract, token.location); } tok::token_t tok::token_t::parse(messages::msg_stack_t &msg_stack, lex::token_t in) { switch (in.type) { case lex::token_t::IDENTIFIER: - return tok::token_t { in.data, in.location }; + return tok::token_t(in.data, in.location); case lex::token_t::OPERATOR: try { auto op = tok::operator_find(in.data); - return token_t { op, in.location }; + return token_t(op, in.location); } catch (std::string &err) { - throw message_t { message_t::ERROR, in.location, "Operator not recognised."s }; + throw message_t(message_t::ERROR, "Operator not recognised."s, in.location); } case lex::token_t::BIN_LITERAL: case lex::token_t::OCT_LITERAL: @@ -169,11 +169,11 @@ tok::token_t tok::token_t::parse(messages::msg_stack_t &msg_stack, lex::token_t return { parse_string(msg_stack, false, in) }; case lex::token_t::CHAR_LITERAL: { auto str = parse_string(msg_stack, true, in); - if (str.size() != 1) throw message_t { message_t::ERROR, in.location, "Char literal must consist of just one character." }; + if (str.size() != 1) throw message_t(message_t::ERROR, "Char literal must consist of just one character.", in.location); return str.front(); } default: - throw message_t { message_t::ERROR, in.location, "Token type not recognised." }; + throw message_t(message_t::ERROR, "Token type not recognised.", in.location); } } std::vector tok::token_t::parse_many(messages::msg_stack_t &msg_stack, std::vector tokens) { diff --git a/src/utils/data.cc b/src/utils/data.cc index e2801f4..65b849b 100644 --- a/src/utils/data.cc +++ b/src/utils/data.cc @@ -81,15 +81,15 @@ ppc::data::value_t::value_t() { } ppc::data::value_t::value_t(const ppc::data::array_t &val) { this->type = type_t::Arr; - this->val.arr = new array_t { val }; + this->val.arr = new array_t(val); } ppc::data::value_t::value_t(const ppc::data::map_t &val) { this->type = type_t::Map; - this->val.map = new map_t { val }; + this->val.map = new map_t(val); } ppc::data::value_t::value_t(const ppc::data::string_t &val) { this->type = type_t::Str; - this->val.str = new string_t { val }; + this->val.str = new string_t(val); } ppc::data::value_t::value_t(ppc::data::bool_t val) { this->type = type_t::Bool; @@ -103,13 +103,13 @@ ppc::data::value_t::value_t(const ppc::data::value_t &other) { type = other.type; switch (other.type) { case type_t::Map: - val.map = new map_t { *other.val.map }; + val.map = new map_t(*other.val.map); break; case type_t::Arr: - val.arr = new array_t { *other.val.arr }; + val.arr = new array_t(*other.val.arr); break; case type_t::Str: - val.str = new string_t { *other.val.str }; + val.str = new string_t(*other.val.str); break; default: val = other.val; diff --git a/src/utils/location.cc b/src/utils/location.cc index b5e29aa..be6e344 100644 --- a/src/utils/location.cc +++ b/src/utils/location.cc @@ -115,3 +115,5 @@ location_t::location_t(std::string filename, std::size_t line, std::size_t start this->code_start = code_start; this->filename = filename; } + +const location_t location_t::NONE = { }; From 044ad200e5dcc4233e63ce6437a2532eb1795352 Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Mon, 3 Oct 2022 15:48:39 +0300 Subject: [PATCH 07/74] chore: move NO_LOCATION const to location_t --- include/utils/location.hh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/utils/location.hh b/include/utils/location.hh index e7266aa..0257414 100644 --- a/include/utils/location.hh +++ b/include/utils/location.hh @@ -4,6 +4,8 @@ namespace ppc { struct location_t { + static const location_t NONE; + std::size_t line; std::size_t start; std::size_t length; @@ -22,6 +24,4 @@ namespace ppc { location_t(std::size_t line, std::size_t start, std::size_t code_start, std::size_t length); location_t(std::string filename, std::size_t line, std::size_t start, std::size_t code_start, std::size_t length); }; - - static const location_t NO_LOCATION = { }; } \ No newline at end of file From e4efdbd5d5715959e15e551b3e1db25bd804a9db Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Mon, 3 Oct 2022 17:22:21 +0300 Subject: [PATCH 08/74] chore: compile files one by one --- scripts/common.mak | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/scripts/common.mak b/scripts/common.mak index 91e3562..72ae2df 100644 --- a/scripts/common.mak +++ b/scripts/common.mak @@ -38,19 +38,18 @@ flags += "-I$(inc)" -D$(OS) -DPPC_VERSION_MAJOR=$(version-major) -DPPC_VERSION_ build: $(binary) .SECONDEXPANSION: -$(binary): $$(call frdeps,$(mainmodule)) $$(call sources,$$*) +$(binary): $$(call frdeps,$(mainmodule)) $$(call binaries,$(mainmodule)) $(call mkdir,$(dir $@)) echo Compiling executable '$(notdir $(binary))'... - $(CXX) $(flags) $(call sources,$(mainmodule)) -o $@ $(ldflags) $(call ldeps,$(mainmodule)) -L$(bin) "-I$(inc)" - $(call rmdir,$(bin)/lsproj$(exe)) + $(CXX) $(flags) $(call binaries,$(mainmodule)) -o $@ $(ldflags) $(call ldeps,$(mainmodule)) -L$(bin) "-I$(inc)" .SECONDEXPANSION: -$(bin)/lib$(lib)%$(so): $$(call sources,$$*) $(headers) +$(bin)/lib$(lib)%$(so): $$(call frdeps,$$*) $$(call binaries,$$*) $(call mkdir,$(bin)) echo Compiling library '$(notdir $@)'... - $(CXX) -shared -fPIC $(flags) $(call sources,$*) -o $@ $(ldflags) $(call ldeps,$*) -L$(bin) "-I$(inc)" + $(CXX) -shared -fPIC $(flags) $(call binaries,$*) -o $@ $(ldflags) $(call ldeps,$*) -L$(bin) "-I$(inc)" -# $(bin)/%.o: $(src)/%.cc $(headers) -# echo - Compiling '$*.cc'... -# $(call mkdir,$(dir $@)) -# $(CXX) -fPIC -c $(flags) $< -o $@ +$(bin)/%.o: $(src)/%.cc $(headers) + echo - Compiling '$*.cc'... + $(call mkdir,$(dir $@)) + $(CXX) -fPIC -c $(flags) $< -o $@ From 7a4d81f5f890f8c7b0142f830e91ed992a73eb5f Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Mon, 3 Oct 2022 17:23:29 +0300 Subject: [PATCH 09/74] fix: replace NO_LOCATION with location_t::NONE --- include/compiler/treeifier/tokenizer.hh | 12 ++++++------ src/main/main.cc | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/include/compiler/treeifier/tokenizer.hh b/include/compiler/treeifier/tokenizer.hh index f889d05..5bfb367 100644 --- a/include/compiler/treeifier/tokenizer.hh +++ b/include/compiler/treeifier/tokenizer.hh @@ -128,32 +128,32 @@ namespace ppc::comp::tree::tok { bool is_identifier(std::string &&val) { return is_identifier() && identifier() == val; } token_t() { kind = NONE; } - token_t(const std::string &identifier, location_t loc = NO_LOCATION) { + token_t(const std::string &identifier, location_t loc = location_t::NONE) { kind = IDENTIFIER; data.identifier = new std::string { identifier }; location = loc; } - token_t(operator_t op, location_t loc = NO_LOCATION) { + token_t(operator_t op, location_t loc = location_t::NONE) { kind = OPERATOR; data._operator = op; location = loc; } - token_t(std::uint64_t val, location_t loc = NO_LOCATION) { + token_t(std::uint64_t val, location_t loc = location_t::NONE) { kind = INT; data.int_literal = val; location = loc; } - token_t(double val, location_t loc = NO_LOCATION) { + token_t(double val, location_t loc = location_t::NONE) { kind = FLOAT; data.float_literal = val; location = loc; } - token_t(char c, location_t loc = NO_LOCATION) { + token_t(char c, location_t loc = location_t::NONE) { kind = CHAR; data.char_literal = c; location = loc; } - token_t(const std::vector &val, location_t loc = NO_LOCATION) { + token_t(const std::vector &val, location_t loc = location_t::NONE) { kind = STRING; data.string_literal = new std::vector { val }; location = loc; diff --git a/src/main/main.cc b/src/main/main.cc index 871054b..fbc052e 100644 --- a/src/main/main.cc +++ b/src/main/main.cc @@ -120,7 +120,7 @@ void add_flags(options::parser_t &parser) { .description = "Prints a 'what?' type of message (you'll see)", .match_type = options::MATCH_PREFIX, .execute = [](options::parser_t &parser, const std::string &option, ppc::messages::msg_stack_t &global_stack) { - global_stack.push({ (messages::message_t::level_t)69, NO_LOCATION, "IDK LOL." }); + global_stack.push(messages::message_t((messages::message_t::level_t)69, "IDK LOL.")); } }); } @@ -172,4 +172,4 @@ int main(int argc, const char *argv[]) { msg_stack.print(std::cout, messages::message_t::DEBUG, true); return 0; -} \ No newline at end of file +} From 90461448f080f814e94d74ca113af26adb4e605f Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Tue, 4 Oct 2022 16:00:18 +0300 Subject: [PATCH 10/74] chore: Rework AST innerworkings --- include/compiler/treeifier/ast.hh | 83 ++++++++++++++++++------ include/compiler/treeifier/ast/helper.hh | 75 +++++++++++++++++++++ include/lang/common.hh | 24 ++++++- include/utils/message.hh | 2 +- include/utils/slice.hh | 54 --------------- src/compiler/treeifier/ast.cc | 26 ++++++++ src/compiler/treeifier/parsers/glob.cc | 8 +-- src/compiler/treeifier/parsers/group.cc | 20 ++++++ src/lang/common.cc | 27 ++++++++ 9 files changed, 237 insertions(+), 82 deletions(-) create mode 100644 include/compiler/treeifier/ast/helper.hh delete mode 100644 include/utils/slice.hh create mode 100644 src/compiler/treeifier/parsers/group.cc create mode 100644 src/lang/common.cc diff --git a/include/compiler/treeifier/ast.hh b/include/compiler/treeifier/ast.hh index ba6eb11..f779f38 100644 --- a/include/compiler/treeifier/ast.hh +++ b/include/compiler/treeifier/ast.hh @@ -1,43 +1,84 @@ #pragma once #include -#include +#include #include #include #include "compiler/treeifier/tokenizer.hh" #include "utils/data.hh" -#include "utils/slice.hh" #include "lang/common.hh" using namespace std::string_literals; using namespace ppc; +using namespace ppc::messages; namespace ppc::comp::tree::ast { - class constr_parser_t { - private: - std::string name; - public: - const std::string &name() { return name; } - virtual bool parse(messages::msg_stack_t &messages, vec_slice_t &tokens, data::map_t &out) = 0; - }; + class parser_t; + class group_parser_t; - class group_parser_t : constr_parser_t { + struct ast_ctx_t { private: - struct named_parser { - constr_parser_t *parser; - std::string name; + using named_parser_t = std::pair; + + struct parser_proxy_t { + private: + ast_ctx_t &parent; + public: + parser_t &operator[](const std::string &name) const; + parser_proxy_t(ast_ctx_t &parent): parent(parent) { } }; - std::list parsers; - std::unordered_map insertion_points; + + struct group_proxy_t { + private: + ast_ctx_t &parent; + public: + group_parser_t &operator[](const std::string &name) const; + group_proxy_t(ast_ctx_t &parent): parent(parent) { } + }; + + std::unordered_map parsers; + std::set groups; + public: - void add_insertion_point(constr_parser_t &parser, const std::string &name); - void add(constr_parser_t &parser); - void add(const std::string &ins_point, constr_parser_t &parser); + msg_stack_t &messages; + std::vector &tokens; - bool parse(messages::msg_stack_t &messages, data::map_t &out); + void add_parser(std::string name, parser_t &parser); + void add_parser(std::string name, group_parser_t &parser); - group_parser_t(); + const parser_proxy_t parser; + const group_proxy_t group; + + ast_ctx_t(msg_stack_t &messages, std::vector tokens): + messages(messages), + tokens(tokens), + parser(*this), + group(*this) { } }; - extern const constr_parser_t &glob_parser; + class parser_t { + private: + std::string _name; + public: + const std::string &name() { return _name; } + virtual bool parse(ast_ctx_t &ctx, size_t &res_i, data::map_t &out) const = 0; + bool operator()(ast_ctx_t &ctx, size_t &i, data::map_t &out) const { + return parse(ctx, i, out); + } + }; + + class group_parser_t : public parser_t { + private: + std::vector> named_parsers; + std::vector parsers; + public: + group_parser_t &add(parser_t &parser); + group_parser_t &add(parser_t &parser, const lang::namespace_name_t &name); + + bool parse(ast_ctx_t &ctx, size_t &i, data::map_t &out) const; + }; + + extern const parser_t &glob_parser; + + const group_parser_t &get_group(std::string name); } \ No newline at end of file diff --git a/include/compiler/treeifier/ast/helper.hh b/include/compiler/treeifier/ast/helper.hh new file mode 100644 index 0000000..9d64d7e --- /dev/null +++ b/include/compiler/treeifier/ast/helper.hh @@ -0,0 +1,75 @@ +#include "compiler/treeifier/ast.hh" + +namespace ppc::comp::tree::ast { + struct tree_helper_t { + private: + ast_ctx_t &ctx; + size_t &res_i; + size_t i; + public: + void submit() { + res_i = i; + } + + bool ended() { + return i == ctx.tokens.size(); + } + + tok::token_t &curr() { return ctx.tokens[i]; } + + location_t next_loc(size_t n = 1) { + location_t res = loc(); + res.start += res.length; + res.code_start += res.length; + res.length = n; + return res; + } + location_t loc() { + if (ended()) { + if (i == 0) return location_t::NONE; + + location_t loc = ctx.tokens[i - 1].location; + + loc.start += loc.length; + loc.code_start += loc.length; + loc.length = 1; + + return loc; + } + else return curr().location; + } + + bool try_parse(const parser_t &parser, data::map_t &out, messages::msg_stack_t &messages) { + try { + return parser(ctx, i, out); + } + catch (messages::message_t msg) { + messages.push(msg); + return false; + } + } + bool try_parse(const parser_t &parser, data::map_t &out) { + try { + return parser(ctx, i, out); + } + catch (messages::message_t msg) { + return false; + } + } + + bool try_advance() { + if (ended()) return false; + i++; + return !ended(); + } + bool advance() { + if (ended()) throw messages::message_t(message_t::ERROR, "Unexpected end.", loc()); + i++; + if (ended()) throw messages::message_t(message_t::ERROR, "Unexpected end.", loc()); + } + + tree_helper_t(ast_ctx_t &ctx, size_t &i): ctx(ctx), res_i(i) { + this->i = i; + } + }; +} \ No newline at end of file diff --git a/include/lang/common.hh b/include/lang/common.hh index 5e2fba2..75d7801 100644 --- a/include/lang/common.hh +++ b/include/lang/common.hh @@ -4,11 +4,31 @@ #include "utils/location.hh" namespace ppc::lang { + template + struct located_t : T { + location_t location; + + template + located_t(location_t loc, Args ...args): T(args...), location(loc) { } + template + located_t(Args ...args): T(args...), location(location_t::NONE) { } + }; + struct namespace_name_t { std::vector segments; - ppc::location_t location; - bool operator ==(const namespace_name_t &other); + bool is_empty() const { return segments.empty(); } + + auto begin() { return segments.begin(); } + auto end() { return segments.end(); } + + bool operator ==(const namespace_name_t &other) const; + const std::string &operator[](size_t i) const { return segments[i]; } + + std::string to_string() const; + + namespace_name_t() { } + namespace_name_t(std::initializer_list segments): segments(segments.begin(), segments.end()) { } }; bool is_identifier_valid(messages::msg_stack_t &msg_stack, ppc::location_t location, const std::string &name); diff --git a/include/utils/message.hh b/include/utils/message.hh index 60f7657..4223d3a 100644 --- a/include/utils/message.hh +++ b/include/utils/message.hh @@ -14,8 +14,8 @@ namespace ppc::messages { WARNING, ERROR, } level; - location_t location; std::string content; + location_t location; message_t(level_t level, std::string content, location_t loc = location_t::NONE) : level(level), diff --git a/include/utils/slice.hh b/include/utils/slice.hh deleted file mode 100644 index 7a9fa33..0000000 --- a/include/utils/slice.hh +++ /dev/null @@ -1,54 +0,0 @@ -#include - -namespace ppc { - template - class slice_t { - private: - T *iterable; - std::size_t start; - std::size_t n; - public: - auto begin() const { return iterable->begin() + start; } - auto end() const { return iterable->end() + start + n; } - - auto size() const { return n; } - auto &operator[](std::size_t i) const { return (iterable*)[start + i]; } - - slice_t(T &iterable, std::size_t start, std::size_t n) { - this->iterable = &iterable; - this->start = start; - this->n = n; - if (n == -1u) this->n = iterable.size() - start; - } - }; - - template - using vec_slice_t = slice_t>; - - - template - inline slice_t slice(slice_t &sl) { - return slice_t(sl.iterable, sl.start, sl.n); - } - template - inline slice_t slice(slice_t &sl, std::size_t start) { - return slice_t(sl.iterable, sl.start + start, sl.n); - } - template - inline slice_t slice(slice_t &sl, std::size_t start, std::size_t n) { - return slice_t(sl.iterable, sl.start + start, n); - } - - template - inline slice_t slice(T &vec) { - return slice_t(vec, 0, vec.size()); - } - template - inline slice_t slice(T &vec, std::size_t start) { - return slice_t(vec, start, vec.size()); - } - template - inline slice_t slice(T &vec, std::size_t start, std::size_t n) { - return slice_t(vec, start, n); - } -} \ No newline at end of file diff --git a/src/compiler/treeifier/ast.cc b/src/compiler/treeifier/ast.cc index 04e224d..1805400 100644 --- a/src/compiler/treeifier/ast.cc +++ b/src/compiler/treeifier/ast.cc @@ -1 +1,27 @@ #include "compiler/treeifier/ast.hh" + +namespace ppc::comp::tree::ast { + std::unordered_map parsers; + + parser_t &ast_ctx_t::parser_proxy_t::operator[](const std::string &name) const { + auto it = parent.parsers.find(name); + if (it == parent.parsers.end()) throw "The parser '" + name + "' doesn't exist."; + return *it->second; + } + group_parser_t &ast_ctx_t::group_proxy_t::operator[](const std::string &name) const { + parser_t *p = &parent.parser[name]; + if (parent.groups.find(p) == parent.groups.end()) throw "A parser '" + name + "' exists, but isn't a group."; + return *(group_parser_t*)p; + } + + void ast_ctx_t::add_parser(std::string name, parser_t &parser) { + if (parsers.find(name) != parsers.end()) throw "The parser '" + name + "' already exists."; + parsers[name] = &parser; + } + void ast_ctx_t::add_parser(std::string name, group_parser_t &parser) { + if (parsers.find(name) != parsers.end()) throw "The parser '" + name + "' already exists."; + parsers[name] = &parser; + groups.emplace(&parser); + } +} + diff --git a/src/compiler/treeifier/parsers/glob.cc b/src/compiler/treeifier/parsers/glob.cc index bd30023..a4627b8 100644 --- a/src/compiler/treeifier/parsers/glob.cc +++ b/src/compiler/treeifier/parsers/glob.cc @@ -1,11 +1,11 @@ #include "compiler/treeifier/ast.hh" namespace ppc::comp::tree::ast { - class glob_parser_t : public constr_parser_t { - bool parse(messages::msg_stack_t &messages, vec_slice_t &tokens, data::map_t &out) { - + class glob_parser_t : public parser_t { + bool parse(ast_ctx_t &ctx, size_t &res_i, data::map_t &out) const { + return false; } }; - const constr_parser_t &glob_parser = glob_parser_t(); + const parser_t &glob_parser = glob_parser_t(); } diff --git a/src/compiler/treeifier/parsers/group.cc b/src/compiler/treeifier/parsers/group.cc new file mode 100644 index 0000000..3471c78 --- /dev/null +++ b/src/compiler/treeifier/parsers/group.cc @@ -0,0 +1,20 @@ +#include "compiler/treeifier/ast.hh" +#include + +using namespace ppc::comp::tree::ast; +using namespace std::string_literals; + +group_parser_t &group_parser_t::add(parser_t &parser) { + parsers.push_back(&parser); + return *this; +} +group_parser_t &group_parser_t::add(parser_t &parser, const lang::namespace_name_t &name) { + if (name.is_empty()) throw "Name can't be empty."s; + if (std::find(parsers.begin(), parsers.end(), &parser) != parsers.end()) { + throw "Parser '" + name.to_string() + "' already in group."; + } + + named_parsers.push_back({ name, &parser }); + + return *this; +} diff --git a/src/lang/common.cc b/src/lang/common.cc new file mode 100644 index 0000000..7b6c9f5 --- /dev/null +++ b/src/lang/common.cc @@ -0,0 +1,27 @@ + +#include +#include "lang/common.hh" + +namespace ppc::lang { + std::string namespace_name_t::to_string() const { + std::stringstream res; + + for (size_t i = 0; i < segments.size(); i++) { + if (i != 0) res << "::"; + res << segments[i]; + } + + return res.str(); + } + + bool namespace_name_t::operator==(const namespace_name_t &other) const { + if (other.segments.size() != segments.size()) return false; + + for (size_t i = 0; i < segments.size(); i++) { + if (other[i] != segments[i]) return false; + } + + return true; + } +} + From ded15374d58b30bc4e74934437b6a8e91706b38f Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Tue, 4 Oct 2022 19:33:30 +0300 Subject: [PATCH 11/74] fix: replace -1u with std::string::npos or -1 --- include/lang/version.hh | 4 ++-- src/lsproj.cc | 12 ++++++------ src/main/options.cc | 2 +- src/utils/strings.cc | 6 +++--- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/include/lang/version.hh b/include/lang/version.hh index 89b3fc3..efa577b 100644 --- a/include/lang/version.hh +++ b/include/lang/version.hh @@ -34,7 +34,7 @@ namespace ppc { inline bool operator !=(version_t other) const { return !(*this == other); } version_t(uint16_t major, uint16_t minor, uint32_t revision) : major(major), minor(minor), revision(revision) { } - version_t(uint16_t major, uint16_t minor) : version_t(major, minor, -1u) { } - version_t(uint16_t major) : version_t(major, -1u, -1u) { } + version_t(uint16_t major, uint16_t minor) : version_t(major, minor, -1) { } + version_t(uint16_t major) : version_t(major, -1, -1) { } }; } diff --git a/src/lsproj.cc b/src/lsproj.cc index 8df6e4e..934dd33 100644 --- a/src/lsproj.cc +++ b/src/lsproj.cc @@ -15,11 +15,11 @@ std::string read_str(std::istream &f, const std::string &skip_chars, const std:: while (true) { c = f.get(); auto a = end_chars.find(c); - if (c == -1 || a != -1ull) { + if (c == -1 || a != std::string::npos) { end_char = c; return ""; } - if ((a = skip_chars.find(c)) == -1ull) { + if ((a = skip_chars.find(c)) == std::string::npos) { f.unget(); break; } @@ -27,7 +27,7 @@ std::string read_str(std::istream &f, const std::string &skip_chars, const std:: while (true) { c = f.get(); - if (c == -1 || end_chars.find(c) != -1ull) { + if (c == -1 || end_chars.find(c) != std::string::npos) { end_char = c; break; } @@ -35,7 +35,7 @@ std::string read_str(std::istream &f, const std::string &skip_chars, const std:: res.push_back(c); } while (true) { - if (skip_chars.find(res.back()) != -1ull) res.pop_back(); + if (skip_chars.find(res.back()) != std::string::npos) res.pop_back(); else break; } @@ -63,14 +63,14 @@ project_t read_project(std::istream &f) { }; } - if (name.find(',') != -1ull || name.find(' ') != -1ull) { + if (name.find(',') != std::string::npos || name.find(' ') != std::string::npos) { throw (std::string)"The name of a project may not contain spaces or commas."; } while (true) { std::string dep = read_str(f, " \v\t\r\n", ",\n", end_ch); - if (dep.find(' ') != -1ull) { + if (dep.find(' ') != std::string::npos) { throw (std::string)"The name of a dependency may not contain spaces."; } diff --git a/src/main/options.cc b/src/main/options.cc index d5b46c2..9328a1c 100644 --- a/src/main/options.cc +++ b/src/main/options.cc @@ -6,7 +6,7 @@ using namespace ppc; bool check_shorthand(std::string &option, const options::flag_t &flag) { if (option.size() < 2 || option[0] != '-') return false; - if (option.size() == 2 && std::string { flag.shorthands }.find(option[1]) != -1u) { + if (option.size() == 2 && std::string { flag.shorthands }.find(option[1]) != std::string::npos) { option = ""; return true; } diff --git a/src/utils/strings.cc b/src/utils/strings.cc index 4982718..1482a81 100644 --- a/src/utils/strings.cc +++ b/src/utils/strings.cc @@ -8,7 +8,7 @@ std::vector str::split(const std::string &splittable, std::initiali std::vector res; for (char c : splittable) { - if (std::string { splitters }.find(c) == -1u) { + if (std::string { splitters }.find(c) == std::string::npos) { buff << c; } else { @@ -29,10 +29,10 @@ std::vector str::split(const std::string &splittable, std::initiali std::string str::trim(std::string splittable, std::initializer_list splitters) { auto split = std::string { splitters }; - while (!splittable.empty() && split.find(splittable[0]) != -1u) { + while (!splittable.empty() && split.find(splittable[0]) != std::string::npos) { splittable = splittable.substr(1); } - while (!splittable.empty() && split.find(splittable[splittable.length() - 1]) != -1u) { + while (!splittable.empty() && split.find(splittable[splittable.length() - 1]) != std::string::npos) { splittable = splittable.substr(0, splittable.length() - 1); } From b3a0b39d06af1bcc46c4568a456a14597ed2b303 Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Tue, 4 Oct 2022 19:34:27 +0300 Subject: [PATCH 12/74] fix: add binaries to .PRECIOUS rule --- scripts/common.mak | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/common.mak b/scripts/common.mak index 72ae2df..aa84cb6 100644 --- a/scripts/common.mak +++ b/scripts/common.mak @@ -34,6 +34,7 @@ binaries = $(patsubst $(src)/%.cc,$(bin)/%.o,$(call sources,$1)) flags += "-I$(inc)" -D$(OS) -DPPC_VERSION_MAJOR=$(version-major) -DPPC_VERSION_MINOR=$(version-minor) -DPPC_VERSION_BUILD=$(version-build) .PHONY: build +.PRECIOUS: $(bin)/%.o build: $(binary) From a0ff612dd29382fc59aa5d9d1ae92e826635ba33 Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Tue, 4 Oct 2022 19:34:44 +0300 Subject: [PATCH 13/74] fix: update lang dependencies --- src/compiler/proj.txt | 2 +- src/lang/proj.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/compiler/proj.txt b/src/compiler/proj.txt index c0def46..b181367 100644 --- a/src/compiler/proj.txt +++ b/src/compiler/proj.txt @@ -1,2 +1,2 @@ compiler -utils \ No newline at end of file +utils, lang \ No newline at end of file diff --git a/src/lang/proj.txt b/src/lang/proj.txt index e45037f..3151a2d 100644 --- a/src/lang/proj.txt +++ b/src/lang/proj.txt @@ -1,2 +1,2 @@ lang -core \ No newline at end of file +utils \ No newline at end of file From 96232c88c95d69860563605b5c55af2fa978243e Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Tue, 4 Oct 2022 19:35:19 +0300 Subject: [PATCH 14/74] fix&refactor: clean up lex and fix some bugs with npos --- src/compiler/treeifier/lexer.cc | 431 ++++++++++++++------------------ 1 file changed, 186 insertions(+), 245 deletions(-) diff --git a/src/compiler/treeifier/lexer.cc b/src/compiler/treeifier/lexer.cc index 7083922..e13b2a0 100644 --- a/src/compiler/treeifier/lexer.cc +++ b/src/compiler/treeifier/lexer.cc @@ -6,7 +6,6 @@ using namespace ppc; using namespace comp::tree::lex; struct lexlet_t { - bool(*is_valid)(char curr); struct process_res_t { bool ended; bool repeat; @@ -15,26 +14,14 @@ struct lexlet_t { bool has_message; messages::message_t msg; }; + + bool(*is_valid)(char curr); process_res_t (*process)(char curr); token_t::kind_t type; }; using process_res_t = lexlet_t::process_res_t; -extern const lexlet_t LEXLET_DEFAULT; -extern const lexlet_t LEXLET_IDENTIFIER; -extern const lexlet_t LEXLET_OPERATOR; -extern const lexlet_t LEXLET_ZERO; -extern const lexlet_t LEXLET_FLOAT; -extern const lexlet_t LEXLET_BIN; -extern const lexlet_t LEXLET_OCT; -extern const lexlet_t LEXLET_DEC; -extern const lexlet_t LEXLET_HEX; -extern const lexlet_t LEXLET_STRING_LITERAL; -extern const lexlet_t LEXLET_CHAR_LITERAL; -extern const lexlet_t LEXLET_COMMENT; -extern const lexlet_t LEXLET_MULTICOMMENT; - static bool is_digit(char c) { return c >= '0' && c <= '9'; } @@ -57,7 +44,8 @@ static bool is_alphanumeric(char c) { return is_letter(c) || is_digit(c); } static bool is_any(char c, std::string chars) { - return chars.find(c) != -1u; + auto res = chars.find(c) != std::string::npos; + return res; } static process_res_t lexer_switch(const lexlet_t *lexlet) { @@ -88,245 +76,199 @@ static process_res_t lexer_none() { .new_parselet = nullptr, }; } -static process_res_t default_process(char curr) { - if (LEXLET_STRING_LITERAL.is_valid(curr)) return lexer_switch(&LEXLET_STRING_LITERAL); - if (LEXLET_CHAR_LITERAL.is_valid(curr)) return lexer_switch(&LEXLET_CHAR_LITERAL); - if (LEXLET_OPERATOR.is_valid(curr)) return lexer_switch(&LEXLET_OPERATOR); - if (LEXLET_ZERO.is_valid(curr)) return lexer_switch(&LEXLET_ZERO); - if (LEXLET_DEC.is_valid(curr)) return lexer_switch(&LEXLET_DEC); - if (LEXLET_FLOAT.is_valid(curr)) return lexer_switch(&LEXLET_FLOAT); - if (LEXLET_IDENTIFIER.is_valid(curr)) return lexer_switch(&LEXLET_IDENTIFIER); - else return (process_res_t) { - .ended = true, - .repeat = false, - .new_parselet = nullptr, - }; -} - -static bool identifier_is_valid(char curr) { - return is_letter(curr) || curr == '_' || curr == '@' || curr == '$'; -} -static process_res_t identifier_process(char curr) { - bool valid = (is_alphanumeric(curr) || curr == '_' || curr == '@' || curr == '$'); - return (process_res_t) { - .ended = !valid, - .repeat = !valid, - .new_parselet = &LEXLET_IDENTIFIER, - }; -} static bool last_escape = false; static bool literal_ended = false; - -static bool string_is_valid(char curr) { - last_escape = false; - literal_ended = false; - return curr == '"'; -} -static process_res_t string_process(char curr) { - if (last_escape) { - last_escape = false; - return lexer_none(); - } - - if (curr == '\\') { - last_escape = true; - } - else if (curr == '"') { - literal_ended = true; - } - else if (literal_ended) return lexer_end(); - return lexer_none(); -} - -static bool char_is_valid(char curr) { - last_escape = false; - literal_ended = false; - return curr == '\''; -} -static process_res_t char_process(char curr) { - if (last_escape) { - last_escape = false; - return lexer_none(); - } - - if (curr == '\\') { - last_escape = true; - } - else if (curr == '\'') { - literal_ended = true; - } - else if (literal_ended) return lexer_end(); - return lexer_none(); -} - static char first_op; static int op_i = 0; - -static bool operator_is_valid(char curr) { - if (is_any(curr, "=!<>+-*/%&|^?:,.(){}[];")) { - first_op = curr; - op_i = 1; - return true; - } - else return false; -} -static process_res_t operator_process(char curr) { - bool failed = true; - if (first_op == curr && op_i == 1 && is_any(curr, "+-&|?<>")) failed = false; - if (curr == '=') { - if (op_i == 1 && is_any(first_op, "<>=!+-/*%")) failed = false; - if (op_i == 2 && is_any(first_op, "<>?")) failed = false; - } - if (first_op == '-' && curr == '>' && op_i == 1) failed = false; - - if (first_op == '/' && op_i == 1) { - if (curr == '/') return lexer_switch(&LEXLET_COMMENT); - else if (curr == '*') return lexer_switch(&LEXLET_MULTICOMMENT); - } - - op_i++; - - if (failed) return lexer_end(); - else return lexer_none(); -} - -static bool zero_is_valid(char curr) { - return curr == '0'; -} -static process_res_t zero_process(char curr) { - if (curr == '.') return lexer_switch(&LEXLET_FLOAT); - else if (curr == 'b') return lexer_switch(&LEXLET_BIN); - else if (curr == 'x') return lexer_switch(&LEXLET_HEX); - else if (is_digit(curr)) return lexer_repeat_switch(&LEXLET_OCT); - else return lexer_end(); -} - -static bool dec_is_valid(char curr) { - return is_digit(curr); -} -static process_res_t dec_process(char curr) { - if (is_digit(curr)) return lexer_none(); - else if (curr == '.') return lexer_switch(&LEXLET_FLOAT); - else return lexer_end(); -} - static bool only_dot = false; - -static bool float_is_valid(char curr) { - return only_dot = curr == '.'; -} -static process_res_t float_process(char curr) { - if (is_digit(curr)) { - only_dot = false; - return lexer_none(); - } - else return lexer_end(); -} - -static process_res_t hex_process(char curr) { - if (is_hex(curr)) return lexer_none(); - else return lexer_end(); -} -static process_res_t bin_process(char curr) { - if (curr == '0' || curr == '1') return lexer_none(); - else if (is_digit(curr)) - throw messages::message_t(messages::message_t::ERROR, "A binary literal may only contain zeroes and ones.", location_t::NONE); - else return lexer_end(); -} -static process_res_t oct_process(char curr) { - if (is_oct(curr)) return lexer_none(); - else if (is_digit(curr)) - throw messages::message_t(messages::message_t::ERROR, "An octal literal may only contain octal digits.", location_t::NONE); - else return lexer_end(); -} - -static process_res_t comment_process(char curr) { - if (curr == '\n') return lexer_end(); - else return (process_res_t) { - .ended = false, - .dont_add = true, - }; -} - static bool last_star = false; -static process_res_t multicomment_process(char curr) { - if (curr == '/' && last_star) { - last_star = false; - return { +const lexlet_t LEXLET_IDENTIFIER = (lexlet_t) { + .is_valid = [] (char curr) { return is_letter(curr) || curr == '_' || curr == '@' || curr == '$'; }, + .process = [] (char curr) { + bool valid = (is_alphanumeric(curr) || curr == '_' || curr == '@' || curr == '$'); + return (process_res_t) { + .ended = !valid, + .repeat = !valid, + .new_parselet = &LEXLET_IDENTIFIER, + }; + }, + .type = token_t::IDENTIFIER, +}; +const lexlet_t LEXLET_HEX = (lexlet_t) { + .process = [] (char curr) { + if (is_hex(curr)) return lexer_none(); + else return lexer_end(); + }, + .type = token_t::HEX_LITERAL, +}; +const lexlet_t LEXLET_BIN = (lexlet_t) { + .process = [] (char curr) { + if (curr == '0' || curr == '1') return lexer_none(); + else if (is_digit(curr)) + throw messages::message_t(messages::message_t::ERROR, "A binary literal may only contain zeroes and ones.", location_t::NONE); + else return lexer_end(); + }, + .type = token_t::BIN_LITERAL, +}; +const lexlet_t LEXLET_OCT = (lexlet_t) { + .process = [] (char curr) { + if (is_oct(curr)) return lexer_none(); + else if (is_digit(curr)) + throw messages::message_t(messages::message_t::ERROR, "An octal literal may only contain octal digits.", location_t::NONE); + else return lexer_end(); + }, + .type = token_t::OCT_LITERAL, +}; +const lexlet_t LEXLET_FLOAT = (lexlet_t) { + .is_valid = [] (char curr) { return only_dot = curr == '.'; }, + .process = [] (char curr) { + if (is_digit(curr)) { + only_dot = false; + return lexer_none(); + } + else return lexer_end(); + }, + .type = token_t::FLOAT_LITERAL, +}; +const lexlet_t LEXLET_DEC = (lexlet_t) { + .is_valid = [] (char curr) { return is_digit(curr); }, + .process = [] (char curr) { + if (is_digit(curr)) return lexer_none(); + else if (curr == '.') return lexer_switch(&LEXLET_FLOAT); + else return lexer_end(); + }, + .type = token_t::DEC_LITERAL, +}; +const lexlet_t LEXLET_ZERO = (lexlet_t) { + .is_valid = [] (char curr) { return curr == '0'; }, + .process = [] (char curr) { + if (curr == '.') return lexer_switch(&LEXLET_FLOAT); + else if (curr == 'b') return lexer_switch(&LEXLET_BIN); + else if (curr == 'x') return lexer_switch(&LEXLET_HEX); + else if (is_digit(curr)) return lexer_repeat_switch(&LEXLET_OCT); + else return lexer_end(); + }, + .type = token_t::DEC_LITERAL, +}; +const lexlet_t LEXLET_COMMENT = { + .process = [] (char curr) { + if (curr == '\n') return lexer_end(); + else return (process_res_t) { + .ended = false, + .dont_add = true, + }; + }, + .type = token_t::NONE, +}; +const lexlet_t LEXLET_MULTICOMMENT = { + .process = [] (char curr) { + if (curr == '/' && last_star) { + last_star = false; + return (process_res_t) { + .ended = true, + }; + } + if (curr == '*') last_star = true; + + return (process_res_t) { + .dont_add = true, + }; + }, + .type = token_t::NONE, +}; +const lexlet_t LEXLET_OPERATOR = (lexlet_t) { + .is_valid = [] (char curr) { + if (is_any(curr, "=!<>+-*/%&|^?:,.(){}[];")) { + first_op = curr; + op_i = 1; + return true; + } + else return false; + }, + .process = [] (char curr) { + bool failed = true; + if (first_op == curr && op_i == 1 && is_any(curr, "+-&|?<>")) failed = false; + if (curr == '=') { + if (op_i == 1 && is_any(first_op, "<>=!+-/*%")) failed = false; + if (op_i == 2 && is_any(first_op, "<>?")) failed = false; + } + if (first_op == '-' && curr == '>' && op_i == 1) failed = false; + + if (first_op == '/' && op_i == 1) { + if (curr == '/') return lexer_switch(&LEXLET_COMMENT); + else if (curr == '*') return lexer_switch(&LEXLET_MULTICOMMENT); + } + + op_i++; + + if (failed) return lexer_end(); + else return lexer_none(); + }, + .type = token_t::OPERATOR, +}; +const lexlet_t LEXLET_STRING_LITERAL = (lexlet_t) { + .is_valid = [] (char curr) { + last_escape = false; + literal_ended = false; + return curr == '"'; + }, + .process = [] (char curr) { + if (last_escape) { + last_escape = false; + return lexer_none(); + } + + if (curr == '\\') { + last_escape = true; + } + else if (curr == '"') { + literal_ended = true; + } + else if (literal_ended) return lexer_end(); + return lexer_none(); + }, + .type = token_t::STRING_LITERAL, +}; +const lexlet_t LEXLET_CHAR_LITERAL = (lexlet_t) { + .is_valid = [] (char curr) { + last_escape = false; + literal_ended = false; + return curr == '\''; + }, + .process = [] (char curr) { + if (last_escape) { + last_escape = false; + return lexer_none(); + } + + if (curr == '\\') { + last_escape = true; + } + else if (curr == '\'') { + literal_ended = true; + } + else if (literal_ended) return lexer_end(); + return lexer_none(); + }, + .type = token_t::CHAR_LITERAL, +}; +const lexlet_t LEXLET_DEFAULT = (lexlet_t) { + .process = [] (char curr) { + if (LEXLET_STRING_LITERAL.is_valid(curr)) return lexer_switch(&LEXLET_STRING_LITERAL); + if (LEXLET_CHAR_LITERAL.is_valid(curr)) return lexer_switch(&LEXLET_CHAR_LITERAL); + if (LEXLET_OPERATOR.is_valid(curr)) return lexer_switch(&LEXLET_OPERATOR); + if (LEXLET_ZERO.is_valid(curr)) return lexer_switch(&LEXLET_ZERO); + if (LEXLET_DEC.is_valid(curr)) return lexer_switch(&LEXLET_DEC); + if (LEXLET_FLOAT.is_valid(curr)) return lexer_switch(&LEXLET_FLOAT); + if (LEXLET_IDENTIFIER.is_valid(curr)) return lexer_switch(&LEXLET_IDENTIFIER); + else return (process_res_t) { .ended = true, .repeat = false, .new_parselet = nullptr, }; - } - if (curr == '*') last_star = true; - - return { - .ended = false, - .dont_add = true, - }; -} - -const lexlet_t LEXLET_DEFAULT = (lexlet_t) { - .process = default_process, - .type = token_t::NONE, -}; -const lexlet_t LEXLET_IDENTIFIER = (lexlet_t) { - .is_valid = identifier_is_valid, - .process = identifier_process, - .type = token_t::IDENTIFIER, -}; -const lexlet_t LEXLET_ZERO = (lexlet_t) { - .is_valid = zero_is_valid, - .process = zero_process, - .type = token_t::DEC_LITERAL, -}; -const lexlet_t LEXLET_DEC = (lexlet_t) { - .is_valid = dec_is_valid, - .process = dec_process, - .type = token_t::DEC_LITERAL, -}; -const lexlet_t LEXLET_HEX = (lexlet_t) { - .process = hex_process, - .type = token_t::HEX_LITERAL, -}; -const lexlet_t LEXLET_BIN = (lexlet_t) { - .process = bin_process, - .type = token_t::BIN_LITERAL, -}; -const lexlet_t LEXLET_OCT = (lexlet_t) { - .process = oct_process, - .type = token_t::OCT_LITERAL, -}; -const lexlet_t LEXLET_FLOAT = (lexlet_t) { - .is_valid = float_is_valid, - .process = float_process, - .type = token_t::FLOAT_LITERAL, -}; -const lexlet_t LEXLET_OPERATOR = (lexlet_t) { - .is_valid = operator_is_valid, - .process = operator_process, - .type = token_t::OPERATOR, -}; -const lexlet_t LEXLET_STRING_LITERAL = (lexlet_t) { - .is_valid = string_is_valid, - .process = string_process, - .type = token_t::STRING_LITERAL, -}; -const lexlet_t LEXLET_CHAR_LITERAL = (lexlet_t) { - .is_valid = char_is_valid, - .process = char_process, - .type = token_t::CHAR_LITERAL, -}; -const lexlet_t LEXLET_COMMENT = { - .is_valid = nullptr, - .process = comment_process, - .type = token_t::NONE, -}; -const lexlet_t LEXLET_MULTICOMMENT = { - .is_valid = nullptr, - .process = multicomment_process, + }, .type = token_t::NONE, }; @@ -347,11 +289,10 @@ std::vector token_t::parse_many(ppc::messages::msg_stack_t &msg_stack, if (curr.type) { location_t loc = { filename, line, start, i - length, length }; tokens.push_back({ curr.type, { curr_token.begin(), curr_token.end() }, loc }); - curr_token.clear(); } + curr_token.clear(); length = 0; - curr = LEXLET_DEFAULT; } else { From 9f17ac64713d6888c5c57aaff8f29d2e55c5ff29 Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Tue, 4 Oct 2022 19:35:40 +0300 Subject: [PATCH 15/74] chore: add placeholder parse function to group_parser_t --- src/compiler/treeifier/parsers/group.cc | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/compiler/treeifier/parsers/group.cc b/src/compiler/treeifier/parsers/group.cc index 3471c78..0a6e26e 100644 --- a/src/compiler/treeifier/parsers/group.cc +++ b/src/compiler/treeifier/parsers/group.cc @@ -4,6 +4,11 @@ using namespace ppc::comp::tree::ast; using namespace std::string_literals; +bool group_parser_t::parse(ast_ctx_t &ctx, size_t &i, data::map_t &out) const { + return false; +} + + group_parser_t &group_parser_t::add(parser_t &parser) { parsers.push_back(&parser); return *this; From 75af4cd77f021bdca69b8e3508ac2e488836140a Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Tue, 4 Oct 2022 19:58:17 +0300 Subject: [PATCH 16/74] fix: add DOUBLE_COLON --- include/compiler/treeifier/tokenizer.hh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/include/compiler/treeifier/tokenizer.hh b/include/compiler/treeifier/tokenizer.hh index 5bfb367..f252f6d 100644 --- a/include/compiler/treeifier/tokenizer.hh +++ b/include/compiler/treeifier/tokenizer.hh @@ -4,7 +4,7 @@ #include "utils/message.hh" #include "compiler/treeifier/lexer.hh" -namespace ppc::comp::tree::tok { +namespace ppc::comp::tree { enum operator_t { LESS_THAN, GREATER_THAN, @@ -55,6 +55,7 @@ namespace ppc::comp::tree::tok { COMMA, SEMICOLON, COLON, + DOUBLE_COLON, LAMBDA, @@ -180,7 +181,7 @@ namespace ppc::comp::tree::tok { } } - static tok::token_t parse(messages::msg_stack_t &msg_stack, lex::token_t token); + static token_t parse(messages::msg_stack_t &msg_stack, lex::token_t token); static std::vector parse_many(messages::msg_stack_t &msg_stack, std::vector tokens); }; From 3c08cd13db078bded84ed0246a2c58041413b05a Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Tue, 4 Oct 2022 19:59:06 +0300 Subject: [PATCH 17/74] chore: move tokenizer.hh to ppc::comp::tree --- include/compiler/treeifier/ast.hh | 4 ++-- include/compiler/treeifier/ast/helper.hh | 20 ++++++++++++++++---- src/compiler/treeifier/operators.cc | 12 ++++++------ 3 files changed, 24 insertions(+), 12 deletions(-) diff --git a/include/compiler/treeifier/ast.hh b/include/compiler/treeifier/ast.hh index f779f38..f49b59f 100644 --- a/include/compiler/treeifier/ast.hh +++ b/include/compiler/treeifier/ast.hh @@ -41,7 +41,7 @@ namespace ppc::comp::tree::ast { public: msg_stack_t &messages; - std::vector &tokens; + std::vector &tokens; void add_parser(std::string name, parser_t &parser); void add_parser(std::string name, group_parser_t &parser); @@ -49,7 +49,7 @@ namespace ppc::comp::tree::ast { const parser_proxy_t parser; const group_proxy_t group; - ast_ctx_t(msg_stack_t &messages, std::vector tokens): + ast_ctx_t(msg_stack_t &messages, std::vector tokens): messages(messages), tokens(tokens), parser(*this), diff --git a/include/compiler/treeifier/ast/helper.hh b/include/compiler/treeifier/ast/helper.hh index 9d64d7e..3fe8348 100644 --- a/include/compiler/treeifier/ast/helper.hh +++ b/include/compiler/treeifier/ast/helper.hh @@ -6,6 +6,10 @@ namespace ppc::comp::tree::ast { ast_ctx_t &ctx; size_t &res_i; size_t i; + + void throw_ended() { + if (ended()) throw messages::message_t(message_t::ERROR, "Unexpected end.", loc()); + } public: void submit() { res_i = i; @@ -15,7 +19,10 @@ namespace ppc::comp::tree::ast { return i == ctx.tokens.size(); } - tok::token_t &curr() { return ctx.tokens[i]; } + token_t &curr() { + throw_ended(); + return ctx.tokens[i]; + } location_t next_loc(size_t n = 1) { location_t res = loc(); @@ -39,6 +46,11 @@ namespace ppc::comp::tree::ast { else return curr().location; } + location_t res_loc() { + if (res_i >= ctx.tokens.size()) return loc(); + else return ctx.tokens[res_i].location.intersect(loc()); + } + bool try_parse(const parser_t &parser, data::map_t &out, messages::msg_stack_t &messages) { try { return parser(ctx, i, out); @@ -62,10 +74,10 @@ namespace ppc::comp::tree::ast { i++; return !ended(); } - bool advance() { - if (ended()) throw messages::message_t(message_t::ERROR, "Unexpected end.", loc()); + void advance() { + throw_ended(); i++; - if (ended()) throw messages::message_t(message_t::ERROR, "Unexpected end.", loc()); + throw_ended(); } tree_helper_t(ast_ctx_t &ctx, size_t &i): ctx(ctx), res_i(i) { diff --git a/src/compiler/treeifier/operators.cc b/src/compiler/treeifier/operators.cc index 4d560c4..06df236 100644 --- a/src/compiler/treeifier/operators.cc +++ b/src/compiler/treeifier/operators.cc @@ -2,6 +2,7 @@ #include "compiler/treeifier/tokenizer.hh" using namespace ppc::comp::tree; +using namespace ppc::comp; using namespace std::string_literals; @@ -17,15 +18,14 @@ std::vector operators = { "[", "]", "{", "}", "(", ")" }; - -const std::string &tok::operator_stringify(tok::operator_t kw) { - if (kw < 0 || kw >= operators.size()) throw "Invalid operator ID given."s; - return operators[kw]; +const std::string &tree::operator_stringify(operator_t op) { + if (op < 0 || op >= operators.size()) throw "Invalid operator ID given."s; + return operators[op]; } -tok::operator_t tok::operator_find(const std::string &raw) { +operator_t tree::operator_find(const std::string &raw) { std::size_t i = 0; for (const auto &op : operators) { - if (op == raw) return (tok::operator_t)i; + if (op == raw) return (operator_t)i; i++; } throw "Invalid operator '"s + raw + "' given."; From 1c552c2c5ce2c0a397d3abbd9e95202f45e3ad8a Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Tue, 4 Oct 2022 20:03:46 +0300 Subject: [PATCH 18/74] fix: replace namespace_name_t implementation with vector inheritence --- include/lang/common.hh | 13 ++++--------- src/lang/common.cc | 19 ++++++++++++++----- 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/include/lang/common.hh b/include/lang/common.hh index 75d7801..00670ad 100644 --- a/include/lang/common.hh +++ b/include/lang/common.hh @@ -14,21 +14,16 @@ namespace ppc::lang { located_t(Args ...args): T(args...), location(location_t::NONE) { } }; - struct namespace_name_t { - std::vector segments; - - bool is_empty() const { return segments.empty(); } - - auto begin() { return segments.begin(); } - auto end() { return segments.end(); } + struct namespace_name_t : public std::vector { + using base = std::vector; bool operator ==(const namespace_name_t &other) const; - const std::string &operator[](size_t i) const { return segments[i]; } + bool operator !=(const namespace_name_t &other) const; std::string to_string() const; namespace_name_t() { } - namespace_name_t(std::initializer_list segments): segments(segments.begin(), segments.end()) { } + namespace_name_t(std::initializer_list segments): base(segments.begin(), segments.end()) { } }; bool is_identifier_valid(messages::msg_stack_t &msg_stack, ppc::location_t location, const std::string &name); diff --git a/src/lang/common.cc b/src/lang/common.cc index 7b6c9f5..99aa799 100644 --- a/src/lang/common.cc +++ b/src/lang/common.cc @@ -6,19 +6,28 @@ namespace ppc::lang { std::string namespace_name_t::to_string() const { std::stringstream res; - for (size_t i = 0; i < segments.size(); i++) { + for (size_t i = 0; i < size(); i++) { if (i != 0) res << "::"; - res << segments[i]; + res << (*this)[i]; } return res.str(); } bool namespace_name_t::operator==(const namespace_name_t &other) const { - if (other.segments.size() != segments.size()) return false; + if (other.size() != size()) return false; - for (size_t i = 0; i < segments.size(); i++) { - if (other[i] != segments[i]) return false; + for (size_t i = 0; i < size(); i++) { + if (other[i] != (*this)[i]) return false; + } + + return true; + } + bool namespace_name_t::operator!=(const namespace_name_t &other) const { + if (other.size() != size()) return true; + + for (size_t i = 0; i < size(); i++) { + if (other[i] == (*this)[i]) return false; } return true; From 63f79a28e1d7635a3dd26f4d52e3979d55c7f1dc Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Tue, 4 Oct 2022 20:23:39 +0300 Subject: [PATCH 19/74] fix: improve try_parse and parse functions in helper.hh --- include/compiler/treeifier/ast/helper.hh | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/include/compiler/treeifier/ast/helper.hh b/include/compiler/treeifier/ast/helper.hh index 3fe8348..bf9e53a 100644 --- a/include/compiler/treeifier/ast/helper.hh +++ b/include/compiler/treeifier/ast/helper.hh @@ -51,20 +51,24 @@ namespace ppc::comp::tree::ast { else return ctx.tokens[res_i].location.intersect(loc()); } - bool try_parse(const parser_t &parser, data::map_t &out, messages::msg_stack_t &messages) { + bool parse(const parser_t &parser, data::map_t &out) { + return parser(ctx, i, out); + } + bool try_parse(const parser_t &parser, data::map_t &out, bool silent = true) { try { return parser(ctx, i, out); } catch (messages::message_t msg) { - messages.push(msg); + if (!silent) ctx.messages.push(msg); return false; } } - bool try_parse(const parser_t &parser, data::map_t &out) { + bool try_parse(const parser_t &parser, data::map_t &out, message_t &err) { try { return parser(ctx, i, out); } catch (messages::message_t msg) { + err = msg; return false; } } From 452884a74c2e281412e460da97ea85a6a3988c48 Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Tue, 4 Oct 2022 20:36:30 +0300 Subject: [PATCH 20/74] feat: implement group parser --- src/compiler/treeifier/parsers/group.cc | 43 +++++++++++++++++++++++-- 1 file changed, 41 insertions(+), 2 deletions(-) diff --git a/src/compiler/treeifier/parsers/group.cc b/src/compiler/treeifier/parsers/group.cc index 0a6e26e..979c49d 100644 --- a/src/compiler/treeifier/parsers/group.cc +++ b/src/compiler/treeifier/parsers/group.cc @@ -1,11 +1,50 @@ #include "compiler/treeifier/ast.hh" +#include "compiler/treeifier/tokenizer.hh" +#include "compiler/treeifier/ast/helper.hh" #include +using namespace ppc::comp::tree; using namespace ppc::comp::tree::ast; using namespace std::string_literals; +static bool read_nmsp(ast_ctx_t &ctx, size_t &i, const lang::namespace_name_t &name) { + tree_helper_t h(ctx, i); + + size_t equal_i = 0; + + while (true) { + if (h.ended()) break; + if (equal_i >= name.size()) return false; + auto &curr = h.curr(); + if (!curr.is_identifier()) return false; + + if (name[equal_i] != curr.identifier()) return false; + + if (h.try_advance() && h.curr().is_operator(operator_t::DOUBLE_COLON)) { + equal_i++; + } + else break; + } + + return equal_i != name.size(); +} + bool group_parser_t::parse(ast_ctx_t &ctx, size_t &i, data::map_t &out) const { - return false; + tree_helper_t h(ctx, i); + + for (auto &pair : named_parsers) { + if (!read_nmsp(ctx, i, pair.first)) continue; + auto &parser = *pair.second; + return h.parse(parser, out); + } + for (auto parser : parsers) { + try { + return h.parse(*parser, out); + } + catch (std::string) { + return false; + } + } } @@ -14,7 +53,7 @@ group_parser_t &group_parser_t::add(parser_t &parser) { return *this; } group_parser_t &group_parser_t::add(parser_t &parser, const lang::namespace_name_t &name) { - if (name.is_empty()) throw "Name can't be empty."s; + if (name.empty()) throw "Name can't be empty."s; if (std::find(parsers.begin(), parsers.end(), &parser) != parsers.end()) { throw "Parser '" + name.to_string() + "' already in group."; } From 9890630b79f9d664da7eb5c2a86ea964273cc0b8 Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Tue, 4 Oct 2022 23:02:18 +0300 Subject: [PATCH 21/74] chore: improve lsproj compilation --- scripts/common.mak | 13 ++++++++----- scripts/lsproj.mak | 3 +++ 2 files changed, 11 insertions(+), 5 deletions(-) create mode 100644 scripts/lsproj.mak diff --git a/scripts/common.mak b/scripts/common.mak index aa84cb6..3b3e241 100644 --- a/scripts/common.mak +++ b/scripts/common.mak @@ -1,12 +1,12 @@ -$(shell $(call mkdir,$(bin))) -$(shell $(CXX) $(src)/lsproj.cc -o $(bin)/lsproj$(exe)) +export lsproj = $(bin)/lsproj$(exe) +export flags += "-I$(inc)" -D$(OS) -DPPC_VERSION_MAJOR=$(version-major) -DPPC_VERSION_MINOR=$(version-minor) -DPPC_VERSION_BUILD=$(version-build) rwildcard=$(foreach d,$(wildcard $(1:=/*)),$(call rwildcard,$d,$2) $(filter $(subst *,%,$2),$d)) uniq=$(if $1,$(firstword $1) $(call uniq,$(filter-out $(firstword $1),$1))) -modoutput=$(shell ./$(bin)/lsproj$(exe) $(src) $1 output) +modoutput=$(shell ./$(lsproj) $(src) $1 output) deps=$(strip \ - $(foreach dep, $(shell ./$(bin)/lsproj$(exe) $(src) $1 deps),\ + $(foreach dep, $(shell ./$(lsproj) $(src) $1 deps),\ $(if $(wildcard src/$(dep)),\ $(dep),\ $(error The module '$(dep)' (dependency of '$1') doesn't exist)\ @@ -31,7 +31,10 @@ sources = $(call rwildcard,$(src)/$1,*.cc) headers = $(call rwildcard,$(inc),*.h) binaries = $(patsubst $(src)/%.cc,$(bin)/%.o,$(call sources,$1)) -flags += "-I$(inc)" -D$(OS) -DPPC_VERSION_MAJOR=$(version-major) -DPPC_VERSION_MINOR=$(version-minor) -DPPC_VERSION_BUILD=$(version-build) +ifneq ($(nolsproj),yes) +$(shell make -f scripts/lsproj.mak $(lsproj)) +endif + .PHONY: build .PRECIOUS: $(bin)/%.o diff --git a/scripts/lsproj.mak b/scripts/lsproj.mak new file mode 100644 index 0000000..6d49d5a --- /dev/null +++ b/scripts/lsproj.mak @@ -0,0 +1,3 @@ +$(lsproj): $(src)/lsproj.cc + $(call mkdir,$(dir $@)) + $(CXX) $^ -o $@ \ No newline at end of file From 0a68529c3b715e4b98917018a47b68869b607e8e Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Tue, 4 Oct 2022 23:45:08 +0300 Subject: [PATCH 22/74] a lot. --- .gitignore | 1 + Makefile | 4 +- include/compiler/treeifier/ast.hh | 6 +-- include/compiler/treeifier/ast/helper.hh | 14 ++++++- include/lang/common.hh | 7 ++-- include/utils/message.hh | 4 +- scripts/common.mak | 17 ++++---- src/compiler/treeifier/parsers/glob.cc | 52 +++++++++++++++++++++++- src/compiler/treeifier/parsers/group.cc | 6 ++- src/compiler/treeifier/tokenizer.cc | 20 ++++----- src/lang/version.cc | 6 +-- src/main/main.cc | 4 +- 12 files changed, 103 insertions(+), 38 deletions(-) diff --git a/.gitignore b/.gitignore index 7e4fcb7..f241371 100644 --- a/.gitignore +++ b/.gitignore @@ -23,6 +23,7 @@ !scripts !scripts/common.mak +!scripts/lsproj.mak !scripts/install.bat !scripts/uninstall.bat diff --git a/Makefile b/Makefile index c5fc586..f244a48 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -export MAKEFLAGS += --silent -r +export MAKEFLAGS += --silent -r -j export flags=-std=c++17 -Wall -Wno-main -Wno-trigraphs -Wno-missing-braces -Wno-stringop-overflow export ldflags=-L$(bin)/$(profile) export lib=ppc$(version-major)- @@ -51,7 +51,7 @@ build: version make -f scripts/common.mak if exist "$(subst /,\,$(bin)\$(output).exe)" del "$(subst /,\,$(bin)\$(output).exe)" mklink /H "$(subst /,\,$(bin)\$(output).exe)" "$(subst /,\,$(binary))" > NUL - + echo Done! clear: if exist $(subst /,\,$(oldbin)) rmdir /s /q $(subst /,\,$(oldbin)) diff --git a/include/compiler/treeifier/ast.hh b/include/compiler/treeifier/ast.hh index f49b59f..38f3b96 100644 --- a/include/compiler/treeifier/ast.hh +++ b/include/compiler/treeifier/ast.hh @@ -10,6 +10,7 @@ using namespace std::string_literals; using namespace ppc; +using namespace ppc::lang; using namespace ppc::messages; namespace ppc::comp::tree::ast { @@ -42,6 +43,8 @@ namespace ppc::comp::tree::ast { public: msg_stack_t &messages; std::vector &tokens; + std::set imports; + located_t nmsp; void add_parser(std::string name, parser_t &parser); void add_parser(std::string name, group_parser_t &parser); @@ -57,10 +60,7 @@ namespace ppc::comp::tree::ast { }; class parser_t { - private: - std::string _name; public: - const std::string &name() { return _name; } virtual bool parse(ast_ctx_t &ctx, size_t &res_i, data::map_t &out) const = 0; bool operator()(ast_ctx_t &ctx, size_t &i, data::map_t &out) const { return parse(ctx, i, out); diff --git a/include/compiler/treeifier/ast/helper.hh b/include/compiler/treeifier/ast/helper.hh index bf9e53a..e4c0193 100644 --- a/include/compiler/treeifier/ast/helper.hh +++ b/include/compiler/treeifier/ast/helper.hh @@ -5,14 +5,19 @@ namespace ppc::comp::tree::ast { private: ast_ctx_t &ctx; size_t &res_i; - size_t i; void throw_ended() { if (ended()) throw messages::message_t(message_t::ERROR, "Unexpected end.", loc()); } + void throw_ended(const std::string &reason) { + if (ended()) throw messages::message_t(message_t::ERROR, "Unexpected end: " + reason, loc()); + } public: - void submit() { + size_t i; + + bool submit() { res_i = i; + return true; } bool ended() { @@ -83,6 +88,11 @@ namespace ppc::comp::tree::ast { i++; throw_ended(); } + void advance(const std::string &reason) { + throw_ended(reason); + i++; + throw_ended(reason); + } tree_helper_t(ast_ctx_t &ctx, size_t &i): ctx(ctx), res_i(i) { this->i = i; diff --git a/include/lang/common.hh b/include/lang/common.hh index 00670ad..69a2137 100644 --- a/include/lang/common.hh +++ b/include/lang/common.hh @@ -8,10 +8,9 @@ namespace ppc::lang { struct located_t : T { location_t location; - template - located_t(location_t loc, Args ...args): T(args...), location(loc) { } - template - located_t(Args ...args): T(args...), location(location_t::NONE) { } + located_t(location_t loc, const T &val): T(val), location(loc) { } + located_t(const T &val): T(val), location(location_t::NONE) { } + located_t() { } }; struct namespace_name_t : public std::vector { diff --git a/include/utils/message.hh b/include/utils/message.hh index 4223d3a..cf3ac52 100644 --- a/include/utils/message.hh +++ b/include/utils/message.hh @@ -17,7 +17,7 @@ namespace ppc::messages { std::string content; location_t location; - message_t(level_t level, std::string content, location_t loc = location_t::NONE) : + message_t(level_t level, const std::string &content, location_t loc = location_t::NONE) : level(level), content(content), location(loc) { } @@ -25,6 +25,8 @@ namespace ppc::messages { std::string to_string() const; bool is_severe() const; + + static message_t error(const std::string &message, location_t loc = location_t::NONE) { return message_t(ERROR, message, loc); } }; struct msg_stack_t { diff --git a/scripts/common.mak b/scripts/common.mak index 3b3e241..608e459 100644 --- a/scripts/common.mak +++ b/scripts/common.mak @@ -1,14 +1,16 @@ export lsproj = $(bin)/lsproj$(exe) export flags += "-I$(inc)" -D$(OS) -DPPC_VERSION_MAJOR=$(version-major) -DPPC_VERSION_MINOR=$(version-minor) -DPPC_VERSION_BUILD=$(version-build) -rwildcard=$(foreach d,$(wildcard $(1:=/*)),$(call rwildcard,$d,$2) $(filter $(subst *,%,$2),$d)) +rwildcard=$(foreach d, $(wildcard $(1:=/*)),\ + $(call rwildcard,$d,$2)\ + $(filter $(subst *,%,$2),$d)\ +) uniq=$(if $1,$(firstword $1) $(call uniq,$(filter-out $(firstword $1),$1))) modoutput=$(shell ./$(lsproj) $(src) $1 output) deps=$(strip \ $(foreach dep, $(shell ./$(lsproj) $(src) $1 deps),\ - $(if $(wildcard src/$(dep)),\ - $(dep),\ + $(if $(wildcard src/$(dep)), $(dep),\ $(error The module '$(dep)' (dependency of '$1') doesn't exist)\ )\ )\ @@ -29,15 +31,14 @@ lrdeps=$(foreach dep,$(call rdeps,$1),-l$(lib)$(call modoutput,$(dep))) modules = $(patsubst $(src)/%/,$(bin)/lib$(lib)%$(so),$(filter-out $(src)/$(mainmodule)/,$(wildcard $(src)/*/))) sources = $(call rwildcard,$(src)/$1,*.cc) headers = $(call rwildcard,$(inc),*.h) -binaries = $(patsubst $(src)/%.cc,$(bin)/%.o,$(call sources,$1)) +binaries = $(patsubst $(src)/%.cc,$(bin)/tmp/%.o,$(call sources,$1)) ifneq ($(nolsproj),yes) -$(shell make -f scripts/lsproj.mak $(lsproj)) +$(shell make -f scripts/lsproj.mak lsproj=$(lsproj) src=$(src) $(lsproj)) endif - .PHONY: build -.PRECIOUS: $(bin)/%.o +.PRECIOUS: $(bin)/tmp/%.o build: $(binary) @@ -53,7 +54,7 @@ $(bin)/lib$(lib)%$(so): $$(call frdeps,$$*) $$(call binaries,$$*) echo Compiling library '$(notdir $@)'... $(CXX) -shared -fPIC $(flags) $(call binaries,$*) -o $@ $(ldflags) $(call ldeps,$*) -L$(bin) "-I$(inc)" -$(bin)/%.o: $(src)/%.cc $(headers) +$(bin)/tmp/%.o: $(src)/%.cc $(headers) echo - Compiling '$*.cc'... $(call mkdir,$(dir $@)) $(CXX) -fPIC -c $(flags) $< -o $@ diff --git a/src/compiler/treeifier/parsers/glob.cc b/src/compiler/treeifier/parsers/glob.cc index a4627b8..e574e3e 100644 --- a/src/compiler/treeifier/parsers/glob.cc +++ b/src/compiler/treeifier/parsers/glob.cc @@ -1,9 +1,59 @@ #include "compiler/treeifier/ast.hh" +#include "compiler/treeifier/ast/helper.hh" namespace ppc::comp::tree::ast { class glob_parser_t : public parser_t { + bool parse_nmsp(ast_ctx_t &ctx, size_t &res_i, located_t &out) const { + tree_helper_t h(ctx, res_i); + located_t res; + + while (true) { + auto &curr = h.curr(); + + if (h.ended() || !curr.is_identifier()) return false; + else res.push_back(curr.identifier()); + + if (!h.try_advance() || !h.curr().is_operator(operator_t::DOUBLE_COLON)) { + out = res; + return h.submit(); + } + } + } + bool parse_nmsp_def(ast_ctx_t &ctx, size_t &res_i) const { + tree_helper_t h(ctx, res_i); + if (h.ended()) return true; + + if (h.curr().is_identifier("namespace")) { + h.advance("Expected a namespace name."); + if (!parse_nmsp(ctx, h.i, ctx.nmsp)) throw message_t::error("Expected a namespace name.", h.loc()); + return h.submit(); + } + else return false; + } + bool parse_import(ast_ctx_t &ctx, size_t &res_i) const { + tree_helper_t h(ctx, res_i); + if (h.ended()) return true; + + if (h.curr().is_identifier("import")) { + h.advance("Expected a namespace name."); + located_t name; + if (!parse_nmsp(ctx, h.i, name)) throw message_t::error("Expected a namespace name.", h.loc()); + if (!ctx.imports.emplace(name).second) { + throw message_t::error("The namespace '" + name.to_string() + "' is already imported.", h.loc()); + } + return h.submit(); + } + else return false; + } + bool parse(ast_ctx_t &ctx, size_t &res_i, data::map_t &out) const { - return false; + tree_helper_t h(ctx, res_i); + if (h.ended()) return true; + parse_nmsp_def(ctx, h.i); + + while (parse_import(ctx, h.i)); + + return true; } }; diff --git a/src/compiler/treeifier/parsers/group.cc b/src/compiler/treeifier/parsers/group.cc index 979c49d..2c4d17c 100644 --- a/src/compiler/treeifier/parsers/group.cc +++ b/src/compiler/treeifier/parsers/group.cc @@ -41,12 +41,14 @@ bool group_parser_t::parse(ast_ctx_t &ctx, size_t &i, data::map_t &out) const { try { return h.parse(*parser, out); } - catch (std::string) { + catch (const message_t &err) { + ctx.messages.push(err); return false; } } -} + return false; +} group_parser_t &group_parser_t::add(parser_t &parser) { parsers.push_back(&parser); diff --git a/src/compiler/treeifier/tokenizer.cc b/src/compiler/treeifier/tokenizer.cc index c91614f..2063afe 100644 --- a/src/compiler/treeifier/tokenizer.cc +++ b/src/compiler/treeifier/tokenizer.cc @@ -52,7 +52,7 @@ static std::vector parse_string(msg_stack_t &msg_stack, bool is_char, lex: if (is_char) throw message_t(message_t::ERROR, "Unterminated char literal.", token.location); else throw message_t(message_t::ERROR, "Unterminated string literal.", token.location); } -static tok::token_t parse_int(msg_stack_t &msg_stack, lex::token_t token) { +static token_t parse_int(msg_stack_t &msg_stack, lex::token_t token) { enum radix_t { BINARY, OCTAL, @@ -119,9 +119,9 @@ static tok::token_t parse_int(msg_stack_t &msg_stack, lex::token_t token) { } } - return tok::token_t(res, token.location); + return token_t(res, token.location); } -static tok::token_t parse_float(msg_stack_t &msg_stack, lex::token_t token) { +static token_t parse_float(msg_stack_t &msg_stack, lex::token_t token) { double whole = 0, fract = 0; char c; @@ -143,16 +143,16 @@ static tok::token_t parse_float(msg_stack_t &msg_stack, lex::token_t token) { } } - return tok::token_t(whole + fract, token.location); + return token_t(whole + fract, token.location); } -tok::token_t tok::token_t::parse(messages::msg_stack_t &msg_stack, lex::token_t in) { +token_t token_t::parse(messages::msg_stack_t &msg_stack, lex::token_t in) { switch (in.type) { case lex::token_t::IDENTIFIER: - return tok::token_t(in.data, in.location); + return token_t(in.data, in.location); case lex::token_t::OPERATOR: try { - auto op = tok::operator_find(in.data); + auto op = operator_find(in.data); return token_t(op, in.location); } catch (std::string &err) { @@ -176,11 +176,11 @@ tok::token_t tok::token_t::parse(messages::msg_stack_t &msg_stack, lex::token_t throw message_t(message_t::ERROR, "Token type not recognised.", in.location); } } -std::vector tok::token_t::parse_many(messages::msg_stack_t &msg_stack, std::vector tokens) { - std::vector res; +std::vector token_t::parse_many(messages::msg_stack_t &msg_stack, std::vector tokens) { + std::vector res; for (auto &tok : tokens) { - res.push_back(tok::token_t::parse(msg_stack, tok)); + res.push_back(token_t::parse(msg_stack, tok)); } return res; diff --git a/src/lang/version.cc b/src/lang/version.cc index 21c2918..a7f0642 100644 --- a/src/lang/version.cc +++ b/src/lang/version.cc @@ -4,14 +4,14 @@ using namespace ppc; bool version_t::operator==(version_t other) const { bool major_same = major == other.major; - bool minor_same = minor == -1 || other.minor == -1 || minor == other.minor; - bool revision_same = revision == -1 || other.revision == -1 || revision == other.revision; + bool minor_same = minor == -1u || other.minor == -1 || minor == other.minor; + bool revision_same = revision == -1u || other.revision == -1u || revision == other.revision; return major_same && minor_same && revision_same; } bool version_t::is_compliant(version_t other) const { bool major_compliant = major == other.major; - bool minor_compliant = minor == -1 || other.minor == -1 || minor <= other.minor; + bool minor_compliant = minor == -1u || other.minor == -1u || minor <= other.minor; return major_compliant && minor_compliant; } diff --git a/src/main/main.cc b/src/main/main.cc index fbc052e..bb637ce 100644 --- a/src/main/main.cc +++ b/src/main/main.cc @@ -152,11 +152,11 @@ int main(int argc, const char *argv[]) { for (const auto &file : files) { std::ifstream f { file, std::ios_base::in }; try { - auto res = tok::token_t::parse_many(msg_stack, lex::token_t::parse_file(msg_stack, file, f)); + auto res = token_t::parse_many(msg_stack, lex::token_t::parse_file(msg_stack, file, f)); for (auto tok : res) { if (tok.is_identifier()) std::cout << "Identifier: \t" << tok.identifier(); - if (tok.is_operator()) std::cout << "Operator: \t" << tok::operator_stringify(tok._operator()); + if (tok.is_operator()) std::cout << "Operator: \t" << operator_stringify(tok._operator()); if (tok.is_float_lit()) std::cout << "Float: \t" << tok.float_lit(); if (tok.is_int_lit()) std::cout << "Int: \t" << tok.int_lit(); if (tok.is_char_lit()) std::cout << "Char: \t" << tok.char_lit(); From ee6c29bb7dbc14b9eb66faf52f248b80930a1455 Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Sun, 9 Oct 2022 14:18:38 +0300 Subject: [PATCH 23/74] feat: add namespace and import parsing --- .gitignore | 2 +- include/compiler/treeifier/ast.hh | 64 ++-- include/compiler/treeifier/ast/helper.hh | 94 ++++-- include/lang/common.hh | 18 +- include/utils/data.hh | 45 ++- include/utils/location.hh | 1 + include/utils/message.hh | 3 + scripts/common.mak | 8 +- src/{compiler/proj.txt => compiler.proj} | 2 +- src/compiler/treeifier/ast.cc | 45 ++- src/compiler/treeifier/ast/conv.cc | 80 +++++ src/compiler/treeifier/ast/parsers/glob.cc | 65 +++++ .../treeifier/{ => ast}/parsers/group.cc | 0 .../treeifier/ast/parsers/identifier.cc | 21 ++ src/compiler/treeifier/ast/parsers/nmsp.cc | 26 ++ src/compiler/treeifier/lexer.cc | 2 +- src/compiler/treeifier/parsers/glob.cc | 61 ---- src/{lang/proj.txt => lang.proj} | 0 src/lang/common.cc | 40 +++ src/lsproj.cc | 2 +- src/{main/proj.txt => main.proj} | 2 +- src/main/main.cc | 8 +- src/{utils/proj.txt => utils.proj} | 0 src/utils/data.cc | 276 ++++++++++-------- src/utils/message.cc | 124 ++++---- 25 files changed, 655 insertions(+), 334 deletions(-) rename src/{compiler/proj.txt => compiler.proj} (95%) create mode 100644 src/compiler/treeifier/ast/conv.cc create mode 100644 src/compiler/treeifier/ast/parsers/glob.cc rename src/compiler/treeifier/{ => ast}/parsers/group.cc (100%) create mode 100644 src/compiler/treeifier/ast/parsers/identifier.cc create mode 100644 src/compiler/treeifier/ast/parsers/nmsp.cc delete mode 100644 src/compiler/treeifier/parsers/glob.cc rename src/{lang/proj.txt => lang.proj} (100%) rename src/{main/proj.txt => main.proj} (95%) rename src/{utils/proj.txt => utils.proj} (100%) diff --git a/.gitignore b/.gitignore index f241371..df9f664 100644 --- a/.gitignore +++ b/.gitignore @@ -18,7 +18,7 @@ !src/*/**/*.cc !src/*/**/*.h !src/*/**/*.hh -!src/*/proj.txt +!src/*.proj !src/lsproj.cc !scripts diff --git a/include/compiler/treeifier/ast.hh b/include/compiler/treeifier/ast.hh index 38f3b96..4eccdf1 100644 --- a/include/compiler/treeifier/ast.hh +++ b/include/compiler/treeifier/ast.hh @@ -17,54 +17,71 @@ namespace ppc::comp::tree::ast { class parser_t; class group_parser_t; + extern const parser_t &glob_parser; + extern const parser_t &identifier_parser; + extern const parser_t &nmsp_parser; + struct ast_ctx_t { private: - using named_parser_t = std::pair; - struct parser_proxy_t { private: - ast_ctx_t &parent; + ast_ctx_t *parent; public: - parser_t &operator[](const std::string &name) const; - parser_proxy_t(ast_ctx_t &parent): parent(parent) { } + const parser_t &operator[](const std::string &name) const; + parser_proxy_t(ast_ctx_t *parent): parent(parent) { } }; struct group_proxy_t { private: - ast_ctx_t &parent; + ast_ctx_t *parent; public: - group_parser_t &operator[](const std::string &name) const; - group_proxy_t(ast_ctx_t &parent): parent(parent) { } + const group_parser_t &operator[](const std::string &name) const; + group_proxy_t(ast_ctx_t *parent): parent(parent) { } }; - std::unordered_map parsers; - std::set groups; + std::unordered_map parsers; + std::set groups; public: msg_stack_t &messages; std::vector &tokens; - std::set imports; - located_t nmsp; + std::set imports; + loc_namespace_name_t nmsp; - void add_parser(std::string name, parser_t &parser); - void add_parser(std::string name, group_parser_t &parser); + void add_parser(const parser_t &parser); + void add_parser(const group_parser_t &parser); const parser_proxy_t parser; const group_proxy_t group; - ast_ctx_t(msg_stack_t &messages, std::vector tokens): + ast_ctx_t &init() { + add_parser(glob_parser); + return *this; + } + + static bool parse(msg_stack_t &messages, std::vector &tokens, data::map_t &out); + + ast_ctx_t(msg_stack_t &messages, std::vector &tokens): messages(messages), tokens(tokens), - parser(*this), - group(*this) { } + parser(this), + group(this) { } }; class parser_t { - public: + private: + std::string _name; + protected: virtual bool parse(ast_ctx_t &ctx, size_t &res_i, data::map_t &out) const = 0; + public: + const std::string &name() const { return _name; } bool operator()(ast_ctx_t &ctx, size_t &i, data::map_t &out) const { + data::map_t res; + out["$_name"] = _name; return parse(ctx, i, out); } + + parser_t(const std::string &name): _name(name) { } }; class group_parser_t : public parser_t { @@ -78,7 +95,14 @@ namespace ppc::comp::tree::ast { bool parse(ast_ctx_t &ctx, size_t &i, data::map_t &out) const; }; - extern const parser_t &glob_parser; + namespace conv { + data::map_t identifier_to_map(const located_t &loc); + located_t map_to_identifier(const data::map_t &map); - const group_parser_t &get_group(std::string name); + data::map_t loc_to_map(const location_t &loc); + location_t map_to_loc(const data::map_t &map); + + data::map_t nmsp_to_map(const loc_namespace_name_t &nmsp); + loc_namespace_name_t map_to_nmsp(const data::map_t &map); + } } \ No newline at end of file diff --git a/include/compiler/treeifier/ast/helper.hh b/include/compiler/treeifier/ast/helper.hh index e4c0193..08defde 100644 --- a/include/compiler/treeifier/ast/helper.hh +++ b/include/compiler/treeifier/ast/helper.hh @@ -1,5 +1,11 @@ #include "compiler/treeifier/ast.hh" +using namespace ppc; +using namespace ppc::lang; +using namespace ppc::data; +using namespace ppc::comp::tree; +using namespace ppc::comp::tree::ast; + namespace ppc::comp::tree::ast { struct tree_helper_t { private: @@ -15,20 +21,6 @@ namespace ppc::comp::tree::ast { public: size_t i; - bool submit() { - res_i = i; - return true; - } - - bool ended() { - return i == ctx.tokens.size(); - } - - token_t &curr() { - throw_ended(); - return ctx.tokens[i]; - } - location_t next_loc(size_t n = 1) { location_t res = loc(); res.start += res.length; @@ -56,26 +48,22 @@ namespace ppc::comp::tree::ast { else return ctx.tokens[res_i].location.intersect(loc()); } - bool parse(const parser_t &parser, data::map_t &out) { - return parser(ctx, i, out); + void err(std::string message) { + throw message_t::error(message, loc()); } - bool try_parse(const parser_t &parser, data::map_t &out, bool silent = true) { - try { - return parser(ctx, i, out); - } - catch (messages::message_t msg) { - if (!silent) ctx.messages.push(msg); - return false; - } + + bool submit(bool inc_i = true) { + res_i = (i += inc_i); + return true; } - bool try_parse(const parser_t &parser, data::map_t &out, message_t &err) { - try { - return parser(ctx, i, out); - } - catch (messages::message_t msg) { - err = msg; - return false; - } + + bool ended() { + return i == ctx.tokens.size(); + } + + token_t &curr() { + throw_ended(); + return ctx.tokens[i]; } bool try_advance() { @@ -94,6 +82,48 @@ namespace ppc::comp::tree::ast { throw_ended(reason); } + bool push_parse(const parser_t &parser, data::array_t &out) { + data::map_t res; + if (parser(ctx, i, res)) { + out.push(res); + return true; + } + else return false; + } + + bool parse(const parser_t &parser, data::map_t &out) { + return parser(ctx, i, out); + } + + void force_push_parse(const parser_t &parser, std::string message, data::array_t &out) { + advance(message); + bool success; + + try { + success = push_parse(parser, out); + } + catch (const message_t &msg) { + ctx.messages.push(msg); + success = false; + } + + if (!success) err(message); + } + void force_parse(const parser_t &parser, std::string message, data::map_t &out) { + advance(message); + bool success; + + try { + success = parse(parser, out); + } + catch (const message_t &msg) { + ctx.messages.push(msg); + success = false; + } + + if (!success) err(message); + } + tree_helper_t(ast_ctx_t &ctx, size_t &i): ctx(ctx), res_i(i) { this->i = i; } diff --git a/include/lang/common.hh b/include/lang/common.hh index 69a2137..2bb6186 100644 --- a/include/lang/common.hh +++ b/include/lang/common.hh @@ -19,15 +19,31 @@ namespace ppc::lang { bool operator ==(const namespace_name_t &other) const; bool operator !=(const namespace_name_t &other) const; + operator std::string() const { return to_string(); } std::string to_string() const; namespace_name_t() { } namespace_name_t(std::initializer_list segments): base(segments.begin(), segments.end()) { } }; + struct loc_namespace_name_t : public std::vector> { + using base = std::vector>; + + bool operator ==(const loc_namespace_name_t &other) const; + bool operator !=(const loc_namespace_name_t &other) const; + + namespace_name_t strip_location(); + + operator std::string() const { return to_string(); } + std::string to_string() const; + + loc_namespace_name_t() { } + loc_namespace_name_t(std::initializer_list> segments): base(segments.begin(), segments.end()) { } + }; + bool is_identifier_valid(messages::msg_stack_t &msg_stack, ppc::location_t location, const std::string &name); inline bool is_identifier_valid(const std::string &name) { - messages::msg_stack_t ms { }; + messages::msg_stack_t ms; return is_identifier_valid(ms, { }, name); } } diff --git a/include/utils/data.hh b/include/utils/data.hh index 9f5b1cd..5ea2fc3 100644 --- a/include/utils/data.hh +++ b/include/utils/data.hh @@ -45,22 +45,26 @@ namespace ppc::data { bool string(string_t &out) const; bool boolean(bool_t &out) const; - const array_t &array() const; - const map_t &map() const; + array_t &array() const; + map_t &map() const; number_t number() const; - const string_t &string() const; + string_t &string() const; bool_t boolean() const; - // value_t &operator=(const value_t &other); + value_t &operator=(const value_t &other); ~value_t(); value_t(); value_t(const array_t &val); value_t(const map_t &val); + value_t(std::initializer_list> map); value_t(number_t val); value_t(const string_t &val); value_t(bool_t val); value_t(const value_t &other); + + static value_t mk_arr(); + static value_t mk_map(); }; @@ -68,18 +72,34 @@ namespace ppc::data { private: std::unordered_map values; public: - value_t &operator [](std::string name) { - if (values.find(name) == values.end()) { - values.emplace(name, value_t()); + value_t &operator [](std::string name){ + auto res = values.find(name); + if (res == values.end()) { + res = values.emplace(name, value_t()).first; } + return res->second; + } + const value_t &operator [](std::string name) const { + auto res = values.find(name); + if (res == values.end()) throw "The map doesn't contain a key '" + name + "'."; + return res->second; + } - return values[name]; + bool has(std::string key) const { + return values.find(key) != values.end(); } std::size_t size() const { return values.size(); } auto begin() const { return values.begin(); } auto end() const { return values.end(); } + + map_t() { } + map_t(std::initializer_list> vals) { + for (const auto &pair : vals) { + values.insert(pair); + } + } }; class array_t { @@ -87,9 +107,10 @@ namespace ppc::data { std::vector values; public: value_t &operator [](std::size_t i) { return values[i]; } + const value_t &operator [](std::size_t i) const { return values[i]; } - auto begin() { return values.begin(); } - auto end() { return values.end(); } + auto begin() const { return values.begin(); } + auto end() const { return values.end(); } void push(const value_t &val) { values.push_back(val); } void insert(const value_t &val, std::size_t i = 0) { values.insert(begin() + i, val); } @@ -97,5 +118,9 @@ namespace ppc::data { void remove(std::size_t i = 0) { values.erase(begin() + i); } std::size_t size() const { return values.size(); } + + array_t() { } + array_t(const std::vector &val): values(val) { } + array_t(std::initializer_list val): values(val) { } }; } \ No newline at end of file diff --git a/include/utils/location.hh b/include/utils/location.hh index 0257414..0ab706f 100644 --- a/include/utils/location.hh +++ b/include/utils/location.hh @@ -12,6 +12,7 @@ namespace ppc { std::size_t code_start; std::string filename; + operator std::string() const { return to_string(); } std::string to_string() const; location_t intersect(location_t other) const; diff --git a/include/utils/message.hh b/include/utils/message.hh index cf3ac52..63a73b8 100644 --- a/include/utils/message.hh +++ b/include/utils/message.hh @@ -23,6 +23,8 @@ namespace ppc::messages { location(loc) { } message_t() : message_t(DEBUG, "") { } + operator std::string() const { return to_string(); } + std::string to_string() const; bool is_severe() const; @@ -37,6 +39,7 @@ namespace ppc::messages { inline auto end() { return messages.end(); } void push(const message_t &msg) { messages.push_back(msg); } + const message_t &peek() { return messages.back(); } void clear() { messages.clear(); } bool is_failed() const; diff --git a/scripts/common.mak b/scripts/common.mak index 608e459..626748c 100644 --- a/scripts/common.mak +++ b/scripts/common.mak @@ -1,6 +1,8 @@ export lsproj = $(bin)/lsproj$(exe) export flags += "-I$(inc)" -D$(OS) -DPPC_VERSION_MAJOR=$(version-major) -DPPC_VERSION_MINOR=$(version-minor) -DPPC_VERSION_BUILD=$(version-build) +$(shell make -f scripts/lsproj.mak lsproj=$(lsproj) src=$(src) $(lsproj)) + rwildcard=$(foreach d, $(wildcard $(1:=/*)),\ $(call rwildcard,$d,$2)\ $(filter $(subst *,%,$2),$d)\ @@ -30,13 +32,9 @@ lrdeps=$(foreach dep,$(call rdeps,$1),-l$(lib)$(call modoutput,$(dep))) modules = $(patsubst $(src)/%/,$(bin)/lib$(lib)%$(so),$(filter-out $(src)/$(mainmodule)/,$(wildcard $(src)/*/))) sources = $(call rwildcard,$(src)/$1,*.cc) -headers = $(call rwildcard,$(inc),*.h) +headers = $(call rwildcard,$(inc),*.hh) binaries = $(patsubst $(src)/%.cc,$(bin)/tmp/%.o,$(call sources,$1)) -ifneq ($(nolsproj),yes) -$(shell make -f scripts/lsproj.mak lsproj=$(lsproj) src=$(src) $(lsproj)) -endif - .PHONY: build .PRECIOUS: $(bin)/tmp/%.o diff --git a/src/compiler/proj.txt b/src/compiler.proj similarity index 95% rename from src/compiler/proj.txt rename to src/compiler.proj index b181367..6fa4f42 100644 --- a/src/compiler/proj.txt +++ b/src/compiler.proj @@ -1,2 +1,2 @@ -compiler +compiler utils, lang \ No newline at end of file diff --git a/src/compiler/treeifier/ast.cc b/src/compiler/treeifier/ast.cc index 1805400..fadbca8 100644 --- a/src/compiler/treeifier/ast.cc +++ b/src/compiler/treeifier/ast.cc @@ -3,25 +3,44 @@ namespace ppc::comp::tree::ast { std::unordered_map parsers; - parser_t &ast_ctx_t::parser_proxy_t::operator[](const std::string &name) const { - auto it = parent.parsers.find(name); - if (it == parent.parsers.end()) throw "The parser '" + name + "' doesn't exist."; + const parser_t &ast_ctx_t::parser_proxy_t::operator[](const std::string &name) const { + auto it = parent->parsers.find(name); + if (it == parent->parsers.end()) throw "The parser '" + name + "' doesn't exist."; return *it->second; } - group_parser_t &ast_ctx_t::group_proxy_t::operator[](const std::string &name) const { - parser_t *p = &parent.parser[name]; - if (parent.groups.find(p) == parent.groups.end()) throw "A parser '" + name + "' exists, but isn't a group."; - return *(group_parser_t*)p; + const group_parser_t &ast_ctx_t::group_proxy_t::operator[](const std::string &name) const { + auto p = &parent->parser[name]; + if (parent->groups.find(p) == parent->groups.end()) throw "A parser '" + name + "' exists, but isn't a group."; + return *(const group_parser_t*)p; } - void ast_ctx_t::add_parser(std::string name, parser_t &parser) { - if (parsers.find(name) != parsers.end()) throw "The parser '" + name + "' already exists."; - parsers[name] = &parser; + void ast_ctx_t::add_parser(const parser_t &parser) { + if (parsers.find(parser.name()) != parsers.end()) throw "The parser '" + parser.name() + "' already exists."; + parsers[parser.name()] = &parser; } - void ast_ctx_t::add_parser(std::string name, group_parser_t &parser) { - if (parsers.find(name) != parsers.end()) throw "The parser '" + name + "' already exists."; - parsers[name] = &parser; + void ast_ctx_t::add_parser(const group_parser_t &parser) { + if (parsers.find(parser.name()) != parsers.end()) throw "The parser '" + parser.name() + "' already exists."; + parsers[parser.name()] = &parser; groups.emplace(&parser); } + + bool ast_ctx_t::parse(msg_stack_t &messages, std::vector &tokens, data::map_t &out) { + ast_ctx_t ctx(messages, tokens); + ctx.init(); + data::map_t res; + size_t i = 0; + + try { + if (glob_parser(ctx, i, out)) { + out = res; + return true; + } + else return false; + } + catch (const message_t &msg) { + messages.push(msg); + return false; + } + } } diff --git a/src/compiler/treeifier/ast/conv.cc b/src/compiler/treeifier/ast/conv.cc new file mode 100644 index 0000000..f323793 --- /dev/null +++ b/src/compiler/treeifier/ast/conv.cc @@ -0,0 +1,80 @@ +#include "compiler/treeifier/ast.hh" + +namespace ppc::comp::tree::ast::conv { + data::map_t identifier_to_map(const located_t &loc) { + return { + { "location", conv::loc_to_map(loc.location) }, + { "content", loc }, + { "$_name", identifier_parser.name() }, + }; + } + located_t map_to_identifier(const data::map_t &map) { + return { conv::map_to_loc(map["location"].map()), map["content"].string() }; + } + + data::map_t loc_to_map(const location_t &loc) { + data::map_t res = { + { "$_name", "$_loc" }, + }; + + if (loc.filename != "") res["filename"] = loc.filename; + if (loc.start != -1u) res["start"] = (float)loc.start; + if (loc.code_start != -1u) res["code_start"] = (float)loc.code_start; + if (loc.length != -1u) res["length"] = (float)loc.length; + + return res; + } + location_t map_to_loc(const data::map_t &map) { + location_t res; + + if (map.has("filename")) { + if (map["filename"].is_string()) res.filename = map["filename"].string(); + else throw "Expected key 'filename' to be a string."; + } + if (map.has("start")) { + if (map["start"].is_number()) res.start = (size_t)map["start"].number(); + else throw "Expected key 'start' to be a number."; + } + if (map.has("length")) { + if (map["length"].is_number()) res.length = (size_t)map["length"].number(); + else throw "Expected key 'length' to be a number."; + } + if (map.has("code_start")) { + if (map["code_start"].is_number()) res.code_start = (size_t)map["code_start"].number(); + else throw "Expected key 'code_start' to be a number."; + } + + return res; + } + + data::map_t nmsp_to_map(const loc_namespace_name_t &nmsp) { + data::map_t res; + + auto arr = (res["content"] = data::array_t()).array(); + + for (const auto &segment : nmsp) { + arr.push({ + { "location", loc_to_map(segment.location) }, + { "content", segment }, + { "$_name", nmsp_parser.name() }, + }); + } + + return res; + } + loc_namespace_name_t map_to_nmsp(const data::map_t &map) { + loc_namespace_name_t res; + + for (const auto &segment : map["content"].array()) { + try { + auto val = map_to_identifier(segment.map()); + res.push_back(val); + } + catch (const message_t &) { + throw "'content' of a namespace map must contain only identifiers."; + } + } + + return res; + } +} diff --git a/src/compiler/treeifier/ast/parsers/glob.cc b/src/compiler/treeifier/ast/parsers/glob.cc new file mode 100644 index 0000000..332ecea --- /dev/null +++ b/src/compiler/treeifier/ast/parsers/glob.cc @@ -0,0 +1,65 @@ +#include "compiler/treeifier/ast.hh" +#include "compiler/treeifier/ast/helper.hh" + +using namespace ppc::comp::tree::ast; + +class nmsp_def_parser_t : public parser_t { + bool parse(ast_ctx_t &ctx, size_t &res_i, data::map_t &res) const { + tree_helper_t h(ctx, res_i); + if (h.ended()) return false; + + if (!h.curr().is_identifier("namespace")) return false; + h.force_parse(nmsp_parser, "Expected a namespace.", res); + if (!h.curr().is_operator(operator_t::SEMICOLON)) h.err("Expected a semicolon."); + + return h.submit(true); + } + + public: nmsp_def_parser_t(): parser_t("$_nmsp_def") { } +}; +class import_parser_t : public parser_t { + bool parse(ast_ctx_t &ctx, size_t &res_i, data::map_t &res) const { + tree_helper_t h(ctx, res_i); + if (h.ended()) return false; + + if (!h.curr().is_identifier("import")) return false; + h.force_parse(nmsp_parser, "Expected a namespace.", res); + if (!h.curr().is_operator(operator_t::SEMICOLON)) h.err("Expected a semicolon."); + + return h.submit(true); + } + + public: import_parser_t(): parser_t("$_import") { } +}; + +const parser_t &import_parser = import_parser_t(); +const parser_t &nmsp_def_parser = nmsp_def_parser_t(); + +class glob_parser_t : public parser_t { + bool parse(ast_ctx_t &ctx, size_t &res_i, data::map_t &out) const { + tree_helper_t h(ctx, res_i); + if (h.ended()) return true; + h.parse(nmsp_def_parser, (out["namespace"] = map_t()).map()); + ctx.nmsp = conv::map_to_nmsp(out["namespace"].map()); + + auto imports = (out["imports"] = array_t()).array(); + + while (true) { + map_t map; + imports.push(map); + if (!h.parse(import_parser, map)) break; + auto nmsp = conv::map_to_nmsp(map); + + if (!ctx.imports.emplace(nmsp).second) h.err("The namespace '" + nmsp.to_string() + "' is already imported."); + } + + if (!h.ended()) h.err("Invalid token."); + + return h.submit(); + } + +public: + glob_parser_t(): parser_t("$_glob") { } +}; + +const parser_t &ppc::comp::tree::ast::glob_parser = glob_parser_t(); diff --git a/src/compiler/treeifier/parsers/group.cc b/src/compiler/treeifier/ast/parsers/group.cc similarity index 100% rename from src/compiler/treeifier/parsers/group.cc rename to src/compiler/treeifier/ast/parsers/group.cc diff --git a/src/compiler/treeifier/ast/parsers/identifier.cc b/src/compiler/treeifier/ast/parsers/identifier.cc new file mode 100644 index 0000000..b9b8557 --- /dev/null +++ b/src/compiler/treeifier/ast/parsers/identifier.cc @@ -0,0 +1,21 @@ +#include "compiler/treeifier/ast/helper.hh" + +class identifier_parser_t : public parser_t { + bool parse(ast_ctx_t &ctx, size_t &res_i, map_t &out) const { + tree_helper_t h(ctx, res_i); + + if (h.ended()) return false; + + if (h.curr().is_identifier()) { + auto loc = h.loc(); + out["location"] = conv::loc_to_map(loc); + out["content"] = h.curr().identifier(); + return h.submit(); + } + else return false; + } + + public: identifier_parser_t(): parser_t("$_identifier") { } +}; + +const parser_t &ppc::comp::tree::ast::identifier_parser = identifier_parser_t(); diff --git a/src/compiler/treeifier/ast/parsers/nmsp.cc b/src/compiler/treeifier/ast/parsers/nmsp.cc new file mode 100644 index 0000000..910ed7e --- /dev/null +++ b/src/compiler/treeifier/ast/parsers/nmsp.cc @@ -0,0 +1,26 @@ +#include "compiler/treeifier/ast/helper.hh" + +class nmsp_parser_t : public parser_t { + bool parse(ast_ctx_t &ctx, size_t &res_i, map_t &out) const { + tree_helper_t h(ctx, res_i); + + if (h.ended()) return false; + + auto &arr = (out["content"] = array_t()).array(); + + if (!h.push_parse(identifier_parser, arr)) return false; + + while (true) { + if (h.ended()) break; + if (!h.curr().is_operator(operator_t::DOUBLE_COLON)) break; + h.force_push_parse(identifier_parser, "Expected an identifier.", arr); + } + + out["location"] = conv::loc_to_map(h.res_loc()); + return h.submit(false); + } + + public: nmsp_parser_t(): parser_t("$_nmsp") { } +}; + +const parser_t &ppc::comp::tree::ast::nmsp_parser = nmsp_parser_t(); diff --git a/src/compiler/treeifier/lexer.cc b/src/compiler/treeifier/lexer.cc index e13b2a0..4012dee 100644 --- a/src/compiler/treeifier/lexer.cc +++ b/src/compiler/treeifier/lexer.cc @@ -189,7 +189,7 @@ const lexlet_t LEXLET_OPERATOR = (lexlet_t) { }, .process = [] (char curr) { bool failed = true; - if (first_op == curr && op_i == 1 && is_any(curr, "+-&|?<>")) failed = false; + if (first_op == curr && op_i == 1 && is_any(curr, ":+-&|?<>")) failed = false; if (curr == '=') { if (op_i == 1 && is_any(first_op, "<>=!+-/*%")) failed = false; if (op_i == 2 && is_any(first_op, "<>?")) failed = false; diff --git a/src/compiler/treeifier/parsers/glob.cc b/src/compiler/treeifier/parsers/glob.cc deleted file mode 100644 index e574e3e..0000000 --- a/src/compiler/treeifier/parsers/glob.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include "compiler/treeifier/ast.hh" -#include "compiler/treeifier/ast/helper.hh" - -namespace ppc::comp::tree::ast { - class glob_parser_t : public parser_t { - bool parse_nmsp(ast_ctx_t &ctx, size_t &res_i, located_t &out) const { - tree_helper_t h(ctx, res_i); - located_t res; - - while (true) { - auto &curr = h.curr(); - - if (h.ended() || !curr.is_identifier()) return false; - else res.push_back(curr.identifier()); - - if (!h.try_advance() || !h.curr().is_operator(operator_t::DOUBLE_COLON)) { - out = res; - return h.submit(); - } - } - } - bool parse_nmsp_def(ast_ctx_t &ctx, size_t &res_i) const { - tree_helper_t h(ctx, res_i); - if (h.ended()) return true; - - if (h.curr().is_identifier("namespace")) { - h.advance("Expected a namespace name."); - if (!parse_nmsp(ctx, h.i, ctx.nmsp)) throw message_t::error("Expected a namespace name.", h.loc()); - return h.submit(); - } - else return false; - } - bool parse_import(ast_ctx_t &ctx, size_t &res_i) const { - tree_helper_t h(ctx, res_i); - if (h.ended()) return true; - - if (h.curr().is_identifier("import")) { - h.advance("Expected a namespace name."); - located_t name; - if (!parse_nmsp(ctx, h.i, name)) throw message_t::error("Expected a namespace name.", h.loc()); - if (!ctx.imports.emplace(name).second) { - throw message_t::error("The namespace '" + name.to_string() + "' is already imported.", h.loc()); - } - return h.submit(); - } - else return false; - } - - bool parse(ast_ctx_t &ctx, size_t &res_i, data::map_t &out) const { - tree_helper_t h(ctx, res_i); - if (h.ended()) return true; - parse_nmsp_def(ctx, h.i); - - while (parse_import(ctx, h.i)); - - return true; - } - }; - - const parser_t &glob_parser = glob_parser_t(); -} diff --git a/src/lang/proj.txt b/src/lang.proj similarity index 100% rename from src/lang/proj.txt rename to src/lang.proj diff --git a/src/lang/common.cc b/src/lang/common.cc index 99aa799..36f1e8a 100644 --- a/src/lang/common.cc +++ b/src/lang/common.cc @@ -3,6 +3,16 @@ #include "lang/common.hh" namespace ppc::lang { + std::string loc_namespace_name_t::to_string() const { + std::stringstream res; + + for (size_t i = 0; i < size(); i++) { + if (i != 0) res << "::"; + res << (*this)[i]; + } + + return res.str(); + } std::string namespace_name_t::to_string() const { std::stringstream res; @@ -13,6 +23,25 @@ namespace ppc::lang { return res.str(); } + + bool loc_namespace_name_t::operator==(const loc_namespace_name_t &other) const { + if (other.size() != size()) return false; + + for (size_t i = 0; i < size(); i++) { + if (other[i] != (*this)[i]) return false; + } + + return true; + } + bool loc_namespace_name_t::operator!=(const loc_namespace_name_t &other) const { + if (other.size() != size()) return true; + + for (size_t i = 0; i < size(); i++) { + if (other[i] == (*this)[i]) return false; + } + + return true; + } bool namespace_name_t::operator==(const namespace_name_t &other) const { if (other.size() != size()) return false; @@ -32,5 +61,16 @@ namespace ppc::lang { return true; } + + namespace_name_t loc_namespace_name_t::strip_location() { + namespace_name_t res; + + for (const auto &el : *this) { + res.push_back(el); + } + + return res; + } + } diff --git a/src/lsproj.cc b/src/lsproj.cc index 934dd33..e133779 100644 --- a/src/lsproj.cc +++ b/src/lsproj.cc @@ -101,7 +101,7 @@ int main(int argc, const char* argv[]) { throw (std::string)"Incorrect usage. Syntax: [src-dir] [project-name] [output|deps]."; } - std::string proj_path = (std::string)argv[0] + "/" + argv[1] + "/proj.txt"; + std::string proj_path = (std::string)argv[0] + "/" + argv[1] + ".proj"; proj_name = argv[1]; std::ifstream f { proj_path, std::ios_base::in }; diff --git a/src/main/proj.txt b/src/main.proj similarity index 95% rename from src/main/proj.txt rename to src/main.proj index 23e5243..dd1bfcc 100644 --- a/src/main/proj.txt +++ b/src/main.proj @@ -1,2 +1,2 @@ -main +main utils, compiler \ No newline at end of file diff --git a/src/main/main.cc b/src/main/main.cc index bb637ce..f9917ad 100644 --- a/src/main/main.cc +++ b/src/main/main.cc @@ -24,6 +24,7 @@ #include "utils/strings.hh" #include "compiler/treeifier/lexer.hh" #include "compiler/treeifier/tokenizer.hh" +#include "compiler/treeifier/ast.hh" #include "./opions.hh" using std::cout; @@ -152,9 +153,10 @@ int main(int argc, const char *argv[]) { for (const auto &file : files) { std::ifstream f { file, std::ios_base::in }; try { - auto res = token_t::parse_many(msg_stack, lex::token_t::parse_file(msg_stack, file, f)); - - for (auto tok : res) { + auto tokens = token_t::parse_many(msg_stack, lex::token_t::parse_file(msg_stack, file, f)); + data::map_t ast; + if (!ast::ast_ctx_t::parse(msg_stack, tokens, ast)) throw msg_stack.peek(); + for (auto tok : tokens) { if (tok.is_identifier()) std::cout << "Identifier: \t" << tok.identifier(); if (tok.is_operator()) std::cout << "Operator: \t" << operator_stringify(tok._operator()); if (tok.is_float_lit()) std::cout << "Float: \t" << tok.float_lit(); diff --git a/src/utils/proj.txt b/src/utils.proj similarity index 100% rename from src/utils/proj.txt rename to src/utils.proj diff --git a/src/utils/data.cc b/src/utils/data.cc index 65b849b..f0d1f79 100644 --- a/src/utils/data.cc +++ b/src/utils/data.cc @@ -1,135 +1,165 @@ #include "utils/data.hh" -bool ppc::data::value_t::is_null() const { - return type == type_t::Null; -} -bool ppc::data::value_t::is_map() const { - return type == type_t::Map; -} -bool ppc::data::value_t::is_array() const { - return type == type_t::Arr; -} -bool ppc::data::value_t::is_number() const { - return type == type_t::Num; -} -bool ppc::data::value_t::is_string() const { - return type == type_t::Str; -} -bool ppc::data::value_t::is_bool() const { - return type == type_t::Bool; -} +namespace ppc::data { + bool value_t::is_null() const { + return type == type_t::Null; + } + bool value_t::is_map() const { + return type == type_t::Map; + } + bool value_t::is_array() const { + return type == type_t::Arr; + } + bool value_t::is_number() const { + return type == type_t::Num; + } + bool value_t::is_string() const { + return type == type_t::Str; + } + bool value_t::is_bool() const { + return type == type_t::Bool; + } -bool ppc::data::value_t::array(ppc::data::array_t &out) const { - if (is_array()) { - out = *val.arr; - return true; + bool value_t::array(array_t &out) const { + if (is_array()) { + out = *val.arr; + return true; + } + return false; } - return false; -} -bool ppc::data::value_t::map(ppc::data::map_t &out) const { - if (is_map()) { - out = *val.map; - return true; + bool value_t::map(map_t &out) const { + if (is_map()) { + out = *val.map; + return true; + } + return false; } - return false; -} -bool ppc::data::value_t::number(ppc::data::number_t &out) const { - if (is_number()) { - out = val.num; - return true; + bool value_t::number(number_t &out) const { + if (is_number()) { + out = val.num; + return true; + } + return false; } - return false; -} -bool ppc::data::value_t::string(ppc::data::string_t &out) const { - if (is_string()) { - out = *val.str; - return true; + bool value_t::string(string_t &out) const { + if (is_string()) { + out = *val.str; + return true; + } + return false; } - return false; -} -bool ppc::data::value_t::boolean(ppc::data::bool_t &out) const { - if (is_bool()) { - out = val.bl; - return true; + bool value_t::boolean(bool_t &out) const { + if (is_bool()) { + out = val.bl; + return true; + } + return false; } - return false; -} - -const ppc::data::array_t &ppc::data::value_t::array() const { - if (is_array()) return *val.arr; - else throw (std::string)"The value isn't an array."; -} -const ppc::data::map_t &ppc::data::value_t::map() const { - if (is_map()) return *val.map; - else throw (std::string)"The value isn't a map."; -} -ppc::data::number_t ppc::data::value_t::number() const { - if (is_number()) return val.num; - else throw (std::string)"The value isn't a number."; -} -const ppc::data::string_t &ppc::data::value_t::string() const { - if (is_string()) return *val.str; - else throw (std::string)"The value isn't a string."; -} -ppc::data::bool_t ppc::data::value_t::boolean() const { - if (is_bool()) return val.bl; - else throw (std::string)"The value isn't a bool."; -} - -ppc::data::value_t::value_t() { - this->type = type_t::Null; -} -ppc::data::value_t::value_t(const ppc::data::array_t &val) { - this->type = type_t::Arr; - this->val.arr = new array_t(val); -} -ppc::data::value_t::value_t(const ppc::data::map_t &val) { - this->type = type_t::Map; - this->val.map = new map_t(val); -} -ppc::data::value_t::value_t(const ppc::data::string_t &val) { - this->type = type_t::Str; - this->val.str = new string_t(val); -} -ppc::data::value_t::value_t(ppc::data::bool_t val) { - this->type = type_t::Bool; - this->val.bl = val; -} -ppc::data::value_t::value_t(ppc::data::number_t val) { - this->type = type_t::Num; - this->val.num = val; -} -ppc::data::value_t::value_t(const ppc::data::value_t &other) { - type = other.type; - switch (other.type) { - case type_t::Map: - val.map = new map_t(*other.val.map); - break; - case type_t::Arr: - val.arr = new array_t(*other.val.arr); - break; - case type_t::Str: - val.str = new string_t(*other.val.str); - break; - default: - val = other.val; - break; + array_t &value_t::array() const { + if (is_array()) return *val.arr; + else throw (std::string)"The value isn't an array."; } -} -ppc::data::value_t::~value_t() { - switch (type) { - case type_t::Map: - delete val.map; - break; - case type_t::Arr: - delete val.arr; - break; - case type_t::Str: - delete val.str; - break; - default: - break; + map_t &value_t::map() const { + if (is_map()) return *val.map; + else throw (std::string)"The value isn't a map."; + } + number_t value_t::number() const { + if (is_number()) return val.num; + else throw (std::string)"The value isn't a number."; + } + string_t &value_t::string() const { + if (is_string()) return *val.str; + else throw (std::string)"The value isn't a string."; + } + bool_t value_t::boolean() const { + if (is_bool()) return val.bl; + else throw (std::string)"The value isn't a bool."; } -} + value_t::value_t() { + this->type = type_t::Null; + } + value_t::value_t(const array_t &val) { + this->type = type_t::Arr; + this->val.arr = new array_t(val); + } + value_t::value_t(const map_t &val) { + this->type = type_t::Map; + this->val.map = new map_t(val); + } + value_t::value_t(const string_t &val) { + this->type = type_t::Str; + this->val.str = new string_t(val); + } + value_t::value_t(bool_t val) { + this->type = type_t::Bool; + this->val.bl = val; + } + value_t::value_t(number_t val) { + this->type = type_t::Num; + this->val.num = val; + } + value_t::value_t(const value_t &other) { + type = other.type; + switch (other.type) { + case type_t::Map: + val.map = new map_t(*other.val.map); + break; + case type_t::Arr: + val.arr = new array_t(*other.val.arr); + break; + case type_t::Str: + val.str = new string_t(*other.val.str); + break; + default: + val = other.val; + break; + } + } + value_t value_t::mk_arr() { + return value_t(array_t()); + } + value_t value_t::mk_map() { + return value_t(map_t()); + } + + value_t::~value_t() { + switch (type) { + case type_t::Map: + delete val.map; + break; + case type_t::Arr: + delete val.arr; + break; + case type_t::Str: + delete val.str; + break; + default: + break; + } + } + + value_t::value_t(std::initializer_list> map): + value_t(map_t(map)) { } + + value_t &value_t::operator=(const value_t &other) { + type = other.type; + switch (other.type) { + case type_t::Map: + val.map = new map_t(*other.val.map); + break; + case type_t::Arr: + val.arr = new array_t(*other.val.arr); + break; + case type_t::Str: + val.str = new string_t(*other.val.str); + break; + default: + val = other.val; + break; + } + return *this; + } + +} diff --git a/src/utils/message.cc b/src/utils/message.cc index 8b49f57..61959ea 100644 --- a/src/utils/message.cc +++ b/src/utils/message.cc @@ -4,68 +4,70 @@ using namespace ppc; -std::string messages::message_t::to_string() const { - std::string loc_readable = location.to_string(); - std::string level_readable; +namespace ppc::messages { + std::string message_t::to_string() const { + std::string loc_readable = location.to_string(); + std::string level_readable; - switch (level) { - case messages::message_t::DEBUG: level_readable = "debug"; break; - case messages::message_t::SUGGESTION: level_readable = "suggestion"; break; - case messages::message_t::INFO: level_readable = "info"; break; - case messages::message_t::WARNING: level_readable = "warning"; break; - case messages::message_t::ERROR: level_readable = "error"; break; - default: level_readable = "what?"; break; - } - - std::stringstream res { }; - - if (loc_readable.length()) res << loc_readable << ": "; - res << level_readable << ": " << content; - - return res.str(); -} -bool messages::message_t::is_severe() const { - return level > messages::message_t::WARNING; -} - -bool messages::msg_stack_t::is_failed() const { - for (const auto &msg : messages) { - if (msg.is_severe()) return true; - } - return false; -} -void messages::msg_stack_t::print(std::ostream &output, messages::message_t::level_t threshold, bool color_output) const { - if (!messages.size()) return; - - for (const auto &msg : messages) { - if (msg.level < threshold) continue; - - std::string loc_readable = msg.location.to_string(); - - switch (msg.level) { - case messages::message_t::DEBUG: - output << (color_output ? "\e[38;5;8mdebug: " : "debug: "); - break; - case messages::message_t::SUGGESTION: - output << (color_output ? "\e[38;5;45msuggestion: " : "suggestion: "); - break; - case messages::message_t::INFO: - output << (color_output ? "\e[38;5;33minfo: ": "info: "); - break; - case messages::message_t::WARNING: - output << (color_output ? "\e[38;5;214mwarning: " : "warning: "); - break; - case messages::message_t::ERROR: - output << (color_output ? "\e[38;5;196merror: " : "error: "); - break; - default: - output << (color_output ? "\e[38;5;196mw\e[38;5;226mh\e[38;5;118ma\e[38;5;162mt\e[38;5;129m?\e[0m: " : "what?: "); - break; + switch (level) { + case message_t::DEBUG: level_readable = "debug"; break; + case message_t::SUGGESTION: level_readable = "suggestion"; break; + case message_t::INFO: level_readable = "info"; break; + case message_t::WARNING: level_readable = "warning"; break; + case message_t::ERROR: level_readable = "error"; break; + default: level_readable = "what?"; break; } - if (loc_readable.length()) output << loc_readable << ": "; - output << msg.content; - if (color_output) output << "\e[0m"; - output << std::endl; + std::stringstream res { }; + + if (loc_readable.length()) res << loc_readable << ": "; + res << level_readable << ": " << content; + + return res.str(); } -} + bool message_t::is_severe() const { + return level > message_t::WARNING; + } + + bool msg_stack_t::is_failed() const { + for (const auto &msg : messages) { + if (msg.is_severe()) return true; + } + return false; + } + void msg_stack_t::print(std::ostream &output, message_t::level_t threshold, bool color_output) const { + if (!messages.size()) return; + + for (const auto &msg : messages) { + if (msg.level < threshold) continue; + + std::string loc_readable = msg.location.to_string(); + + switch (msg.level) { + case message_t::DEBUG: + output << (color_output ? "\e[38;5;8mdebug: " : "debug: "); + break; + case message_t::SUGGESTION: + output << (color_output ? "\e[38;5;45msuggestion: " : "suggestion: "); + break; + case message_t::INFO: + output << (color_output ? "\e[38;5;33minfo: ": "info: "); + break; + case message_t::WARNING: + output << (color_output ? "\e[38;5;214mwarning: " : "warning: "); + break; + case message_t::ERROR: + output << (color_output ? "\e[38;5;196merror: " : "error: "); + break; + default: + output << (color_output ? "\e[38;5;196mw\e[38;5;226mh\e[38;5;118ma\e[38;5;162mt\e[38;5;129m?\e[0m: " : "what?: "); + break; + } + + if (loc_readable.length()) output << loc_readable << ": "; + output << msg.content; + if (color_output) output << "\e[0m"; + output << std::endl; + } + } +} \ No newline at end of file From cca9bc2e0706b7c453917ecda1cc2f284a1b832e Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Sun, 9 Oct 2022 14:43:28 +0300 Subject: [PATCH 24/74] feat: add json stringification --- include/utils/json.hh | 11 ++++++++ src/compiler/treeifier/ast.cc | 7 +----- src/main/main.cc | 4 +++ src/utils/json.cc | 47 +++++++++++++++++++++++++++++++++++ 4 files changed, 63 insertions(+), 6 deletions(-) create mode 100644 include/utils/json.hh create mode 100644 src/utils/json.cc diff --git a/include/utils/json.hh b/include/utils/json.hh new file mode 100644 index 0000000..a67ac1a --- /dev/null +++ b/include/utils/json.hh @@ -0,0 +1,11 @@ +#pragma once + +#include +#include +#include +#include +#include "utils/data.hh" + +namespace ppc::data::json { + std::string stringify(const data::value_t &map); +} \ No newline at end of file diff --git a/src/compiler/treeifier/ast.cc b/src/compiler/treeifier/ast.cc index fadbca8..2b1a552 100644 --- a/src/compiler/treeifier/ast.cc +++ b/src/compiler/treeifier/ast.cc @@ -27,15 +27,10 @@ namespace ppc::comp::tree::ast { bool ast_ctx_t::parse(msg_stack_t &messages, std::vector &tokens, data::map_t &out) { ast_ctx_t ctx(messages, tokens); ctx.init(); - data::map_t res; size_t i = 0; try { - if (glob_parser(ctx, i, out)) { - out = res; - return true; - } - else return false; + return glob_parser(ctx, i, out); } catch (const message_t &msg) { messages.push(msg); diff --git a/src/main/main.cc b/src/main/main.cc index f9917ad..2787e8a 100644 --- a/src/main/main.cc +++ b/src/main/main.cc @@ -22,6 +22,7 @@ #include #include "utils/threading.hh" #include "utils/strings.hh" +#include "utils/json.hh" #include "compiler/treeifier/lexer.hh" #include "compiler/treeifier/tokenizer.hh" #include "compiler/treeifier/ast.hh" @@ -156,6 +157,7 @@ int main(int argc, const char *argv[]) { auto tokens = token_t::parse_many(msg_stack, lex::token_t::parse_file(msg_stack, file, f)); data::map_t ast; if (!ast::ast_ctx_t::parse(msg_stack, tokens, ast)) throw msg_stack.peek(); + for (auto tok : tokens) { if (tok.is_identifier()) std::cout << "Identifier: \t" << tok.identifier(); if (tok.is_operator()) std::cout << "Operator: \t" << operator_stringify(tok._operator()); @@ -165,6 +167,8 @@ int main(int argc, const char *argv[]) { if (tok.is_string_lit()) std::cout << "String: \t" << std::string { tok.string_lit().begin(), tok.string_lit().end() }; std::cout << std::endl; } + + std::cout << std::endl << data::json::stringify(ast); } catch (const messages::message_t &msg) { msg_stack.push(msg); diff --git a/src/utils/json.cc b/src/utils/json.cc new file mode 100644 index 0000000..1311ec8 --- /dev/null +++ b/src/utils/json.cc @@ -0,0 +1,47 @@ +#include "utils/json.hh" +#include + +namespace ppc::data::json { + std::string stringify(const data::value_t &val) { + std::stringstream out; + bool first = true; + + if (val.is_array()) { + out << '['; + + for (const auto &el : val.array()) { + if (!first) out << ','; + first = false; + out << stringify(el); + } + + out << ']'; + } + else if (val.is_map()) { + out << '{'; + + for (const auto &el : val.map()) { + if (!first) out << ','; + first = false; + out << '"' << el.first << '"' << ':' << stringify(el.second); + } + + out << '}'; + } + else if (val.is_bool()) { + if (val.boolean()) out << "true"; + else out << "false"; + } + else if (val.is_null()) { + out << "null"; + } + else if (val.is_number()) { + out << val.number(); + } + else if (val.is_string()) { + out << '"' << val.string() << '"'; + } + + return out.str(); + } +} \ No newline at end of file From 4d6ce93ae359cae1986c8164a41f6369e6b4ed54 Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Sun, 9 Oct 2022 16:34:02 +0300 Subject: [PATCH 25/74] chore --- include/compiler/treeifier/ast.hh | 26 +++++-- include/compiler/treeifier/ast/helper.hh | 16 ++-- include/compiler/treeifier/tokenizer.hh | 21 ++--- include/utils/location.hh | 11 +-- src/compiler/treeifier/ast.cc | 25 ++++-- src/compiler/treeifier/ast/conv.cc | 17 +++-- src/compiler/treeifier/ast/parsers/glob.cc | 18 +++-- src/compiler/treeifier/ast/parsers/group.cc | 5 +- .../treeifier/ast/parsers/identifier.cc | 2 +- src/compiler/treeifier/ast/parsers/nmsp.cc | 6 +- src/main/main.cc | 19 +++-- src/utils/location.cc | 76 +++++-------------- 12 files changed, 115 insertions(+), 127 deletions(-) diff --git a/include/compiler/treeifier/ast.hh b/include/compiler/treeifier/ast.hh index 4eccdf1..ec079b5 100644 --- a/include/compiler/treeifier/ast.hh +++ b/include/compiler/treeifier/ast.hh @@ -17,9 +17,13 @@ namespace ppc::comp::tree::ast { class parser_t; class group_parser_t; - extern const parser_t &glob_parser; - extern const parser_t &identifier_parser; - extern const parser_t &nmsp_parser; + using parser_factory_t = parser_t *(*)(); + using group_parser_factory_t = group_parser_t *(*)(); + + extern parser_factory_t glob_parser; + extern parser_factory_t identifier_parser; + extern parser_factory_t nmsp_parser; + extern group_parser_factory_t def_parser; struct ast_ctx_t { private: @@ -42,25 +46,34 @@ namespace ppc::comp::tree::ast { std::unordered_map parsers; std::set groups; + void add_parser(const parser_t *parser); + void add_parser(const group_parser_t *parser); public: msg_stack_t &messages; std::vector &tokens; std::set imports; loc_namespace_name_t nmsp; - void add_parser(const parser_t &parser); - void add_parser(const group_parser_t &parser); + void add_parser(parser_factory_t factory) { add_parser(factory()); } + void add_parser(group_parser_factory_t factory) { add_parser(factory()); } + + ast_ctx_t &operator=(const ast_ctx_t &other) = delete; const parser_proxy_t parser; const group_proxy_t group; ast_ctx_t &init() { add_parser(glob_parser); + add_parser(identifier_parser); + add_parser(nmsp_parser); + add_parser(def_parser); return *this; } + bool parse(std::string parser, size_t &pi, data::map_t &out); static bool parse(msg_stack_t &messages, std::vector &tokens, data::map_t &out); + ~ast_ctx_t(); ast_ctx_t(msg_stack_t &messages, std::vector &tokens): messages(messages), tokens(tokens), @@ -81,6 +94,7 @@ namespace ppc::comp::tree::ast { return parse(ctx, i, out); } + virtual ~parser_t() = default; parser_t(const std::string &name): _name(name) { } }; @@ -93,6 +107,8 @@ namespace ppc::comp::tree::ast { group_parser_t &add(parser_t &parser, const lang::namespace_name_t &name); bool parse(ast_ctx_t &ctx, size_t &i, data::map_t &out) const; + + group_parser_t(const std::string &name): parser_t(name) { } }; namespace conv { diff --git a/include/compiler/treeifier/ast/helper.hh b/include/compiler/treeifier/ast/helper.hh index 08defde..affa012 100644 --- a/include/compiler/treeifier/ast/helper.hh +++ b/include/compiler/treeifier/ast/helper.hh @@ -82,25 +82,25 @@ namespace ppc::comp::tree::ast { throw_ended(reason); } - bool push_parse(const parser_t &parser, data::array_t &out) { + bool push_parse(const std::string &name, data::array_t &out) { data::map_t res; - if (parser(ctx, i, res)) { + if (parse(name, res)) { out.push(res); return true; } else return false; } - bool parse(const parser_t &parser, data::map_t &out) { - return parser(ctx, i, out); + bool parse(const std::string &name, data::map_t &out) { + return ctx.parse(name, i, out); } - void force_push_parse(const parser_t &parser, std::string message, data::array_t &out) { + void force_push_parse(const std::string &name, std::string message, data::array_t &out) { advance(message); bool success; try { - success = push_parse(parser, out); + success = push_parse(name, out); } catch (const message_t &msg) { ctx.messages.push(msg); @@ -109,12 +109,12 @@ namespace ppc::comp::tree::ast { if (!success) err(message); } - void force_parse(const parser_t &parser, std::string message, data::map_t &out) { + void force_parse(const std::string &name, std::string message, data::map_t &out) { advance(message); bool success; try { - success = parse(parser, out); + success = parse(name, out); } catch (const message_t &msg) { ctx.messages.push(msg); diff --git a/include/compiler/treeifier/tokenizer.hh b/include/compiler/treeifier/tokenizer.hh index f252f6d..feff58d 100644 --- a/include/compiler/treeifier/tokenizer.hh +++ b/include/compiler/treeifier/tokenizer.hh @@ -129,37 +129,31 @@ namespace ppc::comp::tree { bool is_identifier(std::string &&val) { return is_identifier() && identifier() == val; } token_t() { kind = NONE; } - token_t(const std::string &identifier, location_t loc = location_t::NONE) { + token_t(const std::string &identifier, location_t loc = location_t::NONE): location(loc) { kind = IDENTIFIER; data.identifier = new std::string { identifier }; - location = loc; } - token_t(operator_t op, location_t loc = location_t::NONE) { + token_t(operator_t op, location_t loc = location_t::NONE): location(loc) { kind = OPERATOR; data._operator = op; - location = loc; } - token_t(std::uint64_t val, location_t loc = location_t::NONE) { + token_t(std::uint64_t val, location_t loc = location_t::NONE): location(loc) { kind = INT; data.int_literal = val; - location = loc; } - token_t(double val, location_t loc = location_t::NONE) { + token_t(double val, location_t loc = location_t::NONE): location(loc) { kind = FLOAT; data.float_literal = val; - location = loc; } - token_t(char c, location_t loc = location_t::NONE) { + token_t(char c, location_t loc = location_t::NONE): location(loc) { kind = CHAR; data.char_literal = c; - location = loc; } - token_t(const std::vector &val, location_t loc = location_t::NONE) { + token_t(const std::vector &val, location_t loc = location_t::NONE): location(loc) { kind = STRING; data.string_literal = new std::vector { val }; - location = loc; } - token_t(const token_t &tok) { + token_t(const token_t &tok): location(tok.location) { kind = tok.kind; switch (kind) { case NONE: break; @@ -170,7 +164,6 @@ namespace ppc::comp::tree { case CHAR: data.char_literal = tok.data.char_literal; break; case STRING: data.string_literal = new std::vector { *tok.data.string_literal }; break; } - location = tok.location; } ~token_t() { diff --git a/include/utils/location.hh b/include/utils/location.hh index 0ab706f..65efeff 100644 --- a/include/utils/location.hh +++ b/include/utils/location.hh @@ -10,19 +10,20 @@ namespace ppc { std::size_t start; std::size_t length; std::size_t code_start; - std::string filename; + const std::string &filename; operator std::string() const { return to_string(); } std::string to_string() const; location_t intersect(location_t other) const; location_t(); - location_t(std::string filename); + location_t(const location_t &other): location_t(other.filename, other.line, other.start, other.code_start, other.length) { } + location_t(const std::string &filename); location_t(std::size_t line, std::size_t start); - location_t(std::string filename, std::size_t line, std::size_t start); + location_t(const std::string &filename, std::size_t line, std::size_t start); location_t(std::size_t line, std::size_t start, std::size_t code_start); - location_t(std::string filename, std::size_t line, std::size_t start, std::size_t code_start); + location_t(const std::string &filename, std::size_t line, std::size_t start, std::size_t code_start); location_t(std::size_t line, std::size_t start, std::size_t code_start, std::size_t length); - location_t(std::string filename, std::size_t line, std::size_t start, std::size_t code_start, std::size_t length); + location_t(const std::string &filename, std::size_t line, std::size_t start, std::size_t code_start, std::size_t length); }; } \ No newline at end of file diff --git a/src/compiler/treeifier/ast.cc b/src/compiler/treeifier/ast.cc index 2b1a552..7a30834 100644 --- a/src/compiler/treeifier/ast.cc +++ b/src/compiler/treeifier/ast.cc @@ -14,14 +14,20 @@ namespace ppc::comp::tree::ast { return *(const group_parser_t*)p; } - void ast_ctx_t::add_parser(const parser_t &parser) { - if (parsers.find(parser.name()) != parsers.end()) throw "The parser '" + parser.name() + "' already exists."; - parsers[parser.name()] = &parser; + ast_ctx_t::~ast_ctx_t() { + for (auto pair : parsers) { + delete pair.second; + } } - void ast_ctx_t::add_parser(const group_parser_t &parser) { - if (parsers.find(parser.name()) != parsers.end()) throw "The parser '" + parser.name() + "' already exists."; - parsers[parser.name()] = &parser; - groups.emplace(&parser); + + void ast_ctx_t::add_parser(const parser_t *parser) { + if (parsers.find(parser->name()) != parsers.end()) throw "The parser '" + parser->name() + "' already exists."; + parsers[parser->name()] = parser; + } + void ast_ctx_t::add_parser(const group_parser_t *parser) { + if (parsers.find(parser->name()) != parsers.end()) throw "The parser '" + parser->name() + "' already exists."; + parsers[parser->name()] = parser; + groups.emplace(parser); } bool ast_ctx_t::parse(msg_stack_t &messages, std::vector &tokens, data::map_t &out) { @@ -30,12 +36,15 @@ namespace ppc::comp::tree::ast { size_t i = 0; try { - return glob_parser(ctx, i, out); + return ctx.parse("$_glob", i, out); } catch (const message_t &msg) { messages.push(msg); return false; } } + bool ast_ctx_t::parse(std::string parser, size_t &pi, data::map_t &out) { + return this->parser[parser] (*this, pi, out); + } } diff --git a/src/compiler/treeifier/ast/conv.cc b/src/compiler/treeifier/ast/conv.cc index f323793..2526922 100644 --- a/src/compiler/treeifier/ast/conv.cc +++ b/src/compiler/treeifier/ast/conv.cc @@ -5,7 +5,7 @@ namespace ppc::comp::tree::ast::conv { return { { "location", conv::loc_to_map(loc.location) }, { "content", loc }, - { "$_name", identifier_parser.name() }, + { "$_name", "$_identifier" }, }; } located_t map_to_identifier(const data::map_t &map) { @@ -17,20 +17,17 @@ namespace ppc::comp::tree::ast::conv { { "$_name", "$_loc" }, }; - if (loc.filename != "") res["filename"] = loc.filename; + res["filename"] = loc.filename; if (loc.start != -1u) res["start"] = (float)loc.start; + if (loc.start != -1u) res["line"] = (float)loc.line; if (loc.code_start != -1u) res["code_start"] = (float)loc.code_start; if (loc.length != -1u) res["length"] = (float)loc.length; return res; } location_t map_to_loc(const data::map_t &map) { - location_t res; + location_t res(map["filename"].string()); - if (map.has("filename")) { - if (map["filename"].is_string()) res.filename = map["filename"].string(); - else throw "Expected key 'filename' to be a string."; - } if (map.has("start")) { if (map["start"].is_number()) res.start = (size_t)map["start"].number(); else throw "Expected key 'start' to be a number."; @@ -43,6 +40,10 @@ namespace ppc::comp::tree::ast::conv { if (map["code_start"].is_number()) res.code_start = (size_t)map["code_start"].number(); else throw "Expected key 'code_start' to be a number."; } + if (map.has("line")) { + if (map["line"].is_number()) res.line = (size_t)map["line"].number(); + else throw "Expected key 'line' to be a number."; + } return res; } @@ -56,7 +57,7 @@ namespace ppc::comp::tree::ast::conv { arr.push({ { "location", loc_to_map(segment.location) }, { "content", segment }, - { "$_name", nmsp_parser.name() }, + { "$_name", "$_nmsp" }, }); } diff --git a/src/compiler/treeifier/ast/parsers/glob.cc b/src/compiler/treeifier/ast/parsers/glob.cc index 332ecea..45f8eb7 100644 --- a/src/compiler/treeifier/ast/parsers/glob.cc +++ b/src/compiler/treeifier/ast/parsers/glob.cc @@ -9,7 +9,7 @@ class nmsp_def_parser_t : public parser_t { if (h.ended()) return false; if (!h.curr().is_identifier("namespace")) return false; - h.force_parse(nmsp_parser, "Expected a namespace.", res); + h.force_parse("$_nmsp", "Expected a namespace.", res); if (!h.curr().is_operator(operator_t::SEMICOLON)) h.err("Expected a semicolon."); return h.submit(true); @@ -23,7 +23,7 @@ class import_parser_t : public parser_t { if (h.ended()) return false; if (!h.curr().is_identifier("import")) return false; - h.force_parse(nmsp_parser, "Expected a namespace.", res); + h.force_parse("$_nmsp", "Expected a namespace.", res); if (!h.curr().is_operator(operator_t::SEMICOLON)) h.err("Expected a semicolon."); return h.submit(true); @@ -32,22 +32,23 @@ class import_parser_t : public parser_t { public: import_parser_t(): parser_t("$_import") { } }; -const parser_t &import_parser = import_parser_t(); -const parser_t &nmsp_def_parser = nmsp_def_parser_t(); +auto import_parser = import_parser_t(); +auto nmsp_def_parser = nmsp_def_parser_t(); class glob_parser_t : public parser_t { bool parse(ast_ctx_t &ctx, size_t &res_i, data::map_t &out) const { tree_helper_t h(ctx, res_i); if (h.ended()) return true; - h.parse(nmsp_def_parser, (out["namespace"] = map_t()).map()); + nmsp_def_parser(ctx, h.i, (out["namespace"] = map_t()).map()); ctx.nmsp = conv::map_to_nmsp(out["namespace"].map()); - auto imports = (out["imports"] = array_t()).array(); + auto &imports = (out["imports"] = array_t()).array(); + auto &contents = (out["content"] = array_t()).array(); while (true) { map_t map; + if (!import_parser(ctx, h.i, map)) break; imports.push(map); - if (!h.parse(import_parser, map)) break; auto nmsp = conv::map_to_nmsp(map); if (!ctx.imports.emplace(nmsp).second) h.err("The namespace '" + nmsp.to_string() + "' is already imported."); @@ -62,4 +63,5 @@ public: glob_parser_t(): parser_t("$_glob") { } }; -const parser_t &ppc::comp::tree::ast::glob_parser = glob_parser_t(); +parser_factory_t ppc::comp::tree::ast::glob_parser = []() { return (parser_t*)new glob_parser_t(); }; +group_parser_factory_t ppc::comp::tree::ast::def_parser = []() { return new group_parser_t("$_def"); }; diff --git a/src/compiler/treeifier/ast/parsers/group.cc b/src/compiler/treeifier/ast/parsers/group.cc index 2c4d17c..01934f2 100644 --- a/src/compiler/treeifier/ast/parsers/group.cc +++ b/src/compiler/treeifier/ast/parsers/group.cc @@ -29,17 +29,18 @@ static bool read_nmsp(ast_ctx_t &ctx, size_t &i, const lang::namespace_name_t &n return equal_i != name.size(); } + bool group_parser_t::parse(ast_ctx_t &ctx, size_t &i, data::map_t &out) const { tree_helper_t h(ctx, i); for (auto &pair : named_parsers) { if (!read_nmsp(ctx, i, pair.first)) continue; auto &parser = *pair.second; - return h.parse(parser, out); + return parser(ctx, i, out); } for (auto parser : parsers) { try { - return h.parse(*parser, out); + return (*parser)(ctx, i, out); } catch (const message_t &err) { ctx.messages.push(err); diff --git a/src/compiler/treeifier/ast/parsers/identifier.cc b/src/compiler/treeifier/ast/parsers/identifier.cc index b9b8557..59af7e0 100644 --- a/src/compiler/treeifier/ast/parsers/identifier.cc +++ b/src/compiler/treeifier/ast/parsers/identifier.cc @@ -18,4 +18,4 @@ class identifier_parser_t : public parser_t { public: identifier_parser_t(): parser_t("$_identifier") { } }; -const parser_t &ppc::comp::tree::ast::identifier_parser = identifier_parser_t(); +parser_factory_t ppc::comp::tree::ast::identifier_parser = []() { return (parser_t*)new identifier_parser_t(); }; diff --git a/src/compiler/treeifier/ast/parsers/nmsp.cc b/src/compiler/treeifier/ast/parsers/nmsp.cc index 910ed7e..0ae7bca 100644 --- a/src/compiler/treeifier/ast/parsers/nmsp.cc +++ b/src/compiler/treeifier/ast/parsers/nmsp.cc @@ -8,12 +8,12 @@ class nmsp_parser_t : public parser_t { auto &arr = (out["content"] = array_t()).array(); - if (!h.push_parse(identifier_parser, arr)) return false; + if (!h.push_parse("$_identifier", arr)) return false; while (true) { if (h.ended()) break; if (!h.curr().is_operator(operator_t::DOUBLE_COLON)) break; - h.force_push_parse(identifier_parser, "Expected an identifier.", arr); + h.force_push_parse("$_identifier", "Expected an identifier.", arr); } out["location"] = conv::loc_to_map(h.res_loc()); @@ -23,4 +23,4 @@ class nmsp_parser_t : public parser_t { public: nmsp_parser_t(): parser_t("$_nmsp") { } }; -const parser_t &ppc::comp::tree::ast::nmsp_parser = nmsp_parser_t(); +parser_factory_t ppc::comp::tree::ast::nmsp_parser = []() { return (parser_t*)new nmsp_parser_t(); }; diff --git a/src/main/main.cc b/src/main/main.cc index 2787e8a..d140119 100644 --- a/src/main/main.cc +++ b/src/main/main.cc @@ -153,26 +153,29 @@ int main(int argc, const char *argv[]) { for (const auto &file : files) { std::ifstream f { file, std::ios_base::in }; + std::stringstream res; try { auto tokens = token_t::parse_many(msg_stack, lex::token_t::parse_file(msg_stack, file, f)); data::map_t ast; if (!ast::ast_ctx_t::parse(msg_stack, tokens, ast)) throw msg_stack.peek(); for (auto tok : tokens) { - if (tok.is_identifier()) std::cout << "Identifier: \t" << tok.identifier(); - if (tok.is_operator()) std::cout << "Operator: \t" << operator_stringify(tok._operator()); - if (tok.is_float_lit()) std::cout << "Float: \t" << tok.float_lit(); - if (tok.is_int_lit()) std::cout << "Int: \t" << tok.int_lit(); - if (tok.is_char_lit()) std::cout << "Char: \t" << tok.char_lit(); - if (tok.is_string_lit()) std::cout << "String: \t" << std::string { tok.string_lit().begin(), tok.string_lit().end() }; - std::cout << std::endl; + if (tok.is_identifier()) res << "Identifier: \t" << tok.identifier(); + if (tok.is_operator()) res << "Operator: \t" << operator_stringify(tok._operator()); + if (tok.is_float_lit()) res << "Float: \t" << tok.float_lit(); + if (tok.is_int_lit()) res << "Int: \t" << tok.int_lit(); + if (tok.is_char_lit()) res << "Char: \t" << tok.char_lit(); + if (tok.is_string_lit()) res << "String: \t" << std::string { tok.string_lit().begin(), tok.string_lit().end() }; + res << '\n'; } - std::cout << std::endl << data::json::stringify(ast); + res << '\n' << data::json::stringify(ast); } catch (const messages::message_t &msg) { msg_stack.push(msg); } + + std::cout << res.str() << std::endl; } msg_stack.print(std::cout, messages::message_t::DEBUG, true); diff --git a/src/utils/location.cc b/src/utils/location.cc index be6e344..8f216e1 100644 --- a/src/utils/location.cc +++ b/src/utils/location.cc @@ -40,11 +40,7 @@ location_t location_t::intersect(location_t other) const { if (a.start == -1u || b.start == -1u) return { }; - if (a.start > b.start) { - location_t c = a; - a = b; - b = c; - } + if (a.start > b.start) return other.intersect(*this); fix_location(a); fix_location(b); @@ -59,61 +55,27 @@ location_t location_t::intersect(location_t other) const { return a; } -location_t::location_t() { - this->line = -1; - this->start = -1; - this->length = -1; - this->code_start = -1; - this->filename = ""; -} -location_t::location_t(std::string filename) { - this->line = -1; - this->start = -1; - this->length = -1; - this->code_start = -1; - this->filename = filename; -} -location_t::location_t(std::size_t line, std::size_t start) { - this->line = line; - this->start = start; - this->length = -1; - this->code_start = -1; - this->filename = ""; -} -location_t::location_t(std::string filename, std::size_t line, std::size_t start) { - this->line = line; - this->start = start; - this->length = -1; - this->code_start = -1; - this->filename = filename; -} -location_t::location_t(std::size_t line, std::size_t start, std::size_t code_start) { - this->line = line; - this->start = start; - this->length = -1; +std::string empty = ""; + +location_t::location_t(): + location_t(empty, -1, -1, -1, -1) { } +location_t::location_t(const std::string &filename): + location_t(filename, -1, -1, -1, -1) { } +location_t::location_t(std::size_t line, std::size_t start): + location_t(empty, line, start, -1, -1) { } +location_t::location_t(const std::string &filename, std::size_t line, std::size_t start): + location_t(filename, line, start, -1, -1) { } +location_t::location_t(std::size_t line, std::size_t start, std::size_t code_start): + location_t(empty, line, start, code_start, -1) { } +location_t::location_t(const std::string &filename, std::size_t line, std::size_t start, std::size_t code_start): + location_t(filename, line, start, code_start, -1) { } +location_t::location_t(std::size_t line, std::size_t start, std::size_t code_start, std::size_t length): + location_t(empty, line, start, code_start, length) { } +location_t::location_t(const std::string &filename, std::size_t line, std::size_t start, std::size_t code_start, std::size_t length): filename(filename) { + this->length = length; this->code_start = code_start; - this->filename = ""; -} -location_t::location_t(std::string filename, std::size_t line, std::size_t start, std::size_t code_start) { this->line = line; this->start = start; - this->length = -1; - this->code_start = code_start; - this->filename = filename; -} -location_t::location_t(std::size_t line, std::size_t start, std::size_t code_start, std::size_t length) { - this->line = line; - this->start = start; - this->length = line; - this->code_start = code_start; - this->filename = ""; -} -location_t::location_t(std::string filename, std::size_t line, std::size_t start, std::size_t code_start, std::size_t length) { - this->line = line; - this->start = start; - this->length = line; - this->code_start = code_start; - this->filename = filename; } const location_t location_t::NONE = { }; From 1192e86c23cdba3e99c58dd54a9c3a36d91088a0 Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Mon, 10 Oct 2022 18:53:31 +0300 Subject: [PATCH 26/74] chore: rewrite awful lexer --- src/compiler/treeifier/lexer.cc | 422 +++++++++++--------------------- 1 file changed, 149 insertions(+), 273 deletions(-) diff --git a/src/compiler/treeifier/lexer.cc b/src/compiler/treeifier/lexer.cc index 4012dee..1d72280 100644 --- a/src/compiler/treeifier/lexer.cc +++ b/src/compiler/treeifier/lexer.cc @@ -3,333 +3,209 @@ #include "utils/message.hh" using namespace ppc; -using namespace comp::tree::lex; +using namespace ppc::messages; +using namespace ppc::comp::tree::lex; -struct lexlet_t { - struct process_res_t { - bool ended; - bool repeat; - bool dont_add; - const lexlet_t *new_parselet; - bool has_message; - messages::message_t msg; - }; +struct res_t; +using lexlet_t = res_t (*)(char c, std::vector &tok); - bool(*is_valid)(char curr); - process_res_t (*process)(char curr); +struct res_t { + lexlet_t new_parselet; token_t::kind_t type; + bool _repeat; + bool _add; + + res_t add(bool val = false) { + this->_add = val; + return *this; + } + res_t repeat(bool val = true) { + this->_repeat = val; + return *this; + } }; -using process_res_t = lexlet_t::process_res_t; -static bool is_digit(char c) { - return c >= '0' && c <= '9'; -} -static bool is_oct(char c) { +static bool isoct(char c) { return c >= '0' && c <= '7'; } -static bool is_hex(char c) { - return is_digit(c) || (c >= 'A' && c <= 'F') || (c >= 'a' || c <= 'f'); -} -static bool is_lower(char c) { - return c >= 'a' && c <= 'z'; -} -static bool is_upper(char c) { - return c >= 'A' && c <= 'Z'; -} -static bool is_letter(char c) { - return is_lower(c) || is_upper(c); -} -static bool is_alphanumeric(char c) { - return is_letter(c) || is_digit(c); -} static bool is_any(char c, std::string chars) { auto res = chars.find(c) != std::string::npos; return res; } +static bool is_operator(char c) { + return is_any(c, "=!<>+-*/%&|^?:,.(){}[];"); +} -static process_res_t lexer_switch(const lexlet_t *lexlet) { +static res_t lexlet_default(char c, std::vector &tok); + +static res_t lexer_switch(lexlet_t lexlet, bool repeat = false) { return { - .ended = false, - .repeat = false, .new_parselet = lexlet, + ._repeat = repeat, }; } -static process_res_t lexer_repeat_switch(const lexlet_t *lexlet) { - return (process_res_t) { - .ended = false, - .repeat = true, - .new_parselet = lexlet, +static res_t lexer_end(token_t::kind_t type, bool repeat = true) { + return { + .new_parselet = lexlet_default, + .type = type, + ._repeat = repeat }; } -static process_res_t lexer_end() { - return (process_res_t) { - .ended = true, - .repeat = true, - .new_parselet = nullptr, - }; -} -static process_res_t lexer_none() { - return (process_res_t) { - .ended = false, - .repeat = false, - .new_parselet = nullptr, - }; +static res_t lexer_none() { + return { ._add = true }; } -static bool last_escape = false; -static bool literal_ended = false; -static char first_op; -static int op_i = 0; -static bool only_dot = false; -static bool last_star = false; +static res_t lexlet_identifier(char c, std::vector &tok) { + if (is_operator(c) || isspace(c)) return lexer_end(token_t::IDENTIFIER); + else return lexer_none(); +}; +static res_t lexlet_hex(char c, std::vector &tok) { + if (isxdigit(c)) return lexer_none(); + else return lexer_end(token_t::HEX_LITERAL); +}; +static res_t lexlet_bin(char c, std::vector &tok) { + if (is_any(c, "01")) return lexer_none(); + else if (isdigit(c)) throw message_t::error("A binary literal may only contain zeroes and ones."); + else return lexer_end(token_t::BIN_LITERAL); +}; +static res_t lexlet_oct(char c, std::vector &tok) { + if (isoct(c)) return lexer_none(); + else if (isdigit(c)) throw message_t::error("An octal literal may only contain octal digits."); + else return lexer_end(token_t::OCT_LITERAL); +}; +static res_t lexlet_float(char c, std::vector &tok) { + if (isdigit(c)) return lexer_none(); + else return lexer_end(token_t::FLOAT_LITERAL); +}; +static res_t lexlet_dec(char c, std::vector &tok) { + if (isdigit(c)) return lexer_none(); + else if (c == '.') return lexer_switch(lexlet_float); + else return lexer_end(token_t::DEC_LITERAL); +}; -const lexlet_t LEXLET_IDENTIFIER = (lexlet_t) { - .is_valid = [] (char curr) { return is_letter(curr) || curr == '_' || curr == '@' || curr == '$'; }, - .process = [] (char curr) { - bool valid = (is_alphanumeric(curr) || curr == '_' || curr == '@' || curr == '$'); - return (process_res_t) { - .ended = !valid, - .repeat = !valid, - .new_parselet = &LEXLET_IDENTIFIER, - }; - }, - .type = token_t::IDENTIFIER, +static res_t lexlet_zero(char c, std::vector &tok) { + if (c == '.') return lexer_switch(lexlet_float); + else if (c == 'b') return lexer_switch(lexlet_bin); + else if (c == 'x') return lexer_switch(lexlet_hex); + else if (isdigit(c)) return lexer_switch(lexlet_oct, true); + else return lexer_end(token_t::DEC_LITERAL); }; -const lexlet_t LEXLET_HEX = (lexlet_t) { - .process = [] (char curr) { - if (is_hex(curr)) return lexer_none(); - else return lexer_end(); - }, - .type = token_t::HEX_LITERAL, +static res_t lexlet_comment(char c, std::vector &tok) { + tok.clear(); + if (c == '\n') return lexer_switch(lexlet_default); + else return lexer_none().add(false); }; -const lexlet_t LEXLET_BIN = (lexlet_t) { - .process = [] (char curr) { - if (curr == '0' || curr == '1') return lexer_none(); - else if (is_digit(curr)) - throw messages::message_t(messages::message_t::ERROR, "A binary literal may only contain zeroes and ones.", location_t::NONE); - else return lexer_end(); - }, - .type = token_t::BIN_LITERAL, +static res_t lexlet_multicomment(char c, std::vector &tok) { + if (c == '/' && tok.size() && tok.back() == '*') { + tok.clear(); + return lexer_switch(lexlet_default); + } + + return lexer_none(); }; -const lexlet_t LEXLET_OCT = (lexlet_t) { - .process = [] (char curr) { - if (is_oct(curr)) return lexer_none(); - else if (is_digit(curr)) - throw messages::message_t(messages::message_t::ERROR, "An octal literal may only contain octal digits.", location_t::NONE); - else return lexer_end(); - }, - .type = token_t::OCT_LITERAL, -}; -const lexlet_t LEXLET_FLOAT = (lexlet_t) { - .is_valid = [] (char curr) { return only_dot = curr == '.'; }, - .process = [] (char curr) { - if (is_digit(curr)) { - only_dot = false; - return lexer_none(); - } - else return lexer_end(); - }, - .type = token_t::FLOAT_LITERAL, -}; -const lexlet_t LEXLET_DEC = (lexlet_t) { - .is_valid = [] (char curr) { return is_digit(curr); }, - .process = [] (char curr) { - if (is_digit(curr)) return lexer_none(); - else if (curr == '.') return lexer_switch(&LEXLET_FLOAT); - else return lexer_end(); - }, - .type = token_t::DEC_LITERAL, -}; -const lexlet_t LEXLET_ZERO = (lexlet_t) { - .is_valid = [] (char curr) { return curr == '0'; }, - .process = [] (char curr) { - if (curr == '.') return lexer_switch(&LEXLET_FLOAT); - else if (curr == 'b') return lexer_switch(&LEXLET_BIN); - else if (curr == 'x') return lexer_switch(&LEXLET_HEX); - else if (is_digit(curr)) return lexer_repeat_switch(&LEXLET_OCT); - else return lexer_end(); - }, - .type = token_t::DEC_LITERAL, -}; -const lexlet_t LEXLET_COMMENT = { - .process = [] (char curr) { - if (curr == '\n') return lexer_end(); - else return (process_res_t) { - .ended = false, - .dont_add = true, - }; - }, - .type = token_t::NONE, -}; -const lexlet_t LEXLET_MULTICOMMENT = { - .process = [] (char curr) { - if (curr == '/' && last_star) { - last_star = false; - return (process_res_t) { - .ended = true, - }; - } - if (curr == '*') last_star = true; - - return (process_res_t) { - .dont_add = true, - }; - }, - .type = token_t::NONE, -}; -const lexlet_t LEXLET_OPERATOR = (lexlet_t) { - .is_valid = [] (char curr) { - if (is_any(curr, "=!<>+-*/%&|^?:,.(){}[];")) { - first_op = curr; - op_i = 1; - return true; - } - else return false; - }, - .process = [] (char curr) { - bool failed = true; - if (first_op == curr && op_i == 1 && is_any(curr, ":+-&|?<>")) failed = false; - if (curr == '=') { +static res_t lexlet_operator(char c, std::vector &tok) { + bool failed = false; + + if (tok.size() > 0) { + failed = true; + char first_op = tok[0]; + size_t op_i = tok.size(); + + if (first_op == c && op_i == 1 && is_any(c, ":+-&|?<>")) failed = false; + if (c == '=') { if (op_i == 1 && is_any(first_op, "<>=!+-/*%")) failed = false; if (op_i == 2 && is_any(first_op, "<>?")) failed = false; } - if (first_op == '-' && curr == '>' && op_i == 1) failed = false; + if (first_op == '-' && c == '>' && op_i == 1) failed = false; if (first_op == '/' && op_i == 1) { - if (curr == '/') return lexer_switch(&LEXLET_COMMENT); - else if (curr == '*') return lexer_switch(&LEXLET_MULTICOMMENT); + if (c == '/') return lexer_switch(lexlet_comment); + else if (c == '*') return lexer_switch(lexlet_multicomment); } + } - op_i++; - - if (failed) return lexer_end(); - else return lexer_none(); - }, - .type = token_t::OPERATOR, + if (failed) return lexer_end(token_t::OPERATOR); + else return lexer_none(); }; -const lexlet_t LEXLET_STRING_LITERAL = (lexlet_t) { - .is_valid = [] (char curr) { - last_escape = false; - literal_ended = false; - return curr == '"'; - }, - .process = [] (char curr) { - if (last_escape) { - last_escape = false; - return lexer_none(); - } - - if (curr == '\\') { - last_escape = true; - } - else if (curr == '"') { - literal_ended = true; - } - else if (literal_ended) return lexer_end(); - return lexer_none(); - }, - .type = token_t::STRING_LITERAL, +static res_t lexlet_string(char c, std::vector &tok) { + if (c == '"' && tok.back() != '\\') return lexer_end(token_t::STRING_LITERAL, true); + else return lexer_none(); }; -const lexlet_t LEXLET_CHAR_LITERAL = (lexlet_t) { - .is_valid = [] (char curr) { - last_escape = false; - literal_ended = false; - return curr == '\''; - }, - .process = [] (char curr) { - if (last_escape) { - last_escape = false; - return lexer_none(); - } - - if (curr == '\\') { - last_escape = true; - } - else if (curr == '\'') { - literal_ended = true; - } - else if (literal_ended) return lexer_end(); - return lexer_none(); - }, - .type = token_t::CHAR_LITERAL, -}; -const lexlet_t LEXLET_DEFAULT = (lexlet_t) { - .process = [] (char curr) { - if (LEXLET_STRING_LITERAL.is_valid(curr)) return lexer_switch(&LEXLET_STRING_LITERAL); - if (LEXLET_CHAR_LITERAL.is_valid(curr)) return lexer_switch(&LEXLET_CHAR_LITERAL); - if (LEXLET_OPERATOR.is_valid(curr)) return lexer_switch(&LEXLET_OPERATOR); - if (LEXLET_ZERO.is_valid(curr)) return lexer_switch(&LEXLET_ZERO); - if (LEXLET_DEC.is_valid(curr)) return lexer_switch(&LEXLET_DEC); - if (LEXLET_FLOAT.is_valid(curr)) return lexer_switch(&LEXLET_FLOAT); - if (LEXLET_IDENTIFIER.is_valid(curr)) return lexer_switch(&LEXLET_IDENTIFIER); - else return (process_res_t) { - .ended = true, - .repeat = false, - .new_parselet = nullptr, - }; - }, - .type = token_t::NONE, +static res_t lexlet_char(char c, std::vector &tok) { + if (c == '"' && tok.back() != '\\') return lexer_end(token_t::CHAR_LITERAL, true); + else return lexer_none(); }; -std::vector token_t::parse_many(ppc::messages::msg_stack_t &msg_stack, const std::string &filename, const std::string &src) { +static res_t lexlet_default(char c, std::vector &tok) { + tok.push_back(c); + if (c == '"') return lexer_switch(lexlet_string); + if (c == '\'') return lexer_switch(lexlet_char); + if (c == '0') return lexer_switch(lexlet_zero); + if (c == '.') return lexer_switch(lexlet_float); + if (is_operator(c)) return lexer_switch(lexlet_operator); + if (isdigit(c)) return lexer_switch(lexlet_dec); + if (isspace(c)) { + tok.clear(); + return lexer_none().add(false); + } + return lexer_switch(lexlet_identifier); +}; + +std::vector token_t::parse_many(ppc::messages::msg_stack_t &msg_stack, const std::string &filename, const std::string &_src) { + auto src = _src + '\n'; std::vector tokens; std::vector curr_token; - lexlet_t curr = LEXLET_DEFAULT; - std::size_t start = 0, line = 0, curr_start = 0, curr_line = 0, length = 0, i = 0; + lexlet_t curr = lexlet_default; + std::size_t start = 0, line = 0, curr_start = 0, curr_line = 0, i = 0; - while (src[i]) { + while (i < src.size()) { char c = src[i]; try { - process_res_t res = curr.process(c); - if (i == 0) res.repeat = false; - if (res.has_message) throw res.msg; - - if (res.ended) { - if (curr.type) { - location_t loc = { filename, line, start, i - length, length }; - tokens.push_back({ curr.type, { curr_token.begin(), curr_token.end() }, loc }); - } + res_t res = curr(c, curr_token); + if (i == 0) res._repeat = false; + if (res._add) { + curr_token.push_back(c); + } + if (res.type) { + size_t len = curr_token.size(); + location_t loc(filename, line, start, i - len, len); + tokens.push_back({ res.type, { curr_token.begin(), curr_token.end() }, loc }); curr_token.clear(); - length = 0; - curr = LEXLET_DEFAULT; } - else { - if (res.new_parselet) { - if (!curr.type) { - start = curr_start; - line = curr_line; - } - curr = *res.new_parselet; - } - if (!res.dont_add) { - curr_token.push_back(c); - length++; + if (res.new_parselet) { + if (curr == lexlet_default && res.new_parselet != lexlet_default) { + start = curr_start; + line = curr_line; } + curr = res.new_parselet; } - if (!res.repeat) { - curr_start++; - if (c == '\n') { - curr_line++; - curr_start = 0; - } + if (!res._repeat) { i++; + curr_start++; + if (i == src.size()) break; + if (c == '\n') { + curr_start = 0; + curr_line++; + } } } catch (const messages::message_t &msg) { - throw messages::message_t(msg.level, msg.content, location_t(filename, line, start, i - length, length)); + throw message_t(msg.level, msg.content, location_t(filename, line, start, i - curr_token.size(), curr_token.size())); } } - location_t loc = { filename, line, start, i - length, length }; - if (curr.type) { - tokens.push_back({ - curr.type, std::string { curr_token.begin(), curr_token.end() }, - { filename, line, start, i - length, length } - }); - } + curr_start--; + + if (curr_token.size()) curr_token.pop_back(); + + if (curr == lexlet_string) + throw message_t::error("Unclosed string literal.", location_t(filename, line, start, i - curr_token.size(), curr_token.size())); + if (curr == lexlet_char) + throw message_t::error("Unclosed char literal.", location_t(filename, line, start, i - curr_token.size(), curr_token.size())); + if (curr != lexlet_default) throw message_t::error("Unexpected end.", location_t(filename, curr_line, curr_start, i, 1)); return tokens; } From 1190f09e3adee462f1103bbaa9a9c6aa09e17c38 Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Tue, 11 Oct 2022 13:10:12 +0300 Subject: [PATCH 27/74] fix: remove length increment on location printing --- src/utils/location.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils/location.cc b/src/utils/location.cc index 8f216e1..eb94a73 100644 --- a/src/utils/location.cc +++ b/src/utils/location.cc @@ -22,7 +22,7 @@ std::string location_t::to_string() const { written_anything = true; } if (length != -1u) { - if (written_anything) res << '(' << length + 1 << ')'; + if (written_anything) res << '(' << length << ')'; written_anything = true; } From a8cda64516631c7ee969fa82427e07741f1d7658 Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Tue, 11 Oct 2022 13:12:42 +0300 Subject: [PATCH 28/74] fix: float and int literals --- src/compiler/treeifier/lexer.cc | 31 +++++++++++++++-------------- src/compiler/treeifier/tokenizer.cc | 18 ++++++++--------- 2 files changed, 24 insertions(+), 25 deletions(-) diff --git a/src/compiler/treeifier/lexer.cc b/src/compiler/treeifier/lexer.cc index 1d72280..2f89eea 100644 --- a/src/compiler/treeifier/lexer.cc +++ b/src/compiler/treeifier/lexer.cc @@ -15,7 +15,7 @@ struct res_t { bool _repeat; bool _add; - res_t add(bool val = false) { + res_t add(bool val = true) { this->_add = val; return *this; } @@ -26,14 +26,14 @@ struct res_t { }; -static bool isoct(char c) { +static inline bool isoct(char c) { return c >= '0' && c <= '7'; } -static bool is_any(char c, std::string chars) { +static inline bool is_any(char c, std::string chars) { auto res = chars.find(c) != std::string::npos; return res; } -static bool is_operator(char c) { +static inline bool is_operator(char c) { return is_any(c, "=!<>+-*/%&|^?:,.(){}[];"); } @@ -85,9 +85,9 @@ static res_t lexlet_dec(char c, std::vector &tok) { }; static res_t lexlet_zero(char c, std::vector &tok) { - if (c == '.') return lexer_switch(lexlet_float); - else if (c == 'b') return lexer_switch(lexlet_bin); - else if (c == 'x') return lexer_switch(lexlet_hex); + if (c == '.') return lexer_switch(lexlet_float).add(); + else if (c == 'b') return lexer_switch(lexlet_bin).add(); + else if (c == 'x') return lexer_switch(lexlet_hex).add(); else if (isdigit(c)) return lexer_switch(lexlet_oct, true); else return lexer_end(token_t::DEC_LITERAL); }; @@ -107,11 +107,14 @@ static res_t lexlet_multicomment(char c, std::vector &tok) { static res_t lexlet_operator(char c, std::vector &tok) { bool failed = false; + if (tok.size() > 0) { failed = true; char first_op = tok[0]; size_t op_i = tok.size(); + if (first_op == '.' && isdigit(c)) return lexer_switch(lexlet_float).add(); + if (first_op == c && op_i == 1 && is_any(c, ":+-&|?<>")) failed = false; if (c == '=') { if (op_i == 1 && is_any(first_op, "<>=!+-/*%")) failed = false; @@ -138,18 +141,16 @@ static res_t lexlet_char(char c, std::vector &tok) { }; static res_t lexlet_default(char c, std::vector &tok) { - tok.push_back(c); - if (c == '"') return lexer_switch(lexlet_string); - if (c == '\'') return lexer_switch(lexlet_char); - if (c == '0') return lexer_switch(lexlet_zero); - if (c == '.') return lexer_switch(lexlet_float); - if (is_operator(c)) return lexer_switch(lexlet_operator); - if (isdigit(c)) return lexer_switch(lexlet_dec); + if (c == '"') return lexer_switch(lexlet_string).add(); + if (c == '\'') return lexer_switch(lexlet_char).add(); + if (c == '0') return lexer_switch(lexlet_zero).add(); + if (is_operator(c)) return lexer_switch(lexlet_operator).add(); + if (isdigit(c)) return lexer_switch(lexlet_dec).add(); if (isspace(c)) { tok.clear(); return lexer_none().add(false); } - return lexer_switch(lexlet_identifier); + return lexer_switch(lexlet_identifier).add(); }; std::vector token_t::parse_many(ppc::messages::msg_stack_t &msg_stack, const std::string &filename, const std::string &_src) { diff --git a/src/compiler/treeifier/tokenizer.cc b/src/compiler/treeifier/tokenizer.cc index 2063afe..763aab1 100644 --- a/src/compiler/treeifier/tokenizer.cc +++ b/src/compiler/treeifier/tokenizer.cc @@ -7,7 +7,7 @@ using namespace messages; using namespace comp::tree; using namespace std::string_literals; -static std::vector parse_string(msg_stack_t &msg_stack, bool is_char, lex::token_t token) { +static std::vector parse_string(msg_stack_t &msg_stack, bool is_char, const lex::token_t &token) { char literal_char = is_char ? '\'' : '"'; bool escaping = false; @@ -52,7 +52,7 @@ static std::vector parse_string(msg_stack_t &msg_stack, bool is_char, lex: if (is_char) throw message_t(message_t::ERROR, "Unterminated char literal.", token.location); else throw message_t(message_t::ERROR, "Unterminated string literal.", token.location); } -static token_t parse_int(msg_stack_t &msg_stack, lex::token_t token) { +static token_t parse_int(msg_stack_t &msg_stack, const lex::token_t &token) { enum radix_t { BINARY, OCTAL, @@ -82,11 +82,9 @@ static token_t parse_int(msg_stack_t &msg_stack, lex::token_t token) { throw "WTF r u doing bro?"s; } - std::size_t j = token.data.length() - 1; - uint64_t res = 0; - for (; i <= j; i++) { + for (; i <= token.data.length() - 1; i++) { char c = token.data[i]; int8_t digit; switch (radix) { @@ -109,8 +107,8 @@ static token_t parse_int(msg_stack_t &msg_stack, lex::token_t token) { res += digit; break; case 3: - if (c >= 'a' && c <= 'f') digit = c - 'a' + 9; - else if (c >= 'A' && c <= 'F') digit = c - 'A' + 9; + if (c >= 'a' && c <= 'f') digit = c - 'a' + 10; + else if (c >= 'A' && c <= 'F') digit = c - 'A' + 10; else if (c >= '0' && c <= '9') digit = c - '0'; else throw message_t(message_t::ERROR, "Invalid character '"s + c + "' in hex literal.", token.location); res <<= 4; @@ -121,13 +119,13 @@ static token_t parse_int(msg_stack_t &msg_stack, lex::token_t token) { return token_t(res, token.location); } -static token_t parse_float(msg_stack_t &msg_stack, lex::token_t token) { +static token_t parse_float(msg_stack_t &msg_stack, const lex::token_t &token) { double whole = 0, fract = 0; char c; std::size_t i; - for (i = 0; i < token.data.length() && ((c = token.data[i]) > '0' && c < '9'); i++) { + for (i = 0; i < token.data.length() && isdigit(c = token.data[i]); i++) { if (c == '.') break; int digit = c - '0'; whole *= 10; @@ -136,7 +134,7 @@ static token_t parse_float(msg_stack_t &msg_stack, lex::token_t token) { if (c == '.') { i++; - for (; i < token.data.length() && ((c = token.data[i]) > '0' && c < '9'); i++) { + for (; i < token.data.length() && isdigit(c = token.data[i]); i++) { int digit = c - '0'; fract += digit; fract /= 10; From 9dca34696ba6d9b261b4d51ad869ff12d8ffc95b Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Tue, 11 Oct 2022 13:13:10 +0300 Subject: [PATCH 29/74] fix: some issues with glob parser --- include/compiler/treeifier/ast/helper.hh | 17 +++++++++++++++++ src/compiler/treeifier/ast/parsers/glob.cc | 19 +++++++++++++------ 2 files changed, 30 insertions(+), 6 deletions(-) diff --git a/include/compiler/treeifier/ast/helper.hh b/include/compiler/treeifier/ast/helper.hh index affa012..7eb965b 100644 --- a/include/compiler/treeifier/ast/helper.hh +++ b/include/compiler/treeifier/ast/helper.hh @@ -21,6 +21,20 @@ namespace ppc::comp::tree::ast { public: size_t i; + location_t loc(size_t n) { + location_t res = prev_loc(); + res.start += res.length; + res.code_start += res.length; + res.length = n; + return res; + } + location_t prev_loc() { + auto prev_i = i; + if (i > 0) i--; + auto res = loc(); + i = prev_i; + return res; + } location_t next_loc(size_t n = 1) { location_t res = loc(); res.start += res.length; @@ -51,6 +65,9 @@ namespace ppc::comp::tree::ast { void err(std::string message) { throw message_t::error(message, loc()); } + void err(std::string message, size_t n) { + throw message_t::error(message, loc(n)); + } bool submit(bool inc_i = true) { res_i = (i += inc_i); diff --git a/src/compiler/treeifier/ast/parsers/glob.cc b/src/compiler/treeifier/ast/parsers/glob.cc index 45f8eb7..54dcde7 100644 --- a/src/compiler/treeifier/ast/parsers/glob.cc +++ b/src/compiler/treeifier/ast/parsers/glob.cc @@ -10,8 +10,10 @@ class nmsp_def_parser_t : public parser_t { if (!h.curr().is_identifier("namespace")) return false; h.force_parse("$_nmsp", "Expected a namespace.", res); - if (!h.curr().is_operator(operator_t::SEMICOLON)) h.err("Expected a semicolon."); - + if (!h.curr().is_operator(operator_t::SEMICOLON)) { + ctx.messages.push(message_t::error("Expected a semicolon.", h.loc(1))); + return h.submit(false); + } return h.submit(true); } @@ -24,7 +26,10 @@ class import_parser_t : public parser_t { if (!h.curr().is_identifier("import")) return false; h.force_parse("$_nmsp", "Expected a namespace.", res); - if (!h.curr().is_operator(operator_t::SEMICOLON)) h.err("Expected a semicolon."); + if (!h.curr().is_operator(operator_t::SEMICOLON)) { + ctx.messages.push(message_t::error("Expected a semicolon.", h.loc(1))); + return h.submit(false); + } return h.submit(true); } @@ -39,11 +44,12 @@ class glob_parser_t : public parser_t { bool parse(ast_ctx_t &ctx, size_t &res_i, data::map_t &out) const { tree_helper_t h(ctx, res_i); if (h.ended()) return true; - nmsp_def_parser(ctx, h.i, (out["namespace"] = map_t()).map()); - ctx.nmsp = conv::map_to_nmsp(out["namespace"].map()); + if (nmsp_def_parser(ctx, h.i, (out["namespace"] = map_t()).map())) { + ctx.nmsp = conv::map_to_nmsp(out["namespace"].map()); + } auto &imports = (out["imports"] = array_t()).array(); - auto &contents = (out["content"] = array_t()).array(); + /* auto &contents = */ (out["content"] = array_t()).array(); while (true) { map_t map; @@ -56,6 +62,7 @@ class glob_parser_t : public parser_t { if (!h.ended()) h.err("Invalid token."); + if (ctx.messages.is_failed()) return false; return h.submit(); } From f0e778a85c943e527a01909b20edaf588d4a4397 Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Tue, 11 Oct 2022 13:14:25 +0300 Subject: [PATCH 30/74] chore: remove token printing in main --- src/main/main.cc | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/src/main/main.cc b/src/main/main.cc index d140119..338d3b2 100644 --- a/src/main/main.cc +++ b/src/main/main.cc @@ -153,29 +153,16 @@ int main(int argc, const char *argv[]) { for (const auto &file : files) { std::ifstream f { file, std::ios_base::in }; - std::stringstream res; try { auto tokens = token_t::parse_many(msg_stack, lex::token_t::parse_file(msg_stack, file, f)); data::map_t ast; - if (!ast::ast_ctx_t::parse(msg_stack, tokens, ast)) throw msg_stack.peek(); + if (!ast::ast_ctx_t::parse(msg_stack, tokens, ast)) continue; - for (auto tok : tokens) { - if (tok.is_identifier()) res << "Identifier: \t" << tok.identifier(); - if (tok.is_operator()) res << "Operator: \t" << operator_stringify(tok._operator()); - if (tok.is_float_lit()) res << "Float: \t" << tok.float_lit(); - if (tok.is_int_lit()) res << "Int: \t" << tok.int_lit(); - if (tok.is_char_lit()) res << "Char: \t" << tok.char_lit(); - if (tok.is_string_lit()) res << "String: \t" << std::string { tok.string_lit().begin(), tok.string_lit().end() }; - res << '\n'; - } - - res << '\n' << data::json::stringify(ast); + std::cout << data::json::stringify(ast) << std::endl; } catch (const messages::message_t &msg) { msg_stack.push(msg); } - - std::cout << res.str() << std::endl; } msg_stack.print(std::cout, messages::message_t::DEBUG, true); From 8264dc3de02b343c45666d8db80fde43d4b35671 Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Tue, 11 Oct 2022 14:36:54 +0300 Subject: [PATCH 31/74] fix: value from const char* initialization --- include/utils/data.hh | 2 ++ src/utils/data.cc | 9 +++++++++ 2 files changed, 11 insertions(+) diff --git a/include/utils/data.hh b/include/utils/data.hh index 5ea2fc3..c4a903b 100644 --- a/include/utils/data.hh +++ b/include/utils/data.hh @@ -52,6 +52,7 @@ namespace ppc::data { bool_t boolean() const; value_t &operator=(const value_t &other); + value_t &operator=(const char *other); ~value_t(); value_t(); @@ -60,6 +61,7 @@ namespace ppc::data { value_t(std::initializer_list> map); value_t(number_t val); value_t(const string_t &val); + value_t(const char *val); value_t(bool_t val); value_t(const value_t &other); diff --git a/src/utils/data.cc b/src/utils/data.cc index f0d1f79..f8ac7ce 100644 --- a/src/utils/data.cc +++ b/src/utils/data.cc @@ -92,6 +92,10 @@ namespace ppc::data { this->type = type_t::Str; this->val.str = new string_t(val); } + value_t::value_t(const char *val) { + this->type = type_t::Str; + this->val.str = new string_t(val); + } value_t::value_t(bool_t val) { this->type = type_t::Bool; this->val.bl = val; @@ -161,5 +165,10 @@ namespace ppc::data { } return *this; } + value_t &value_t::operator=(const char *other) { + type = type_t::Str; + val.str = new string_t(other); + return *this; + } } From febae7df7fd808ea7a5172fca7425d6052ce25c1 Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Tue, 11 Oct 2022 14:38:00 +0300 Subject: [PATCH 32/74] cohre: add PROFILE define statement in make script --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index f244a48..623891a 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ export MAKEFLAGS += --silent -r -j -export flags=-std=c++17 -Wall -Wno-main -Wno-trigraphs -Wno-missing-braces -Wno-stringop-overflow +export flags=-std=c++17 -Wall -Wno-main -Wno-trigraphs -Wno-missing-braces -Wno-stringop-overflow -DPROFILE_$(profile) export ldflags=-L$(bin)/$(profile) export lib=ppc$(version-major)- export profile=release From 349bcc462c0eff79209ca93ab177ca0f039b15cd Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Tue, 11 Oct 2022 14:38:14 +0300 Subject: [PATCH 33/74] fix: location stringification --- src/utils/location.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/utils/location.cc b/src/utils/location.cc index eb94a73..bfc2481 100644 --- a/src/utils/location.cc +++ b/src/utils/location.cc @@ -11,17 +11,17 @@ std::string location_t::to_string() const { res << filename; written_anything = true; } - if (line != -1u) { + if (line + 1 != 0) { if (written_anything) res << ':'; res << line + 1; written_anything = true; } - if (start != -1u) { + if (start + 1 != 0) { if (written_anything) res << ':'; res << start + 1; written_anything = true; } - if (length != -1u) { + if (length + 1 != 0) { if (written_anything) res << '(' << length << ')'; written_anything = true; } From ff6d01034ad8f7f861c4f23eeb9406ba26033581 Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Tue, 11 Oct 2022 14:38:42 +0300 Subject: [PATCH 34/74] chore: add pause on end of application when in debug mode, improve exception handling --- src/main/main.cc | 38 +++++++++++++++++++++++++------------- 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/src/main/main.cc b/src/main/main.cc index 338d3b2..b25e7e5 100644 --- a/src/main/main.cc +++ b/src/main/main.cc @@ -141,31 +141,43 @@ int main(int argc, const char *argv[]) { std::vector files; messages::msg_stack_t msg_stack; - options::parser_t parser; - data::map_t conf; - add_flags(parser); + try { + options::parser_t parser; + data::map_t conf; + add_flags(parser); - for (const auto &arg : args) { - if (!parser.parse(arg, msg_stack, conf)) { - files.push_back(arg); + for (const auto &arg : args) { + if (!parser.parse(arg, msg_stack, conf)) { + files.push_back(arg); + } } - } - for (const auto &file : files) { - std::ifstream f { file, std::ios_base::in }; - try { + for (const auto &file : files) { + std::ifstream f { file, std::ios_base::in }; auto tokens = token_t::parse_many(msg_stack, lex::token_t::parse_file(msg_stack, file, f)); data::map_t ast; if (!ast::ast_ctx_t::parse(msg_stack, tokens, ast)) continue; std::cout << data::json::stringify(ast) << std::endl; } - catch (const messages::message_t &msg) { - msg_stack.push(msg); - } + throw 15.0f; + } + catch (const messages::message_t &msg) { + msg_stack.push(msg); + } + catch (const std::string &msg) { + msg_stack.push(message_t::error(msg)); + } + catch (...) { + std::cout << std::endl; + msg_stack.push(message_t::error("A fatal error occurred.")); } msg_stack.print(std::cout, messages::message_t::DEBUG, true); + #ifdef PROFILE_debug + system("pause"); + #endif + return 0; } From 0c7961ca6b0cfcc9eacf54abda296074117a75b2 Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Tue, 11 Oct 2022 14:39:01 +0300 Subject: [PATCH 35/74] feat: add type parsing --- include/compiler/treeifier/ast.hh | 2 + src/compiler/treeifier/ast/parsers/type.cc | 43 ++++++++++++++++++++++ 2 files changed, 45 insertions(+) create mode 100644 src/compiler/treeifier/ast/parsers/type.cc diff --git a/include/compiler/treeifier/ast.hh b/include/compiler/treeifier/ast.hh index ec079b5..47c3443 100644 --- a/include/compiler/treeifier/ast.hh +++ b/include/compiler/treeifier/ast.hh @@ -23,6 +23,7 @@ namespace ppc::comp::tree::ast { extern parser_factory_t glob_parser; extern parser_factory_t identifier_parser; extern parser_factory_t nmsp_parser; + extern parser_factory_t type_parser; extern group_parser_factory_t def_parser; struct ast_ctx_t { @@ -67,6 +68,7 @@ namespace ppc::comp::tree::ast { add_parser(identifier_parser); add_parser(nmsp_parser); add_parser(def_parser); + add_parser(type_parser); return *this; } diff --git a/src/compiler/treeifier/ast/parsers/type.cc b/src/compiler/treeifier/ast/parsers/type.cc new file mode 100644 index 0000000..80bc252 --- /dev/null +++ b/src/compiler/treeifier/ast/parsers/type.cc @@ -0,0 +1,43 @@ +#include "compiler/treeifier/ast/helper.hh" + +class type_parser_t : public parser_t { + bool parse(ast_ctx_t &ctx, size_t &res_i, map_t &out) const { + tree_helper_t h(ctx, res_i); + + if (h.ended()) return false; + + auto &nmsp = (out["namespace"] = map_t()).map(); + nmsp["$_name"] = "$_nmsp"; + auto &nmsp_content = (out["namespace"].map()["content"] = array_t()).array(); + + if (!h.push_parse("$_identifier", nmsp_content)) return false; + + while (true) { + if (h.ended()) break; + if (!h.curr().is_operator(operator_t::DOUBLE_COLON)) break; + h.force_push_parse("$_identifier", "Expected an identifier.", nmsp_content); + } + + out["location"] = conv::loc_to_map(h.res_loc()); + out["name"] = nmsp_content[nmsp_content.size() - 1]; + nmsp_content.pop(); + + if (nmsp_content.size() == 0) { + auto loc = h.res_loc(); + loc.length = 1; + nmsp["location"] = conv::loc_to_map(loc); + } + else { + auto loc_1 = conv::map_to_loc(nmsp_content[0].map()["location"].map()); + auto loc_2 = conv::map_to_loc(nmsp_content[nmsp_content.size() - 1].map()["location"].map()); + auto loc = loc_1.intersect(loc_2); + nmsp["location"] = conv::loc_to_map(loc); + } + + return h.submit(false); + } + + public: type_parser_t(): parser_t("$_type") { } +}; + +parser_factory_t ppc::comp::tree::ast::type_parser = []() { return (parser_t*)new type_parser_t(); }; From eb8d7cd1f1bb90bed690c59de179ab8fde3f273b Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Wed, 12 Oct 2022 11:59:54 +0300 Subject: [PATCH 36/74] refactor: modify ast_ctx_t::parse signature to fit other stages --- include/compiler/treeifier/ast.hh | 2 +- src/compiler/treeifier/ast.cc | 12 ++++-------- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/include/compiler/treeifier/ast.hh b/include/compiler/treeifier/ast.hh index 47c3443..e4c80ef 100644 --- a/include/compiler/treeifier/ast.hh +++ b/include/compiler/treeifier/ast.hh @@ -73,7 +73,7 @@ namespace ppc::comp::tree::ast { } bool parse(std::string parser, size_t &pi, data::map_t &out); - static bool parse(msg_stack_t &messages, std::vector &tokens, data::map_t &out); + static data::map_t parse(msg_stack_t &messages, std::vector &tokens); ~ast_ctx_t(); ast_ctx_t(msg_stack_t &messages, std::vector &tokens): diff --git a/src/compiler/treeifier/ast.cc b/src/compiler/treeifier/ast.cc index 7a30834..b93e5e4 100644 --- a/src/compiler/treeifier/ast.cc +++ b/src/compiler/treeifier/ast.cc @@ -30,18 +30,14 @@ namespace ppc::comp::tree::ast { groups.emplace(parser); } - bool ast_ctx_t::parse(msg_stack_t &messages, std::vector &tokens, data::map_t &out) { + data::map_t ast_ctx_t::parse(msg_stack_t &messages, std::vector &tokens) { ast_ctx_t ctx(messages, tokens); ctx.init(); size_t i = 0; + data::map_t res; - try { - return ctx.parse("$_glob", i, out); - } - catch (const message_t &msg) { - messages.push(msg); - return false; - } + if (!ctx.parse("$_glob", i, res)) throw message_t::error("Failed to compile."); + return res; } bool ast_ctx_t::parse(std::string parser, size_t &pi, data::map_t &out) { return this->parser[parser] (*this, pi, out); From 86f72ee2b28b8f474157a059d07d288e8ec181cd Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Wed, 12 Oct 2022 12:01:37 +0300 Subject: [PATCH 37/74] feat: add pointer parsing to types --- src/compiler/treeifier/ast/parsers/type.cc | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/compiler/treeifier/ast/parsers/type.cc b/src/compiler/treeifier/ast/parsers/type.cc index 80bc252..43fd921 100644 --- a/src/compiler/treeifier/ast/parsers/type.cc +++ b/src/compiler/treeifier/ast/parsers/type.cc @@ -9,6 +9,7 @@ class type_parser_t : public parser_t { auto &nmsp = (out["namespace"] = map_t()).map(); nmsp["$_name"] = "$_nmsp"; auto &nmsp_content = (out["namespace"].map()["content"] = array_t()).array(); + size_t ptr_n = 0; if (!h.push_parse("$_identifier", nmsp_content)) return false; @@ -18,8 +19,14 @@ class type_parser_t : public parser_t { h.force_push_parse("$_identifier", "Expected an identifier.", nmsp_content); } + while (!h.ended() && h.curr().is_operator(operator_t::MULTIPLY)) { + ptr_n++; + if (!h.try_advance()) break; + } + out["location"] = conv::loc_to_map(h.res_loc()); out["name"] = nmsp_content[nmsp_content.size() - 1]; + out["ptr_n"] = (float)ptr_n; nmsp_content.pop(); if (nmsp_content.size() == 0) { From f1f78e0af2f219e48ee3911312bc3e0cce04d7b4 Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Wed, 12 Oct 2022 12:01:57 +0300 Subject: [PATCH 38/74] chore: fix up main --- src/main/main.cc | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/main/main.cc b/src/main/main.cc index b25e7e5..a849937 100644 --- a/src/main/main.cc +++ b/src/main/main.cc @@ -32,6 +32,7 @@ using std::cout; using std::size_t; using namespace ppc; using namespace ppc::comp::tree; +using namespace ppc::comp::tree::ast; void add_flags(options::parser_t &parser) { parser.add_flag({ @@ -153,14 +154,17 @@ int main(int argc, const char *argv[]) { } for (const auto &file : files) { - std::ifstream f { file, std::ios_base::in }; - auto tokens = token_t::parse_many(msg_stack, lex::token_t::parse_file(msg_stack, file, f)); - data::map_t ast; - if (!ast::ast_ctx_t::parse(msg_stack, tokens, ast)) continue; + try { + std::ifstream f { file, std::ios_base::in }; + auto tokens = token_t::parse_many(msg_stack, lex::token_t::parse_file(msg_stack, file, f)); + auto ast = ast_ctx_t::parse(msg_stack, tokens); - std::cout << data::json::stringify(ast) << std::endl; + std::cout << data::json::stringify(ast) << std::endl; + } + catch (const messages::message_t &msg) { + msg_stack.push(msg); + } } - throw 15.0f; } catch (const messages::message_t &msg) { msg_stack.push(msg); From e2e180b9a5b38df0ef52a4a8df9e84051d50f0e0 Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Fri, 14 Oct 2022 20:27:45 +0300 Subject: [PATCH 39/74] chore --- include/compiler/treeifier/ast.hh | 49 ++++++------ include/compiler/treeifier/ast/helper.hh | 17 ++-- include/utils/data.hh | 24 +++--- include/utils/message.hh | 6 ++ src/compiler/treeifier/ast.cc | 13 ++- src/compiler/treeifier/ast/conv.cc | 2 +- src/compiler/treeifier/ast/parsers/field.cc | 44 ++++++++++ src/compiler/treeifier/ast/parsers/glob.cc | 20 ++++- src/compiler/treeifier/ast/parsers/group.cc | 25 +++--- .../treeifier/ast/parsers/identifier.cc | 2 +- src/compiler/treeifier/ast/parsers/nmsp.cc | 2 +- src/compiler/treeifier/ast/parsers/type.cc | 2 +- src/utils/data.cc | 80 ++++++++++--------- 13 files changed, 186 insertions(+), 100 deletions(-) create mode 100644 src/compiler/treeifier/ast/parsers/field.cc diff --git a/include/compiler/treeifier/ast.hh b/include/compiler/treeifier/ast.hh index e4c80ef..68d0aea 100644 --- a/include/compiler/treeifier/ast.hh +++ b/include/compiler/treeifier/ast.hh @@ -16,15 +16,16 @@ using namespace ppc::messages; namespace ppc::comp::tree::ast { class parser_t; class group_parser_t; + struct ast_ctx_t; - using parser_factory_t = parser_t *(*)(); - using group_parser_factory_t = group_parser_t *(*)(); + using parser_adder_t = void (*)(ast_ctx_t &ctx); - extern parser_factory_t glob_parser; - extern parser_factory_t identifier_parser; - extern parser_factory_t nmsp_parser; - extern parser_factory_t type_parser; - extern group_parser_factory_t def_parser; + extern const parser_adder_t glob_adder; + extern const parser_adder_t identifier_adder; + extern const parser_adder_t nmsp_adder; + extern const parser_adder_t type_adder; + extern const parser_adder_t exp_adder; + extern const parser_adder_t field_adder; struct ast_ctx_t { private: @@ -40,23 +41,23 @@ namespace ppc::comp::tree::ast { private: ast_ctx_t *parent; public: - const group_parser_t &operator[](const std::string &name) const; + group_parser_t &operator[](const std::string &name) const; group_proxy_t(ast_ctx_t *parent): parent(parent) { } }; std::unordered_map parsers; - std::set groups; - - void add_parser(const parser_t *parser); - void add_parser(const group_parser_t *parser); + std::set groups; public: msg_stack_t &messages; std::vector &tokens; std::set imports; loc_namespace_name_t nmsp; - void add_parser(parser_factory_t factory) { add_parser(factory()); } - void add_parser(group_parser_factory_t factory) { add_parser(factory()); } + void add_parser(const parser_t *parser); + void add_parser(const parser_t *parser, const std::string &group); + void add_group(const std::string &name); + + void add_parser(parser_adder_t factory) { factory(*this); } ast_ctx_t &operator=(const ast_ctx_t &other) = delete; @@ -64,11 +65,13 @@ namespace ppc::comp::tree::ast { const group_proxy_t group; ast_ctx_t &init() { - add_parser(glob_parser); - add_parser(identifier_parser); - add_parser(nmsp_parser); - add_parser(def_parser); - add_parser(type_parser); + add_parser(identifier_adder); + add_parser(nmsp_adder); + add_parser(glob_adder); + add_parser(type_adder); + add_parser(exp_adder); + add_parser(field_adder); + return *this; } @@ -102,11 +105,11 @@ namespace ppc::comp::tree::ast { class group_parser_t : public parser_t { private: - std::vector> named_parsers; - std::vector parsers; + std::vector> named_parsers; + std::vector parsers; public: - group_parser_t &add(parser_t &parser); - group_parser_t &add(parser_t &parser, const lang::namespace_name_t &name); + group_parser_t &add(const parser_t &parser); + group_parser_t &add(const parser_t &parser, const lang::namespace_name_t &name); bool parse(ast_ctx_t &ctx, size_t &i, data::map_t &out) const; diff --git a/include/compiler/treeifier/ast/helper.hh b/include/compiler/treeifier/ast/helper.hh index 7eb965b..a9a418d 100644 --- a/include/compiler/treeifier/ast/helper.hh +++ b/include/compiler/treeifier/ast/helper.hh @@ -12,14 +12,15 @@ namespace ppc::comp::tree::ast { ast_ctx_t &ctx; size_t &res_i; + public: + size_t i; + void throw_ended() { if (ended()) throw messages::message_t(message_t::ERROR, "Unexpected end.", loc()); } void throw_ended(const std::string &reason) { if (ended()) throw messages::message_t(message_t::ERROR, "Unexpected end: " + reason, loc()); } - public: - size_t i; location_t loc(size_t n) { location_t res = prev_loc(); @@ -62,10 +63,10 @@ namespace ppc::comp::tree::ast { else return ctx.tokens[res_i].location.intersect(loc()); } - void err(std::string message) { + bool err(std::string message) { throw message_t::error(message, loc()); } - void err(std::string message, size_t n) { + bool err(std::string message, size_t n) { throw message_t::error(message, loc(n)); } @@ -78,6 +79,10 @@ namespace ppc::comp::tree::ast { return i == ctx.tokens.size(); } + token_t &curr(const std::string &reason) { + throw_ended(reason); + return ctx.tokens[i]; + } token_t &curr() { throw_ended(); return ctx.tokens[i]; @@ -113,7 +118,7 @@ namespace ppc::comp::tree::ast { } void force_push_parse(const std::string &name, std::string message, data::array_t &out) { - advance(message); + throw_ended(message); bool success; try { @@ -127,7 +132,7 @@ namespace ppc::comp::tree::ast { if (!success) err(message); } void force_parse(const std::string &name, std::string message, data::map_t &out) { - advance(message); + throw_ended(message); bool success; try { diff --git a/include/utils/data.hh b/include/utils/data.hh index c4a903b..a5ed6d2 100644 --- a/include/utils/data.hh +++ b/include/utils/data.hh @@ -39,16 +39,22 @@ namespace ppc::data { bool is_string() const; bool is_bool() const; - bool array(array_t &out) const; - bool map(map_t &out) const; - bool number(number_t &out) const; - bool string(string_t &out) const; - bool boolean(bool_t &out) const; + array_t &array(const array_t &arr); + map_t &map(const map_t &map); + number_t &number(number_t num); + string_t &string(const string_t &str); + bool_t &boolean(bool_t bl); - array_t &array() const; - map_t &map() const; + array_t &array(); + map_t &map(); + number_t &number(); + string_t &string(); + bool_t &boolean(); + + const array_t &array() const; + const map_t &map() const; number_t number() const; - string_t &string() const; + const string_t &string() const; bool_t boolean() const; value_t &operator=(const value_t &other); @@ -65,8 +71,6 @@ namespace ppc::data { value_t(bool_t val); value_t(const value_t &other); - static value_t mk_arr(); - static value_t mk_map(); }; diff --git a/include/utils/message.hh b/include/utils/message.hh index 63a73b8..75886d8 100644 --- a/include/utils/message.hh +++ b/include/utils/message.hh @@ -38,7 +38,13 @@ namespace ppc::messages { inline auto begin() { return messages.begin(); } inline auto end() { return messages.end(); } + inline auto begin() const { return messages.begin(); } + inline auto end() const { return messages.end(); } + void push(const message_t &msg) { messages.push_back(msg); } + void push(const msg_stack_t &other) { + for (const auto &msg : other) push(msg); + } const message_t &peek() { return messages.back(); } void clear() { messages.clear(); } diff --git a/src/compiler/treeifier/ast.cc b/src/compiler/treeifier/ast.cc index b93e5e4..aca1368 100644 --- a/src/compiler/treeifier/ast.cc +++ b/src/compiler/treeifier/ast.cc @@ -8,10 +8,10 @@ namespace ppc::comp::tree::ast { if (it == parent->parsers.end()) throw "The parser '" + name + "' doesn't exist."; return *it->second; } - const group_parser_t &ast_ctx_t::group_proxy_t::operator[](const std::string &name) const { - auto p = &parent->parser[name]; + group_parser_t &ast_ctx_t::group_proxy_t::operator[](const std::string &name) const { + auto p = (group_parser_t*)&parent->parser[name]; if (parent->groups.find(p) == parent->groups.end()) throw "A parser '" + name + "' exists, but isn't a group."; - return *(const group_parser_t*)p; + return *p; } ast_ctx_t::~ast_ctx_t() { @@ -24,7 +24,12 @@ namespace ppc::comp::tree::ast { if (parsers.find(parser->name()) != parsers.end()) throw "The parser '" + parser->name() + "' already exists."; parsers[parser->name()] = parser; } - void ast_ctx_t::add_parser(const group_parser_t *parser) { + void ast_ctx_t::add_parser(const parser_t *parser, const std::string &group) { + add_parser(parser); + this->group[group].add(*parser); + } + void ast_ctx_t::add_group(const std::string &name) { + auto parser = new group_parser_t(name); if (parsers.find(parser->name()) != parsers.end()) throw "The parser '" + parser->name() + "' already exists."; parsers[parser->name()] = parser; groups.emplace(parser); diff --git a/src/compiler/treeifier/ast/conv.cc b/src/compiler/treeifier/ast/conv.cc index 2526922..2bf6c73 100644 --- a/src/compiler/treeifier/ast/conv.cc +++ b/src/compiler/treeifier/ast/conv.cc @@ -51,7 +51,7 @@ namespace ppc::comp::tree::ast::conv { data::map_t nmsp_to_map(const loc_namespace_name_t &nmsp) { data::map_t res; - auto arr = (res["content"] = data::array_t()).array(); + auto arr = res["content"].array({}); for (const auto &segment : nmsp) { arr.push({ diff --git a/src/compiler/treeifier/ast/parsers/field.cc b/src/compiler/treeifier/ast/parsers/field.cc new file mode 100644 index 0000000..79d1c0f --- /dev/null +++ b/src/compiler/treeifier/ast/parsers/field.cc @@ -0,0 +1,44 @@ +#include "compiler/treeifier/ast/helper.hh" + +class field_parser_t : public parser_t { + bool parse(ast_ctx_t &ctx, size_t &res_i, map_t &out) const { + tree_helper_t h(ctx, res_i); + + if (h.ended()) return false; + + if (!h.parse("$_identifier", out["name"].map({}))) return false; + + bool type, defval; + + h.throw_ended("Expected a colon or an equals sign."); + + if (h.curr().is_operator(operator_t::COLON)) { + h.advance(); + h.force_parse("$_type", "Expected a type.", out["type"].map({})); + type = true; + } + if (h.curr().is_operator(operator_t::ASSIGN)) { + h.i++; + h.err("Default values are not yet supported.", 1); + h.advance(); + h.force_parse("$_exp", "Expected an expression.", out["value"].map({})); + type = true; + } + + if (h.curr().is_operator(operator_t::SEMICOLON)) { + if (type || defval) return h.submit(); + else return h.err("A type or a default value must be specified "); + } + else if (type || defval) { + ctx.messages.push(message_t::error("Expected a semicolon.", h.loc(1))); + return h.submit(false); + } + else return false; + + return h.submit(true); + } + + public: field_parser_t(): parser_t("$_field") { } +}; + +parser_adder_t ppc::comp::tree::ast::field_adder = [](ast_ctx_t &ctx) { ctx.add_parser(new field_parser_t(), "$_def"); }; diff --git a/src/compiler/treeifier/ast/parsers/glob.cc b/src/compiler/treeifier/ast/parsers/glob.cc index 54dcde7..427b718 100644 --- a/src/compiler/treeifier/ast/parsers/glob.cc +++ b/src/compiler/treeifier/ast/parsers/glob.cc @@ -9,6 +9,7 @@ class nmsp_def_parser_t : public parser_t { if (h.ended()) return false; if (!h.curr().is_identifier("namespace")) return false; + h.advance("Expected a namespace"); h.force_parse("$_nmsp", "Expected a namespace.", res); if (!h.curr().is_operator(operator_t::SEMICOLON)) { ctx.messages.push(message_t::error("Expected a semicolon.", h.loc(1))); @@ -25,6 +26,7 @@ class import_parser_t : public parser_t { if (h.ended()) return false; if (!h.curr().is_identifier("import")) return false; + h.advance("Expected a namespace"); h.force_parse("$_nmsp", "Expected a namespace.", res); if (!h.curr().is_operator(operator_t::SEMICOLON)) { ctx.messages.push(message_t::error("Expected a semicolon.", h.loc(1))); @@ -49,7 +51,7 @@ class glob_parser_t : public parser_t { } auto &imports = (out["imports"] = array_t()).array(); - /* auto &contents = */ (out["content"] = array_t()).array(); + auto &contents = (out["content"] = array_t()).array(); while (true) { map_t map; @@ -60,9 +62,16 @@ class glob_parser_t : public parser_t { if (!ctx.imports.emplace(nmsp).second) h.err("The namespace '" + nmsp.to_string() + "' is already imported."); } + while (true) { + if (h.ended()) break; + if (!h.push_parse("$_def", contents)) { + ctx.messages.push(message_t::error("Invalid token.", h.loc())); + h.i++; + } + } + if (!h.ended()) h.err("Invalid token."); - if (ctx.messages.is_failed()) return false; return h.submit(); } @@ -70,5 +79,8 @@ public: glob_parser_t(): parser_t("$_glob") { } }; -parser_factory_t ppc::comp::tree::ast::glob_parser = []() { return (parser_t*)new glob_parser_t(); }; -group_parser_factory_t ppc::comp::tree::ast::def_parser = []() { return new group_parser_t("$_def"); }; +parser_adder_t ppc::comp::tree::ast::glob_adder = [](ast_ctx_t &ctx) { + ctx.add_parser(new group_parser_t("$_def")); + ctx.add_parser(new group_parser_t("$_expr_val")); + ctx.add_parser(new glob_parser_t()); +}; diff --git a/src/compiler/treeifier/ast/parsers/group.cc b/src/compiler/treeifier/ast/parsers/group.cc index 01934f2..7e19636 100644 --- a/src/compiler/treeifier/ast/parsers/group.cc +++ b/src/compiler/treeifier/ast/parsers/group.cc @@ -2,10 +2,13 @@ #include "compiler/treeifier/tokenizer.hh" #include "compiler/treeifier/ast/helper.hh" #include +#include +#include using namespace ppc::comp::tree; using namespace ppc::comp::tree::ast; using namespace std::string_literals; +using namespace std; static bool read_nmsp(ast_ctx_t &ctx, size_t &i, const lang::namespace_name_t &name) { tree_helper_t h(ctx, i); @@ -33,29 +36,31 @@ static bool read_nmsp(ast_ctx_t &ctx, size_t &i, const lang::namespace_name_t &n bool group_parser_t::parse(ast_ctx_t &ctx, size_t &i, data::map_t &out) const { tree_helper_t h(ctx, i); + if (h.ended()) return false; + for (auto &pair : named_parsers) { if (!read_nmsp(ctx, i, pair.first)) continue; auto &parser = *pair.second; - return parser(ctx, i, out); + if (parser(ctx, i, out)) return true; + else throw message_t::error("Unexpected construct specifier.", h.res_loc()); } + + unordered_map errors; + for (auto parser : parsers) { - try { - return (*parser)(ctx, i, out); - } - catch (const message_t &err) { - ctx.messages.push(err); - return false; - } + if ((*parser)(ctx, i, out)) return true; } + stringstream m; + return false; } -group_parser_t &group_parser_t::add(parser_t &parser) { +group_parser_t &group_parser_t::add(const parser_t &parser) { parsers.push_back(&parser); return *this; } -group_parser_t &group_parser_t::add(parser_t &parser, const lang::namespace_name_t &name) { +group_parser_t &group_parser_t::add(const parser_t &parser, const lang::namespace_name_t &name) { if (name.empty()) throw "Name can't be empty."s; if (std::find(parsers.begin(), parsers.end(), &parser) != parsers.end()) { throw "Parser '" + name.to_string() + "' already in group."; diff --git a/src/compiler/treeifier/ast/parsers/identifier.cc b/src/compiler/treeifier/ast/parsers/identifier.cc index 59af7e0..3b3d2fc 100644 --- a/src/compiler/treeifier/ast/parsers/identifier.cc +++ b/src/compiler/treeifier/ast/parsers/identifier.cc @@ -18,4 +18,4 @@ class identifier_parser_t : public parser_t { public: identifier_parser_t(): parser_t("$_identifier") { } }; -parser_factory_t ppc::comp::tree::ast::identifier_parser = []() { return (parser_t*)new identifier_parser_t(); }; +parser_adder_t ppc::comp::tree::ast::identifier_adder = [](ast_ctx_t &ctx) { ctx.add_parser(new identifier_parser_t()); }; diff --git a/src/compiler/treeifier/ast/parsers/nmsp.cc b/src/compiler/treeifier/ast/parsers/nmsp.cc index 0ae7bca..b1bd4b4 100644 --- a/src/compiler/treeifier/ast/parsers/nmsp.cc +++ b/src/compiler/treeifier/ast/parsers/nmsp.cc @@ -23,4 +23,4 @@ class nmsp_parser_t : public parser_t { public: nmsp_parser_t(): parser_t("$_nmsp") { } }; -parser_factory_t ppc::comp::tree::ast::nmsp_parser = []() { return (parser_t*)new nmsp_parser_t(); }; +parser_adder_t ppc::comp::tree::ast::nmsp_adder = [](ast_ctx_t &ctx) { ctx.add_parser(new nmsp_parser_t()); }; diff --git a/src/compiler/treeifier/ast/parsers/type.cc b/src/compiler/treeifier/ast/parsers/type.cc index 43fd921..0425a4d 100644 --- a/src/compiler/treeifier/ast/parsers/type.cc +++ b/src/compiler/treeifier/ast/parsers/type.cc @@ -47,4 +47,4 @@ class type_parser_t : public parser_t { public: type_parser_t(): parser_t("$_type") { } }; -parser_factory_t ppc::comp::tree::ast::type_parser = []() { return (parser_t*)new type_parser_t(); }; +parser_adder_t ppc::comp::tree::ast::type_adder = [](ast_ctx_t &ctx) { ctx.add_parser(new type_parser_t()); }; diff --git a/src/utils/data.cc b/src/utils/data.cc index f8ac7ce..f83e097 100644 --- a/src/utils/data.cc +++ b/src/utils/data.cc @@ -20,47 +20,55 @@ namespace ppc::data { return type == type_t::Bool; } - bool value_t::array(array_t &out) const { - if (is_array()) { - out = *val.arr; - return true; - } - return false; + array_t &value_t::array(const array_t &val) { + *this = val; + return *this->val.arr; } - bool value_t::map(map_t &out) const { - if (is_map()) { - out = *val.map; - return true; - } - return false; + map_t &value_t::map(const map_t &val) { + *this = val; + return *this->val.map; } - bool value_t::number(number_t &out) const { - if (is_number()) { - out = val.num; - return true; - } - return false; + number_t &value_t::number(number_t val) { + *this = val; + return this->val.num; } - bool value_t::string(string_t &out) const { - if (is_string()) { - out = *val.str; - return true; - } - return false; + string_t &value_t::string(const string_t &val) { + *this = val; + return *this->val.str; } - bool value_t::boolean(bool_t &out) const { - if (is_bool()) { - out = val.bl; - return true; - } - return false; + bool_t &value_t::boolean(bool_t val) { + *this = val; + return this->val.bl; } - array_t &value_t::array() const { + + array_t &value_t::array() { if (is_array()) return *val.arr; else throw (std::string)"The value isn't an array."; } - map_t &value_t::map() const { + map_t &value_t::map() { + if (is_map()) return *val.map; + else throw (std::string)"The value isn't a map."; + } + number_t &value_t::number() { + if (is_number()) return val.num; + else throw (std::string)"The value isn't a number."; + } + string_t &value_t::string() { + if (is_string()) return *val.str; + else throw (std::string)"The value isn't a string."; + } + bool_t &value_t::boolean() { + if (is_bool()) return val.bl; + else throw (std::string)"The value isn't a bool."; + } + + + const array_t &value_t::array() const { + if (is_array()) return *val.arr; + else throw (std::string)"The value isn't an array."; + } + const map_t &value_t::map() const { if (is_map()) return *val.map; else throw (std::string)"The value isn't a map."; } @@ -68,7 +76,7 @@ namespace ppc::data { if (is_number()) return val.num; else throw (std::string)"The value isn't a number."; } - string_t &value_t::string() const { + const string_t &value_t::string() const { if (is_string()) return *val.str; else throw (std::string)"The value isn't a string."; } @@ -121,12 +129,6 @@ namespace ppc::data { break; } } - value_t value_t::mk_arr() { - return value_t(array_t()); - } - value_t value_t::mk_map() { - return value_t(map_t()); - } value_t::~value_t() { switch (type) { From ef7920843808f9e181514914fe6e2af146ca03cf Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Mon, 17 Oct 2022 22:22:38 +0300 Subject: [PATCH 40/74] refactor: map::array_t now extends std::vector, instead of shelling it --- include/utils/data.hh | 26 +++----------------------- 1 file changed, 3 insertions(+), 23 deletions(-) diff --git a/include/utils/data.hh b/include/utils/data.hh index a5ed6d2..4e13f36 100644 --- a/include/utils/data.hh +++ b/include/utils/data.hh @@ -78,14 +78,14 @@ namespace ppc::data { private: std::unordered_map values; public: - value_t &operator [](std::string name){ + value_t &operator[](std::string name){ auto res = values.find(name); if (res == values.end()) { res = values.emplace(name, value_t()).first; } return res->second; } - const value_t &operator [](std::string name) const { + const value_t &operator[](std::string name) const { auto res = values.find(name); if (res == values.end()) throw "The map doesn't contain a key '" + name + "'."; return res->second; @@ -108,25 +108,5 @@ namespace ppc::data { } }; - class array_t { - private: - std::vector values; - public: - value_t &operator [](std::size_t i) { return values[i]; } - const value_t &operator [](std::size_t i) const { return values[i]; } - - auto begin() const { return values.begin(); } - auto end() const { return values.end(); } - - void push(const value_t &val) { values.push_back(val); } - void insert(const value_t &val, std::size_t i = 0) { values.insert(begin() + i, val); } - void pop() { values.pop_back(); } - void remove(std::size_t i = 0) { values.erase(begin() + i); } - - std::size_t size() const { return values.size(); } - - array_t() { } - array_t(const std::vector &val): values(val) { } - array_t(std::initializer_list val): values(val) { } - }; + class array_t : public std::vector { }; } \ No newline at end of file From 328788d32009a8b71e02203213ea8fc5ada1e27b Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Mon, 17 Oct 2022 22:39:59 +0300 Subject: [PATCH 41/74] chore: add NONE operator --- include/compiler/treeifier/tokenizer.hh | 2 ++ src/compiler/treeifier/operators.cc | 1 + 2 files changed, 3 insertions(+) diff --git a/include/compiler/treeifier/tokenizer.hh b/include/compiler/treeifier/tokenizer.hh index feff58d..78cd6b4 100644 --- a/include/compiler/treeifier/tokenizer.hh +++ b/include/compiler/treeifier/tokenizer.hh @@ -6,6 +6,8 @@ namespace ppc::comp::tree { enum operator_t { + NONE, + LESS_THAN, GREATER_THAN, LESS_THAN_EQUALS, diff --git a/src/compiler/treeifier/operators.cc b/src/compiler/treeifier/operators.cc index 06df236..fce5567 100644 --- a/src/compiler/treeifier/operators.cc +++ b/src/compiler/treeifier/operators.cc @@ -7,6 +7,7 @@ using namespace std::string_literals; std::vector operators = { + "(none)", "<", ">", "<=", ">=", "==", "!=", "&&", "||", "<<", ">>", "^", "&", "|", "!", "~", "++", "--", From 50c3067802dcd1fba51208ffcf958c204108c978 Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Mon, 17 Oct 2022 22:43:49 +0300 Subject: [PATCH 42/74] fix: make appropriate methods of token_t const --- include/compiler/treeifier/tokenizer.hh | 28 ++++++++++++------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/include/compiler/treeifier/tokenizer.hh b/include/compiler/treeifier/tokenizer.hh index 78cd6b4..93e1cb6 100644 --- a/include/compiler/treeifier/tokenizer.hh +++ b/include/compiler/treeifier/tokenizer.hh @@ -95,40 +95,40 @@ namespace ppc::comp::tree { public: ppc::location_t location; - bool is_identifier() { return kind == IDENTIFIER; } - bool is_operator() { return kind == OPERATOR; } - bool is_int_lit() { return kind == INT; } - bool is_float_lit() { return kind == FLOAT; } - bool is_char_lit() { return kind == CHAR; } - bool is_string_lit() { return kind == STRING; } + bool is_identifier() const { return kind == IDENTIFIER; } + bool is_operator() const { return kind == OPERATOR; } + bool is_int_lit() const { return kind == INT; } + bool is_float_lit() const { return kind == FLOAT; } + bool is_char_lit() const { return kind == CHAR; } + bool is_string_lit() const { return kind == STRING; } - const auto &identifier() { + const auto &identifier() const { if (!is_identifier()) throw std::string { "Token is not an identifier." }; else return *data.identifier; } - auto _operator() { + auto _operator() const { if (!is_operator()) throw std::string { "Token is not an operator." }; else return data._operator; } - auto int_lit() { + auto int_lit() const { if (!is_int_lit()) throw std::string { "Token is not an int literal." }; else return data.int_literal; } - auto float_lit() { + auto float_lit() const { if (!is_float_lit()) throw std::string { "Token is not a float literal." }; else return data.float_literal; } - auto char_lit() { + auto char_lit() const { if (!is_char_lit()) throw std::string { "Token is not a char literal." }; else return data.char_literal; } - const auto &string_lit() { + const auto &string_lit() const { if (!is_string_lit()) throw std::string { "Token is not a string literal." }; else return *data.string_literal; } - bool is_operator(operator_t op) { return is_operator() && _operator() == op; } - bool is_identifier(std::string &&val) { return is_identifier() && identifier() == val; } + bool is_operator(operator_t op) const { return is_operator() && _operator() == op; } + bool is_identifier(const std::string &val) const { return is_identifier() && identifier() == val; } token_t() { kind = NONE; } token_t(const std::string &identifier, location_t loc = location_t::NONE): location(loc) { From 18c60988510d735bcd15145309da5c04977fa021 Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Wed, 19 Oct 2022 14:21:05 +0300 Subject: [PATCH 43/74] feat: add basic expression parsing --- include/compiler/treeifier/ast.hh | 2 + include/compiler/treeifier/ast/helper.hh | 2 +- include/lang/common.hh | 16 ++ include/utils/data.hh | 2 +- include/utils/location.hh | 6 + src/compiler/treeifier/ast/conv.cc | 2 +- src/compiler/treeifier/ast/parsers/exp.cc | 242 ++++++++++++++++++ src/compiler/treeifier/ast/parsers/field.cc | 2 +- src/compiler/treeifier/ast/parsers/glob.cc | 11 +- .../treeifier/ast/parsers/identifier.cc | 2 +- src/compiler/treeifier/ast/parsers/nmsp.cc | 2 +- src/compiler/treeifier/ast/parsers/type.cc | 4 +- src/compiler/treeifier/ast/parsers/var.cc | 19 ++ src/compiler/treeifier/lexer.cc | 2 +- src/utils/location.cc | 22 ++ 15 files changed, 323 insertions(+), 13 deletions(-) create mode 100644 src/compiler/treeifier/ast/parsers/exp.cc create mode 100644 src/compiler/treeifier/ast/parsers/var.cc diff --git a/include/compiler/treeifier/ast.hh b/include/compiler/treeifier/ast.hh index 68d0aea..4a340e5 100644 --- a/include/compiler/treeifier/ast.hh +++ b/include/compiler/treeifier/ast.hh @@ -26,6 +26,7 @@ namespace ppc::comp::tree::ast { extern const parser_adder_t type_adder; extern const parser_adder_t exp_adder; extern const parser_adder_t field_adder; + extern const parser_adder_t var_adder; struct ast_ctx_t { private: @@ -70,6 +71,7 @@ namespace ppc::comp::tree::ast { add_parser(glob_adder); add_parser(type_adder); add_parser(exp_adder); + add_parser(var_adder); add_parser(field_adder); return *this; diff --git a/include/compiler/treeifier/ast/helper.hh b/include/compiler/treeifier/ast/helper.hh index a9a418d..1181ce2 100644 --- a/include/compiler/treeifier/ast/helper.hh +++ b/include/compiler/treeifier/ast/helper.hh @@ -107,7 +107,7 @@ namespace ppc::comp::tree::ast { bool push_parse(const std::string &name, data::array_t &out) { data::map_t res; if (parse(name, res)) { - out.push(res); + out.push_back(res); return true; } else return false; diff --git a/include/lang/common.hh b/include/lang/common.hh index 2bb6186..b846bf8 100644 --- a/include/lang/common.hh +++ b/include/lang/common.hh @@ -12,6 +12,22 @@ namespace ppc::lang { located_t(const T &val): T(val), location(location_t::NONE) { } located_t() { } }; + template + struct slocated_t { + T value; + location_t location; + + bool operator ==(const slocated_t &other) { + return value == other.value && location == other.location; + } + bool operator !=(const slocated_t &other) { + return !(*this == other); + } + + slocated_t(location_t loc, const T &val): value(val), location(loc) { } + slocated_t(const T &val): value(val), location(location_t::NONE) { } + slocated_t() { } + }; struct namespace_name_t : public std::vector { using base = std::vector; diff --git a/include/utils/data.hh b/include/utils/data.hh index 4e13f36..9dd385b 100644 --- a/include/utils/data.hh +++ b/include/utils/data.hh @@ -85,7 +85,7 @@ namespace ppc::data { } return res->second; } - const value_t &operator[](std::string name) const { + const value_t &operator [](std::string name) const { auto res = values.find(name); if (res == values.end()) throw "The map doesn't contain a key '" + name + "'."; return res->second; diff --git a/include/utils/location.hh b/include/utils/location.hh index 65efeff..b9728db 100644 --- a/include/utils/location.hh +++ b/include/utils/location.hh @@ -12,6 +12,12 @@ namespace ppc { std::size_t code_start; const std::string &filename; + location_t &operator=(const location_t &other); + bool operator==(const location_t &other) const; + bool operator !=(const location_t &other) const { + return !(*this == other); + } + operator std::string() const { return to_string(); } std::string to_string() const; location_t intersect(location_t other) const; diff --git a/src/compiler/treeifier/ast/conv.cc b/src/compiler/treeifier/ast/conv.cc index 2bf6c73..23019b4 100644 --- a/src/compiler/treeifier/ast/conv.cc +++ b/src/compiler/treeifier/ast/conv.cc @@ -54,7 +54,7 @@ namespace ppc::comp::tree::ast::conv { auto arr = res["content"].array({}); for (const auto &segment : nmsp) { - arr.push({ + arr.push_back({ { "location", loc_to_map(segment.location) }, { "content", segment }, { "$_name", "$_nmsp" }, diff --git a/src/compiler/treeifier/ast/parsers/exp.cc b/src/compiler/treeifier/ast/parsers/exp.cc new file mode 100644 index 0000000..de39658 --- /dev/null +++ b/src/compiler/treeifier/ast/parsers/exp.cc @@ -0,0 +1,242 @@ +#include "compiler/treeifier/ast/helper.hh" +#include +#include + +enum precedence_t { + NONE, + POSTFIX, + PREFIX, + MULT, + ADD, + SHIFT, + COMP, + EQU, + BIN_AND, + BIN_XOR, + BIN_OR, + BOOL_AND, + BOOL_OR, + TERNARY, + ASSIGN, + PAREN, + CALL_START, +}; + +struct op_data_t { + precedence_t precedence; + size_t op_n; + std::string name; + bool assoc; +}; + +op_data_t sizeof_data { precedence_t::PREFIX, 1, "sizeof", true }; + +std::map pre_ops { + { operator_t::INCREASE, { precedence_t::PREFIX, 1, "inc_pre" } }, + { operator_t::DECREASE, { precedence_t::PREFIX, 1, "dec_pre" } }, + { operator_t::ADD, { precedence_t::PREFIX, 1, "positive" } }, + { operator_t::SUBTRACT, { precedence_t::PREFIX, 1, "negative" } }, + { operator_t::BITWISE_NEGATIVE, { precedence_t::PREFIX, 1, "flip" } }, + { operator_t::MULTIPLY, { precedence_t::PREFIX, 1, "dereference" } }, + { operator_t::AND, { precedence_t::PREFIX, 1, "reference" } }, +}; +std::map bin_ops { + { operator_t::ADD, { precedence_t::ADD, 2, "add" } }, + { operator_t::SUBTRACT, { precedence_t::ADD, 2, "subtract" } }, + { operator_t::MULTIPLY, { precedence_t::MULT, 2, "multiply" } }, + { operator_t::DIVIDE, { precedence_t::MULT, 2, "divide" } }, + { operator_t::MODULO, { precedence_t::MULT, 2, "modulo" } }, + { operator_t::INCREASE, { precedence_t::POSTFIX, 1, "inc_post" } }, + { operator_t::DECREASE, { precedence_t::POSTFIX, 1, "dec_post" } }, + { (operator_t)-1, sizeof_data }, +}; + +class exp_parser_t : public parser_t { + map_t op_to_map(located_t op) const { + return { + { "$_name", "$_operator" }, + { "ops", array_t() }, + { "location", conv::loc_to_map(op.location) }, + { "op", op.name }, + }; + } + + bool pop(std::vector> &op_stack, array_t &res) const { + if (op_stack.empty()) return false; + + auto map = op_to_map(op_stack.back()); + auto op_n = op_stack.back().op_n; + op_stack.pop_back(); + + if (res.size() < op_n) return false; + + auto &ops = map["ops"].array(); + + for (size_t i = 0; i < op_n; i++) { + ops.push_back(res.back()); + res.pop_back(); + } + + std::reverse(ops.begin(), ops.end()); + res.push_back(map); + + return true; + } + bool pop_paren(std::vector> &op_stack, array_t &res) const { + bool has_paren = false; + for (const auto &op : op_stack) { + if (op.precedence == precedence_t::PAREN) { + has_paren = true; + break; + } + } + if (!has_paren) return false; + + while (true) { + if (op_stack.back().precedence == precedence_t::PAREN) break; + if (!pop(op_stack, res)) return false; + } + + op_stack.pop_back(); + return true; + } + bool pop_call(size_t n, std::vector> &op_stack, array_t &res) const { + map_t call = { + { "$_name", "$_call" }, + }; + + array_t &args = call["args"].array({}); + + for (size_t i = 0; i <= n; i++) { + args.push_back(res.back()); + res.pop_back(); + } + + std::reverse(args.begin(), args.end()); + + call["func"] = res.back(); + res.pop_back(); + res.push_back(call); + + op_stack.pop_back(); + return true; + } + bool pop_until(const op_data_t &data, tree_helper_t &h, std::vector> &op_stack, array_t &res) const { + while (!op_stack.empty()) { + auto &back_data = op_stack.back(); + if (data.assoc ? + back_data.precedence >= data.precedence : + back_data.precedence > data.precedence + ) break; + + if (!pop(op_stack, res)) return h.err("Expected an expression on the right side of this operator."); + } + return true; + } + + bool parse(ast_ctx_t &ctx, size_t &res_i, map_t &out) const { + tree_helper_t h(ctx, res_i); + + bool last_val = false; + map_t val; + std::vector> op_stack; + std::vector call_args_n; + auto res = array_t(); + + + while (true) { + if (h.ended()) break; + + if (!last_val && h.curr().is_identifier("sizeof")) { + op_stack.push_back({ h.loc(), sizeof_data }); + h.advance("Expected a value on the right side of the operator."); + continue; + } + if (h.curr().is_operator()) { + auto op = h.curr()._operator(); + if (last_val) { + if (op == operator_t::PAREN_OPEN) { + call_args_n.push_back(0); + op_stack.push_back({ h.loc(), { precedence_t::CALL_START } }); + h.advance("Expected an argument."); + last_val = false; + } + else if (op == operator_t::COMMA) { + if (call_args_n.size() == 0) h.err("Unexpected comma here."); + + pop_until({ precedence_t::CALL_START, .assoc = true }, h, op_stack, res); + h.advance("Expected an argument."); + call_args_n.back()++; + last_val = false; + } + else if (h.curr().is_operator(operator_t::PAREN_CLOSE)) { + bool is_call = false, is_paren = false; + + for (auto i = op_stack.rbegin(); i != op_stack.rend(); i++) { + if (i->precedence == precedence_t::PAREN) { + is_paren = true; + break; + } + else if (i->precedence == precedence_t::CALL_START) { + is_call = true; + break; + } + } + + if (is_call) pop_call(call_args_n.back(), op_stack, res); + else if (is_paren) pop_paren(op_stack, res); + else break; + + if (!h.try_advance()) break; + } + else if (bin_ops.find(op) != bin_ops.end()) { + auto data = bin_ops[op]; + pop_until(data, h, op_stack, res); + op_stack.push_back({ h.loc(), data }); + + if (data.op_n == 1) { + last_val = true; + if (!pop(op_stack, res)) return h.err("Expected an expression on the right side of this operator."); + if (h.try_advance()) break; + } + else { + last_val = false; + h.advance("Expected a value on the right side of the operator."); + } + } + else break; + } + else { + if (op == operator_t::PAREN_OPEN) { + op_stack.push_back({ h.loc(), { precedence_t::PAREN } }); + h.advance("Expected a value."); + last_val = false; + } + else if (pre_ops.find(op) != pre_ops.end()) { + op_stack.push_back({ h.loc(), pre_ops[op] }); + h.advance("Expected a value on the right side of the operator."); + } + else break; + } + continue; + } + if (!last_val && h.push_parse("$_exp_val", res)) last_val = true; + else break; + } + + if (res.size() == 0) return false; + + while (!op_stack.empty()) { + if (op_stack.back().precedence == precedence_t::PAREN) throw message_t::error("Unclosed paren.", op_stack.back().location); + if (!pop(op_stack, res)) return h.err("Expected an expression on the right side of this operator."); + } + + out = res.front().map(); + + return h.submit(false); + } + + public: exp_parser_t(): parser_t("$_exp") { } +}; + +const parser_adder_t ppc::comp::tree::ast::exp_adder = [](ast_ctx_t &ctx) { ctx.add_parser(new exp_parser_t()); }; diff --git a/src/compiler/treeifier/ast/parsers/field.cc b/src/compiler/treeifier/ast/parsers/field.cc index 79d1c0f..072e186 100644 --- a/src/compiler/treeifier/ast/parsers/field.cc +++ b/src/compiler/treeifier/ast/parsers/field.cc @@ -41,4 +41,4 @@ class field_parser_t : public parser_t { public: field_parser_t(): parser_t("$_field") { } }; -parser_adder_t ppc::comp::tree::ast::field_adder = [](ast_ctx_t &ctx) { ctx.add_parser(new field_parser_t(), "$_def"); }; +const parser_adder_t ppc::comp::tree::ast::field_adder = [](ast_ctx_t &ctx) { ctx.add_parser(new field_parser_t(), "$_def"); }; diff --git a/src/compiler/treeifier/ast/parsers/glob.cc b/src/compiler/treeifier/ast/parsers/glob.cc index 427b718..ead5048 100644 --- a/src/compiler/treeifier/ast/parsers/glob.cc +++ b/src/compiler/treeifier/ast/parsers/glob.cc @@ -45,6 +45,9 @@ auto nmsp_def_parser = nmsp_def_parser_t(); class glob_parser_t : public parser_t { bool parse(ast_ctx_t &ctx, size_t &res_i, data::map_t &out) const { tree_helper_t h(ctx, res_i); + + return h.parse("$_exp", out); + if (h.ended()) return true; if (nmsp_def_parser(ctx, h.i, (out["namespace"] = map_t()).map())) { ctx.nmsp = conv::map_to_nmsp(out["namespace"].map()); @@ -56,7 +59,7 @@ class glob_parser_t : public parser_t { while (true) { map_t map; if (!import_parser(ctx, h.i, map)) break; - imports.push(map); + imports.push_back(map); auto nmsp = conv::map_to_nmsp(map); if (!ctx.imports.emplace(nmsp).second) h.err("The namespace '" + nmsp.to_string() + "' is already imported."); @@ -79,8 +82,8 @@ public: glob_parser_t(): parser_t("$_glob") { } }; -parser_adder_t ppc::comp::tree::ast::glob_adder = [](ast_ctx_t &ctx) { - ctx.add_parser(new group_parser_t("$_def")); - ctx.add_parser(new group_parser_t("$_expr_val")); +const parser_adder_t ppc::comp::tree::ast::glob_adder = [](ast_ctx_t &ctx) { + ctx.add_group("$_def"); + ctx.add_group("$_exp_val"); ctx.add_parser(new glob_parser_t()); }; diff --git a/src/compiler/treeifier/ast/parsers/identifier.cc b/src/compiler/treeifier/ast/parsers/identifier.cc index 3b3d2fc..e44eaed 100644 --- a/src/compiler/treeifier/ast/parsers/identifier.cc +++ b/src/compiler/treeifier/ast/parsers/identifier.cc @@ -18,4 +18,4 @@ class identifier_parser_t : public parser_t { public: identifier_parser_t(): parser_t("$_identifier") { } }; -parser_adder_t ppc::comp::tree::ast::identifier_adder = [](ast_ctx_t &ctx) { ctx.add_parser(new identifier_parser_t()); }; +const parser_adder_t ppc::comp::tree::ast::identifier_adder = [](ast_ctx_t &ctx) { ctx.add_parser(new identifier_parser_t()); }; diff --git a/src/compiler/treeifier/ast/parsers/nmsp.cc b/src/compiler/treeifier/ast/parsers/nmsp.cc index b1bd4b4..576aec1 100644 --- a/src/compiler/treeifier/ast/parsers/nmsp.cc +++ b/src/compiler/treeifier/ast/parsers/nmsp.cc @@ -23,4 +23,4 @@ class nmsp_parser_t : public parser_t { public: nmsp_parser_t(): parser_t("$_nmsp") { } }; -parser_adder_t ppc::comp::tree::ast::nmsp_adder = [](ast_ctx_t &ctx) { ctx.add_parser(new nmsp_parser_t()); }; +const parser_adder_t ppc::comp::tree::ast::nmsp_adder = [](ast_ctx_t &ctx) { ctx.add_parser(new nmsp_parser_t()); }; diff --git a/src/compiler/treeifier/ast/parsers/type.cc b/src/compiler/treeifier/ast/parsers/type.cc index 0425a4d..cf0801f 100644 --- a/src/compiler/treeifier/ast/parsers/type.cc +++ b/src/compiler/treeifier/ast/parsers/type.cc @@ -27,7 +27,7 @@ class type_parser_t : public parser_t { out["location"] = conv::loc_to_map(h.res_loc()); out["name"] = nmsp_content[nmsp_content.size() - 1]; out["ptr_n"] = (float)ptr_n; - nmsp_content.pop(); + nmsp_content.pop_back(); if (nmsp_content.size() == 0) { auto loc = h.res_loc(); @@ -47,4 +47,4 @@ class type_parser_t : public parser_t { public: type_parser_t(): parser_t("$_type") { } }; -parser_adder_t ppc::comp::tree::ast::type_adder = [](ast_ctx_t &ctx) { ctx.add_parser(new type_parser_t()); }; +const parser_adder_t ppc::comp::tree::ast::type_adder = [](ast_ctx_t &ctx) { ctx.add_parser(new type_parser_t()); }; diff --git a/src/compiler/treeifier/ast/parsers/var.cc b/src/compiler/treeifier/ast/parsers/var.cc new file mode 100644 index 0000000..4d3a8e0 --- /dev/null +++ b/src/compiler/treeifier/ast/parsers/var.cc @@ -0,0 +1,19 @@ +#include "compiler/treeifier/ast/helper.hh" + +class var_parser_t : public parser_t { + bool parse(ast_ctx_t &ctx, size_t &res_i, map_t &out) const { + tree_helper_t h(ctx, res_i); + + if (h.curr().is_identifier()) { + out["content"] = h.curr().identifier(); + out["location"] = conv::loc_to_map(h.loc()); + return h.submit(true); + } + + return false; + } + + public: var_parser_t(): parser_t("$_var") { } +}; + +const parser_adder_t ppc::comp::tree::ast::var_adder = [](ast_ctx_t &ctx) { ctx.add_parser(new var_parser_t(), "$_exp_val"); }; diff --git a/src/compiler/treeifier/lexer.cc b/src/compiler/treeifier/lexer.cc index 2f89eea..4d6e1e3 100644 --- a/src/compiler/treeifier/lexer.cc +++ b/src/compiler/treeifier/lexer.cc @@ -34,7 +34,7 @@ static inline bool is_any(char c, std::string chars) { return res; } static inline bool is_operator(char c) { - return is_any(c, "=!<>+-*/%&|^?:,.(){}[];"); + return is_any(c, "=!<>+-*/%&|^?:,.(){}[];~"); } static res_t lexlet_default(char c, std::vector &tok); diff --git a/src/utils/location.cc b/src/utils/location.cc index bfc2481..4138fc1 100644 --- a/src/utils/location.cc +++ b/src/utils/location.cc @@ -2,6 +2,7 @@ #include using namespace ppc; +using namespace std::string_literals; std::string location_t::to_string() const { std::stringstream res; @@ -55,6 +56,27 @@ location_t location_t::intersect(location_t other) const { return a; } +location_t &location_t::operator=(const location_t &other) { + if (this->filename != other.filename) throw "Can't assign to location with different filename."s; + this->line = other.line; + this->start = other.start; + this->length = other.length; + this->code_start = other.code_start; + return *this; +} + +bool location_t::operator==(const location_t &other) const { + if (this->filename != other.filename) return false; + if (this->line != other.line) return false; + if (this->start != other.start) return false; + if (this->length != other.length) return false; + if (this->code_start != other.code_start) return false; + + return true; +} + + + std::string empty = ""; location_t::location_t(): From 8a8712fb9da028d1841e94abe7547ce169f8f4c1 Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Wed, 19 Oct 2022 14:39:11 +0300 Subject: [PATCH 44/74] feat: add casting to expressions --- src/compiler/treeifier/ast/parsers/exp.cc | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/compiler/treeifier/ast/parsers/exp.cc b/src/compiler/treeifier/ast/parsers/exp.cc index de39658..57e047f 100644 --- a/src/compiler/treeifier/ast/parsers/exp.cc +++ b/src/compiler/treeifier/ast/parsers/exp.cc @@ -189,6 +189,18 @@ class exp_parser_t : public parser_t { if (!h.try_advance()) break; } + else if (h.curr().is_operator(operator_t::COLON)) { + h.advance("Expected a type."); + pop_until({ precedence_t::PREFIX, .assoc = true }, h, op_stack, res); + map_t cast = { + { "$_name", "$_cast" }, + { "exp", res.back() }, + }; + + res.pop_back(); + h.force_parse("$_type", "Expected a type.", cast["type"].map({})); + res.push_back(cast); + } else if (bin_ops.find(op) != bin_ops.end()) { auto data = bin_ops[op]; pop_until(data, h, op_stack, res); From 9aa0063e30aec51f73876732104494a3ab8bd291 Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Wed, 19 Oct 2022 16:17:56 +0300 Subject: [PATCH 45/74] fix: some != -1 comparasion fixes --- src/compiler/treeifier/ast/conv.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/compiler/treeifier/ast/conv.cc b/src/compiler/treeifier/ast/conv.cc index 23019b4..6bacd72 100644 --- a/src/compiler/treeifier/ast/conv.cc +++ b/src/compiler/treeifier/ast/conv.cc @@ -18,10 +18,10 @@ namespace ppc::comp::tree::ast::conv { }; res["filename"] = loc.filename; - if (loc.start != -1u) res["start"] = (float)loc.start; - if (loc.start != -1u) res["line"] = (float)loc.line; - if (loc.code_start != -1u) res["code_start"] = (float)loc.code_start; - if (loc.length != -1u) res["length"] = (float)loc.length; + if (loc.start + 1) res["start"] = (float)loc.start; + if (loc.start + 1) res["line"] = (float)loc.line; + if (loc.code_start + 1) res["code_start"] = (float)loc.code_start; + if (loc.length + 1) res["length"] = (float)loc.length; return res; } From a5c9a163f2031d4adc52d9f2701f28b4555d683a Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Wed, 19 Oct 2022 16:28:29 +0300 Subject: [PATCH 46/74] fix: namespaces of types are now read correctly --- src/compiler/treeifier/ast/parsers/type.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/compiler/treeifier/ast/parsers/type.cc b/src/compiler/treeifier/ast/parsers/type.cc index cf0801f..48c8032 100644 --- a/src/compiler/treeifier/ast/parsers/type.cc +++ b/src/compiler/treeifier/ast/parsers/type.cc @@ -16,6 +16,7 @@ class type_parser_t : public parser_t { while (true) { if (h.ended()) break; if (!h.curr().is_operator(operator_t::DOUBLE_COLON)) break; + h.advance("Expected an identifier."); h.force_push_parse("$_identifier", "Expected an identifier.", nmsp_content); } From c5f8438f1a5722d9aedc697231fb004a03981243 Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Thu, 20 Oct 2022 13:30:20 +0300 Subject: [PATCH 47/74] chore: make filename of location_t NOT be a reference --- include/utils/location.hh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/utils/location.hh b/include/utils/location.hh index b9728db..b67dbd8 100644 --- a/include/utils/location.hh +++ b/include/utils/location.hh @@ -10,7 +10,7 @@ namespace ppc { std::size_t start; std::size_t length; std::size_t code_start; - const std::string &filename; + std::string filename; location_t &operator=(const location_t &other); bool operator==(const location_t &other) const; From 7a1cc275147f0f667c8ebbecc39ff961f1c16eb0 Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Thu, 20 Oct 2022 13:31:18 +0300 Subject: [PATCH 48/74] chore: made location - map converters convert to string instead --- include/compiler/treeifier/ast.hh | 4 +-- src/compiler/treeifier/ast/conv.cc | 53 ++++++++++++------------------ 2 files changed, 23 insertions(+), 34 deletions(-) diff --git a/include/compiler/treeifier/ast.hh b/include/compiler/treeifier/ast.hh index 4a340e5..e1ecd3d 100644 --- a/include/compiler/treeifier/ast.hh +++ b/include/compiler/treeifier/ast.hh @@ -122,8 +122,8 @@ namespace ppc::comp::tree::ast { data::map_t identifier_to_map(const located_t &loc); located_t map_to_identifier(const data::map_t &map); - data::map_t loc_to_map(const location_t &loc); - location_t map_to_loc(const data::map_t &map); + data::string_t loc_to_map(const location_t &loc); + location_t map_to_loc(const data::string_t &map); data::map_t nmsp_to_map(const loc_namespace_name_t &nmsp); loc_namespace_name_t map_to_nmsp(const data::map_t &map); diff --git a/src/compiler/treeifier/ast/conv.cc b/src/compiler/treeifier/ast/conv.cc index 6bacd72..f3a688d 100644 --- a/src/compiler/treeifier/ast/conv.cc +++ b/src/compiler/treeifier/ast/conv.cc @@ -1,3 +1,4 @@ +#include #include "compiler/treeifier/ast.hh" namespace ppc::comp::tree::ast::conv { @@ -9,43 +10,31 @@ namespace ppc::comp::tree::ast::conv { }; } located_t map_to_identifier(const data::map_t &map) { - return { conv::map_to_loc(map["location"].map()), map["content"].string() }; + return { conv::map_to_loc(map["location"].string()), map["content"].string() }; } - data::map_t loc_to_map(const location_t &loc) { - data::map_t res = { - { "$_name", "$_loc" }, - }; - - res["filename"] = loc.filename; - if (loc.start + 1) res["start"] = (float)loc.start; - if (loc.start + 1) res["line"] = (float)loc.line; - if (loc.code_start + 1) res["code_start"] = (float)loc.code_start; - if (loc.length + 1) res["length"] = (float)loc.length; - - return res; + data::string_t loc_to_map(const location_t &loc) { + std::stringstream res; + res << loc.filename << ':' << loc.line + 1 << ':' << loc.start + 1 << ':' << loc.code_start + 1 << ':' << loc.length + 1; + return res.str(); } - location_t map_to_loc(const data::map_t &map) { - location_t res(map["filename"].string()); + location_t map_to_loc(const data::string_t &map) { + std::stringstream res; + res.str(map); - if (map.has("start")) { - if (map["start"].is_number()) res.start = (size_t)map["start"].number(); - else throw "Expected key 'start' to be a number."; - } - if (map.has("length")) { - if (map["length"].is_number()) res.length = (size_t)map["length"].number(); - else throw "Expected key 'length' to be a number."; - } - if (map.has("code_start")) { - if (map["code_start"].is_number()) res.code_start = (size_t)map["code_start"].number(); - else throw "Expected key 'code_start' to be a number."; - } - if (map.has("line")) { - if (map["line"].is_number()) res.line = (size_t)map["line"].number(); - else throw "Expected key 'line' to be a number."; - } + std::string filename; + std::string line; + std::string start; + std::string code_start; + std::string length; - return res; + std::getline(res, filename, ':'); + std::getline(res, line, ':'); + std::getline(res, start, ':'); + std::getline(res, code_start, ':'); + std::getline(res, length, ':'); + + return { filename, std::stoull(line) - 1, std::stoull(start) - 1, std::stoull(code_start) - 1, std::stoull(length) - 1 }; } data::map_t nmsp_to_map(const loc_namespace_name_t &nmsp) { From 6245a41fe00b1509036da1d9de47885b9b85e34c Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Thu, 20 Oct 2022 13:32:05 +0300 Subject: [PATCH 49/74] fix: remove = operator from locaton --- src/utils/location.cc | 9 --------- 1 file changed, 9 deletions(-) diff --git a/src/utils/location.cc b/src/utils/location.cc index 4138fc1..9169ddd 100644 --- a/src/utils/location.cc +++ b/src/utils/location.cc @@ -56,15 +56,6 @@ location_t location_t::intersect(location_t other) const { return a; } -location_t &location_t::operator=(const location_t &other) { - if (this->filename != other.filename) throw "Can't assign to location with different filename."s; - this->line = other.line; - this->start = other.start; - this->length = other.length; - this->code_start = other.code_start; - return *this; -} - bool location_t::operator==(const location_t &other) const { if (this->filename != other.filename) return false; if (this->line != other.line) return false; From bd90a3ed6d265ebd2b93ec06acf4a5b8f853b4c1 Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Thu, 20 Oct 2022 13:32:20 +0300 Subject: [PATCH 50/74] fix: remove = operator from location 2 --- include/utils/location.hh | 1 - 1 file changed, 1 deletion(-) diff --git a/include/utils/location.hh b/include/utils/location.hh index b67dbd8..82fd250 100644 --- a/include/utils/location.hh +++ b/include/utils/location.hh @@ -12,7 +12,6 @@ namespace ppc { std::size_t code_start; std::string filename; - location_t &operator=(const location_t &other); bool operator==(const location_t &other) const; bool operator !=(const location_t &other) const { return !(*this == other); From 0cb7e250970f93464694aaa4bd5f7a4fcbe474b7 Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Thu, 20 Oct 2022 13:32:52 +0300 Subject: [PATCH 51/74] fix: some location fixes --- src/compiler/treeifier/ast/parsers/exp.cc | 23 +++++++++++++++++----- src/compiler/treeifier/ast/parsers/type.cc | 4 ++-- 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/src/compiler/treeifier/ast/parsers/exp.cc b/src/compiler/treeifier/ast/parsers/exp.cc index 57e047f..04221a2 100644 --- a/src/compiler/treeifier/ast/parsers/exp.cc +++ b/src/compiler/treeifier/ast/parsers/exp.cc @@ -66,17 +66,22 @@ class exp_parser_t : public parser_t { auto map = op_to_map(op_stack.back()); auto op_n = op_stack.back().op_n; + auto loc = op_stack.back().location; op_stack.pop_back(); if (res.size() < op_n) return false; auto &ops = map["ops"].array(); + for (size_t i = 0; i < op_n; i++) { ops.push_back(res.back()); + loc = loc.intersect(conv::map_to_loc(res.back().map()["location"].string())); res.pop_back(); } + map["location"] = conv::loc_to_map(loc); + std::reverse(ops.begin(), ops.end()); res.push_back(map); @@ -100,13 +105,21 @@ class exp_parser_t : public parser_t { op_stack.pop_back(); return true; } - bool pop_call(size_t n, std::vector> &op_stack, array_t &res) const { + bool pop_call(size_t n, location_t loc, std::vector> &op_stack, array_t &res) const { map_t call = { { "$_name", "$_call" }, }; array_t &args = call["args"].array({}); + while (true) { + if (op_stack.back().precedence == precedence_t::CALL_START) break; + if (!pop(op_stack, res)) return false; + } + loc = loc.intersect(op_stack.back().location); + op_stack.pop_back(); + call["location"] = conv::loc_to_map(loc); + for (size_t i = 0; i <= n; i++) { args.push_back(res.back()); res.pop_back(); @@ -118,7 +131,6 @@ class exp_parser_t : public parser_t { res.pop_back(); res.push_back(call); - op_stack.pop_back(); return true; } bool pop_until(const op_data_t &data, tree_helper_t &h, std::vector> &op_stack, array_t &res) const { @@ -145,7 +157,7 @@ class exp_parser_t : public parser_t { while (true) { - if (h.ended()) break; + if (h.ended()) break; if (!last_val && h.curr().is_identifier("sizeof")) { op_stack.push_back({ h.loc(), sizeof_data }); @@ -162,7 +174,7 @@ class exp_parser_t : public parser_t { last_val = false; } else if (op == operator_t::COMMA) { - if (call_args_n.size() == 0) h.err("Unexpected comma here."); + if (call_args_n.size() == 0) break; pop_until({ precedence_t::CALL_START, .assoc = true }, h, op_stack, res); h.advance("Expected an argument."); @@ -183,7 +195,7 @@ class exp_parser_t : public parser_t { } } - if (is_call) pop_call(call_args_n.back(), op_stack, res); + if (is_call) pop_call(call_args_n.back(), h.loc(), op_stack, res); else if (is_paren) pop_paren(op_stack, res); else break; @@ -240,6 +252,7 @@ class exp_parser_t : public parser_t { while (!op_stack.empty()) { if (op_stack.back().precedence == precedence_t::PAREN) throw message_t::error("Unclosed paren.", op_stack.back().location); + if (op_stack.back().precedence == precedence_t::CALL_START) throw message_t::error("Unclosed call.", op_stack.back().location); if (!pop(op_stack, res)) return h.err("Expected an expression on the right side of this operator."); } diff --git a/src/compiler/treeifier/ast/parsers/type.cc b/src/compiler/treeifier/ast/parsers/type.cc index 48c8032..9cc580f 100644 --- a/src/compiler/treeifier/ast/parsers/type.cc +++ b/src/compiler/treeifier/ast/parsers/type.cc @@ -36,8 +36,8 @@ class type_parser_t : public parser_t { nmsp["location"] = conv::loc_to_map(loc); } else { - auto loc_1 = conv::map_to_loc(nmsp_content[0].map()["location"].map()); - auto loc_2 = conv::map_to_loc(nmsp_content[nmsp_content.size() - 1].map()["location"].map()); + auto loc_1 = conv::map_to_loc(nmsp_content[0].map()["location"].string()); + auto loc_2 = conv::map_to_loc(nmsp_content[nmsp_content.size() - 1].map()["location"].string()); auto loc = loc_1.intersect(loc_2); nmsp["location"] = conv::loc_to_map(loc); } From eb167d0a508f5bb720b5d0e2a917b0bf1d353f52 Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Thu, 20 Oct 2022 14:30:19 +0300 Subject: [PATCH 52/74] chore: improve named constructs in groups --- include/compiler/treeifier/ast.hh | 4 +- include/lang/common.hh | 22 ++++++-- src/compiler/treeifier/ast.cc | 4 ++ src/compiler/treeifier/ast/parsers/group.cc | 57 ++++++++++++--------- src/lang/common.cc | 50 ++++++------------ 5 files changed, 75 insertions(+), 62 deletions(-) diff --git a/include/compiler/treeifier/ast.hh b/include/compiler/treeifier/ast.hh index e1ecd3d..6842d6d 100644 --- a/include/compiler/treeifier/ast.hh +++ b/include/compiler/treeifier/ast.hh @@ -2,6 +2,7 @@ #include #include +#include #include #include #include "compiler/treeifier/tokenizer.hh" @@ -56,6 +57,7 @@ namespace ppc::comp::tree::ast { void add_parser(const parser_t *parser); void add_parser(const parser_t *parser, const std::string &group); + void add_parser(const parser_t *parser, const std::string &group, const namespace_name_t &name); void add_group(const std::string &name); void add_parser(parser_adder_t factory) { factory(*this); } @@ -107,7 +109,7 @@ namespace ppc::comp::tree::ast { class group_parser_t : public parser_t { private: - std::vector> named_parsers; + std::map named_parsers; std::vector parsers; public: group_parser_t &add(const parser_t &parser); diff --git a/include/lang/common.hh b/include/lang/common.hh index b846bf8..6d193c8 100644 --- a/include/lang/common.hh +++ b/include/lang/common.hh @@ -32,8 +32,14 @@ namespace ppc::lang { struct namespace_name_t : public std::vector { using base = std::vector; - bool operator ==(const namespace_name_t &other) const; - bool operator !=(const namespace_name_t &other) const; + int compare(const namespace_name_t &other) const; + + bool operator==(const namespace_name_t &other) const { return compare(other) == 0; } + bool operator!=(const namespace_name_t &other) const { return compare(other) != 0; } + bool operator<(const namespace_name_t &other) const { return compare(other) < 0; } + bool operator<=(const namespace_name_t &other) const { return compare(other) <= 0; } + bool operator>(const namespace_name_t &other) const { return compare(other) > 0; } + bool operator>=(const namespace_name_t &other) const { return compare(other) >= 0; } operator std::string() const { return to_string(); } std::string to_string() const; @@ -45,10 +51,16 @@ namespace ppc::lang { struct loc_namespace_name_t : public std::vector> { using base = std::vector>; - bool operator ==(const loc_namespace_name_t &other) const; - bool operator !=(const loc_namespace_name_t &other) const; + int compare(const loc_namespace_name_t &other) const; - namespace_name_t strip_location(); + bool operator==(const loc_namespace_name_t &other) const { return compare(other) == 0; } + bool operator!=(const loc_namespace_name_t &other) const { return compare(other) != 0; } + bool operator<(const loc_namespace_name_t &other) const { return compare(other) < 0; } + bool operator<=(const loc_namespace_name_t &other) const { return compare(other) <= 0; } + bool operator>(const loc_namespace_name_t &other) const { return compare(other) > 0; } + bool operator>=(const loc_namespace_name_t &other) const { return compare(other) >= 0; } + + namespace_name_t strip_location() const; operator std::string() const { return to_string(); } std::string to_string() const; diff --git a/src/compiler/treeifier/ast.cc b/src/compiler/treeifier/ast.cc index aca1368..e0e66b8 100644 --- a/src/compiler/treeifier/ast.cc +++ b/src/compiler/treeifier/ast.cc @@ -28,6 +28,10 @@ namespace ppc::comp::tree::ast { add_parser(parser); this->group[group].add(*parser); } + void ast_ctx_t::add_parser(const parser_t *parser, const std::string &group, const namespace_name_t &name) { + add_parser(parser); + this->group[group].add(*parser, name); + } void ast_ctx_t::add_group(const std::string &name) { auto parser = new group_parser_t(name); if (parsers.find(parser->name()) != parsers.end()) throw "The parser '" + parser->name() + "' already exists."; diff --git a/src/compiler/treeifier/ast/parsers/group.cc b/src/compiler/treeifier/ast/parsers/group.cc index 7e19636..c52d103 100644 --- a/src/compiler/treeifier/ast/parsers/group.cc +++ b/src/compiler/treeifier/ast/parsers/group.cc @@ -10,26 +10,34 @@ using namespace ppc::comp::tree::ast; using namespace std::string_literals; using namespace std; -static bool read_nmsp(ast_ctx_t &ctx, size_t &i, const lang::namespace_name_t &name) { +static bool read_nmsp(ast_ctx_t &ctx, size_t &i, lang::loc_namespace_name_t &name) { tree_helper_t h(ctx, i); - - size_t equal_i = 0; - - while (true) { - if (h.ended()) break; - if (equal_i >= name.size()) return false; - auto &curr = h.curr(); - if (!curr.is_identifier()) return false; - - if (name[equal_i] != curr.identifier()) return false; - - if (h.try_advance() && h.curr().is_operator(operator_t::DOUBLE_COLON)) { - equal_i++; + map_t res; + if (!h.parse("$_nmsp", res)) return false; + name = conv::map_to_nmsp(res); + return true; +} +template +static bool resolve_nmsp(ast_ctx_t &ctx, const lang::namespace_name_t &name, T begin, T end, lang::namespace_name_t &actual_name) { + for (auto it = begin; it != end; it++) { + const namespace_name_t &curr = it->first; + if (curr == name) { + actual_name = name; + return true; } - else break; } - - return equal_i != name.size(); + for (const auto &import : ctx.imports) { + auto new_name = name; + new_name.insert(new_name.begin(), import.begin(), import.end()); + for (auto it = begin; it != end; it++) { + const namespace_name_t &curr = it->first; + if (curr == new_name) { + actual_name = name; + return true; + } + } + } + return false; } @@ -38,11 +46,14 @@ bool group_parser_t::parse(ast_ctx_t &ctx, size_t &i, data::map_t &out) const { if (h.ended()) return false; - for (auto &pair : named_parsers) { - if (!read_nmsp(ctx, i, pair.first)) continue; - auto &parser = *pair.second; - if (parser(ctx, i, out)) return true; - else throw message_t::error("Unexpected construct specifier.", h.res_loc()); + loc_namespace_name_t name; + if (read_nmsp(ctx, h.i, name)) { + namespace_name_t actual; + if (resolve_nmsp(ctx, name.strip_location(), named_parsers.begin(), named_parsers.end(), actual)) { + auto &parser = *this->named_parsers.find(actual)->second; + if (parser(ctx, i, out)) return true; + else throw message_t::error("Unexpected construct specifier.", h.res_loc()); + } } unordered_map errors; @@ -66,7 +77,7 @@ group_parser_t &group_parser_t::add(const parser_t &parser, const lang::namespac throw "Parser '" + name.to_string() + "' already in group."; } - named_parsers.push_back({ name, &parser }); + named_parsers[name] = &parser; return *this; } diff --git a/src/lang/common.cc b/src/lang/common.cc index 36f1e8a..bf4bd70 100644 --- a/src/lang/common.cc +++ b/src/lang/common.cc @@ -23,46 +23,31 @@ namespace ppc::lang { return res.str(); } - - bool loc_namespace_name_t::operator==(const loc_namespace_name_t &other) const { - if (other.size() != size()) return false; - for (size_t i = 0; i < size(); i++) { - if (other[i] != (*this)[i]) return false; + int namespace_name_t::compare(const namespace_name_t &b) const { + const auto &a = *this; + for (size_t i = 0; i < a.size() && i < b.size(); i++) { + auto cmp = a[i].compare(b[i]); + if (cmp != 0) return cmp; } - return true; + if (a.size() > b.size()) return 1; + else if (a.size() == b.size()) return 0; + else return -1; } - bool loc_namespace_name_t::operator!=(const loc_namespace_name_t &other) const { - if (other.size() != size()) return true; - - for (size_t i = 0; i < size(); i++) { - if (other[i] == (*this)[i]) return false; + int loc_namespace_name_t::compare(const loc_namespace_name_t &b) const { + const auto &a = *this; + for (size_t i = 0; i < a.size() && i < b.size(); i++) { + auto cmp = a[i].compare(b[i]); + if (cmp != 0) return cmp; } - return true; + if (a.size() > b.size()) return 1; + else if (a.size() == b.size()) return 0; + else return -1; } - bool namespace_name_t::operator==(const namespace_name_t &other) const { - if (other.size() != size()) return false; - - for (size_t i = 0; i < size(); i++) { - if (other[i] != (*this)[i]) return false; - } - - return true; - } - bool namespace_name_t::operator!=(const namespace_name_t &other) const { - if (other.size() != size()) return true; - - for (size_t i = 0; i < size(); i++) { - if (other[i] == (*this)[i]) return false; - } - - return true; - } - - namespace_name_t loc_namespace_name_t::strip_location() { + namespace_name_t loc_namespace_name_t::strip_location() const { namespace_name_t res; for (const auto &el : *this) { @@ -71,6 +56,5 @@ namespace ppc::lang { return res; } - } From 3229d41d851674d383c4a93a6cfec55ced380d18 Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Thu, 20 Oct 2022 14:30:44 +0300 Subject: [PATCH 53/74] feat: add member access to expressions --- src/compiler/treeifier/ast/parsers/exp.cc | 28 +++++++++++++++++++---- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/src/compiler/treeifier/ast/parsers/exp.cc b/src/compiler/treeifier/ast/parsers/exp.cc index 04221a2..d93310b 100644 --- a/src/compiler/treeifier/ast/parsers/exp.cc +++ b/src/compiler/treeifier/ast/parsers/exp.cc @@ -164,24 +164,25 @@ class exp_parser_t : public parser_t { h.advance("Expected a value on the right side of the operator."); continue; } + if (!last_val && h.push_parse("$_exp_val", res)) last_val = true; if (h.curr().is_operator()) { auto op = h.curr()._operator(); if (last_val) { if (op == operator_t::PAREN_OPEN) { + h.advance("Expected an argument."); call_args_n.push_back(0); op_stack.push_back({ h.loc(), { precedence_t::CALL_START } }); - h.advance("Expected an argument."); last_val = false; } else if (op == operator_t::COMMA) { if (call_args_n.size() == 0) break; + h.advance("Expected an argument."); pop_until({ precedence_t::CALL_START, .assoc = true }, h, op_stack, res); - h.advance("Expected an argument."); call_args_n.back()++; last_val = false; } - else if (h.curr().is_operator(operator_t::PAREN_CLOSE)) { + else if (op == operator_t::PAREN_CLOSE) { bool is_call = false, is_paren = false; for (auto i = op_stack.rbegin(); i != op_stack.rend(); i++) { @@ -201,7 +202,7 @@ class exp_parser_t : public parser_t { if (!h.try_advance()) break; } - else if (h.curr().is_operator(operator_t::COLON)) { + else if (op == operator_t::COLON) { h.advance("Expected a type."); pop_until({ precedence_t::PREFIX, .assoc = true }, h, op_stack, res); map_t cast = { @@ -213,6 +214,24 @@ class exp_parser_t : public parser_t { h.force_parse("$_type", "Expected a type.", cast["type"].map({})); res.push_back(cast); } + else if (op == operator_t::DOT || op == operator_t::PTR_MEMBER) { + h.advance("Expected an identifier."); + pop_until({ precedence_t::POSTFIX, .assoc = true }, h, op_stack, res); + + map_t member_access = { + { "$_name", "$_member" }, + { "exp", res.back() }, + { "is_ptr", op == operator_t::PTR_MEMBER }, + }; + h.force_parse("$_identifier", "Expected an identifier.", member_access["name"].map({})); + member_access["location"] = conv::loc_to_map( + conv::map_to_loc(member_access["name"].map()["location"].string()).intersect( + conv::map_to_loc(res.back().map()["location"].string()) + ) + ); + res.pop_back(); + res.push_back(member_access); + } else if (bin_ops.find(op) != bin_ops.end()) { auto data = bin_ops[op]; pop_until(data, h, op_stack, res); @@ -244,7 +263,6 @@ class exp_parser_t : public parser_t { } continue; } - if (!last_val && h.push_parse("$_exp_val", res)) last_val = true; else break; } From 17e5021fd8f1fe9eba9197b29c16e096f19a5c58 Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Thu, 20 Oct 2022 22:41:41 +0300 Subject: [PATCH 54/74] chore: remove arbitrary error catching for debug mode --- src/main/main.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/main/main.cc b/src/main/main.cc index a849937..b7d6f08 100644 --- a/src/main/main.cc +++ b/src/main/main.cc @@ -169,6 +169,7 @@ int main(int argc, const char *argv[]) { catch (const messages::message_t &msg) { msg_stack.push(msg); } + #ifndef PROFILE_debug catch (const std::string &msg) { msg_stack.push(message_t::error(msg)); } @@ -176,6 +177,7 @@ int main(int argc, const char *argv[]) { std::cout << std::endl; msg_stack.push(message_t::error("A fatal error occurred.")); } + #endif msg_stack.print(std::cout, messages::message_t::DEBUG, true); From d7ee0e3bb2b7ee30d41ba68ea470781dee4d0856 Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Sat, 22 Oct 2022 15:19:05 +0300 Subject: [PATCH 55/74] feat: add function parsing --- include/compiler/treeifier/ast.hh | 4 +- include/utils/data.hh | 1 + scripts/common.mak | 6 +- src/compiler/treeifier/ast.cc | 22 +++-- src/compiler/treeifier/ast/parsers/exp.cc | 18 +++- src/compiler/treeifier/ast/parsers/field.cc | 2 - src/compiler/treeifier/ast/parsers/func.cc | 101 ++++++++++++++++++++ src/compiler/treeifier/ast/parsers/glob.cc | 15 ++- 8 files changed, 145 insertions(+), 24 deletions(-) create mode 100644 src/compiler/treeifier/ast/parsers/func.cc diff --git a/include/compiler/treeifier/ast.hh b/include/compiler/treeifier/ast.hh index 6842d6d..b83f24d 100644 --- a/include/compiler/treeifier/ast.hh +++ b/include/compiler/treeifier/ast.hh @@ -27,6 +27,7 @@ namespace ppc::comp::tree::ast { extern const parser_adder_t type_adder; extern const parser_adder_t exp_adder; extern const parser_adder_t field_adder; + extern const parser_adder_t func_adder; extern const parser_adder_t var_adder; struct ast_ctx_t { @@ -43,6 +44,7 @@ namespace ppc::comp::tree::ast { private: ast_ctx_t *parent; public: + group_parser_t &operator[](const std::string &name); group_parser_t &operator[](const std::string &name) const; group_proxy_t(ast_ctx_t *parent): parent(parent) { } }; @@ -58,7 +60,6 @@ namespace ppc::comp::tree::ast { void add_parser(const parser_t *parser); void add_parser(const parser_t *parser, const std::string &group); void add_parser(const parser_t *parser, const std::string &group, const namespace_name_t &name); - void add_group(const std::string &name); void add_parser(parser_adder_t factory) { factory(*this); } @@ -75,6 +76,7 @@ namespace ppc::comp::tree::ast { add_parser(exp_adder); add_parser(var_adder); add_parser(field_adder); + add_parser(func_adder); return *this; } diff --git a/include/utils/data.hh b/include/utils/data.hh index 9dd385b..0001345 100644 --- a/include/utils/data.hh +++ b/include/utils/data.hh @@ -73,6 +73,7 @@ namespace ppc::data { }; + static const value_t null{}; class map_t { private: diff --git a/scripts/common.mak b/scripts/common.mak index 626748c..c9091f3 100644 --- a/scripts/common.mak +++ b/scripts/common.mak @@ -43,16 +43,16 @@ build: $(binary) .SECONDEXPANSION: $(binary): $$(call frdeps,$(mainmodule)) $$(call binaries,$(mainmodule)) $(call mkdir,$(dir $@)) - echo Compiling executable '$(notdir $(binary))'... $(CXX) $(flags) $(call binaries,$(mainmodule)) -o $@ $(ldflags) $(call ldeps,$(mainmodule)) -L$(bin) "-I$(inc)" + echo Compiling executable '$(notdir $(binary))'... .SECONDEXPANSION: $(bin)/lib$(lib)%$(so): $$(call frdeps,$$*) $$(call binaries,$$*) $(call mkdir,$(bin)) - echo Compiling library '$(notdir $@)'... $(CXX) -shared -fPIC $(flags) $(call binaries,$*) -o $@ $(ldflags) $(call ldeps,$*) -L$(bin) "-I$(inc)" + echo Compiling library '$(notdir $@)'... $(bin)/tmp/%.o: $(src)/%.cc $(headers) - echo - Compiling '$*.cc'... $(call mkdir,$(dir $@)) $(CXX) -fPIC -c $(flags) $< -o $@ + echo - Compiling '$*.cc'... diff --git a/src/compiler/treeifier/ast.cc b/src/compiler/treeifier/ast.cc index e0e66b8..aea1723 100644 --- a/src/compiler/treeifier/ast.cc +++ b/src/compiler/treeifier/ast.cc @@ -9,9 +9,19 @@ namespace ppc::comp::tree::ast { return *it->second; } group_parser_t &ast_ctx_t::group_proxy_t::operator[](const std::string &name) const { - auto p = (group_parser_t*)&parent->parser[name]; - if (parent->groups.find(p) == parent->groups.end()) throw "A parser '" + name + "' exists, but isn't a group."; - return *p; + auto it = parent->parsers.find(name); + if (it == parent->parsers.end()) { + auto p = new group_parser_t(name); + parent->parsers[name] = p; + parent->groups.emplace(p); + return *p; + } + else if (parent->groups.find((group_parser_t*)it->second) == parent->groups.end()) { + throw "A parser '" + name + "' exists, but isn't a group."s; + } + else { + return *(group_parser_t*)it->second; + } } ast_ctx_t::~ast_ctx_t() { @@ -32,12 +42,6 @@ namespace ppc::comp::tree::ast { add_parser(parser); this->group[group].add(*parser, name); } - void ast_ctx_t::add_group(const std::string &name) { - auto parser = new group_parser_t(name); - if (parsers.find(parser->name()) != parsers.end()) throw "The parser '" + parser->name() + "' already exists."; - parsers[parser->name()] = parser; - groups.emplace(parser); - } data::map_t ast_ctx_t::parse(msg_stack_t &messages, std::vector &tokens) { ast_ctx_t ctx(messages, tokens); diff --git a/src/compiler/treeifier/ast/parsers/exp.cc b/src/compiler/treeifier/ast/parsers/exp.cc index d93310b..f4ed46e 100644 --- a/src/compiler/treeifier/ast/parsers/exp.cc +++ b/src/compiler/treeifier/ast/parsers/exp.cc @@ -282,4 +282,20 @@ class exp_parser_t : public parser_t { public: exp_parser_t(): parser_t("$_exp") { } }; -const parser_adder_t ppc::comp::tree::ast::exp_adder = [](ast_ctx_t &ctx) { ctx.add_parser(new exp_parser_t()); }; +class exp_stat_parser_t : public parser_t { + bool parse(ast_ctx_t &ctx, size_t &i, map_t &res) const { + tree_helper_t h(ctx, i); + if (!h.parse("$_exp", res)) return false; + if (h.curr().is_operator(operator_t::SEMICOLON)) return h.submit(true); + + ctx.messages.push(message_t::error("Expected a semicolon.", h.loc(1))); + return h.submit(false); + } + + public: exp_stat_parser_t() : parser_t("$_exp_stat") { } +}; + +const parser_adder_t ppc::comp::tree::ast::exp_adder = [](ast_ctx_t &ctx) { + ctx.add_parser(new exp_parser_t()); + ctx.add_parser(new exp_stat_parser_t(), "$_stat"); +}; diff --git a/src/compiler/treeifier/ast/parsers/field.cc b/src/compiler/treeifier/ast/parsers/field.cc index 072e186..631faac 100644 --- a/src/compiler/treeifier/ast/parsers/field.cc +++ b/src/compiler/treeifier/ast/parsers/field.cc @@ -18,8 +18,6 @@ class field_parser_t : public parser_t { type = true; } if (h.curr().is_operator(operator_t::ASSIGN)) { - h.i++; - h.err("Default values are not yet supported.", 1); h.advance(); h.force_parse("$_exp", "Expected an expression.", out["value"].map({})); type = true; diff --git a/src/compiler/treeifier/ast/parsers/func.cc b/src/compiler/treeifier/ast/parsers/func.cc new file mode 100644 index 0000000..1380ce3 --- /dev/null +++ b/src/compiler/treeifier/ast/parsers/func.cc @@ -0,0 +1,101 @@ +#include "compiler/treeifier/ast/helper.hh" + +class arg_parser_t : public parser_t { + bool parse(ast_ctx_t &ctx, size_t &res_i, map_t &out) const { + tree_helper_t h(ctx, res_i); + + if (h.ended()) return false; + + if (!h.parse("$_identifier", out["name"].map({}))) return false; + + bool type, defval; + + h.throw_ended("Expected a colon or an equals sign."); + + if (h.curr().is_operator(operator_t::COLON)) { + h.advance(); + h.force_parse("$_type", "Expected a type.", out["type"].map({})); + type = true; + } + if (h.curr().is_operator(operator_t::ASSIGN)) { + h.advance(); + h.force_parse("$_exp", "Expected an expression.", out["value"].map({})); + type = true; + } + + if (!type && !defval) { + ctx.messages.push(message_t::error("Expected a type or a default value.", h.loc(1))); + } + + return h.submit(false); + } + + public: arg_parser_t(): parser_t("$_func_arg") {} +}; + +class func_parser_t : public parser_t { + bool parse(ast_ctx_t &ctx, size_t &res_i, map_t &out) const { + tree_helper_t h(ctx, res_i); + + if (h.ended()) return false; + + if (!h.parse("$_identifier", out["name"].map({}))) return false; + if (h.ended()) return false; + if (!h.curr().is_operator(operator_t::PAREN_OPEN)) return false; + h.advance("Expected a closing paren or a parameter."); + + auto ¶ms = out["params"].array({}); + auto &content = out["content"].array({}); + + while (true) { + if (h.curr().is_operator(operator_t::PAREN_CLOSE)) { + h.advance("Expected a function body."); + break; + } + h.force_push_parse("$_func_arg", "Expected a parameter.", params); + if (h.curr().is_operator(operator_t::COMMA)) { + h.advance("Expected a parameter."); + } + } + + if (h.curr().is_operator(operator_t::COLON)) { + h.advance("Expected a type."); + h.force_parse("$_type", "Expected a type", out["type"].map({})); + } + + if (h.curr().is_operator(operator_t::SEMICOLON)) return h.submit(true); + else if (h.curr().is_operator(operator_t::LAMBDA)) { + h.advance("Expected an expression."); + map_t exp; + h.force_parse("$_exp", "Expected an expression.", exp); + content.push_back({ + { "$_name", "$_return" }, + { "content", exp }, + }); + return h.submit(false); + } + else if (h.curr().is_operator(operator_t::BRACE_OPEN)) { + h.advance("Expected a statement."); + while (true) { + if (h.curr().is_operator(operator_t::BRACE_CLOSE)) { + return h.submit(true); + } + + h.force_push_parse("$_stat", "Expected an expression.", content); + } + } + else { + ctx.messages.push(message_t::error("Expected a semicolon, brace open or a lambda operator.", h.loc(1))); + return h.submit(false); + } + + return h.submit(true); + } + + public: func_parser_t(): parser_t("$_func") { } +}; + +const parser_adder_t ppc::comp::tree::ast::func_adder = [](ast_ctx_t &ctx) { + ctx.add_parser(new func_parser_t(), "$_def"); + ctx.add_parser(new arg_parser_t()); +}; diff --git a/src/compiler/treeifier/ast/parsers/glob.cc b/src/compiler/treeifier/ast/parsers/glob.cc index ead5048..f8ec95c 100644 --- a/src/compiler/treeifier/ast/parsers/glob.cc +++ b/src/compiler/treeifier/ast/parsers/glob.cc @@ -18,7 +18,7 @@ class nmsp_def_parser_t : public parser_t { return h.submit(true); } - public: nmsp_def_parser_t(): parser_t("$_nmsp_def") { } + public: nmsp_def_parser_t(): parser_t("$_nmsp") { } }; class import_parser_t : public parser_t { bool parse(ast_ctx_t &ctx, size_t &res_i, data::map_t &res) const { @@ -46,15 +46,16 @@ class glob_parser_t : public parser_t { bool parse(ast_ctx_t &ctx, size_t &res_i, data::map_t &out) const { tree_helper_t h(ctx, res_i); - return h.parse("$_exp", out); - if (h.ended()) return true; - if (nmsp_def_parser(ctx, h.i, (out["namespace"] = map_t()).map())) { + if (nmsp_def_parser(ctx, h.i, out["namespace"].map({}))) { ctx.nmsp = conv::map_to_nmsp(out["namespace"].map()); } + else { + out["namespace"] = data::null; + } - auto &imports = (out["imports"] = array_t()).array(); - auto &contents = (out["content"] = array_t()).array(); + auto &imports = out["imports"].array({}); + auto &contents = out["content"].array({}); while (true) { map_t map; @@ -83,7 +84,5 @@ public: }; const parser_adder_t ppc::comp::tree::ast::glob_adder = [](ast_ctx_t &ctx) { - ctx.add_group("$_def"); - ctx.add_group("$_exp_val"); ctx.add_parser(new glob_parser_t()); }; From 662442121d1098efbb74ea68f8c7649585062600 Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Thu, 27 Oct 2022 14:31:59 +0300 Subject: [PATCH 56/74] chore: recoded ast system with ptr functions --- Makefile | 2 +- include/compiler/treeifier/ast.hh | 114 ++--- include/compiler/treeifier/ast/helper.hh | 20 +- include/compiler/treeifier/tokenizer.hh | 58 +-- src/compiler/treeifier/ast.cc | 59 --- src/compiler/treeifier/ast/ast.cc | 24 + src/compiler/treeifier/ast/parsers.cc | 0 src/compiler/treeifier/ast/parsers/exp.cc | 442 +++++++++--------- src/compiler/treeifier/ast/parsers/field.cc | 58 ++- src/compiler/treeifier/ast/parsers/func.cc | 161 +++---- src/compiler/treeifier/ast/parsers/glob.cc | 123 +++-- src/compiler/treeifier/ast/parsers/group.cc | 67 ++- .../treeifier/ast/parsers/identifier.cc | 26 +- src/compiler/treeifier/ast/parsers/nmsp.cc | 30 +- src/compiler/treeifier/ast/parsers/type.cc | 78 ++-- src/compiler/treeifier/ast/parsers/var.cc | 19 - src/compiler/treeifier/tokenizer.cc | 179 +++---- src/main/main.cc | 2 +- 18 files changed, 651 insertions(+), 811 deletions(-) delete mode 100644 src/compiler/treeifier/ast.cc create mode 100644 src/compiler/treeifier/ast/ast.cc create mode 100644 src/compiler/treeifier/ast/parsers.cc delete mode 100644 src/compiler/treeifier/ast/parsers/var.cc diff --git a/Makefile b/Makefile index 623891a..4002055 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ export MAKEFLAGS += --silent -r -j -export flags=-std=c++17 -Wall -Wno-main -Wno-trigraphs -Wno-missing-braces -Wno-stringop-overflow -DPROFILE_$(profile) +export flags=-std=c++17 -Wall -Wno-main -Wno-trigraphs -Wno-missing-braces -Wno-stringop-overflow -DPROFILE_$(profile) -fdiagnostics-color=always export ldflags=-L$(bin)/$(profile) export lib=ppc$(version-major)- export profile=release diff --git a/include/compiler/treeifier/ast.hh b/include/compiler/treeifier/ast.hh index b83f24d..f589d37 100644 --- a/include/compiler/treeifier/ast.hh +++ b/include/compiler/treeifier/ast.hh @@ -15,111 +15,52 @@ using namespace ppc::lang; using namespace ppc::messages; namespace ppc::comp::tree::ast { - class parser_t; - class group_parser_t; struct ast_ctx_t; + using parser_func_t = bool (ast_ctx_t &ctx, size_t &res_i, data::map_t &out); + using parser_t = parser_func_t*; - using parser_adder_t = void (*)(ast_ctx_t &ctx); + class group_t { + private: + std::map named_parsers; + std::map parsers; + public: + group_t &insert(const std::string &name, parser_t parser, const std::string &relative_to, bool after); + group_t &add_last(const std::string &name, parser_t parser); + group_t &replace(const std::string &name, parser_t parser); + group_t &add_named(const std::string &name, parser_t parser, const lang::namespace_name_t &identifier); - extern const parser_adder_t glob_adder; - extern const parser_adder_t identifier_adder; - extern const parser_adder_t nmsp_adder; - extern const parser_adder_t type_adder; - extern const parser_adder_t exp_adder; - extern const parser_adder_t field_adder; - extern const parser_adder_t func_adder; - extern const parser_adder_t var_adder; + bool operator()(ast_ctx_t &ctx, size_t &i, data::map_t &out) const; + }; struct ast_ctx_t { private: - struct parser_proxy_t { - private: - ast_ctx_t *parent; - public: - const parser_t &operator[](const std::string &name) const; - parser_proxy_t(ast_ctx_t *parent): parent(parent) { } - }; - - struct group_proxy_t { - private: - ast_ctx_t *parent; - public: - group_parser_t &operator[](const std::string &name); - group_parser_t &operator[](const std::string &name) const; - group_proxy_t(ast_ctx_t *parent): parent(parent) { } - }; - - std::unordered_map parsers; - std::set groups; + std::unordered_map groups; public: msg_stack_t &messages; std::vector &tokens; std::set imports; loc_namespace_name_t nmsp; - void add_parser(const parser_t *parser); - void add_parser(const parser_t *parser, const std::string &group); - void add_parser(const parser_t *parser, const std::string &group, const namespace_name_t &name); - - void add_parser(parser_adder_t factory) { factory(*this); } - ast_ctx_t &operator=(const ast_ctx_t &other) = delete; - const parser_proxy_t parser; - const group_proxy_t group; - - ast_ctx_t &init() { - add_parser(identifier_adder); - add_parser(nmsp_adder); - add_parser(glob_adder); - add_parser(type_adder); - add_parser(exp_adder); - add_parser(var_adder); - add_parser(field_adder); - add_parser(func_adder); - - return *this; + template + bool parse(const T &parser, size_t &i, data::map_t &out) { + return parser(*this, i, out); } - bool parse(std::string parser, size_t &pi, data::map_t &out); - static data::map_t parse(msg_stack_t &messages, std::vector &tokens); + group_t &group(const std::string &name); - ~ast_ctx_t(); - ast_ctx_t(msg_stack_t &messages, std::vector &tokens): - messages(messages), - tokens(tokens), - parser(this), - group(this) { } - }; - - class parser_t { - private: - std::string _name; - protected: - virtual bool parse(ast_ctx_t &ctx, size_t &res_i, data::map_t &out) const = 0; - public: - const std::string &name() const { return _name; } - bool operator()(ast_ctx_t &ctx, size_t &i, data::map_t &out) const { + template + static data::map_t parse(const T &glob, msg_stack_t &messages, std::vector &tokens) { + ast_ctx_t ctx(messages, tokens); data::map_t res; - out["$_name"] = _name; - return parse(ctx, i, out); + size_t i = 0; + + if (!ctx.parse(glob, i, res)) throw message_t::error("Failed to compile."); + return res; } - virtual ~parser_t() = default; - parser_t(const std::string &name): _name(name) { } - }; - - class group_parser_t : public parser_t { - private: - std::map named_parsers; - std::vector parsers; - public: - group_parser_t &add(const parser_t &parser); - group_parser_t &add(const parser_t &parser, const lang::namespace_name_t &name); - - bool parse(ast_ctx_t &ctx, size_t &i, data::map_t &out) const; - - group_parser_t(const std::string &name): parser_t(name) { } + ast_ctx_t(msg_stack_t &messages, std::vector &tokens); }; namespace conv { @@ -132,4 +73,7 @@ namespace ppc::comp::tree::ast { data::map_t nmsp_to_map(const loc_namespace_name_t &nmsp); loc_namespace_name_t map_to_nmsp(const data::map_t &map); } + + parser_func_t parse_glob, parse_nmsp, parse_identifier, parse_type, parse_func, parse_field, parse_exp, parse_stat_exp; + parser_func_t parse_exp_var, parse_exp_str_lit, parse_exp_int_lit, parse_exp_float_lit; } \ No newline at end of file diff --git a/include/compiler/treeifier/ast/helper.hh b/include/compiler/treeifier/ast/helper.hh index 1181ce2..971c562 100644 --- a/include/compiler/treeifier/ast/helper.hh +++ b/include/compiler/treeifier/ast/helper.hh @@ -104,25 +104,28 @@ namespace ppc::comp::tree::ast { throw_ended(reason); } - bool push_parse(const std::string &name, data::array_t &out) { + template + bool push_parse(const T &parser, data::array_t &out) { data::map_t res; - if (parse(name, res)) { + if (parse(parser, res)) { out.push_back(res); return true; } else return false; } - bool parse(const std::string &name, data::map_t &out) { - return ctx.parse(name, i, out); + template + bool parse(const T &parser, data::map_t &out) { + return ctx.parse(parser, i, out); } - void force_push_parse(const std::string &name, std::string message, data::array_t &out) { + template + void force_push_parse(const T &parser, std::string message, data::array_t &out) { throw_ended(message); bool success; try { - success = push_parse(name, out); + success = push_parse(parser, out); } catch (const message_t &msg) { ctx.messages.push(msg); @@ -131,12 +134,13 @@ namespace ppc::comp::tree::ast { if (!success) err(message); } - void force_parse(const std::string &name, std::string message, data::map_t &out) { + template + void force_parse(const T &parser, std::string message, data::map_t &out) { throw_ended(message); bool success; try { - success = parse(name, out); + success = parse(parser, out); } catch (const message_t &msg) { ctx.messages.push(msg); diff --git a/include/compiler/treeifier/tokenizer.hh b/include/compiler/treeifier/tokenizer.hh index 93e1cb6..adaece9 100644 --- a/include/compiler/treeifier/tokenizer.hh +++ b/include/compiler/treeifier/tokenizer.hh @@ -79,28 +79,19 @@ namespace ppc::comp::tree { NONE, IDENTIFIER, OPERATOR, - INT, - FLOAT, - CHAR, - STRING, + LITERAL, } kind; union data_t { std::string *identifier; operator_t _operator; - std::uint64_t int_literal; - double float_literal; - char char_literal; - std::vector *string_literal; + std::vector *literal; } data; public: ppc::location_t location; bool is_identifier() const { return kind == IDENTIFIER; } bool is_operator() const { return kind == OPERATOR; } - bool is_int_lit() const { return kind == INT; } - bool is_float_lit() const { return kind == FLOAT; } - bool is_char_lit() const { return kind == CHAR; } - bool is_string_lit() const { return kind == STRING; } + bool is_literal() const { return kind == LITERAL; } const auto &identifier() const { if (!is_identifier()) throw std::string { "Token is not an identifier." }; @@ -110,21 +101,9 @@ namespace ppc::comp::tree { if (!is_operator()) throw std::string { "Token is not an operator." }; else return data._operator; } - auto int_lit() const { - if (!is_int_lit()) throw std::string { "Token is not an int literal." }; - else return data.int_literal; - } - auto float_lit() const { - if (!is_float_lit()) throw std::string { "Token is not a float literal." }; - else return data.float_literal; - } - auto char_lit() const { - if (!is_char_lit()) throw std::string { "Token is not a char literal." }; - else return data.char_literal; - } - const auto &string_lit() const { - if (!is_string_lit()) throw std::string { "Token is not a string literal." }; - else return *data.string_literal; + const auto &literal() const { + if (!is_literal()) throw std::string { "Token is not a literal." }; + else return *data.literal; } bool is_operator(operator_t op) const { return is_operator() && _operator() == op; } @@ -139,21 +118,9 @@ namespace ppc::comp::tree { kind = OPERATOR; data._operator = op; } - token_t(std::uint64_t val, location_t loc = location_t::NONE): location(loc) { - kind = INT; - data.int_literal = val; - } - token_t(double val, location_t loc = location_t::NONE): location(loc) { - kind = FLOAT; - data.float_literal = val; - } - token_t(char c, location_t loc = location_t::NONE): location(loc) { - kind = CHAR; - data.char_literal = c; - } - token_t(const std::vector &val, location_t loc = location_t::NONE): location(loc) { - kind = STRING; - data.string_literal = new std::vector { val }; + token_t(const std::vector &val, location_t loc = location_t::NONE): location(loc) { + kind = LITERAL; + data.literal = new std::vector { val }; } token_t(const token_t &tok): location(tok.location) { kind = tok.kind; @@ -161,17 +128,14 @@ namespace ppc::comp::tree { case NONE: break; case IDENTIFIER: data.identifier = new std::string { *tok.data.identifier }; break; case OPERATOR: data._operator = tok.data._operator; break; - case INT: data.int_literal = tok.data.int_literal; break; - case FLOAT: data.float_literal = tok.data.float_literal; break; - case CHAR: data.char_literal = tok.data.char_literal; break; - case STRING: data.string_literal = new std::vector { *tok.data.string_literal }; break; + case LITERAL: data.literal = new std::vector { *tok.data.literal }; break; } } ~token_t() { switch (kind) { case IDENTIFIER: delete data.identifier; break; - case STRING: delete data.string_literal; break; + case LITERAL: delete data.literal; break; default: break; } } diff --git a/src/compiler/treeifier/ast.cc b/src/compiler/treeifier/ast.cc deleted file mode 100644 index aea1723..0000000 --- a/src/compiler/treeifier/ast.cc +++ /dev/null @@ -1,59 +0,0 @@ -#include "compiler/treeifier/ast.hh" - -namespace ppc::comp::tree::ast { - std::unordered_map parsers; - - const parser_t &ast_ctx_t::parser_proxy_t::operator[](const std::string &name) const { - auto it = parent->parsers.find(name); - if (it == parent->parsers.end()) throw "The parser '" + name + "' doesn't exist."; - return *it->second; - } - group_parser_t &ast_ctx_t::group_proxy_t::operator[](const std::string &name) const { - auto it = parent->parsers.find(name); - if (it == parent->parsers.end()) { - auto p = new group_parser_t(name); - parent->parsers[name] = p; - parent->groups.emplace(p); - return *p; - } - else if (parent->groups.find((group_parser_t*)it->second) == parent->groups.end()) { - throw "A parser '" + name + "' exists, but isn't a group."s; - } - else { - return *(group_parser_t*)it->second; - } - } - - ast_ctx_t::~ast_ctx_t() { - for (auto pair : parsers) { - delete pair.second; - } - } - - void ast_ctx_t::add_parser(const parser_t *parser) { - if (parsers.find(parser->name()) != parsers.end()) throw "The parser '" + parser->name() + "' already exists."; - parsers[parser->name()] = parser; - } - void ast_ctx_t::add_parser(const parser_t *parser, const std::string &group) { - add_parser(parser); - this->group[group].add(*parser); - } - void ast_ctx_t::add_parser(const parser_t *parser, const std::string &group, const namespace_name_t &name) { - add_parser(parser); - this->group[group].add(*parser, name); - } - - data::map_t ast_ctx_t::parse(msg_stack_t &messages, std::vector &tokens) { - ast_ctx_t ctx(messages, tokens); - ctx.init(); - size_t i = 0; - data::map_t res; - - if (!ctx.parse("$_glob", i, res)) throw message_t::error("Failed to compile."); - return res; - } - bool ast_ctx_t::parse(std::string parser, size_t &pi, data::map_t &out) { - return this->parser[parser] (*this, pi, out); - } -} - diff --git a/src/compiler/treeifier/ast/ast.cc b/src/compiler/treeifier/ast/ast.cc new file mode 100644 index 0000000..83abea1 --- /dev/null +++ b/src/compiler/treeifier/ast/ast.cc @@ -0,0 +1,24 @@ +#include "compiler/treeifier/ast.hh" + +using namespace ppc; +using namespace ppc::data; +using namespace ppc::lang; +using namespace ppc::comp::tree::ast; + +group_t &ast_ctx_t::group(const std::string &name) { + if (groups.find(name) == groups.end()) return groups[name] = { }; + else return groups[name]; +} + +ast_ctx_t::ast_ctx_t(msg_stack_t &messages, std::vector &tokens): messages(messages), tokens(tokens) { + group("$_exp_val") + .add_last("$_var", parse_exp_var) + .add_last("$_int", parse_exp_int_lit); + // .add_last("$_float", parse_exp_float_lit) + // .add_last("$_string", parse_exp_str_lit); + group("$_stat") + .add_last("$_exp", parse_stat_exp); + group("$_def") + .add_last("$_func", parse_func) + .add_last("$_field", parse_field); +} \ No newline at end of file diff --git a/src/compiler/treeifier/ast/parsers.cc b/src/compiler/treeifier/ast/parsers.cc new file mode 100644 index 0000000..e69de29 diff --git a/src/compiler/treeifier/ast/parsers/exp.cc b/src/compiler/treeifier/ast/parsers/exp.cc index f4ed46e..690eb50 100644 --- a/src/compiler/treeifier/ast/parsers/exp.cc +++ b/src/compiler/treeifier/ast/parsers/exp.cc @@ -51,251 +51,261 @@ std::map bin_ops { { (operator_t)-1, sizeof_data }, }; -class exp_parser_t : public parser_t { - map_t op_to_map(located_t op) const { - return { - { "$_name", "$_operator" }, - { "ops", array_t() }, - { "location", conv::loc_to_map(op.location) }, - { "op", op.name }, - }; - } +map_t op_to_map(located_t op) { + return { + { "$_name", "$_operator" }, + { "ops", array_t() }, + { "location", conv::loc_to_map(op.location) }, + { "op", op.name }, + }; +} - bool pop(std::vector> &op_stack, array_t &res) const { - if (op_stack.empty()) return false; +bool pop(std::vector> &op_stack, array_t &res) { + if (op_stack.empty()) return false; - auto map = op_to_map(op_stack.back()); - auto op_n = op_stack.back().op_n; - auto loc = op_stack.back().location; - op_stack.pop_back(); + auto map = op_to_map(op_stack.back()); + auto op_n = op_stack.back().op_n; + auto loc = op_stack.back().location; + op_stack.pop_back(); - if (res.size() < op_n) return false; + if (res.size() < op_n) return false; - auto &ops = map["ops"].array(); + auto &ops = map["ops"].array(); - for (size_t i = 0; i < op_n; i++) { - ops.push_back(res.back()); - loc = loc.intersect(conv::map_to_loc(res.back().map()["location"].string())); - res.pop_back(); - } - - map["location"] = conv::loc_to_map(loc); - - std::reverse(ops.begin(), ops.end()); - res.push_back(map); - - return true; - } - bool pop_paren(std::vector> &op_stack, array_t &res) const { - bool has_paren = false; - for (const auto &op : op_stack) { - if (op.precedence == precedence_t::PAREN) { - has_paren = true; - break; - } - } - if (!has_paren) return false; - - while (true) { - if (op_stack.back().precedence == precedence_t::PAREN) break; - if (!pop(op_stack, res)) return false; - } - - op_stack.pop_back(); - return true; - } - bool pop_call(size_t n, location_t loc, std::vector> &op_stack, array_t &res) const { - map_t call = { - { "$_name", "$_call" }, - }; - - array_t &args = call["args"].array({}); - - while (true) { - if (op_stack.back().precedence == precedence_t::CALL_START) break; - if (!pop(op_stack, res)) return false; - } - loc = loc.intersect(op_stack.back().location); - op_stack.pop_back(); - call["location"] = conv::loc_to_map(loc); - - for (size_t i = 0; i <= n; i++) { - args.push_back(res.back()); - res.pop_back(); - } - - std::reverse(args.begin(), args.end()); - - call["func"] = res.back(); + for (size_t i = 0; i < op_n; i++) { + ops.push_back(res.back()); + loc = loc.intersect(conv::map_to_loc(res.back().map()["location"].string())); res.pop_back(); - res.push_back(call); - - return true; } - bool pop_until(const op_data_t &data, tree_helper_t &h, std::vector> &op_stack, array_t &res) const { - while (!op_stack.empty()) { - auto &back_data = op_stack.back(); - if (data.assoc ? - back_data.precedence >= data.precedence : - back_data.precedence > data.precedence - ) break; - if (!pop(op_stack, res)) return h.err("Expected an expression on the right side of this operator."); + map["location"] = conv::loc_to_map(loc); + + std::reverse(ops.begin(), ops.end()); + res.push_back(map); + + return true; +} +bool pop_paren(std::vector> &op_stack, array_t &res) { + bool has_paren = false; + for (const auto &op : op_stack) { + if (op.precedence == precedence_t::PAREN) { + has_paren = true; + break; } - return true; + } + if (!has_paren) return false; + + while (true) { + if (op_stack.back().precedence == precedence_t::PAREN) break; + if (!pop(op_stack, res)) return false; } - bool parse(ast_ctx_t &ctx, size_t &res_i, map_t &out) const { - tree_helper_t h(ctx, res_i); + op_stack.pop_back(); + return true; +} +bool pop_call(size_t n, location_t loc, std::vector> &op_stack, array_t &res) { + map_t call = { + { "$_name", "$_call" }, + }; - bool last_val = false; - map_t val; - std::vector> op_stack; - std::vector call_args_n; - auto res = array_t(); + array_t &args = call["args"].array({}); + + while (true) { + if (op_stack.back().precedence == precedence_t::CALL_START) break; + if (!pop(op_stack, res)) return false; + } + loc = loc.intersect(op_stack.back().location); + op_stack.pop_back(); + call["location"] = conv::loc_to_map(loc); + + for (size_t i = 0; i <= n; i++) { + args.push_back(res.back()); + res.pop_back(); + } + + std::reverse(args.begin(), args.end()); + + call["func"] = res.back(); + res.pop_back(); + res.push_back(call); + + return true; +} +bool pop_until(const op_data_t &data, tree_helper_t &h, std::vector> &op_stack, array_t &res) { + while (!op_stack.empty()) { + auto &back_data = op_stack.back(); + if (data.assoc ? + back_data.precedence >= data.precedence : + back_data.precedence > data.precedence + ) break; + + if (!pop(op_stack, res)) return h.err("Expected an expression on the right side of this operator."); + } + return true; +} + +bool ast::parse_exp_var(ast_ctx_t &ctx, size_t &res_i, map_t &out) { + tree_helper_t h(ctx, res_i); + + if (h.curr().is_identifier()) { + out["content"] = h.curr().identifier(); + out["location"] = conv::loc_to_map(h.loc()); + return h.submit(true); + } + + return false; +} +bool ast::parse_exp_int_lit(ast_ctx_t &ctx, size_t &res_i, map_t &out) { + tree_helper_t h(ctx, res_i); + + if (h.curr().is_literal()) { + auto &arr = out["content"].array({}); + for (auto b : h.curr().literal()) arr.push_back((float)b); + out["location"] = conv::loc_to_map(h.loc()); + return h.submit(true); + } + + return false; +} - while (true) { - if (h.ended()) break; +bool ast::parse_exp(ast_ctx_t &ctx, size_t &res_i, map_t &out) { + tree_helper_t h(ctx, res_i); - if (!last_val && h.curr().is_identifier("sizeof")) { - op_stack.push_back({ h.loc(), sizeof_data }); - h.advance("Expected a value on the right side of the operator."); - continue; - } - if (!last_val && h.push_parse("$_exp_val", res)) last_val = true; - if (h.curr().is_operator()) { - auto op = h.curr()._operator(); - if (last_val) { - if (op == operator_t::PAREN_OPEN) { - h.advance("Expected an argument."); - call_args_n.push_back(0); - op_stack.push_back({ h.loc(), { precedence_t::CALL_START } }); - last_val = false; - } - else if (op == operator_t::COMMA) { - if (call_args_n.size() == 0) break; - h.advance("Expected an argument."); + bool last_val = false; + map_t val; + std::vector> op_stack; + std::vector call_args_n; + auto res = array_t(); - pop_until({ precedence_t::CALL_START, .assoc = true }, h, op_stack, res); - call_args_n.back()++; - last_val = false; - } - else if (op == operator_t::PAREN_CLOSE) { - bool is_call = false, is_paren = false; - for (auto i = op_stack.rbegin(); i != op_stack.rend(); i++) { - if (i->precedence == precedence_t::PAREN) { - is_paren = true; - break; - } - else if (i->precedence == precedence_t::CALL_START) { - is_call = true; - break; - } - } + while (true) { + if (h.ended()) break; - if (is_call) pop_call(call_args_n.back(), h.loc(), op_stack, res); - else if (is_paren) pop_paren(op_stack, res); - else break; - - if (!h.try_advance()) break; - } - else if (op == operator_t::COLON) { - h.advance("Expected a type."); - pop_until({ precedence_t::PREFIX, .assoc = true }, h, op_stack, res); - map_t cast = { - { "$_name", "$_cast" }, - { "exp", res.back() }, - }; - - res.pop_back(); - h.force_parse("$_type", "Expected a type.", cast["type"].map({})); - res.push_back(cast); - } - else if (op == operator_t::DOT || op == operator_t::PTR_MEMBER) { - h.advance("Expected an identifier."); - pop_until({ precedence_t::POSTFIX, .assoc = true }, h, op_stack, res); - - map_t member_access = { - { "$_name", "$_member" }, - { "exp", res.back() }, - { "is_ptr", op == operator_t::PTR_MEMBER }, - }; - h.force_parse("$_identifier", "Expected an identifier.", member_access["name"].map({})); - member_access["location"] = conv::loc_to_map( - conv::map_to_loc(member_access["name"].map()["location"].string()).intersect( - conv::map_to_loc(res.back().map()["location"].string()) - ) - ); - res.pop_back(); - res.push_back(member_access); - } - else if (bin_ops.find(op) != bin_ops.end()) { - auto data = bin_ops[op]; - pop_until(data, h, op_stack, res); - op_stack.push_back({ h.loc(), data }); - - if (data.op_n == 1) { - last_val = true; - if (!pop(op_stack, res)) return h.err("Expected an expression on the right side of this operator."); - if (h.try_advance()) break; - } - else { - last_val = false; - h.advance("Expected a value on the right side of the operator."); - } - } - else break; + if (!last_val && h.curr().is_identifier("sizeof")) { + op_stack.push_back({ h.loc(), sizeof_data }); + h.advance("Expected a value on the right side of the operator."); + continue; + } + if (!last_val && h.push_parse(ctx.group("$_exp_val"), res)) last_val = true; + if (h.curr().is_operator()) { + auto op = h.curr()._operator(); + if (last_val) { + if (op == operator_t::PAREN_OPEN) { + h.advance("Expected an argument."); + call_args_n.push_back(0); + op_stack.push_back({ h.loc(), { precedence_t::CALL_START } }); + last_val = false; } - else { - if (op == operator_t::PAREN_OPEN) { - op_stack.push_back({ h.loc(), { precedence_t::PAREN } }); - h.advance("Expected a value."); - last_val = false; + else if (op == operator_t::COMMA) { + if (call_args_n.size() == 0) break; + h.advance("Expected an argument."); + + pop_until({ .precedence = precedence_t::CALL_START, .assoc = true }, h, op_stack, res); + call_args_n.back()++; + last_val = false; + } + else if (op == operator_t::PAREN_CLOSE) { + bool is_call = false, is_paren = false; + + for (auto i = op_stack.rbegin(); i != op_stack.rend(); i++) { + if (i->precedence == precedence_t::PAREN) { + is_paren = true; + break; + } + else if (i->precedence == precedence_t::CALL_START) { + is_call = true; + break; + } } - else if (pre_ops.find(op) != pre_ops.end()) { - op_stack.push_back({ h.loc(), pre_ops[op] }); + + if (is_call) pop_call(call_args_n.back(), h.loc(), op_stack, res); + else if (is_paren) pop_paren(op_stack, res); + else break; + + if (!h.try_advance()) break; + } + else if (op == operator_t::COLON) { + h.advance("Expected a type."); + pop_until({ .precedence = precedence_t::PREFIX, .assoc = true }, h, op_stack, res); + map_t cast = { + { "$_name", "$_cast" }, + { "exp", res.back() }, + }; + + res.pop_back(); + h.force_parse(parse_type, "Expected a type.", cast["type"].map({})); + res.push_back(cast); + } + else if (op == operator_t::DOT || op == operator_t::PTR_MEMBER) { + h.advance("Expected an identifier."); + pop_until({ .precedence = precedence_t::POSTFIX, .assoc = true }, h, op_stack, res); + + map_t member_access = { + { "exp", res.back() }, + { "is_ptr", op == operator_t::PTR_MEMBER }, + }; + h.force_parse(parse_identifier, "Expected an identifier.", member_access["name"].map({})); + member_access["location"] = conv::loc_to_map( + conv::map_to_loc(member_access["name"].map()["location"].string()).intersect( + conv::map_to_loc(res.back().map()["location"].string()) + ) + ); + res.pop_back(); + res.push_back(member_access); + } + else if (bin_ops.find(op) != bin_ops.end()) { + auto data = bin_ops[op]; + pop_until(data, h, op_stack, res); + op_stack.push_back({ h.loc(), data }); + + if (data.op_n == 1) { + last_val = true; + if (!pop(op_stack, res)) return h.err("Expected an expression on the right side of this operator."); + if (h.try_advance()) break; + } + else { + last_val = false; h.advance("Expected a value on the right side of the operator."); } - else break; } - continue; + else break; } - else break; + else { + if (op == operator_t::PAREN_OPEN) { + op_stack.push_back({ h.loc(), { precedence_t::PAREN } }); + h.advance("Expected a value."); + last_val = false; + } + else if (pre_ops.find(op) != pre_ops.end()) { + op_stack.push_back({ h.loc(), pre_ops[op] }); + h.advance("Expected a value on the right side of the operator."); + } + else break; + } + continue; } - - if (res.size() == 0) return false; - - while (!op_stack.empty()) { - if (op_stack.back().precedence == precedence_t::PAREN) throw message_t::error("Unclosed paren.", op_stack.back().location); - if (op_stack.back().precedence == precedence_t::CALL_START) throw message_t::error("Unclosed call.", op_stack.back().location); - if (!pop(op_stack, res)) return h.err("Expected an expression on the right side of this operator."); - } - - out = res.front().map(); - - return h.submit(false); + else break; } - public: exp_parser_t(): parser_t("$_exp") { } -}; + if (res.size() == 0) return false; -class exp_stat_parser_t : public parser_t { - bool parse(ast_ctx_t &ctx, size_t &i, map_t &res) const { - tree_helper_t h(ctx, i); - if (!h.parse("$_exp", res)) return false; - if (h.curr().is_operator(operator_t::SEMICOLON)) return h.submit(true); - - ctx.messages.push(message_t::error("Expected a semicolon.", h.loc(1))); - return h.submit(false); + while (!op_stack.empty()) { + if (op_stack.back().precedence == precedence_t::PAREN) throw message_t::error("Unclosed paren.", op_stack.back().location); + if (op_stack.back().precedence == precedence_t::CALL_START) throw message_t::error("Unclosed call.", op_stack.back().location); + if (!pop(op_stack, res)) return h.err("Expected an expression on the right side of this operator."); } - public: exp_stat_parser_t() : parser_t("$_exp_stat") { } -}; + out = res.front().map(); -const parser_adder_t ppc::comp::tree::ast::exp_adder = [](ast_ctx_t &ctx) { - ctx.add_parser(new exp_parser_t()); - ctx.add_parser(new exp_stat_parser_t(), "$_stat"); -}; + return h.submit(false); +} +bool ast::parse_stat_exp(ast_ctx_t &ctx, size_t &i, map_t &res) { + tree_helper_t h(ctx, i); + if (!h.parse(parse_exp, res)) return false; + if (h.curr().is_operator(operator_t::SEMICOLON)) return h.submit(true); + + ctx.messages.push(message_t::error("Expected a semicolon.", h.loc(1))); + return h.submit(false); +} diff --git a/src/compiler/treeifier/ast/parsers/field.cc b/src/compiler/treeifier/ast/parsers/field.cc index 631faac..d1f335f 100644 --- a/src/compiler/treeifier/ast/parsers/field.cc +++ b/src/compiler/treeifier/ast/parsers/field.cc @@ -1,42 +1,36 @@ #include "compiler/treeifier/ast/helper.hh" -class field_parser_t : public parser_t { - bool parse(ast_ctx_t &ctx, size_t &res_i, map_t &out) const { - tree_helper_t h(ctx, res_i); +bool ast::parse_field(ast_ctx_t &ctx, size_t &res_i, map_t &out) { + tree_helper_t h(ctx, res_i); - if (h.ended()) return false; + if (h.ended()) return false; - if (!h.parse("$_identifier", out["name"].map({}))) return false; + if (!h.parse(parse_identifier, out["name"].map({}))) return false; - bool type, defval; + bool type, defval; - h.throw_ended("Expected a colon or an equals sign."); + h.throw_ended("Expected a colon or an equals sign."); - if (h.curr().is_operator(operator_t::COLON)) { - h.advance(); - h.force_parse("$_type", "Expected a type.", out["type"].map({})); - type = true; - } - if (h.curr().is_operator(operator_t::ASSIGN)) { - h.advance(); - h.force_parse("$_exp", "Expected an expression.", out["value"].map({})); - type = true; - } - - if (h.curr().is_operator(operator_t::SEMICOLON)) { - if (type || defval) return h.submit(); - else return h.err("A type or a default value must be specified "); - } - else if (type || defval) { - ctx.messages.push(message_t::error("Expected a semicolon.", h.loc(1))); - return h.submit(false); - } - else return false; - - return h.submit(true); + if (h.curr().is_operator(operator_t::COLON)) { + h.advance(); + h.force_parse(parse_type, "Expected a type.", out["type"].map({})); + type = true; + } + if (h.curr().is_operator(operator_t::ASSIGN)) { + h.advance(); + h.force_parse(parse_exp, "Expected an expression.", out["value"].map({})); + type = true; } - public: field_parser_t(): parser_t("$_field") { } -}; + if (h.curr().is_operator(operator_t::SEMICOLON)) { + if (type || defval) return h.submit(); + else return h.err("A type or a default value must be specified "); + } + else if (type || defval) { + ctx.messages.push(message_t::error("Expected a semicolon.", h.loc(1))); + return h.submit(false); + } + else return false; -const parser_adder_t ppc::comp::tree::ast::field_adder = [](ast_ctx_t &ctx) { ctx.add_parser(new field_parser_t(), "$_def"); }; + return h.submit(true); +} diff --git a/src/compiler/treeifier/ast/parsers/func.cc b/src/compiler/treeifier/ast/parsers/func.cc index 1380ce3..ca588a3 100644 --- a/src/compiler/treeifier/ast/parsers/func.cc +++ b/src/compiler/treeifier/ast/parsers/func.cc @@ -1,101 +1,88 @@ #include "compiler/treeifier/ast/helper.hh" -class arg_parser_t : public parser_t { - bool parse(ast_ctx_t &ctx, size_t &res_i, map_t &out) const { - tree_helper_t h(ctx, res_i); +static bool parse_arg(ast_ctx_t &ctx, size_t &res_i, map_t &out) { + tree_helper_t h(ctx, res_i); - if (h.ended()) return false; + if (h.ended()) return false; - if (!h.parse("$_identifier", out["name"].map({}))) return false; + if (!h.parse(parse_identifier, out["name"].map({}))) return false; - bool type, defval; + bool type, defval; - h.throw_ended("Expected a colon or an equals sign."); + h.throw_ended("Expected a colon or an equals sign."); - if (h.curr().is_operator(operator_t::COLON)) { - h.advance(); - h.force_parse("$_type", "Expected a type.", out["type"].map({})); - type = true; + if (h.curr().is_operator(operator_t::COLON)) { + h.advance(); + h.force_parse(parse_type, "Expected a type.", out["type"].map({})); + type = true; + } + if (h.curr().is_operator(operator_t::ASSIGN)) { + h.advance(); + h.force_parse(parse_exp, "Expected an expression.", out["value"].map({})); + type = true; + } + + if (!type && !defval) { + ctx.messages.push(message_t::error("Expected a type or a default value.", h.loc(1))); + } + + return h.submit(false); +} + +bool ast::parse_func(ast_ctx_t &ctx, size_t &res_i, map_t &out) { + tree_helper_t h(ctx, res_i); + + if (h.ended()) return false; + + if (!h.parse(parse_identifier, out["name"].map({}))) return false; + if (h.ended()) return false; + if (!h.curr().is_operator(operator_t::PAREN_OPEN)) return false; + h.advance("Expected a closing paren or a parameter."); + + auto ¶ms = out["params"].array({}); + auto &content = out["content"].array({}); + + while (true) { + if (h.curr().is_operator(operator_t::PAREN_CLOSE)) { + h.advance("Expected a function body."); + break; } - if (h.curr().is_operator(operator_t::ASSIGN)) { - h.advance(); - h.force_parse("$_exp", "Expected an expression.", out["value"].map({})); - type = true; + h.force_push_parse(parse_arg, "Expected a parameter.", params); + if (h.curr().is_operator(operator_t::COMMA)) { + h.advance("Expected a parameter."); } + } - if (!type && !defval) { - ctx.messages.push(message_t::error("Expected a type or a default value.", h.loc(1))); + if (h.curr().is_operator(operator_t::COLON)) { + h.advance("Expected a type."); + h.force_parse(parse_type, "Expected a type", out["type"].map({})); + } + + if (h.curr().is_operator(operator_t::SEMICOLON)) return h.submit(true); + else if (h.curr().is_operator(operator_t::LAMBDA)) { + h.advance("Expected an expression."); + map_t exp; + h.force_parse(parse_exp, "Expected an expression.", exp); + content.push_back({ + { "$_name", "$_return" }, + { "content", exp }, + }); + return h.submit(false); + } + else if (h.curr().is_operator(operator_t::BRACE_OPEN)) { + h.advance("Expected a statement."); + while (true) { + if (h.curr().is_operator(operator_t::BRACE_CLOSE)) { + return h.submit(true); + } + + h.force_push_parse(ctx.group("$_stat"), "Expected an expression.", content); } - + } + else { + ctx.messages.push(message_t::error("Expected a semicolon, brace open or a lambda operator.", h.loc(1))); return h.submit(false); } - public: arg_parser_t(): parser_t("$_func_arg") {} -}; - -class func_parser_t : public parser_t { - bool parse(ast_ctx_t &ctx, size_t &res_i, map_t &out) const { - tree_helper_t h(ctx, res_i); - - if (h.ended()) return false; - - if (!h.parse("$_identifier", out["name"].map({}))) return false; - if (h.ended()) return false; - if (!h.curr().is_operator(operator_t::PAREN_OPEN)) return false; - h.advance("Expected a closing paren or a parameter."); - - auto ¶ms = out["params"].array({}); - auto &content = out["content"].array({}); - - while (true) { - if (h.curr().is_operator(operator_t::PAREN_CLOSE)) { - h.advance("Expected a function body."); - break; - } - h.force_push_parse("$_func_arg", "Expected a parameter.", params); - if (h.curr().is_operator(operator_t::COMMA)) { - h.advance("Expected a parameter."); - } - } - - if (h.curr().is_operator(operator_t::COLON)) { - h.advance("Expected a type."); - h.force_parse("$_type", "Expected a type", out["type"].map({})); - } - - if (h.curr().is_operator(operator_t::SEMICOLON)) return h.submit(true); - else if (h.curr().is_operator(operator_t::LAMBDA)) { - h.advance("Expected an expression."); - map_t exp; - h.force_parse("$_exp", "Expected an expression.", exp); - content.push_back({ - { "$_name", "$_return" }, - { "content", exp }, - }); - return h.submit(false); - } - else if (h.curr().is_operator(operator_t::BRACE_OPEN)) { - h.advance("Expected a statement."); - while (true) { - if (h.curr().is_operator(operator_t::BRACE_CLOSE)) { - return h.submit(true); - } - - h.force_push_parse("$_stat", "Expected an expression.", content); - } - } - else { - ctx.messages.push(message_t::error("Expected a semicolon, brace open or a lambda operator.", h.loc(1))); - return h.submit(false); - } - - return h.submit(true); - } - - public: func_parser_t(): parser_t("$_func") { } -}; - -const parser_adder_t ppc::comp::tree::ast::func_adder = [](ast_ctx_t &ctx) { - ctx.add_parser(new func_parser_t(), "$_def"); - ctx.add_parser(new arg_parser_t()); -}; + return h.submit(true); +} diff --git a/src/compiler/treeifier/ast/parsers/glob.cc b/src/compiler/treeifier/ast/parsers/glob.cc index f8ec95c..2b8021a 100644 --- a/src/compiler/treeifier/ast/parsers/glob.cc +++ b/src/compiler/treeifier/ast/parsers/glob.cc @@ -1,88 +1,69 @@ #include "compiler/treeifier/ast.hh" #include "compiler/treeifier/ast/helper.hh" +// #include "./type.cc" using namespace ppc::comp::tree::ast; -class nmsp_def_parser_t : public parser_t { - bool parse(ast_ctx_t &ctx, size_t &res_i, data::map_t &res) const { - tree_helper_t h(ctx, res_i); - if (h.ended()) return false; +static bool nmsp_def(ast_ctx_t &ctx, size_t &res_i, data::map_t &res) { + tree_helper_t h(ctx, res_i); + if (h.ended()) return false; - if (!h.curr().is_identifier("namespace")) return false; - h.advance("Expected a namespace"); - h.force_parse("$_nmsp", "Expected a namespace.", res); - if (!h.curr().is_operator(operator_t::SEMICOLON)) { - ctx.messages.push(message_t::error("Expected a semicolon.", h.loc(1))); - return h.submit(false); - } - return h.submit(true); + if (!h.curr().is_identifier("namespace")) return false; + h.advance("Expected a namespace"); + h.force_parse(parse_nmsp, "Expected a namespace.", res); + if (!h.curr().is_operator(operator_t::SEMICOLON)) { + ctx.messages.push(message_t::error("Expected a semicolon.", h.loc(1))); + return h.submit(false); + } + return h.submit(true); +} +static bool import(ast_ctx_t &ctx, size_t &res_i, data::map_t &res) { + tree_helper_t h(ctx, res_i); + if (h.ended()) return false; + + if (!h.curr().is_identifier("import")) return false; + h.advance("Expected a namespace"); + h.force_parse(parse_nmsp, "Expected a namespace.", res); + if (!h.curr().is_operator(operator_t::SEMICOLON)) { + ctx.messages.push(message_t::error("Expected a semicolon.", h.loc(1))); + return h.submit(false); } - public: nmsp_def_parser_t(): parser_t("$_nmsp") { } -}; -class import_parser_t : public parser_t { - bool parse(ast_ctx_t &ctx, size_t &res_i, data::map_t &res) const { - tree_helper_t h(ctx, res_i); - if (h.ended()) return false; + return h.submit(true); +} - if (!h.curr().is_identifier("import")) return false; - h.advance("Expected a namespace"); - h.force_parse("$_nmsp", "Expected a namespace.", res); - if (!h.curr().is_operator(operator_t::SEMICOLON)) { - ctx.messages.push(message_t::error("Expected a semicolon.", h.loc(1))); - return h.submit(false); - } +bool ast::parse_glob(ast_ctx_t &ctx, size_t &res_i, data::map_t &out) { + tree_helper_t h(ctx, res_i); - return h.submit(true); + if (h.ended()) return true; + if (h.parse(nmsp_def, out["namespace"].map({}))) { + ctx.nmsp = conv::map_to_nmsp(out["namespace"].map()); + } + else { + out["namespace"] = data::null; } - public: import_parser_t(): parser_t("$_import") { } -}; + auto &imports = out["imports"].array({}); + auto &contents = out["content"].array({}); -auto import_parser = import_parser_t(); -auto nmsp_def_parser = nmsp_def_parser_t(); + while (true) { + map_t map; + if (!h.parse(import, map)) break; + imports.push_back(map); + auto nmsp = conv::map_to_nmsp(map); -class glob_parser_t : public parser_t { - bool parse(ast_ctx_t &ctx, size_t &res_i, data::map_t &out) const { - tree_helper_t h(ctx, res_i); - - if (h.ended()) return true; - if (nmsp_def_parser(ctx, h.i, out["namespace"].map({}))) { - ctx.nmsp = conv::map_to_nmsp(out["namespace"].map()); - } - else { - out["namespace"] = data::null; - } - - auto &imports = out["imports"].array({}); - auto &contents = out["content"].array({}); - - while (true) { - map_t map; - if (!import_parser(ctx, h.i, map)) break; - imports.push_back(map); - auto nmsp = conv::map_to_nmsp(map); - - if (!ctx.imports.emplace(nmsp).second) h.err("The namespace '" + nmsp.to_string() + "' is already imported."); - } - - while (true) { - if (h.ended()) break; - if (!h.push_parse("$_def", contents)) { - ctx.messages.push(message_t::error("Invalid token.", h.loc())); - h.i++; - } - } - - if (!h.ended()) h.err("Invalid token."); - - return h.submit(); + if (!ctx.imports.emplace(nmsp).second) h.err("The namespace '" + nmsp.to_string() + "' is already imported."); } -public: - glob_parser_t(): parser_t("$_glob") { } -}; + while (true) { + if (h.ended()) break; + if (!h.push_parse(ctx.group("$_def"), contents)) { + ctx.messages.push(message_t::error("Invalid token.", h.loc())); + h.i++; + } + } -const parser_adder_t ppc::comp::tree::ast::glob_adder = [](ast_ctx_t &ctx) { - ctx.add_parser(new glob_parser_t()); -}; + if (!h.ended()) h.err("Invalid token."); + + return h.submit(); +} diff --git a/src/compiler/treeifier/ast/parsers/group.cc b/src/compiler/treeifier/ast/parsers/group.cc index c52d103..fa12cc2 100644 --- a/src/compiler/treeifier/ast/parsers/group.cc +++ b/src/compiler/treeifier/ast/parsers/group.cc @@ -13,7 +13,7 @@ using namespace std; static bool read_nmsp(ast_ctx_t &ctx, size_t &i, lang::loc_namespace_name_t &name) { tree_helper_t h(ctx, i); map_t res; - if (!h.parse("$_nmsp", res)) return false; + if (!h.parse(parse_nmsp, res)) return false; name = conv::map_to_nmsp(res); return true; } @@ -40,8 +40,48 @@ static bool resolve_nmsp(ast_ctx_t &ctx, const lang::namespace_name_t &name, T b return false; } +group_t &group_t::insert(const std::string &name, parser_t parser, const std::string &relative_to, bool after) { + if (parsers.find(name) != parsers.end()) { + throw "The parser '" + name + "' is already in the group."; + } -bool group_parser_t::parse(ast_ctx_t &ctx, size_t &i, data::map_t &out) const { + auto it = parsers.find(relative_to); + if (it == parsers.end()) { + throw "The parser '" + relative_to + "' isn't in the group."; + } + + if (after) it++; + + parsers.insert(it, { name, parser }); + + return *this; +} +group_t &group_t::replace(const std::string &name, parser_t parser) { + auto it = parsers.find(name); + + if (parsers.find(name) == parsers.end()) { + throw "The parser '" + name + "' isn't in the group."; + } + + it->second = parser; + + return *this; +} +group_t &group_t::add_last(const std::string &name, parser_t parser) { + if (parsers.find(name) != parsers.end()) { + throw "The parser '" + name + "' is already in the group."; + } + + parsers.emplace(name, parser); + + return *this; +} +group_t &group_t::add_named(const std::string &name, parser_t parser, const lang::namespace_name_t &identifier) { + add_last(name, parser); + return *this; +} + +bool group_t::operator()(ast_ctx_t &ctx, size_t &i, data::map_t &out) const { tree_helper_t h(ctx, i); if (h.ended()) return false; @@ -50,8 +90,9 @@ bool group_parser_t::parse(ast_ctx_t &ctx, size_t &i, data::map_t &out) const { if (read_nmsp(ctx, h.i, name)) { namespace_name_t actual; if (resolve_nmsp(ctx, name.strip_location(), named_parsers.begin(), named_parsers.end(), actual)) { - auto &parser = *this->named_parsers.find(actual)->second; - if (parser(ctx, i, out)) return true; + auto parser = parsers.find(this->named_parsers.find(actual)->second); + out["$_name"] = parser->first; + if (parser->second(ctx, i, out)) return true; else throw message_t::error("Unexpected construct specifier.", h.res_loc()); } } @@ -59,25 +100,11 @@ bool group_parser_t::parse(ast_ctx_t &ctx, size_t &i, data::map_t &out) const { unordered_map errors; for (auto parser : parsers) { - if ((*parser)(ctx, i, out)) return true; + out["$_name"] = parser.first; + if ((*parser.second)(ctx, i, out)) return true; } stringstream m; return false; } - -group_parser_t &group_parser_t::add(const parser_t &parser) { - parsers.push_back(&parser); - return *this; -} -group_parser_t &group_parser_t::add(const parser_t &parser, const lang::namespace_name_t &name) { - if (name.empty()) throw "Name can't be empty."s; - if (std::find(parsers.begin(), parsers.end(), &parser) != parsers.end()) { - throw "Parser '" + name.to_string() + "' already in group."; - } - - named_parsers[name] = &parser; - - return *this; -} diff --git a/src/compiler/treeifier/ast/parsers/identifier.cc b/src/compiler/treeifier/ast/parsers/identifier.cc index e44eaed..a1119e0 100644 --- a/src/compiler/treeifier/ast/parsers/identifier.cc +++ b/src/compiler/treeifier/ast/parsers/identifier.cc @@ -1,21 +1,15 @@ #include "compiler/treeifier/ast/helper.hh" -class identifier_parser_t : public parser_t { - bool parse(ast_ctx_t &ctx, size_t &res_i, map_t &out) const { - tree_helper_t h(ctx, res_i); +bool ast::parse_identifier(ast_ctx_t &ctx, size_t &res_i, map_t &out) { + tree_helper_t h(ctx, res_i); - if (h.ended()) return false; + if (h.ended()) return false; - if (h.curr().is_identifier()) { - auto loc = h.loc(); - out["location"] = conv::loc_to_map(loc); - out["content"] = h.curr().identifier(); - return h.submit(); - } - else return false; + if (h.curr().is_identifier()) { + auto loc = h.loc(); + out["location"] = conv::loc_to_map(loc); + out["content"] = h.curr().identifier(); + return h.submit(); } - - public: identifier_parser_t(): parser_t("$_identifier") { } -}; - -const parser_adder_t ppc::comp::tree::ast::identifier_adder = [](ast_ctx_t &ctx) { ctx.add_parser(new identifier_parser_t()); }; + else return false; +} diff --git a/src/compiler/treeifier/ast/parsers/nmsp.cc b/src/compiler/treeifier/ast/parsers/nmsp.cc index 576aec1..13431ad 100644 --- a/src/compiler/treeifier/ast/parsers/nmsp.cc +++ b/src/compiler/treeifier/ast/parsers/nmsp.cc @@ -1,26 +1,20 @@ #include "compiler/treeifier/ast/helper.hh" -class nmsp_parser_t : public parser_t { - bool parse(ast_ctx_t &ctx, size_t &res_i, map_t &out) const { - tree_helper_t h(ctx, res_i); +bool ast::parse_nmsp(ast_ctx_t &ctx, size_t &res_i, map_t &out) { + tree_helper_t h(ctx, res_i); - if (h.ended()) return false; + if (h.ended()) return false; - auto &arr = (out["content"] = array_t()).array(); + auto &arr = (out["content"] = array_t()).array(); - if (!h.push_parse("$_identifier", arr)) return false; + if (!h.push_parse(parse_identifier, arr)) return false; - while (true) { - if (h.ended()) break; - if (!h.curr().is_operator(operator_t::DOUBLE_COLON)) break; - h.force_push_parse("$_identifier", "Expected an identifier.", arr); - } - - out["location"] = conv::loc_to_map(h.res_loc()); - return h.submit(false); + while (true) { + if (h.ended()) break; + if (!h.curr().is_operator(operator_t::DOUBLE_COLON)) break; + h.force_push_parse(parse_identifier, "Expected an identifier.", arr); } - public: nmsp_parser_t(): parser_t("$_nmsp") { } -}; - -const parser_adder_t ppc::comp::tree::ast::nmsp_adder = [](ast_ctx_t &ctx) { ctx.add_parser(new nmsp_parser_t()); }; + out["location"] = conv::loc_to_map(h.res_loc()); + return h.submit(false); +} diff --git a/src/compiler/treeifier/ast/parsers/type.cc b/src/compiler/treeifier/ast/parsers/type.cc index 9cc580f..4242a05 100644 --- a/src/compiler/treeifier/ast/parsers/type.cc +++ b/src/compiler/treeifier/ast/parsers/type.cc @@ -1,51 +1,45 @@ #include "compiler/treeifier/ast/helper.hh" -class type_parser_t : public parser_t { - bool parse(ast_ctx_t &ctx, size_t &res_i, map_t &out) const { - tree_helper_t h(ctx, res_i); +bool ast::parse_type(ast_ctx_t &ctx, size_t &res_i, map_t &out) { + tree_helper_t h(ctx, res_i); - if (h.ended()) return false; + if (h.ended()) return false; - auto &nmsp = (out["namespace"] = map_t()).map(); - nmsp["$_name"] = "$_nmsp"; - auto &nmsp_content = (out["namespace"].map()["content"] = array_t()).array(); - size_t ptr_n = 0; + auto &nmsp = (out["namespace"] = map_t()).map(); + nmsp["$_name"] = "$_nmsp"; + auto &nmsp_content = (out["namespace"].map()["content"] = array_t()).array(); + size_t ptr_n = 0; - if (!h.push_parse("$_identifier", nmsp_content)) return false; + if (!h.push_parse(parse_identifier, nmsp_content)) return false; - while (true) { - if (h.ended()) break; - if (!h.curr().is_operator(operator_t::DOUBLE_COLON)) break; - h.advance("Expected an identifier."); - h.force_push_parse("$_identifier", "Expected an identifier.", nmsp_content); - } - - while (!h.ended() && h.curr().is_operator(operator_t::MULTIPLY)) { - ptr_n++; - if (!h.try_advance()) break; - } - - out["location"] = conv::loc_to_map(h.res_loc()); - out["name"] = nmsp_content[nmsp_content.size() - 1]; - out["ptr_n"] = (float)ptr_n; - nmsp_content.pop_back(); - - if (nmsp_content.size() == 0) { - auto loc = h.res_loc(); - loc.length = 1; - nmsp["location"] = conv::loc_to_map(loc); - } - else { - auto loc_1 = conv::map_to_loc(nmsp_content[0].map()["location"].string()); - auto loc_2 = conv::map_to_loc(nmsp_content[nmsp_content.size() - 1].map()["location"].string()); - auto loc = loc_1.intersect(loc_2); - nmsp["location"] = conv::loc_to_map(loc); - } - - return h.submit(false); + while (true) { + if (h.ended()) break; + if (!h.curr().is_operator(operator_t::DOUBLE_COLON)) break; + h.advance("Expected an identifier."); + h.force_push_parse(parse_identifier, "Expected an identifier.", nmsp_content); } - public: type_parser_t(): parser_t("$_type") { } -}; + while (!h.ended() && h.curr().is_operator(operator_t::MULTIPLY)) { + ptr_n++; + if (!h.try_advance()) break; + } -const parser_adder_t ppc::comp::tree::ast::type_adder = [](ast_ctx_t &ctx) { ctx.add_parser(new type_parser_t()); }; + out["location"] = conv::loc_to_map(h.res_loc()); + out["name"] = nmsp_content[nmsp_content.size() - 1]; + out["ptr_n"] = (float)ptr_n; + nmsp_content.pop_back(); + + if (nmsp_content.size() == 0) { + auto loc = h.res_loc(); + loc.length = 1; + nmsp["location"] = conv::loc_to_map(loc); + } + else { + auto loc_1 = conv::map_to_loc(nmsp_content[0].map()["location"].string()); + auto loc_2 = conv::map_to_loc(nmsp_content[nmsp_content.size() - 1].map()["location"].string()); + auto loc = loc_1.intersect(loc_2); + nmsp["location"] = conv::loc_to_map(loc); + } + + return h.submit(false); +} diff --git a/src/compiler/treeifier/ast/parsers/var.cc b/src/compiler/treeifier/ast/parsers/var.cc deleted file mode 100644 index 4d3a8e0..0000000 --- a/src/compiler/treeifier/ast/parsers/var.cc +++ /dev/null @@ -1,19 +0,0 @@ -#include "compiler/treeifier/ast/helper.hh" - -class var_parser_t : public parser_t { - bool parse(ast_ctx_t &ctx, size_t &res_i, map_t &out) const { - tree_helper_t h(ctx, res_i); - - if (h.curr().is_identifier()) { - out["content"] = h.curr().identifier(); - out["location"] = conv::loc_to_map(h.loc()); - return h.submit(true); - } - - return false; - } - - public: var_parser_t(): parser_t("$_var") { } -}; - -const parser_adder_t ppc::comp::tree::ast::var_adder = [](ast_ctx_t &ctx) { ctx.add_parser(new var_parser_t(), "$_exp_val"); }; diff --git a/src/compiler/treeifier/tokenizer.cc b/src/compiler/treeifier/tokenizer.cc index 763aab1..ecda2d1 100644 --- a/src/compiler/treeifier/tokenizer.cc +++ b/src/compiler/treeifier/tokenizer.cc @@ -1,4 +1,5 @@ #include +#include #include "compiler/treeifier/tokenizer.hh" #include "compiler/treeifier/lexer.hh" @@ -7,12 +8,12 @@ using namespace messages; using namespace comp::tree; using namespace std::string_literals; -static std::vector parse_string(msg_stack_t &msg_stack, bool is_char, const lex::token_t &token) { +static std::vector parse_string(msg_stack_t &msg_stack, bool is_char, const lex::token_t &token) { char literal_char = is_char ? '\'' : '"'; bool escaping = false; - std::vector res; + std::vector res; location_t curr_char_loc = token.location; curr_char_loc.length = 1; curr_char_loc.start++; @@ -52,96 +53,99 @@ static std::vector parse_string(msg_stack_t &msg_stack, bool is_char, cons if (is_char) throw message_t(message_t::ERROR, "Unterminated char literal.", token.location); else throw message_t(message_t::ERROR, "Unterminated string literal.", token.location); } -static token_t parse_int(msg_stack_t &msg_stack, const lex::token_t &token) { - enum radix_t { - BINARY, - OCTAL, - DECIMAL, - HEXADECIMAL, - } radix; - std::size_t i = 0; +static std::vector parse_bin(msg_stack_t &msg_stack, size_t i, const std::string &data) { + std::vector res; + int last_byte = 0; + int lastbyte_n = 0; + + for (size_t j = data.length() - 1; j >= i; j--) { + if (lastbyte_n == 8) { + lastbyte_n = 0; + res.push_back(last_byte); + last_byte = 0; + } + + last_byte <<= 1; + last_byte |= data[j] - '0'; + lastbyte_n++; + } + + res.push_back(last_byte); + std::reverse(res.begin(), res.end()); + + return res; +} +static std::vector parse_hex(msg_stack_t &msg_stack, size_t i, const std::string &data) { + std::vector res; + + int last_byte = 0; + int lastbyte_n = 0; + + for (size_t j = data.length() - 1; j >= i; j--) { + if (lastbyte_n == 8) { + lastbyte_n = 0; + res.push_back(last_byte); + last_byte = 0; + } + + int digit = data[j] - '0'; + if (data[j] >= 'a' && data[j] <= 'f') digit = data[j] - 'a' + 10; + if (data[j] >= 'A' && data[j] <= 'F') digit = data[j] - 'F' + 10; + + last_byte <<= 4; + last_byte |= digit; + lastbyte_n += 4; + } + + res.push_back(last_byte); + std::reverse(res.begin(), res.end()); + + return res; +} +static std::vector parse_oct(msg_stack_t &msg_stack, size_t i, const std::string &data) { + std::vector res; + + int last_byte = 0; + int lastbyte_n = 0; + + for (size_t j = data.length() - 1; j >= i; j--) { + if (lastbyte_n >= 8) { + lastbyte_n = 0; + res.push_back(last_byte); + last_byte >>= 8; + } + + int digit = data[j] - '0'; + + last_byte <<= 3; + last_byte |= digit; + lastbyte_n += 3; + } + + res.push_back(last_byte); + std::reverse(res.begin(), res.end()); + + return res; +} + +static std::vector parse_int(msg_stack_t &msg_stack, const lex::token_t &token) { switch (token.type) { case lex::token_t::BIN_LITERAL: - i += 2; - radix = BINARY; - break; + return parse_bin(msg_stack, 2, token.data); case lex::token_t::OCT_LITERAL: - i++; - radix = OCTAL; - break; + return parse_oct(msg_stack, 1, token.data); case lex::token_t::DEC_LITERAL: - radix = DECIMAL; - break; + throw "no dec literals lol bozo."s; case lex::token_t::HEX_LITERAL: - i += 2; - radix = HEXADECIMAL; - break; + return parse_hex(msg_stack, 2, token.data); default: throw "WTF r u doing bro?"s; } - - uint64_t res = 0; - - for (; i <= token.data.length() - 1; i++) { - char c = token.data[i]; - int8_t digit; - switch (radix) { - case BINARY: - digit = c - '0'; - res <<= 1; - res |= digit; - break; - case OCTAL: - digit = c - '0'; - if (digit < 0 || digit > 7) { - throw message_t(message_t::ERROR, "Octal literals may contain numbers between 0 and 7.", token.location); - } - res <<= 3; - res |= digit; - break; - case 2: - digit = c - '0'; - res *= 10; - res += digit; - break; - case 3: - if (c >= 'a' && c <= 'f') digit = c - 'a' + 10; - else if (c >= 'A' && c <= 'F') digit = c - 'A' + 10; - else if (c >= '0' && c <= '9') digit = c - '0'; - else throw message_t(message_t::ERROR, "Invalid character '"s + c + "' in hex literal.", token.location); - res <<= 4; - res |= digit; - break; - } - } - - return token_t(res, token.location); } -static token_t parse_float(msg_stack_t &msg_stack, const lex::token_t &token) { - double whole = 0, fract = 0; - - char c; - std::size_t i; - - for (i = 0; i < token.data.length() && isdigit(c = token.data[i]); i++) { - if (c == '.') break; - int digit = c - '0'; - whole *= 10; - whole += digit; - } - - if (c == '.') { - i++; - for (; i < token.data.length() && isdigit(c = token.data[i]); i++) { - int digit = c - '0'; - fract += digit; - fract /= 10; - } - } - - return token_t(whole + fract, token.location); +static std::vector parse_float(msg_stack_t &msg_stack, const lex::token_t &token) { + throw "no floats lol bozo"s; } token_t token_t::parse(messages::msg_stack_t &msg_stack, lex::token_t in) { @@ -160,16 +164,13 @@ token_t token_t::parse(messages::msg_stack_t &msg_stack, lex::token_t in) { case lex::token_t::OCT_LITERAL: case lex::token_t::DEC_LITERAL: case lex::token_t::HEX_LITERAL: - return parse_int(msg_stack, in); + return { parse_int(msg_stack, in), in.location }; case lex::token_t::FLOAT_LITERAL: - return parse_float(msg_stack, in); + return { parse_float(msg_stack, in), in.location }; case lex::token_t::STRING_LITERAL: - return { parse_string(msg_stack, false, in) }; - case lex::token_t::CHAR_LITERAL: { - auto str = parse_string(msg_stack, true, in); - if (str.size() != 1) throw message_t(message_t::ERROR, "Char literal must consist of just one character.", in.location); - return str.front(); - } + return { parse_string(msg_stack, false, in), in.location }; + case lex::token_t::CHAR_LITERAL: + return { parse_string(msg_stack, true, in), in.location }; default: throw message_t(message_t::ERROR, "Token type not recognised.", in.location); } diff --git a/src/main/main.cc b/src/main/main.cc index b7d6f08..2f4ec65 100644 --- a/src/main/main.cc +++ b/src/main/main.cc @@ -157,7 +157,7 @@ int main(int argc, const char *argv[]) { try { std::ifstream f { file, std::ios_base::in }; auto tokens = token_t::parse_many(msg_stack, lex::token_t::parse_file(msg_stack, file, f)); - auto ast = ast_ctx_t::parse(msg_stack, tokens); + auto ast = ast_ctx_t::parse(ast::parse_glob, msg_stack, tokens); std::cout << data::json::stringify(ast) << std::endl; } From 28a18feb3fe380f42d1917f2cae550dcb2fdcd5e Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Thu, 27 Oct 2022 15:30:59 +0300 Subject: [PATCH 57/74] fix: int literal parsing parses in reverse --- src/compiler/treeifier/tokenizer.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/compiler/treeifier/tokenizer.cc b/src/compiler/treeifier/tokenizer.cc index ecda2d1..fd5fdcd 100644 --- a/src/compiler/treeifier/tokenizer.cc +++ b/src/compiler/treeifier/tokenizer.cc @@ -60,7 +60,7 @@ static std::vector parse_bin(msg_stack_t &msg_stack, size_t i, const st int last_byte = 0; int lastbyte_n = 0; - for (size_t j = data.length() - 1; j >= i; j--) { + for (size_t j = 0; j < data.length(); j++) { if (lastbyte_n == 8) { lastbyte_n = 0; res.push_back(last_byte); @@ -83,7 +83,7 @@ static std::vector parse_hex(msg_stack_t &msg_stack, size_t i, const st int last_byte = 0; int lastbyte_n = 0; - for (size_t j = data.length() - 1; j >= i; j--) { + for (size_t j = 0; j < data.length(); j++) { if (lastbyte_n == 8) { lastbyte_n = 0; res.push_back(last_byte); @@ -110,7 +110,7 @@ static std::vector parse_oct(msg_stack_t &msg_stack, size_t i, const st int last_byte = 0; int lastbyte_n = 0; - for (size_t j = data.length() - 1; j >= i; j--) { + for (size_t j = 0; j < data.length(); j++) { if (lastbyte_n >= 8) { lastbyte_n = 0; res.push_back(last_byte); From d15bcc542c027823dda4cb8e61ef6f34746bdc96 Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Thu, 27 Oct 2022 15:31:21 +0300 Subject: [PATCH 58/74] fix: type output map's namespace is not null when empty --- src/compiler/treeifier/ast/parsers/type.cc | 32 +++++++--------------- 1 file changed, 10 insertions(+), 22 deletions(-) diff --git a/src/compiler/treeifier/ast/parsers/type.cc b/src/compiler/treeifier/ast/parsers/type.cc index 4242a05..483e216 100644 --- a/src/compiler/treeifier/ast/parsers/type.cc +++ b/src/compiler/treeifier/ast/parsers/type.cc @@ -5,38 +5,26 @@ bool ast::parse_type(ast_ctx_t &ctx, size_t &res_i, map_t &out) { if (h.ended()) return false; - auto &nmsp = (out["namespace"] = map_t()).map(); - nmsp["$_name"] = "$_nmsp"; - auto &nmsp_content = (out["namespace"].map()["content"] = array_t()).array(); + auto &nmsp = out["namespace"].map({}); size_t ptr_n = 0; - if (!h.push_parse(parse_identifier, nmsp_content)) return false; - - while (true) { - if (h.ended()) break; - if (!h.curr().is_operator(operator_t::DOUBLE_COLON)) break; - h.advance("Expected an identifier."); - h.force_push_parse(parse_identifier, "Expected an identifier.", nmsp_content); - } + if (!h.parse(parse_nmsp, nmsp)) return false; while (!h.ended() && h.curr().is_operator(operator_t::MULTIPLY)) { ptr_n++; if (!h.try_advance()) break; } - out["location"] = conv::loc_to_map(h.res_loc()); - out["name"] = nmsp_content[nmsp_content.size() - 1]; - out["ptr_n"] = (float)ptr_n; - nmsp_content.pop_back(); + auto &nmsp_arr = nmsp["content"].array(); - if (nmsp_content.size() == 0) { - auto loc = h.res_loc(); - loc.length = 1; - nmsp["location"] = conv::loc_to_map(loc); - } + out["location"] = conv::loc_to_map(h.res_loc()); + out["name"] = nmsp_arr.back(); + out["ptr_n"] = (float)ptr_n; + nmsp_arr.pop_back(); + if (nmsp_arr.empty()) out["namespace"] = null; else { - auto loc_1 = conv::map_to_loc(nmsp_content[0].map()["location"].string()); - auto loc_2 = conv::map_to_loc(nmsp_content[nmsp_content.size() - 1].map()["location"].string()); + auto loc_1 = conv::map_to_loc(nmsp_arr.front().map()["location"].string()); + auto loc_2 = conv::map_to_loc(nmsp_arr.back().map()["location"].string()); auto loc = loc_1.intersect(loc_2); nmsp["location"] = conv::loc_to_map(loc); } From 1f05551be1e502b55e4cbbb3101217558df14263 Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Fri, 28 Oct 2022 10:06:30 +0300 Subject: [PATCH 59/74] fix: end of string doesn't get detected properly --- src/compiler/treeifier/lexer.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/compiler/treeifier/lexer.cc b/src/compiler/treeifier/lexer.cc index 4d6e1e3..0be3284 100644 --- a/src/compiler/treeifier/lexer.cc +++ b/src/compiler/treeifier/lexer.cc @@ -132,11 +132,11 @@ static res_t lexlet_operator(char c, std::vector &tok) { else return lexer_none(); }; static res_t lexlet_string(char c, std::vector &tok) { - if (c == '"' && tok.back() != '\\') return lexer_end(token_t::STRING_LITERAL, true); + if ((c == '"') && (tok.back() != '\\')) return lexer_end(token_t::STRING_LITERAL, false).add(); else return lexer_none(); }; static res_t lexlet_char(char c, std::vector &tok) { - if (c == '"' && tok.back() != '\\') return lexer_end(token_t::CHAR_LITERAL, true); + if (c == '\'' && tok.back() != '\\') return lexer_end(token_t::CHAR_LITERAL, false).add(); else return lexer_none(); }; From e510584b987e4e0da89048b2b2b6720f84242cae Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Fri, 28 Oct 2022 10:07:52 +0300 Subject: [PATCH 60/74] fix(group): named parsers are tried in unnamed parsing --- include/compiler/treeifier/ast.hh | 5 +++- src/compiler/treeifier/ast/parsers/group.cc | 30 ++++++++++++--------- 2 files changed, 22 insertions(+), 13 deletions(-) diff --git a/include/compiler/treeifier/ast.hh b/include/compiler/treeifier/ast.hh index f589d37..6594c24 100644 --- a/include/compiler/treeifier/ast.hh +++ b/include/compiler/treeifier/ast.hh @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include "compiler/treeifier/tokenizer.hh" @@ -22,6 +23,7 @@ namespace ppc::comp::tree::ast { class group_t { private: std::map named_parsers; + std::set unnamed_parsers; std::map parsers; public: group_t &insert(const std::string &name, parser_t parser, const std::string &relative_to, bool after); @@ -74,6 +76,7 @@ namespace ppc::comp::tree::ast { loc_namespace_name_t map_to_nmsp(const data::map_t &map); } - parser_func_t parse_glob, parse_nmsp, parse_identifier, parse_type, parse_func, parse_field, parse_exp, parse_stat_exp; + parser_func_t parse_glob, parse_nmsp, parse_identifier, parse_type, parse_exp, parse_stat_exp; + parser_func_t parse_func, parse_field, parse_export; parser_func_t parse_exp_var, parse_exp_str_lit, parse_exp_int_lit, parse_exp_float_lit; } \ No newline at end of file diff --git a/src/compiler/treeifier/ast/parsers/group.cc b/src/compiler/treeifier/ast/parsers/group.cc index fa12cc2..4cad173 100644 --- a/src/compiler/treeifier/ast/parsers/group.cc +++ b/src/compiler/treeifier/ast/parsers/group.cc @@ -4,6 +4,7 @@ #include #include #include +#include using namespace ppc::comp::tree; using namespace ppc::comp::tree::ast; @@ -15,7 +16,7 @@ static bool read_nmsp(ast_ctx_t &ctx, size_t &i, lang::loc_namespace_name_t &nam map_t res; if (!h.parse(parse_nmsp, res)) return false; name = conv::map_to_nmsp(res); - return true; + return h.submit(false); } template static bool resolve_nmsp(ast_ctx_t &ctx, const lang::namespace_name_t &name, T begin, T end, lang::namespace_name_t &actual_name) { @@ -45,14 +46,14 @@ group_t &group_t::insert(const std::string &name, parser_t parser, const std::st throw "The parser '" + name + "' is already in the group."; } - auto it = parsers.find(relative_to); - if (it == parsers.end()) { - throw "The parser '" + relative_to + "' isn't in the group."; + auto it = unnamed_parsers.find(relative_to); + if (it == unnamed_parsers.end()) { + throw "The parser '" + relative_to + "' isn't in the group or isn't unnamed."; } if (after) it++; - parsers.insert(it, { name, parser }); + unnamed_parsers.insert(it, name); return *this; } @@ -73,11 +74,18 @@ group_t &group_t::add_last(const std::string &name, parser_t parser) { } parsers.emplace(name, parser); + unnamed_parsers.emplace(name); return *this; } group_t &group_t::add_named(const std::string &name, parser_t parser, const lang::namespace_name_t &identifier) { - add_last(name, parser); + if (parsers.find(name) != parsers.end()) { + throw "The parser '" + name + "' is already in the group."; + } + + parsers.emplace(name, parser); + named_parsers.emplace(identifier, name); + return *this; } @@ -92,16 +100,14 @@ bool group_t::operator()(ast_ctx_t &ctx, size_t &i, data::map_t &out) const { if (resolve_nmsp(ctx, name.strip_location(), named_parsers.begin(), named_parsers.end(), actual)) { auto parser = parsers.find(this->named_parsers.find(actual)->second); out["$_name"] = parser->first; - if (parser->second(ctx, i, out)) return true; + if (h.parse(parser->second, out)) return h.submit(false); else throw message_t::error("Unexpected construct specifier.", h.res_loc()); } } - unordered_map errors; - - for (auto parser : parsers) { - out["$_name"] = parser.first; - if ((*parser.second)(ctx, i, out)) return true; + for (auto name : unnamed_parsers) { + out["$_name"] = name; + if (parsers.at(name)(ctx, i, out)) return true; } stringstream m; From 2a0104808c33f45b1530be48ffda6f8531a976fa Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Fri, 28 Oct 2022 10:08:30 +0300 Subject: [PATCH 61/74] fix: split int and string literals --- include/compiler/treeifier/tokenizer.hh | 17 +++++--- src/compiler/treeifier/tokenizer.cc | 58 +++++++++++++++++++++---- 2 files changed, 61 insertions(+), 14 deletions(-) diff --git a/include/compiler/treeifier/tokenizer.hh b/include/compiler/treeifier/tokenizer.hh index adaece9..c2e40b2 100644 --- a/include/compiler/treeifier/tokenizer.hh +++ b/include/compiler/treeifier/tokenizer.hh @@ -79,7 +79,8 @@ namespace ppc::comp::tree { NONE, IDENTIFIER, OPERATOR, - LITERAL, + INT_LITERAL, + STR_LITERAL, } kind; union data_t { std::string *identifier; @@ -91,7 +92,9 @@ namespace ppc::comp::tree { bool is_identifier() const { return kind == IDENTIFIER; } bool is_operator() const { return kind == OPERATOR; } - bool is_literal() const { return kind == LITERAL; } + bool is_int_literal() const { return kind == INT_LITERAL; } + bool is_str_literal() const { return kind == STR_LITERAL; } + bool is_literal() const { return is_int_literal() || is_str_literal(); } const auto &identifier() const { if (!is_identifier()) throw std::string { "Token is not an identifier." }; @@ -118,8 +121,8 @@ namespace ppc::comp::tree { kind = OPERATOR; data._operator = op; } - token_t(const std::vector &val, location_t loc = location_t::NONE): location(loc) { - kind = LITERAL; + token_t(const std::vector &val, bool is_str, location_t loc = location_t::NONE): location(loc) { + kind = is_str ? STR_LITERAL : INT_LITERAL; data.literal = new std::vector { val }; } token_t(const token_t &tok): location(tok.location) { @@ -128,14 +131,16 @@ namespace ppc::comp::tree { case NONE: break; case IDENTIFIER: data.identifier = new std::string { *tok.data.identifier }; break; case OPERATOR: data._operator = tok.data._operator; break; - case LITERAL: data.literal = new std::vector { *tok.data.literal }; break; + case STR_LITERAL: + case INT_LITERAL: data.literal = new std::vector { *tok.data.literal }; break; } } ~token_t() { switch (kind) { case IDENTIFIER: delete data.identifier; break; - case LITERAL: delete data.literal; break; + case STR_LITERAL: + case INT_LITERAL: delete data.literal; break; default: break; } } diff --git a/src/compiler/treeifier/tokenizer.cc b/src/compiler/treeifier/tokenizer.cc index fd5fdcd..5e5d362 100644 --- a/src/compiler/treeifier/tokenizer.cc +++ b/src/compiler/treeifier/tokenizer.cc @@ -60,7 +60,7 @@ static std::vector parse_bin(msg_stack_t &msg_stack, size_t i, const st int last_byte = 0; int lastbyte_n = 0; - for (size_t j = 0; j < data.length(); j++) { + for (size_t j = i; j < data.length(); j++) { if (lastbyte_n == 8) { lastbyte_n = 0; res.push_back(last_byte); @@ -83,7 +83,7 @@ static std::vector parse_hex(msg_stack_t &msg_stack, size_t i, const st int last_byte = 0; int lastbyte_n = 0; - for (size_t j = 0; j < data.length(); j++) { + for (size_t j = i; j < data.length(); j++) { if (lastbyte_n == 8) { lastbyte_n = 0; res.push_back(last_byte); @@ -110,7 +110,7 @@ static std::vector parse_oct(msg_stack_t &msg_stack, size_t i, const st int last_byte = 0; int lastbyte_n = 0; - for (size_t j = 0; j < data.length(); j++) { + for (size_t j = i; j < data.length(); j++) { if (lastbyte_n >= 8) { lastbyte_n = 0; res.push_back(last_byte); @@ -129,6 +129,45 @@ static std::vector parse_oct(msg_stack_t &msg_stack, size_t i, const st return res; } +static void mult_10(std::vector &val) { + std::vector res; + + int carry = 0; + + for (size_t i = 0; i < val.size(); i++) { + carry = val[i] * 10 + carry; + res.push_back(carry); + carry >>= 8; + } + + if (carry != 0) res.push_back(carry); + val = res; +} +static void add_byte(std::vector &a, uint8_t b) { + int carry = b; + + for (size_t i = 0; i < a.size(); i++) { + carry = a[i] + carry; + a[i] = carry; + carry >>= 8; + if (carry == 0) break; + } + + if (carry != 0) a.push_back(carry); +} +static std::vector parse_dec(msg_stack_t &msg_stack, size_t i, const std::string &data) { + std::vector res; + + for (size_t j = i; j < data.length(); j++) { + int digit = data[j] - '0'; + + mult_10(res); + if (res.empty()) res.push_back(digit); + else add_byte(res, digit); + } + + return res; +} static std::vector parse_int(msg_stack_t &msg_stack, const lex::token_t &token) { switch (token.type) { @@ -137,7 +176,7 @@ static std::vector parse_int(msg_stack_t &msg_stack, const lex::token_t case lex::token_t::OCT_LITERAL: return parse_oct(msg_stack, 1, token.data); case lex::token_t::DEC_LITERAL: - throw "no dec literals lol bozo."s; + return parse_dec(msg_stack, 0, token.data); case lex::token_t::HEX_LITERAL: return parse_hex(msg_stack, 2, token.data); default: @@ -161,16 +200,19 @@ token_t token_t::parse(messages::msg_stack_t &msg_stack, lex::token_t in) { throw message_t(message_t::ERROR, "Operator not recognised."s, in.location); } case lex::token_t::BIN_LITERAL: + return { parse_bin(msg_stack, 1, in.data), false, in.location }; case lex::token_t::OCT_LITERAL: + return { parse_oct(msg_stack, 1, in.data), false, in.location }; case lex::token_t::DEC_LITERAL: + return { parse_dec(msg_stack, 0, in.data), false, in.location }; case lex::token_t::HEX_LITERAL: - return { parse_int(msg_stack, in), in.location }; + return { parse_hex(msg_stack, 2, in.data), false, in.location }; case lex::token_t::FLOAT_LITERAL: - return { parse_float(msg_stack, in), in.location }; + return { parse_float(msg_stack, in), false, in.location }; case lex::token_t::STRING_LITERAL: - return { parse_string(msg_stack, false, in), in.location }; + return { parse_string(msg_stack, false, in), true, in.location }; case lex::token_t::CHAR_LITERAL: - return { parse_string(msg_stack, true, in), in.location }; + return { parse_string(msg_stack, true, in), false, in.location }; default: throw message_t(message_t::ERROR, "Token type not recognised.", in.location); } From 572e9cb484ea4dcc31fcbf112d432d3767292c53 Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Fri, 28 Oct 2022 10:09:46 +0300 Subject: [PATCH 62/74] fix: inporper detection of end of string in char, too --- src/compiler/treeifier/lexer.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/compiler/treeifier/lexer.cc b/src/compiler/treeifier/lexer.cc index 0be3284..0127d64 100644 --- a/src/compiler/treeifier/lexer.cc +++ b/src/compiler/treeifier/lexer.cc @@ -136,7 +136,7 @@ static res_t lexlet_string(char c, std::vector &tok) { else return lexer_none(); }; static res_t lexlet_char(char c, std::vector &tok) { - if (c == '\'' && tok.back() != '\\') return lexer_end(token_t::CHAR_LITERAL, false).add(); + if ((c == '\'') && (tok.back() != '\\')) return lexer_end(token_t::CHAR_LITERAL, false).add(); else return lexer_none(); }; From 24f312d848e13881a1f946102dfec0d0f2406bef Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Fri, 28 Oct 2022 10:10:10 +0300 Subject: [PATCH 63/74] refactor: remove unnececeary 's' operator --- src/compiler/treeifier/operators.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/compiler/treeifier/operators.cc b/src/compiler/treeifier/operators.cc index fce5567..b03bd90 100644 --- a/src/compiler/treeifier/operators.cc +++ b/src/compiler/treeifier/operators.cc @@ -29,5 +29,5 @@ operator_t tree::operator_find(const std::string &raw) { if (op == raw) return (operator_t)i; i++; } - throw "Invalid operator '"s + raw + "' given."; + throw "Invalid operator '" + raw + "' given."; } From 9a63f8720dac0f99a618f295dcfdf6964f30e301 Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Fri, 28 Oct 2022 10:10:56 +0300 Subject: [PATCH 64/74] fix: add default values to unassigned variables --- src/compiler/treeifier/ast/parsers/field.cc | 2 +- src/compiler/treeifier/ast/parsers/func.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/compiler/treeifier/ast/parsers/field.cc b/src/compiler/treeifier/ast/parsers/field.cc index d1f335f..e454884 100644 --- a/src/compiler/treeifier/ast/parsers/field.cc +++ b/src/compiler/treeifier/ast/parsers/field.cc @@ -7,7 +7,7 @@ bool ast::parse_field(ast_ctx_t &ctx, size_t &res_i, map_t &out) { if (!h.parse(parse_identifier, out["name"].map({}))) return false; - bool type, defval; + bool type = false, defval = false; h.throw_ended("Expected a colon or an equals sign."); diff --git a/src/compiler/treeifier/ast/parsers/func.cc b/src/compiler/treeifier/ast/parsers/func.cc index ca588a3..1931aa8 100644 --- a/src/compiler/treeifier/ast/parsers/func.cc +++ b/src/compiler/treeifier/ast/parsers/func.cc @@ -7,7 +7,7 @@ static bool parse_arg(ast_ctx_t &ctx, size_t &res_i, map_t &out) { if (!h.parse(parse_identifier, out["name"].map({}))) return false; - bool type, defval; + bool type = false, defval = false; h.throw_ended("Expected a colon or an equals sign."); From 90497f601d6423fd0e3195e93895e6fe3c4f21f0 Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Fri, 28 Oct 2022 10:11:17 +0300 Subject: [PATCH 65/74] feat: add export def --- include/utils/data.hh | 3 +++ src/compiler/treeifier/ast/ast.cc | 1 + src/compiler/treeifier/ast/parsers/export.cc | 12 ++++++++++++ 3 files changed, 16 insertions(+) create mode 100644 src/compiler/treeifier/ast/parsers/export.cc diff --git a/include/utils/data.hh b/include/utils/data.hh index 0001345..d1ff35a 100644 --- a/include/utils/data.hh +++ b/include/utils/data.hh @@ -38,6 +38,9 @@ namespace ppc::data { bool is_number() const; bool is_string() const; bool is_bool() const; + bool is_true() { + return is_bool() && boolean(); + } array_t &array(const array_t &arr); map_t &map(const map_t &map); diff --git a/src/compiler/treeifier/ast/ast.cc b/src/compiler/treeifier/ast/ast.cc index 83abea1..13ba48b 100644 --- a/src/compiler/treeifier/ast/ast.cc +++ b/src/compiler/treeifier/ast/ast.cc @@ -20,5 +20,6 @@ ast_ctx_t::ast_ctx_t(msg_stack_t &messages, std::vector &tokens): messa .add_last("$_exp", parse_stat_exp); group("$_def") .add_last("$_func", parse_func) + .add_named("$_export", parse_export, { "export" }) .add_last("$_field", parse_field); } \ No newline at end of file diff --git a/src/compiler/treeifier/ast/parsers/export.cc b/src/compiler/treeifier/ast/parsers/export.cc new file mode 100644 index 0000000..dfd29a8 --- /dev/null +++ b/src/compiler/treeifier/ast/parsers/export.cc @@ -0,0 +1,12 @@ +#include "compiler/treeifier/ast/helper.hh" + +bool ast::parse_export(ast_ctx_t &ctx, size_t &res_i, map_t &out) { + tree_helper_t h(ctx, res_i); + + if (out["exported"].is_true()) { + ctx.messages.push(message_t(message_t::WARNING, "Export is alredy specified for this definition.", h.prev_loc())); + } + out["exported"] = true; + + return ctx.group("$_def")(ctx, res_i, out); +} From e755850c6fecb2251d62fcc1c03e5d09ad61559e Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Fri, 28 Oct 2022 10:14:58 +0300 Subject: [PATCH 66/74] feat: add string parser --- src/compiler/treeifier/ast/ast.cc | 4 ++-- src/compiler/treeifier/ast/parsers/exp.cc | 13 ++++++++++++- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/src/compiler/treeifier/ast/ast.cc b/src/compiler/treeifier/ast/ast.cc index 13ba48b..912ed86 100644 --- a/src/compiler/treeifier/ast/ast.cc +++ b/src/compiler/treeifier/ast/ast.cc @@ -13,9 +13,9 @@ group_t &ast_ctx_t::group(const std::string &name) { ast_ctx_t::ast_ctx_t(msg_stack_t &messages, std::vector &tokens): messages(messages), tokens(tokens) { group("$_exp_val") .add_last("$_var", parse_exp_var) - .add_last("$_int", parse_exp_int_lit); + .add_last("$_int", parse_exp_int_lit) + .add_last("$_string", parse_exp_str_lit); // .add_last("$_float", parse_exp_float_lit) - // .add_last("$_string", parse_exp_str_lit); group("$_stat") .add_last("$_exp", parse_stat_exp); group("$_def") diff --git a/src/compiler/treeifier/ast/parsers/exp.cc b/src/compiler/treeifier/ast/parsers/exp.cc index 690eb50..7991e2e 100644 --- a/src/compiler/treeifier/ast/parsers/exp.cc +++ b/src/compiler/treeifier/ast/parsers/exp.cc @@ -159,7 +159,7 @@ bool ast::parse_exp_var(ast_ctx_t &ctx, size_t &res_i, map_t &out) { bool ast::parse_exp_int_lit(ast_ctx_t &ctx, size_t &res_i, map_t &out) { tree_helper_t h(ctx, res_i); - if (h.curr().is_literal()) { + if (h.curr().is_int_literal()) { auto &arr = out["content"].array({}); for (auto b : h.curr().literal()) arr.push_back((float)b); out["location"] = conv::loc_to_map(h.loc()); @@ -168,7 +168,18 @@ bool ast::parse_exp_int_lit(ast_ctx_t &ctx, size_t &res_i, map_t &out) { return false; } +bool ast::parse_exp_str_lit(ast_ctx_t &ctx, size_t &res_i, map_t &out) { + tree_helper_t h(ctx, res_i); + if (h.curr().is_str_literal()) { + auto &arr = out["content"].array({}); + for (auto b : h.curr().literal()) arr.push_back((float)b); + out["location"] = conv::loc_to_map(h.loc()); + return h.submit(true); + } + + return false; +} bool ast::parse_exp(ast_ctx_t &ctx, size_t &res_i, map_t &out) { tree_helper_t h(ctx, res_i); From 19c8af768b556698bd23044334fcd4a6ab75b2c1 Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Fri, 28 Oct 2022 10:38:09 +0300 Subject: [PATCH 67/74] feat: add oct parsing in string parser --- include/utils/message.hh | 5 ++++ src/compiler/treeifier/tokenizer.cc | 43 +++++++++++++++++------------ 2 files changed, 30 insertions(+), 18 deletions(-) diff --git a/include/utils/message.hh b/include/utils/message.hh index 75886d8..1d8eaf3 100644 --- a/include/utils/message.hh +++ b/include/utils/message.hh @@ -42,6 +42,11 @@ namespace ppc::messages { inline auto end() const { return messages.end(); } void push(const message_t &msg) { messages.push_back(msg); } + void err(const std::string &msg, location_t loc = location_t::NONE) { push({ message_t::ERROR, msg, loc }); } + void warn(const std::string &msg, location_t loc = location_t::NONE) { push({ message_t::WARNING, msg, loc }); } + void suggest(const std::string &msg, location_t loc = location_t::NONE) { push({ message_t::SUGGESTION, msg, loc }); } + void info(const std::string &msg, location_t loc = location_t::NONE) { push({ message_t::INFO, msg, loc }); } + void debug(const std::string &msg, location_t loc = location_t::NONE) { push({ message_t::DEBUG, msg, loc }); } void push(const msg_stack_t &other) { for (const auto &msg : other) push(msg); } diff --git a/src/compiler/treeifier/tokenizer.cc b/src/compiler/treeifier/tokenizer.cc index 5e5d362..e603d86 100644 --- a/src/compiler/treeifier/tokenizer.cc +++ b/src/compiler/treeifier/tokenizer.cc @@ -32,7 +32,26 @@ static std::vector parse_string(msg_stack_t &msg_stack, bool is_char, c else if (c == 'r') new_c = '\r'; else if (c == 't') new_c = '\t'; else if (c == 'v') new_c = '\v'; - // TODO: Add support for oct, hex and utf8 literals + else if (c >= '0' && c <= '7') { + new_c = 0; + size_t n = 0; + while (c >= '0' && c <= '7') { + new_c <<= 3; + new_c |= c - '0'; + c = token.data[++i]; + n++; + } + if (n > 3) { + location_t loc = curr_char_loc; + loc.code_start--; + loc.start--; + loc.length = n + 1; + msg_stack.warn("Octal escape sequence overflows 255 8-bit limit (3 digits).", loc); + } + curr_char_loc.start += n - 1; + i--; + } + // TODO: Add support for hex and utf8 literals else if (c == literal_char || c == '\\') new_c = c; else { throw message_t(message_t::ERROR, "Unescapable character.", curr_char_loc); @@ -168,21 +187,6 @@ static std::vector parse_dec(msg_stack_t &msg_stack, size_t i, const st return res; } - -static std::vector parse_int(msg_stack_t &msg_stack, const lex::token_t &token) { - switch (token.type) { - case lex::token_t::BIN_LITERAL: - return parse_bin(msg_stack, 2, token.data); - case lex::token_t::OCT_LITERAL: - return parse_oct(msg_stack, 1, token.data); - case lex::token_t::DEC_LITERAL: - return parse_dec(msg_stack, 0, token.data); - case lex::token_t::HEX_LITERAL: - return parse_hex(msg_stack, 2, token.data); - default: - throw "WTF r u doing bro?"s; - } -} static std::vector parse_float(msg_stack_t &msg_stack, const lex::token_t &token) { throw "no floats lol bozo"s; } @@ -211,8 +215,11 @@ token_t token_t::parse(messages::msg_stack_t &msg_stack, lex::token_t in) { return { parse_float(msg_stack, in), false, in.location }; case lex::token_t::STRING_LITERAL: return { parse_string(msg_stack, false, in), true, in.location }; - case lex::token_t::CHAR_LITERAL: - return { parse_string(msg_stack, true, in), false, in.location }; + case lex::token_t::CHAR_LITERAL: { + auto res = parse_string(msg_stack, true, in); + std::reverse(res.begin(), res.end()); + return { res, false, in.location }; + } default: throw message_t(message_t::ERROR, "Token type not recognised.", in.location); } From b8400f0ec34281dc53e4909ed353cdd74c09f65c Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Fri, 28 Oct 2022 11:00:29 +0300 Subject: [PATCH 68/74] feat: add cleartmp command to makefile --- Makefile | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 4002055..33b103a 100644 --- a/Makefile +++ b/Makefile @@ -30,7 +30,8 @@ else ifeq ($(profile),debug) flags += -g ldflags+= -Wl,-rpath=bin/debug endif -oldbin := bin + +oldbin := $(bin) export bin := $(bin)/$(profile) ifeq ($(os),Windows) @@ -54,6 +55,8 @@ build: version echo Done! clear: if exist $(subst /,\,$(oldbin)) rmdir /s /q $(subst /,\,$(oldbin)) +cleartmp: + if exist $(subst /,\,$(bin)/tmp) rmdir /s /q $(subst /,\,$(bin)/tmp) .ONESHELL: install: build @@ -82,6 +85,9 @@ build: version clear: rm -r $(oldbin) +clear: + rm -r $(bin)/tmp + install: build echo Installing ++C compiler to your system... From 1b6d29ff7b3c21ebd9693604409e1e7f6afccc63 Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Fri, 28 Oct 2022 11:10:48 +0300 Subject: [PATCH 69/74] fix: some issues with missing semicolons at EOF --- src/compiler/treeifier/ast/parsers/exp.cc | 7 +++++-- src/compiler/treeifier/ast/parsers/field.cc | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/compiler/treeifier/ast/parsers/exp.cc b/src/compiler/treeifier/ast/parsers/exp.cc index 7991e2e..840335c 100644 --- a/src/compiler/treeifier/ast/parsers/exp.cc +++ b/src/compiler/treeifier/ast/parsers/exp.cc @@ -199,7 +199,10 @@ bool ast::parse_exp(ast_ctx_t &ctx, size_t &res_i, map_t &out) { h.advance("Expected a value on the right side of the operator."); continue; } - if (!last_val && h.push_parse(ctx.group("$_exp_val"), res)) last_val = true; + if (!last_val && h.push_parse(ctx.group("$_exp_val"), res)) { + last_val = true; + continue; + } if (h.curr().is_operator()) { auto op = h.curr()._operator(); if (last_val) { @@ -315,7 +318,7 @@ bool ast::parse_exp(ast_ctx_t &ctx, size_t &res_i, map_t &out) { bool ast::parse_stat_exp(ast_ctx_t &ctx, size_t &i, map_t &res) { tree_helper_t h(ctx, i); if (!h.parse(parse_exp, res)) return false; - if (h.curr().is_operator(operator_t::SEMICOLON)) return h.submit(true); + if (!h.ended() && h.curr().is_operator(operator_t::SEMICOLON)) return h.submit(true); ctx.messages.push(message_t::error("Expected a semicolon.", h.loc(1))); return h.submit(false); diff --git a/src/compiler/treeifier/ast/parsers/field.cc b/src/compiler/treeifier/ast/parsers/field.cc index e454884..a879a1d 100644 --- a/src/compiler/treeifier/ast/parsers/field.cc +++ b/src/compiler/treeifier/ast/parsers/field.cc @@ -22,7 +22,7 @@ bool ast::parse_field(ast_ctx_t &ctx, size_t &res_i, map_t &out) { type = true; } - if (h.curr().is_operator(operator_t::SEMICOLON)) { + if (!h.ended() && h.curr().is_operator(operator_t::SEMICOLON)) { if (type || defval) return h.submit(); else return h.err("A type or a default value must be specified "); } From e92c2a69c8999659bd662b6b19247225b9e8dc4b Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Fri, 28 Oct 2022 12:59:44 +0300 Subject: [PATCH 70/74] feat: add all operators to expression --- include/compiler/treeifier/tokenizer.hh | 4 -- include/utils/location.hh | 3 ++ src/compiler/treeifier/ast/parsers/exp.cc | 49 ++++++++++++++++++++--- 3 files changed, 47 insertions(+), 9 deletions(-) diff --git a/include/compiler/treeifier/tokenizer.hh b/include/compiler/treeifier/tokenizer.hh index c2e40b2..c8e2034 100644 --- a/include/compiler/treeifier/tokenizer.hh +++ b/include/compiler/treeifier/tokenizer.hh @@ -67,10 +67,6 @@ namespace ppc::comp::tree { BRACE_CLOSE, PAREN_OPEN, PAREN_CLOSE, - - VAL, - REF, - SIZEOF, }; struct token_t { diff --git a/include/utils/location.hh b/include/utils/location.hh index 82fd250..f929edb 100644 --- a/include/utils/location.hh +++ b/include/utils/location.hh @@ -20,6 +20,9 @@ namespace ppc { operator std::string() const { return to_string(); } std::string to_string() const; location_t intersect(location_t other) const; + static location_t intersect(const location_t &a, const location_t &b) { + return a.intersect(b); + } location_t(); location_t(const location_t &other): location_t(other.filename, other.line, other.start, other.code_start, other.length) { } diff --git a/src/compiler/treeifier/ast/parsers/exp.cc b/src/compiler/treeifier/ast/parsers/exp.cc index 840335c..b6ba5e5 100644 --- a/src/compiler/treeifier/ast/parsers/exp.cc +++ b/src/compiler/treeifier/ast/parsers/exp.cc @@ -41,14 +41,49 @@ std::map pre_ops { { operator_t::AND, { precedence_t::PREFIX, 1, "reference" } }, }; std::map bin_ops { - { operator_t::ADD, { precedence_t::ADD, 2, "add" } }, - { operator_t::SUBTRACT, { precedence_t::ADD, 2, "subtract" } }, - { operator_t::MULTIPLY, { precedence_t::MULT, 2, "multiply" } }, - { operator_t::DIVIDE, { precedence_t::MULT, 2, "divide" } }, - { operator_t::MODULO, { precedence_t::MULT, 2, "modulo" } }, { operator_t::INCREASE, { precedence_t::POSTFIX, 1, "inc_post" } }, { operator_t::DECREASE, { precedence_t::POSTFIX, 1, "dec_post" } }, { (operator_t)-1, sizeof_data }, + + { operator_t::ADD, { precedence_t::ADD, 2, "add" } }, + { operator_t::SUBTRACT, { precedence_t::ADD, 2, "subtract" } }, + + { operator_t::MULTIPLY, { precedence_t::MULT, 2, "multiply" } }, + { operator_t::DIVIDE, { precedence_t::MULT, 2, "divide" } }, + { operator_t::MODULO, { precedence_t::MULT, 2, "modulo" } }, + + { operator_t::SHIFT_LEFT, { precedence_t::SHIFT, 2, "shl" } }, + { operator_t::SHIFT_RIGHT, { precedence_t::SHIFT, 2, "shr" } }, + + { operator_t::LESS_THAN, { precedence_t::COMP, 2, "less" } }, + { operator_t::LESS_THAN_EQUALS, { precedence_t::COMP, 2, "less_eq" } }, + { operator_t::GREATER_THAN, { precedence_t::COMP, 2, "great" } }, + { operator_t::GREATER_THAN_EQUALS, { precedence_t::COMP, 2, "great_eq" } }, + + { operator_t::EQUALS, { precedence_t::EQU, 2, "eq" } }, + { operator_t::NOT_EQUALS, { precedence_t::EQU, 2, "neq" } }, + + { operator_t::AND, { precedence_t::BIN_AND, 2, "great_eq" } }, + { operator_t::OR, { precedence_t::BIN_OR, 2, "great_eq" } }, + { operator_t::XOR, { precedence_t::BIN_XOR, 2, "great_eq" } }, + + { operator_t::DOUBLE_AND, { precedence_t::BOOL_AND, 2, "great_eq" } }, + { operator_t::DOUBLE_OR, { precedence_t::BOOL_OR, 2, "great_eq" } }, + + { operator_t::ASSIGN, { precedence_t::ASSIGN, 2, "assign", true } }, + { operator_t::ASSIGN_ADD, { precedence_t::ASSIGN, 2, "assign_add", true } }, + { operator_t::ASSIGN_SUBTRACT, { precedence_t::ASSIGN, 2, "assign_subtract", true } }, + { operator_t::ASSIGN_MULTIPLY, { precedence_t::ASSIGN, 2, "assign_multiply", true } }, + { operator_t::ASSIGN_DIVIDE, { precedence_t::ASSIGN, 2, "assign_divide", true } }, + { operator_t::ASSIGN_MODULO, { precedence_t::ASSIGN, 2, "assign_modulo", true } }, + { operator_t::ASSIGN_SHIFT_LEFT, { precedence_t::ASSIGN, 2, "assign_shl", true } }, + { operator_t::ASSIGN_SHIFT_RIGHT, { precedence_t::ASSIGN, 2, "assign_shr", true } }, + { operator_t::ASSIGN_XOR, { precedence_t::ASSIGN, 2, "assign_xor", true } }, + { operator_t::ASSIGN_AND, { precedence_t::ASSIGN, 2, "assign_and", true } }, + { operator_t::ASSIGN_OR, { precedence_t::ASSIGN, 2, "assign_or", true } }, + { operator_t::ASSIGN_DOUBLE_AND, { precedence_t::ASSIGN, 2, "assign_dand", true } }, + { operator_t::ASSIGN_DOUBLE_OR, { precedence_t::ASSIGN, 2, "assign_dor", true } }, + { operator_t::ASSIGN_NULL_COALESCING, { precedence_t::ASSIGN, 2, "assign_null_coal", true } }, }; map_t op_to_map(located_t op) { @@ -250,6 +285,10 @@ bool ast::parse_exp(ast_ctx_t &ctx, size_t &res_i, map_t &out) { res.pop_back(); h.force_parse(parse_type, "Expected a type.", cast["type"].map({})); + cast["location"] = conv::loc_to_map(location_t::intersect( + conv::map_to_loc(cast["exp"].map()["location"].string()), + conv::map_to_loc(cast["type"].map()["location"].string()) + )); res.push_back(cast); } else if (op == operator_t::DOT || op == operator_t::PTR_MEMBER) { From f58eb14a00d651ab0a88b5f2070fb842796bf443 Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Fri, 28 Oct 2022 12:59:54 +0300 Subject: [PATCH 71/74] fix: don't output null properties --- src/utils/json.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/utils/json.cc b/src/utils/json.cc index 1311ec8..828f037 100644 --- a/src/utils/json.cc +++ b/src/utils/json.cc @@ -10,6 +10,7 @@ namespace ppc::data::json { out << '['; for (const auto &el : val.array()) { + if (el.is_null()) continue; if (!first) out << ','; first = false; out << stringify(el); @@ -21,6 +22,7 @@ namespace ppc::data::json { out << '{'; for (const auto &el : val.map()) { + if (el.second.is_null()) continue; if (!first) out << ','; first = false; out << '"' << el.first << '"' << ':' << stringify(el.second); From ec2d29ef185264bd86fce6e0d30ec07be9217ee6 Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Fri, 28 Oct 2022 13:07:27 +0300 Subject: [PATCH 72/74] fix: location of type is incorrect --- src/compiler/treeifier/ast/parsers/type.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/compiler/treeifier/ast/parsers/type.cc b/src/compiler/treeifier/ast/parsers/type.cc index 483e216..eebcabf 100644 --- a/src/compiler/treeifier/ast/parsers/type.cc +++ b/src/compiler/treeifier/ast/parsers/type.cc @@ -17,7 +17,9 @@ bool ast::parse_type(ast_ctx_t &ctx, size_t &res_i, map_t &out) { auto &nmsp_arr = nmsp["content"].array(); + h.i--; out["location"] = conv::loc_to_map(h.res_loc()); + h.i++; out["name"] = nmsp_arr.back(); out["ptr_n"] = (float)ptr_n; nmsp_arr.pop_back(); From 7a98dfa825b4c0f7acdb50663a5385ff7b80fa64 Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Fri, 28 Oct 2022 14:49:49 +0300 Subject: [PATCH 73/74] feat: add if, else, return, break, continue and compounds stat --- include/compiler/treeifier/ast.hh | 1 + src/compiler/treeifier/ast/ast.cc | 6 ++ src/compiler/treeifier/ast/parsers/stat.cc | 98 ++++++++++++++++++++++ 3 files changed, 105 insertions(+) create mode 100644 src/compiler/treeifier/ast/parsers/stat.cc diff --git a/include/compiler/treeifier/ast.hh b/include/compiler/treeifier/ast.hh index 6594c24..ccfa3a3 100644 --- a/include/compiler/treeifier/ast.hh +++ b/include/compiler/treeifier/ast.hh @@ -78,5 +78,6 @@ namespace ppc::comp::tree::ast { parser_func_t parse_glob, parse_nmsp, parse_identifier, parse_type, parse_exp, parse_stat_exp; parser_func_t parse_func, parse_field, parse_export; + parser_func_t parse_if, parse_while, parse_return, parse_break, parse_continue, parse_stat_comp; parser_func_t parse_exp_var, parse_exp_str_lit, parse_exp_int_lit, parse_exp_float_lit; } \ No newline at end of file diff --git a/src/compiler/treeifier/ast/ast.cc b/src/compiler/treeifier/ast/ast.cc index 912ed86..578d987 100644 --- a/src/compiler/treeifier/ast/ast.cc +++ b/src/compiler/treeifier/ast/ast.cc @@ -17,6 +17,12 @@ ast_ctx_t::ast_ctx_t(msg_stack_t &messages, std::vector &tokens): messa .add_last("$_string", parse_exp_str_lit); // .add_last("$_float", parse_exp_float_lit) group("$_stat") + .add_named("$_while", parse_while, { "while" }) + .add_named("$_if", parse_if, { "if" }) + .add_named("$_return", parse_return, { "return" }) + .add_named("$_break", parse_break, { "break" }) + .add_named("$_continue", parse_continue, { "continue" }) + .add_last("$_comp", parse_stat_comp) .add_last("$_exp", parse_stat_exp); group("$_def") .add_last("$_func", parse_func) diff --git a/src/compiler/treeifier/ast/parsers/stat.cc b/src/compiler/treeifier/ast/parsers/stat.cc new file mode 100644 index 0000000..4548797 --- /dev/null +++ b/src/compiler/treeifier/ast/parsers/stat.cc @@ -0,0 +1,98 @@ +#include "compiler/treeifier/ast/helper.hh" + +bool ast::parse_if(ast_ctx_t &ctx, size_t &res_i, map_t &out) { + tree_helper_t h(ctx, res_i); + + h.throw_ended(); + if (!h.curr("Expected open parens after if keyword.").is_operator(operator_t::PAREN_OPEN)) { + throw message_t::error("Expected open parens after if keyword.", h.loc(1)); + } + + h.advance("Expected an expression."); + h.force_parse(parse_exp, "Expected an expression.", out["condition"].map({})); + + if (!h.curr("Expected closed parens.").is_operator(operator_t::PAREN_CLOSE)) { + throw message_t::error("Expected closed parens.", h.loc(1)); + } + + h.advance("Expected a statement."); + h.force_parse(ctx.group("$_stat"), "Expected a statement.", out["if"].map({})); + + if (h.ended() || !h.curr().is_identifier("else")) return h.submit(false); + + h.advance("Expected a statement."); + h.force_parse(ctx.group("$_stat"), "Expected a statement.", out["else"].map({})); + + return h.submit(false); +} + +bool ast::parse_while(ast_ctx_t &ctx, size_t &res_i, map_t &out) { + tree_helper_t h(ctx, res_i); + + h.throw_ended(); + if (!h.curr("Expected open parens after while keyword.").is_operator(operator_t::PAREN_OPEN)) { + throw message_t::error("Expected open parens after while keyword.", h.loc(1)); + } + + h.advance("Expected an expression."); + h.force_parse(parse_exp, "Expected an expression.", out["condition"].map({})); + + if (!h.curr("Expected closed parens.").is_operator(operator_t::PAREN_CLOSE)) { + throw message_t::error("Expected closed parens.", h.loc(1)); + } + + h.advance("Expected a statement."); + h.force_parse(ctx.group("$_stat"), "Expected a statement.", out["while"].map({})); + + return h.submit(false); +} + +bool ast::parse_return(ast_ctx_t &ctx, size_t &res_i, map_t &out) { + tree_helper_t h(ctx, res_i); + + h.advance("Expected an expression."); + h.force_parse(parse_exp, "Expected an expression.", out["condition"].map({})); + + if (!h.curr("Expected a semicolon.").is_operator(operator_t::SEMICOLON)) { + throw message_t::error("Expected a semicolon.", h.loc(1)); + } + + return h.submit(true); +} + +bool ast::parse_break(ast_ctx_t &ctx, size_t &res_i, map_t &out) { + tree_helper_t h(ctx, res_i); + + if (!h.curr("Expected a semicolon.").is_operator(operator_t::SEMICOLON)) { + throw message_t::error("Expected a semicolon.", h.loc(1)); + } + + return h.submit(true); +} + +bool ast::parse_continue(ast_ctx_t &ctx, size_t &res_i, map_t &out) { + tree_helper_t h(ctx, res_i); + + if (!h.curr("Expected a semicolon.").is_operator(operator_t::SEMICOLON)) { + throw message_t::error("Expected a semicolon.", h.loc(1)); + } + + return h.submit(true); +} + +bool ast::parse_stat_comp(ast_ctx_t &ctx, size_t &res_i, map_t &out) { + tree_helper_t h(ctx, res_i); + + if (h.ended()) return false; + if (!h.curr().is_operator(operator_t::BRACE_OPEN)) return false; + h.advance("Expected a statement or a closing brace."); + + auto &content = out["content"].array({}); + + while (!h.curr().is_operator(operator_t::BRACE_CLOSE)) { + h.throw_ended("Expected a statement or a closing brace."); + h.push_parse(ctx.group("$_stat"), content); + } + + return h.submit(true); +} \ No newline at end of file From 7725949c2292cf936040fffedb9b07f87fe43c96 Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Fri, 28 Oct 2022 14:55:24 +0300 Subject: [PATCH 74/74] fix: linux issues (of course) --- Makefile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 33b103a..1cf1ca9 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ export MAKEFLAGS += --silent -r -j export flags=-std=c++17 -Wall -Wno-main -Wno-trigraphs -Wno-missing-braces -Wno-stringop-overflow -DPROFILE_$(profile) -fdiagnostics-color=always -export ldflags=-L$(bin)/$(profile) +export ldflags=-L$(bin)/$(profile) -Wl,-rpath=bin/$(profile) export lib=ppc$(version-major)- export profile=release @@ -28,7 +28,6 @@ ifeq ($(profile),release) flags += -O3 else ifeq ($(profile),debug) flags += -g -ldflags+= -Wl,-rpath=bin/debug endif oldbin := $(bin)