diff --git a/.gitignore b/.gitignore index 7e4fcb7..df9f664 100644 --- a/.gitignore +++ b/.gitignore @@ -18,11 +18,12 @@ !src/*/**/*.cc !src/*/**/*.h !src/*/**/*.hh -!src/*/proj.txt +!src/*.proj !src/lsproj.cc !scripts !scripts/common.mak +!scripts/lsproj.mak !scripts/install.bat !scripts/uninstall.bat diff --git a/Makefile b/Makefile index c5fc586..1cf1ca9 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ -export MAKEFLAGS += --silent -r -export flags=-std=c++17 -Wall -Wno-main -Wno-trigraphs -Wno-missing-braces -Wno-stringop-overflow -export ldflags=-L$(bin)/$(profile) +export MAKEFLAGS += --silent -r -j +export flags=-std=c++17 -Wall -Wno-main -Wno-trigraphs -Wno-missing-braces -Wno-stringop-overflow -DPROFILE_$(profile) -fdiagnostics-color=always +export ldflags=-L$(bin)/$(profile) -Wl,-rpath=bin/$(profile) export lib=ppc$(version-major)- export profile=release @@ -28,9 +28,9 @@ ifeq ($(profile),release) flags += -O3 else ifeq ($(profile),debug) flags += -g -ldflags+= -Wl,-rpath=bin/debug endif -oldbin := bin + +oldbin := $(bin) export bin := $(bin)/$(profile) ifeq ($(os),Windows) @@ -51,9 +51,11 @@ build: version make -f scripts/common.mak if exist "$(subst /,\,$(bin)\$(output).exe)" del "$(subst /,\,$(bin)\$(output).exe)" mklink /H "$(subst /,\,$(bin)\$(output).exe)" "$(subst /,\,$(binary))" > NUL - + echo Done! clear: if exist $(subst /,\,$(oldbin)) rmdir /s /q $(subst /,\,$(oldbin)) +cleartmp: + if exist $(subst /,\,$(bin)/tmp) rmdir /s /q $(subst /,\,$(bin)/tmp) .ONESHELL: install: build @@ -82,6 +84,9 @@ build: version clear: rm -r $(oldbin) +clear: + rm -r $(bin)/tmp + install: build echo Installing ++C compiler to your system... diff --git a/include/compiler/treeifier/ast.hh b/include/compiler/treeifier/ast.hh new file mode 100644 index 0000000..ccfa3a3 --- /dev/null +++ b/include/compiler/treeifier/ast.hh @@ -0,0 +1,83 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include "compiler/treeifier/tokenizer.hh" +#include "utils/data.hh" +#include "lang/common.hh" + +using namespace std::string_literals; +using namespace ppc; +using namespace ppc::lang; +using namespace ppc::messages; + +namespace ppc::comp::tree::ast { + struct ast_ctx_t; + using parser_func_t = bool (ast_ctx_t &ctx, size_t &res_i, data::map_t &out); + using parser_t = parser_func_t*; + + class group_t { + private: + std::map named_parsers; + std::set unnamed_parsers; + std::map parsers; + public: + group_t &insert(const std::string &name, parser_t parser, const std::string &relative_to, bool after); + group_t &add_last(const std::string &name, parser_t parser); + group_t &replace(const std::string &name, parser_t parser); + group_t &add_named(const std::string &name, parser_t parser, const lang::namespace_name_t &identifier); + + bool operator()(ast_ctx_t &ctx, size_t &i, data::map_t &out) const; + }; + + struct ast_ctx_t { + private: + std::unordered_map groups; + public: + msg_stack_t &messages; + std::vector &tokens; + std::set imports; + loc_namespace_name_t nmsp; + + ast_ctx_t &operator=(const ast_ctx_t &other) = delete; + + template + bool parse(const T &parser, size_t &i, data::map_t &out) { + return parser(*this, i, out); + } + + group_t &group(const std::string &name); + + template + static data::map_t parse(const T &glob, msg_stack_t &messages, std::vector &tokens) { + ast_ctx_t ctx(messages, tokens); + data::map_t res; + size_t i = 0; + + if (!ctx.parse(glob, i, res)) throw message_t::error("Failed to compile."); + return res; + } + + ast_ctx_t(msg_stack_t &messages, std::vector &tokens); + }; + + namespace conv { + data::map_t identifier_to_map(const located_t &loc); + located_t map_to_identifier(const data::map_t &map); + + data::string_t loc_to_map(const location_t &loc); + location_t map_to_loc(const data::string_t &map); + + data::map_t nmsp_to_map(const loc_namespace_name_t &nmsp); + loc_namespace_name_t map_to_nmsp(const data::map_t &map); + } + + parser_func_t parse_glob, parse_nmsp, parse_identifier, parse_type, parse_exp, parse_stat_exp; + parser_func_t parse_func, parse_field, parse_export; + parser_func_t parse_if, parse_while, parse_return, parse_break, parse_continue, parse_stat_comp; + parser_func_t parse_exp_var, parse_exp_str_lit, parse_exp_int_lit, parse_exp_float_lit; +} \ No newline at end of file diff --git a/include/compiler/treeifier/ast/helper.hh b/include/compiler/treeifier/ast/helper.hh new file mode 100644 index 0000000..971c562 --- /dev/null +++ b/include/compiler/treeifier/ast/helper.hh @@ -0,0 +1,157 @@ +#include "compiler/treeifier/ast.hh" + +using namespace ppc; +using namespace ppc::lang; +using namespace ppc::data; +using namespace ppc::comp::tree; +using namespace ppc::comp::tree::ast; + +namespace ppc::comp::tree::ast { + struct tree_helper_t { + private: + ast_ctx_t &ctx; + size_t &res_i; + + public: + size_t i; + + void throw_ended() { + if (ended()) throw messages::message_t(message_t::ERROR, "Unexpected end.", loc()); + } + void throw_ended(const std::string &reason) { + if (ended()) throw messages::message_t(message_t::ERROR, "Unexpected end: " + reason, loc()); + } + + location_t loc(size_t n) { + location_t res = prev_loc(); + res.start += res.length; + res.code_start += res.length; + res.length = n; + return res; + } + location_t prev_loc() { + auto prev_i = i; + if (i > 0) i--; + auto res = loc(); + i = prev_i; + return res; + } + location_t next_loc(size_t n = 1) { + location_t res = loc(); + res.start += res.length; + res.code_start += res.length; + res.length = n; + return res; + } + location_t loc() { + if (ended()) { + if (i == 0) return location_t::NONE; + + location_t loc = ctx.tokens[i - 1].location; + + loc.start += loc.length; + loc.code_start += loc.length; + loc.length = 1; + + return loc; + } + else return curr().location; + } + + location_t res_loc() { + if (res_i >= ctx.tokens.size()) return loc(); + else return ctx.tokens[res_i].location.intersect(loc()); + } + + bool err(std::string message) { + throw message_t::error(message, loc()); + } + bool err(std::string message, size_t n) { + throw message_t::error(message, loc(n)); + } + + bool submit(bool inc_i = true) { + res_i = (i += inc_i); + return true; + } + + bool ended() { + return i == ctx.tokens.size(); + } + + token_t &curr(const std::string &reason) { + throw_ended(reason); + return ctx.tokens[i]; + } + token_t &curr() { + throw_ended(); + return ctx.tokens[i]; + } + + bool try_advance() { + if (ended()) return false; + i++; + return !ended(); + } + void advance() { + throw_ended(); + i++; + throw_ended(); + } + void advance(const std::string &reason) { + throw_ended(reason); + i++; + throw_ended(reason); + } + + template + bool push_parse(const T &parser, data::array_t &out) { + data::map_t res; + if (parse(parser, res)) { + out.push_back(res); + return true; + } + else return false; + } + + template + bool parse(const T &parser, data::map_t &out) { + return ctx.parse(parser, i, out); + } + + template + void force_push_parse(const T &parser, std::string message, data::array_t &out) { + throw_ended(message); + bool success; + + try { + success = push_parse(parser, out); + } + catch (const message_t &msg) { + ctx.messages.push(msg); + success = false; + } + + if (!success) err(message); + } + template + void force_parse(const T &parser, std::string message, data::map_t &out) { + throw_ended(message); + bool success; + + try { + success = parse(parser, out); + } + catch (const message_t &msg) { + ctx.messages.push(msg); + success = false; + } + + if (!success) err(message); + } + + tree_helper_t(ast_ctx_t &ctx, size_t &i): ctx(ctx), res_i(i) { + this->i = i; + } + }; +} \ No newline at end of file diff --git a/include/compiler/treeifier/tokenizer.hh b/include/compiler/treeifier/tokenizer.hh index f889d05..c8e2034 100644 --- a/include/compiler/treeifier/tokenizer.hh +++ b/include/compiler/treeifier/tokenizer.hh @@ -4,8 +4,10 @@ #include "utils/message.hh" #include "compiler/treeifier/lexer.hh" -namespace ppc::comp::tree::tok { +namespace ppc::comp::tree { enum operator_t { + NONE, + LESS_THAN, GREATER_THAN, LESS_THAN_EQUALS, @@ -55,6 +57,7 @@ namespace ppc::comp::tree::tok { COMMA, SEMICOLON, COLON, + DOUBLE_COLON, LAMBDA, @@ -64,10 +67,6 @@ namespace ppc::comp::tree::tok { BRACE_CLOSE, PAREN_OPEN, PAREN_CLOSE, - - VAL, - REF, - SIZEOF, }; struct token_t { @@ -76,111 +75,73 @@ namespace ppc::comp::tree::tok { NONE, IDENTIFIER, OPERATOR, - INT, - FLOAT, - CHAR, - STRING, + INT_LITERAL, + STR_LITERAL, } kind; union data_t { std::string *identifier; operator_t _operator; - std::uint64_t int_literal; - double float_literal; - char char_literal; - std::vector *string_literal; + std::vector *literal; } data; public: ppc::location_t location; - bool is_identifier() { return kind == IDENTIFIER; } - bool is_operator() { return kind == OPERATOR; } - bool is_int_lit() { return kind == INT; } - bool is_float_lit() { return kind == FLOAT; } - bool is_char_lit() { return kind == CHAR; } - bool is_string_lit() { return kind == STRING; } + bool is_identifier() const { return kind == IDENTIFIER; } + bool is_operator() const { return kind == OPERATOR; } + bool is_int_literal() const { return kind == INT_LITERAL; } + bool is_str_literal() const { return kind == STR_LITERAL; } + bool is_literal() const { return is_int_literal() || is_str_literal(); } - const auto &identifier() { + const auto &identifier() const { if (!is_identifier()) throw std::string { "Token is not an identifier." }; else return *data.identifier; } - auto _operator() { + auto _operator() const { if (!is_operator()) throw std::string { "Token is not an operator." }; else return data._operator; } - auto int_lit() { - if (!is_int_lit()) throw std::string { "Token is not an int literal." }; - else return data.int_literal; - } - auto float_lit() { - if (!is_float_lit()) throw std::string { "Token is not a float literal." }; - else return data.float_literal; - } - auto char_lit() { - if (!is_char_lit()) throw std::string { "Token is not a char literal." }; - else return data.char_literal; - } - const auto &string_lit() { - if (!is_string_lit()) throw std::string { "Token is not a string literal." }; - else return *data.string_literal; + const auto &literal() const { + if (!is_literal()) throw std::string { "Token is not a literal." }; + else return *data.literal; } - bool is_operator(operator_t op) { return is_operator() && _operator() == op; } - bool is_identifier(std::string &&val) { return is_identifier() && identifier() == val; } + bool is_operator(operator_t op) const { return is_operator() && _operator() == op; } + bool is_identifier(const std::string &val) const { return is_identifier() && identifier() == val; } token_t() { kind = NONE; } - token_t(const std::string &identifier, location_t loc = NO_LOCATION) { + token_t(const std::string &identifier, location_t loc = location_t::NONE): location(loc) { kind = IDENTIFIER; data.identifier = new std::string { identifier }; - location = loc; } - token_t(operator_t op, location_t loc = NO_LOCATION) { + token_t(operator_t op, location_t loc = location_t::NONE): location(loc) { kind = OPERATOR; data._operator = op; - location = loc; } - token_t(std::uint64_t val, location_t loc = NO_LOCATION) { - kind = INT; - data.int_literal = val; - location = loc; + token_t(const std::vector &val, bool is_str, location_t loc = location_t::NONE): location(loc) { + kind = is_str ? STR_LITERAL : INT_LITERAL; + data.literal = new std::vector { val }; } - token_t(double val, location_t loc = NO_LOCATION) { - kind = FLOAT; - data.float_literal = val; - location = loc; - } - token_t(char c, location_t loc = NO_LOCATION) { - kind = CHAR; - data.char_literal = c; - location = loc; - } - token_t(const std::vector &val, location_t loc = NO_LOCATION) { - kind = STRING; - data.string_literal = new std::vector { val }; - location = loc; - } - token_t(const token_t &tok) { + token_t(const token_t &tok): location(tok.location) { kind = tok.kind; switch (kind) { case NONE: break; case IDENTIFIER: data.identifier = new std::string { *tok.data.identifier }; break; case OPERATOR: data._operator = tok.data._operator; break; - case INT: data.int_literal = tok.data.int_literal; break; - case FLOAT: data.float_literal = tok.data.float_literal; break; - case CHAR: data.char_literal = tok.data.char_literal; break; - case STRING: data.string_literal = new std::vector { *tok.data.string_literal }; break; + case STR_LITERAL: + case INT_LITERAL: data.literal = new std::vector { *tok.data.literal }; break; } - location = tok.location; } ~token_t() { switch (kind) { case IDENTIFIER: delete data.identifier; break; - case STRING: delete data.string_literal; break; + case STR_LITERAL: + case INT_LITERAL: delete data.literal; break; default: break; } } - static tok::token_t parse(messages::msg_stack_t &msg_stack, lex::token_t token); + static token_t parse(messages::msg_stack_t &msg_stack, lex::token_t token); static std::vector parse_many(messages::msg_stack_t &msg_stack, std::vector tokens); }; diff --git a/include/lang/common.hh b/include/lang/common.hh index 5e2fba2..6d193c8 100644 --- a/include/lang/common.hh +++ b/include/lang/common.hh @@ -4,16 +4,74 @@ #include "utils/location.hh" namespace ppc::lang { - struct namespace_name_t { - std::vector segments; - ppc::location_t location; + template + struct located_t : T { + location_t location; - bool operator ==(const namespace_name_t &other); + located_t(location_t loc, const T &val): T(val), location(loc) { } + located_t(const T &val): T(val), location(location_t::NONE) { } + located_t() { } + }; + template + struct slocated_t { + T value; + location_t location; + + bool operator ==(const slocated_t &other) { + return value == other.value && location == other.location; + } + bool operator !=(const slocated_t &other) { + return !(*this == other); + } + + slocated_t(location_t loc, const T &val): value(val), location(loc) { } + slocated_t(const T &val): value(val), location(location_t::NONE) { } + slocated_t() { } + }; + + struct namespace_name_t : public std::vector { + using base = std::vector; + + int compare(const namespace_name_t &other) const; + + bool operator==(const namespace_name_t &other) const { return compare(other) == 0; } + bool operator!=(const namespace_name_t &other) const { return compare(other) != 0; } + bool operator<(const namespace_name_t &other) const { return compare(other) < 0; } + bool operator<=(const namespace_name_t &other) const { return compare(other) <= 0; } + bool operator>(const namespace_name_t &other) const { return compare(other) > 0; } + bool operator>=(const namespace_name_t &other) const { return compare(other) >= 0; } + + operator std::string() const { return to_string(); } + std::string to_string() const; + + namespace_name_t() { } + namespace_name_t(std::initializer_list segments): base(segments.begin(), segments.end()) { } + }; + + struct loc_namespace_name_t : public std::vector> { + using base = std::vector>; + + int compare(const loc_namespace_name_t &other) const; + + bool operator==(const loc_namespace_name_t &other) const { return compare(other) == 0; } + bool operator!=(const loc_namespace_name_t &other) const { return compare(other) != 0; } + bool operator<(const loc_namespace_name_t &other) const { return compare(other) < 0; } + bool operator<=(const loc_namespace_name_t &other) const { return compare(other) <= 0; } + bool operator>(const loc_namespace_name_t &other) const { return compare(other) > 0; } + bool operator>=(const loc_namespace_name_t &other) const { return compare(other) >= 0; } + + namespace_name_t strip_location() const; + + operator std::string() const { return to_string(); } + std::string to_string() const; + + loc_namespace_name_t() { } + loc_namespace_name_t(std::initializer_list> segments): base(segments.begin(), segments.end()) { } }; bool is_identifier_valid(messages::msg_stack_t &msg_stack, ppc::location_t location, const std::string &name); inline bool is_identifier_valid(const std::string &name) { - messages::msg_stack_t ms { }; + messages::msg_stack_t ms; return is_identifier_valid(ms, { }, name); } } diff --git a/include/lang/version.hh b/include/lang/version.hh index 6fd01fc..efa577b 100644 --- a/include/lang/version.hh +++ b/include/lang/version.hh @@ -33,8 +33,8 @@ namespace ppc { bool operator ==(version_t other) const; inline bool operator !=(version_t other) const { return !(*this == other); } - version_t(uint16_t major, uint16_t minor, uint32_t revision) : major { major }, minor { minor }, revision { revision } { } - version_t(uint16_t major, uint16_t minor) : version_t { major, minor, -1u } { } - version_t(uint16_t major) : version_t { major, -1u, -1u } { } + version_t(uint16_t major, uint16_t minor, uint32_t revision) : major(major), minor(minor), revision(revision) { } + version_t(uint16_t major, uint16_t minor) : version_t(major, minor, -1) { } + version_t(uint16_t major) : version_t(major, -1, -1) { } }; } diff --git a/include/utils/data.hh b/include/utils/data.hh index 2b4201b..d1ff35a 100644 --- a/include/utils/data.hh +++ b/include/utils/data.hh @@ -38,12 +38,21 @@ namespace ppc::data { bool is_number() const; bool is_string() const; bool is_bool() const; + bool is_true() { + return is_bool() && boolean(); + } - bool array(array_t &out) const; - bool map(map_t &out) const; - bool number(number_t &out) const; - bool string(string_t &out) const; - bool boolean(bool_t &out) const; + array_t &array(const array_t &arr); + map_t &map(const map_t &map); + number_t &number(number_t num); + string_t &string(const string_t &str); + bool_t &boolean(bool_t bl); + + array_t &array(); + map_t &map(); + number_t &number(); + string_t &string(); + bool_t &boolean(); const array_t &array() const; const map_t &map() const; @@ -51,51 +60,57 @@ namespace ppc::data { const string_t &string() const; bool_t boolean() const; - // value_t &operator=(const value_t &other); + value_t &operator=(const value_t &other); + value_t &operator=(const char *other); ~value_t(); value_t(); value_t(const array_t &val); value_t(const map_t &val); + value_t(std::initializer_list> map); value_t(number_t val); value_t(const string_t &val); + value_t(const char *val); value_t(bool_t val); value_t(const value_t &other); + }; + static const value_t null{}; class map_t { private: std::unordered_map values; public: - value_t &operator [](std::string name) { - if (values.find(name) == values.end()) { - values.emplace(name, value_t { }); + value_t &operator[](std::string name){ + auto res = values.find(name); + if (res == values.end()) { + res = values.emplace(name, value_t()).first; } + return res->second; + } + const value_t &operator [](std::string name) const { + auto res = values.find(name); + if (res == values.end()) throw "The map doesn't contain a key '" + name + "'."; + return res->second; + } - return values[name]; + bool has(std::string key) const { + return values.find(key) != values.end(); } std::size_t size() const { return values.size(); } auto begin() const { return values.begin(); } auto end() const { return values.end(); } + + map_t() { } + map_t(std::initializer_list> vals) { + for (const auto &pair : vals) { + values.insert(pair); + } + } }; - class array_t { - private: - std::vector values; - public: - value_t &operator [](std::size_t i) { return values[i]; } - - auto begin() { return values.begin(); } - auto end() { return values.end(); } - - void push(const value_t &val) { values.push_back(val); } - void insert(const value_t &val, std::size_t i = 0) { values.insert(begin() + i, val); } - void pop() { values.pop_back(); } - void remove(std::size_t i = 0) { values.erase(begin() + i); } - - std::size_t size() const { return values.size(); } - }; + class array_t : public std::vector { }; } \ No newline at end of file diff --git a/include/utils/json.hh b/include/utils/json.hh new file mode 100644 index 0000000..a67ac1a --- /dev/null +++ b/include/utils/json.hh @@ -0,0 +1,11 @@ +#pragma once + +#include +#include +#include +#include +#include "utils/data.hh" + +namespace ppc::data::json { + std::string stringify(const data::value_t &map); +} \ No newline at end of file diff --git a/include/utils/location.hh b/include/utils/location.hh index e7266aa..f929edb 100644 --- a/include/utils/location.hh +++ b/include/utils/location.hh @@ -4,24 +4,34 @@ namespace ppc { struct location_t { + static const location_t NONE; + std::size_t line; std::size_t start; std::size_t length; std::size_t code_start; std::string filename; + bool operator==(const location_t &other) const; + bool operator !=(const location_t &other) const { + return !(*this == other); + } + + operator std::string() const { return to_string(); } std::string to_string() const; location_t intersect(location_t other) const; + static location_t intersect(const location_t &a, const location_t &b) { + return a.intersect(b); + } location_t(); - location_t(std::string filename); + location_t(const location_t &other): location_t(other.filename, other.line, other.start, other.code_start, other.length) { } + location_t(const std::string &filename); location_t(std::size_t line, std::size_t start); - location_t(std::string filename, std::size_t line, std::size_t start); + location_t(const std::string &filename, std::size_t line, std::size_t start); location_t(std::size_t line, std::size_t start, std::size_t code_start); - location_t(std::string filename, std::size_t line, std::size_t start, std::size_t code_start); + location_t(const std::string &filename, std::size_t line, std::size_t start, std::size_t code_start); location_t(std::size_t line, std::size_t start, std::size_t code_start, std::size_t length); - location_t(std::string filename, std::size_t line, std::size_t start, std::size_t code_start, std::size_t length); + location_t(const std::string &filename, std::size_t line, std::size_t start, std::size_t code_start, std::size_t length); }; - - static const location_t NO_LOCATION = { }; } \ No newline at end of file diff --git a/include/utils/message.hh b/include/utils/message.hh index 90439bd..1d8eaf3 100644 --- a/include/utils/message.hh +++ b/include/utils/message.hh @@ -14,11 +14,21 @@ namespace ppc::messages { WARNING, ERROR, } level; - location_t location; std::string content; + location_t location; + + message_t(level_t level, const std::string &content, location_t loc = location_t::NONE) : + level(level), + content(content), + location(loc) { } + message_t() : message_t(DEBUG, "") { } + + operator std::string() const { return to_string(); } std::string to_string() const; bool is_severe() const; + + static message_t error(const std::string &message, location_t loc = location_t::NONE) { return message_t(ERROR, message, loc); } }; struct msg_stack_t { @@ -28,7 +38,19 @@ namespace ppc::messages { inline auto begin() { return messages.begin(); } inline auto end() { return messages.end(); } + inline auto begin() const { return messages.begin(); } + inline auto end() const { return messages.end(); } + void push(const message_t &msg) { messages.push_back(msg); } + void err(const std::string &msg, location_t loc = location_t::NONE) { push({ message_t::ERROR, msg, loc }); } + void warn(const std::string &msg, location_t loc = location_t::NONE) { push({ message_t::WARNING, msg, loc }); } + void suggest(const std::string &msg, location_t loc = location_t::NONE) { push({ message_t::SUGGESTION, msg, loc }); } + void info(const std::string &msg, location_t loc = location_t::NONE) { push({ message_t::INFO, msg, loc }); } + void debug(const std::string &msg, location_t loc = location_t::NONE) { push({ message_t::DEBUG, msg, loc }); } + void push(const msg_stack_t &other) { + for (const auto &msg : other) push(msg); + } + const message_t &peek() { return messages.back(); } void clear() { messages.clear(); } bool is_failed() const; diff --git a/scripts/common.mak b/scripts/common.mak index 91e3562..c9091f3 100644 --- a/scripts/common.mak +++ b/scripts/common.mak @@ -1,14 +1,18 @@ -$(shell $(call mkdir,$(bin))) -$(shell $(CXX) $(src)/lsproj.cc -o $(bin)/lsproj$(exe)) +export lsproj = $(bin)/lsproj$(exe) +export flags += "-I$(inc)" -D$(OS) -DPPC_VERSION_MAJOR=$(version-major) -DPPC_VERSION_MINOR=$(version-minor) -DPPC_VERSION_BUILD=$(version-build) -rwildcard=$(foreach d,$(wildcard $(1:=/*)),$(call rwildcard,$d,$2) $(filter $(subst *,%,$2),$d)) +$(shell make -f scripts/lsproj.mak lsproj=$(lsproj) src=$(src) $(lsproj)) + +rwildcard=$(foreach d, $(wildcard $(1:=/*)),\ + $(call rwildcard,$d,$2)\ + $(filter $(subst *,%,$2),$d)\ +) uniq=$(if $1,$(firstword $1) $(call uniq,$(filter-out $(firstword $1),$1))) -modoutput=$(shell ./$(bin)/lsproj$(exe) $(src) $1 output) +modoutput=$(shell ./$(lsproj) $(src) $1 output) deps=$(strip \ - $(foreach dep, $(shell ./$(bin)/lsproj$(exe) $(src) $1 deps),\ - $(if $(wildcard src/$(dep)),\ - $(dep),\ + $(foreach dep, $(shell ./$(lsproj) $(src) $1 deps),\ + $(if $(wildcard src/$(dep)), $(dep),\ $(error The module '$(dep)' (dependency of '$1') doesn't exist)\ )\ )\ @@ -28,29 +32,27 @@ lrdeps=$(foreach dep,$(call rdeps,$1),-l$(lib)$(call modoutput,$(dep))) modules = $(patsubst $(src)/%/,$(bin)/lib$(lib)%$(so),$(filter-out $(src)/$(mainmodule)/,$(wildcard $(src)/*/))) sources = $(call rwildcard,$(src)/$1,*.cc) -headers = $(call rwildcard,$(inc),*.h) -binaries = $(patsubst $(src)/%.cc,$(bin)/%.o,$(call sources,$1)) - -flags += "-I$(inc)" -D$(OS) -DPPC_VERSION_MAJOR=$(version-major) -DPPC_VERSION_MINOR=$(version-minor) -DPPC_VERSION_BUILD=$(version-build) +headers = $(call rwildcard,$(inc),*.hh) +binaries = $(patsubst $(src)/%.cc,$(bin)/tmp/%.o,$(call sources,$1)) .PHONY: build +.PRECIOUS: $(bin)/tmp/%.o build: $(binary) .SECONDEXPANSION: -$(binary): $$(call frdeps,$(mainmodule)) $$(call sources,$$*) +$(binary): $$(call frdeps,$(mainmodule)) $$(call binaries,$(mainmodule)) $(call mkdir,$(dir $@)) + $(CXX) $(flags) $(call binaries,$(mainmodule)) -o $@ $(ldflags) $(call ldeps,$(mainmodule)) -L$(bin) "-I$(inc)" echo Compiling executable '$(notdir $(binary))'... - $(CXX) $(flags) $(call sources,$(mainmodule)) -o $@ $(ldflags) $(call ldeps,$(mainmodule)) -L$(bin) "-I$(inc)" - $(call rmdir,$(bin)/lsproj$(exe)) .SECONDEXPANSION: -$(bin)/lib$(lib)%$(so): $$(call sources,$$*) $(headers) +$(bin)/lib$(lib)%$(so): $$(call frdeps,$$*) $$(call binaries,$$*) $(call mkdir,$(bin)) + $(CXX) -shared -fPIC $(flags) $(call binaries,$*) -o $@ $(ldflags) $(call ldeps,$*) -L$(bin) "-I$(inc)" echo Compiling library '$(notdir $@)'... - $(CXX) -shared -fPIC $(flags) $(call sources,$*) -o $@ $(ldflags) $(call ldeps,$*) -L$(bin) "-I$(inc)" -# $(bin)/%.o: $(src)/%.cc $(headers) -# echo - Compiling '$*.cc'... -# $(call mkdir,$(dir $@)) -# $(CXX) -fPIC -c $(flags) $< -o $@ +$(bin)/tmp/%.o: $(src)/%.cc $(headers) + $(call mkdir,$(dir $@)) + $(CXX) -fPIC -c $(flags) $< -o $@ + echo - Compiling '$*.cc'... diff --git a/scripts/lsproj.mak b/scripts/lsproj.mak new file mode 100644 index 0000000..6d49d5a --- /dev/null +++ b/scripts/lsproj.mak @@ -0,0 +1,3 @@ +$(lsproj): $(src)/lsproj.cc + $(call mkdir,$(dir $@)) + $(CXX) $^ -o $@ \ No newline at end of file diff --git a/src/compiler.proj b/src/compiler.proj new file mode 100644 index 0000000..6fa4f42 --- /dev/null +++ b/src/compiler.proj @@ -0,0 +1,2 @@ +compiler +utils, lang \ No newline at end of file diff --git a/src/compiler/proj.txt b/src/compiler/proj.txt deleted file mode 100644 index c0def46..0000000 --- a/src/compiler/proj.txt +++ /dev/null @@ -1,2 +0,0 @@ -compiler -utils \ No newline at end of file diff --git a/src/compiler/treeifier/ast/ast.cc b/src/compiler/treeifier/ast/ast.cc new file mode 100644 index 0000000..578d987 --- /dev/null +++ b/src/compiler/treeifier/ast/ast.cc @@ -0,0 +1,31 @@ +#include "compiler/treeifier/ast.hh" + +using namespace ppc; +using namespace ppc::data; +using namespace ppc::lang; +using namespace ppc::comp::tree::ast; + +group_t &ast_ctx_t::group(const std::string &name) { + if (groups.find(name) == groups.end()) return groups[name] = { }; + else return groups[name]; +} + +ast_ctx_t::ast_ctx_t(msg_stack_t &messages, std::vector &tokens): messages(messages), tokens(tokens) { + group("$_exp_val") + .add_last("$_var", parse_exp_var) + .add_last("$_int", parse_exp_int_lit) + .add_last("$_string", parse_exp_str_lit); + // .add_last("$_float", parse_exp_float_lit) + group("$_stat") + .add_named("$_while", parse_while, { "while" }) + .add_named("$_if", parse_if, { "if" }) + .add_named("$_return", parse_return, { "return" }) + .add_named("$_break", parse_break, { "break" }) + .add_named("$_continue", parse_continue, { "continue" }) + .add_last("$_comp", parse_stat_comp) + .add_last("$_exp", parse_stat_exp); + group("$_def") + .add_last("$_func", parse_func) + .add_named("$_export", parse_export, { "export" }) + .add_last("$_field", parse_field); +} \ No newline at end of file diff --git a/src/compiler/treeifier/ast/conv.cc b/src/compiler/treeifier/ast/conv.cc new file mode 100644 index 0000000..f3a688d --- /dev/null +++ b/src/compiler/treeifier/ast/conv.cc @@ -0,0 +1,70 @@ +#include +#include "compiler/treeifier/ast.hh" + +namespace ppc::comp::tree::ast::conv { + data::map_t identifier_to_map(const located_t &loc) { + return { + { "location", conv::loc_to_map(loc.location) }, + { "content", loc }, + { "$_name", "$_identifier" }, + }; + } + located_t map_to_identifier(const data::map_t &map) { + return { conv::map_to_loc(map["location"].string()), map["content"].string() }; + } + + data::string_t loc_to_map(const location_t &loc) { + std::stringstream res; + res << loc.filename << ':' << loc.line + 1 << ':' << loc.start + 1 << ':' << loc.code_start + 1 << ':' << loc.length + 1; + return res.str(); + } + location_t map_to_loc(const data::string_t &map) { + std::stringstream res; + res.str(map); + + std::string filename; + std::string line; + std::string start; + std::string code_start; + std::string length; + + std::getline(res, filename, ':'); + std::getline(res, line, ':'); + std::getline(res, start, ':'); + std::getline(res, code_start, ':'); + std::getline(res, length, ':'); + + return { filename, std::stoull(line) - 1, std::stoull(start) - 1, std::stoull(code_start) - 1, std::stoull(length) - 1 }; + } + + data::map_t nmsp_to_map(const loc_namespace_name_t &nmsp) { + data::map_t res; + + auto arr = res["content"].array({}); + + for (const auto &segment : nmsp) { + arr.push_back({ + { "location", loc_to_map(segment.location) }, + { "content", segment }, + { "$_name", "$_nmsp" }, + }); + } + + return res; + } + loc_namespace_name_t map_to_nmsp(const data::map_t &map) { + loc_namespace_name_t res; + + for (const auto &segment : map["content"].array()) { + try { + auto val = map_to_identifier(segment.map()); + res.push_back(val); + } + catch (const message_t &) { + throw "'content' of a namespace map must contain only identifiers."; + } + } + + return res; + } +} diff --git a/src/compiler/treeifier/ast/parsers.cc b/src/compiler/treeifier/ast/parsers.cc new file mode 100644 index 0000000..e69de29 diff --git a/src/compiler/treeifier/ast/parsers/exp.cc b/src/compiler/treeifier/ast/parsers/exp.cc new file mode 100644 index 0000000..b6ba5e5 --- /dev/null +++ b/src/compiler/treeifier/ast/parsers/exp.cc @@ -0,0 +1,364 @@ +#include "compiler/treeifier/ast/helper.hh" +#include +#include + +enum precedence_t { + NONE, + POSTFIX, + PREFIX, + MULT, + ADD, + SHIFT, + COMP, + EQU, + BIN_AND, + BIN_XOR, + BIN_OR, + BOOL_AND, + BOOL_OR, + TERNARY, + ASSIGN, + PAREN, + CALL_START, +}; + +struct op_data_t { + precedence_t precedence; + size_t op_n; + std::string name; + bool assoc; +}; + +op_data_t sizeof_data { precedence_t::PREFIX, 1, "sizeof", true }; + +std::map pre_ops { + { operator_t::INCREASE, { precedence_t::PREFIX, 1, "inc_pre" } }, + { operator_t::DECREASE, { precedence_t::PREFIX, 1, "dec_pre" } }, + { operator_t::ADD, { precedence_t::PREFIX, 1, "positive" } }, + { operator_t::SUBTRACT, { precedence_t::PREFIX, 1, "negative" } }, + { operator_t::BITWISE_NEGATIVE, { precedence_t::PREFIX, 1, "flip" } }, + { operator_t::MULTIPLY, { precedence_t::PREFIX, 1, "dereference" } }, + { operator_t::AND, { precedence_t::PREFIX, 1, "reference" } }, +}; +std::map bin_ops { + { operator_t::INCREASE, { precedence_t::POSTFIX, 1, "inc_post" } }, + { operator_t::DECREASE, { precedence_t::POSTFIX, 1, "dec_post" } }, + { (operator_t)-1, sizeof_data }, + + { operator_t::ADD, { precedence_t::ADD, 2, "add" } }, + { operator_t::SUBTRACT, { precedence_t::ADD, 2, "subtract" } }, + + { operator_t::MULTIPLY, { precedence_t::MULT, 2, "multiply" } }, + { operator_t::DIVIDE, { precedence_t::MULT, 2, "divide" } }, + { operator_t::MODULO, { precedence_t::MULT, 2, "modulo" } }, + + { operator_t::SHIFT_LEFT, { precedence_t::SHIFT, 2, "shl" } }, + { operator_t::SHIFT_RIGHT, { precedence_t::SHIFT, 2, "shr" } }, + + { operator_t::LESS_THAN, { precedence_t::COMP, 2, "less" } }, + { operator_t::LESS_THAN_EQUALS, { precedence_t::COMP, 2, "less_eq" } }, + { operator_t::GREATER_THAN, { precedence_t::COMP, 2, "great" } }, + { operator_t::GREATER_THAN_EQUALS, { precedence_t::COMP, 2, "great_eq" } }, + + { operator_t::EQUALS, { precedence_t::EQU, 2, "eq" } }, + { operator_t::NOT_EQUALS, { precedence_t::EQU, 2, "neq" } }, + + { operator_t::AND, { precedence_t::BIN_AND, 2, "great_eq" } }, + { operator_t::OR, { precedence_t::BIN_OR, 2, "great_eq" } }, + { operator_t::XOR, { precedence_t::BIN_XOR, 2, "great_eq" } }, + + { operator_t::DOUBLE_AND, { precedence_t::BOOL_AND, 2, "great_eq" } }, + { operator_t::DOUBLE_OR, { precedence_t::BOOL_OR, 2, "great_eq" } }, + + { operator_t::ASSIGN, { precedence_t::ASSIGN, 2, "assign", true } }, + { operator_t::ASSIGN_ADD, { precedence_t::ASSIGN, 2, "assign_add", true } }, + { operator_t::ASSIGN_SUBTRACT, { precedence_t::ASSIGN, 2, "assign_subtract", true } }, + { operator_t::ASSIGN_MULTIPLY, { precedence_t::ASSIGN, 2, "assign_multiply", true } }, + { operator_t::ASSIGN_DIVIDE, { precedence_t::ASSIGN, 2, "assign_divide", true } }, + { operator_t::ASSIGN_MODULO, { precedence_t::ASSIGN, 2, "assign_modulo", true } }, + { operator_t::ASSIGN_SHIFT_LEFT, { precedence_t::ASSIGN, 2, "assign_shl", true } }, + { operator_t::ASSIGN_SHIFT_RIGHT, { precedence_t::ASSIGN, 2, "assign_shr", true } }, + { operator_t::ASSIGN_XOR, { precedence_t::ASSIGN, 2, "assign_xor", true } }, + { operator_t::ASSIGN_AND, { precedence_t::ASSIGN, 2, "assign_and", true } }, + { operator_t::ASSIGN_OR, { precedence_t::ASSIGN, 2, "assign_or", true } }, + { operator_t::ASSIGN_DOUBLE_AND, { precedence_t::ASSIGN, 2, "assign_dand", true } }, + { operator_t::ASSIGN_DOUBLE_OR, { precedence_t::ASSIGN, 2, "assign_dor", true } }, + { operator_t::ASSIGN_NULL_COALESCING, { precedence_t::ASSIGN, 2, "assign_null_coal", true } }, +}; + +map_t op_to_map(located_t op) { + return { + { "$_name", "$_operator" }, + { "ops", array_t() }, + { "location", conv::loc_to_map(op.location) }, + { "op", op.name }, + }; +} + +bool pop(std::vector> &op_stack, array_t &res) { + if (op_stack.empty()) return false; + + auto map = op_to_map(op_stack.back()); + auto op_n = op_stack.back().op_n; + auto loc = op_stack.back().location; + op_stack.pop_back(); + + if (res.size() < op_n) return false; + + auto &ops = map["ops"].array(); + + + for (size_t i = 0; i < op_n; i++) { + ops.push_back(res.back()); + loc = loc.intersect(conv::map_to_loc(res.back().map()["location"].string())); + res.pop_back(); + } + + map["location"] = conv::loc_to_map(loc); + + std::reverse(ops.begin(), ops.end()); + res.push_back(map); + + return true; +} +bool pop_paren(std::vector> &op_stack, array_t &res) { + bool has_paren = false; + for (const auto &op : op_stack) { + if (op.precedence == precedence_t::PAREN) { + has_paren = true; + break; + } + } + if (!has_paren) return false; + + while (true) { + if (op_stack.back().precedence == precedence_t::PAREN) break; + if (!pop(op_stack, res)) return false; + } + + op_stack.pop_back(); + return true; +} +bool pop_call(size_t n, location_t loc, std::vector> &op_stack, array_t &res) { + map_t call = { + { "$_name", "$_call" }, + }; + + array_t &args = call["args"].array({}); + + while (true) { + if (op_stack.back().precedence == precedence_t::CALL_START) break; + if (!pop(op_stack, res)) return false; + } + loc = loc.intersect(op_stack.back().location); + op_stack.pop_back(); + call["location"] = conv::loc_to_map(loc); + + for (size_t i = 0; i <= n; i++) { + args.push_back(res.back()); + res.pop_back(); + } + + std::reverse(args.begin(), args.end()); + + call["func"] = res.back(); + res.pop_back(); + res.push_back(call); + + return true; +} +bool pop_until(const op_data_t &data, tree_helper_t &h, std::vector> &op_stack, array_t &res) { + while (!op_stack.empty()) { + auto &back_data = op_stack.back(); + if (data.assoc ? + back_data.precedence >= data.precedence : + back_data.precedence > data.precedence + ) break; + + if (!pop(op_stack, res)) return h.err("Expected an expression on the right side of this operator."); + } + return true; +} + +bool ast::parse_exp_var(ast_ctx_t &ctx, size_t &res_i, map_t &out) { + tree_helper_t h(ctx, res_i); + + if (h.curr().is_identifier()) { + out["content"] = h.curr().identifier(); + out["location"] = conv::loc_to_map(h.loc()); + return h.submit(true); + } + + return false; +} +bool ast::parse_exp_int_lit(ast_ctx_t &ctx, size_t &res_i, map_t &out) { + tree_helper_t h(ctx, res_i); + + if (h.curr().is_int_literal()) { + auto &arr = out["content"].array({}); + for (auto b : h.curr().literal()) arr.push_back((float)b); + out["location"] = conv::loc_to_map(h.loc()); + return h.submit(true); + } + + return false; +} +bool ast::parse_exp_str_lit(ast_ctx_t &ctx, size_t &res_i, map_t &out) { + tree_helper_t h(ctx, res_i); + + if (h.curr().is_str_literal()) { + auto &arr = out["content"].array({}); + for (auto b : h.curr().literal()) arr.push_back((float)b); + out["location"] = conv::loc_to_map(h.loc()); + return h.submit(true); + } + + return false; +} + +bool ast::parse_exp(ast_ctx_t &ctx, size_t &res_i, map_t &out) { + tree_helper_t h(ctx, res_i); + + bool last_val = false; + map_t val; + std::vector> op_stack; + std::vector call_args_n; + auto res = array_t(); + + + while (true) { + if (h.ended()) break; + + if (!last_val && h.curr().is_identifier("sizeof")) { + op_stack.push_back({ h.loc(), sizeof_data }); + h.advance("Expected a value on the right side of the operator."); + continue; + } + if (!last_val && h.push_parse(ctx.group("$_exp_val"), res)) { + last_val = true; + continue; + } + if (h.curr().is_operator()) { + auto op = h.curr()._operator(); + if (last_val) { + if (op == operator_t::PAREN_OPEN) { + h.advance("Expected an argument."); + call_args_n.push_back(0); + op_stack.push_back({ h.loc(), { precedence_t::CALL_START } }); + last_val = false; + } + else if (op == operator_t::COMMA) { + if (call_args_n.size() == 0) break; + h.advance("Expected an argument."); + + pop_until({ .precedence = precedence_t::CALL_START, .assoc = true }, h, op_stack, res); + call_args_n.back()++; + last_val = false; + } + else if (op == operator_t::PAREN_CLOSE) { + bool is_call = false, is_paren = false; + + for (auto i = op_stack.rbegin(); i != op_stack.rend(); i++) { + if (i->precedence == precedence_t::PAREN) { + is_paren = true; + break; + } + else if (i->precedence == precedence_t::CALL_START) { + is_call = true; + break; + } + } + + if (is_call) pop_call(call_args_n.back(), h.loc(), op_stack, res); + else if (is_paren) pop_paren(op_stack, res); + else break; + + if (!h.try_advance()) break; + } + else if (op == operator_t::COLON) { + h.advance("Expected a type."); + pop_until({ .precedence = precedence_t::PREFIX, .assoc = true }, h, op_stack, res); + map_t cast = { + { "$_name", "$_cast" }, + { "exp", res.back() }, + }; + + res.pop_back(); + h.force_parse(parse_type, "Expected a type.", cast["type"].map({})); + cast["location"] = conv::loc_to_map(location_t::intersect( + conv::map_to_loc(cast["exp"].map()["location"].string()), + conv::map_to_loc(cast["type"].map()["location"].string()) + )); + res.push_back(cast); + } + else if (op == operator_t::DOT || op == operator_t::PTR_MEMBER) { + h.advance("Expected an identifier."); + pop_until({ .precedence = precedence_t::POSTFIX, .assoc = true }, h, op_stack, res); + + map_t member_access = { + { "exp", res.back() }, + { "is_ptr", op == operator_t::PTR_MEMBER }, + }; + h.force_parse(parse_identifier, "Expected an identifier.", member_access["name"].map({})); + member_access["location"] = conv::loc_to_map( + conv::map_to_loc(member_access["name"].map()["location"].string()).intersect( + conv::map_to_loc(res.back().map()["location"].string()) + ) + ); + res.pop_back(); + res.push_back(member_access); + } + else if (bin_ops.find(op) != bin_ops.end()) { + auto data = bin_ops[op]; + pop_until(data, h, op_stack, res); + op_stack.push_back({ h.loc(), data }); + + if (data.op_n == 1) { + last_val = true; + if (!pop(op_stack, res)) return h.err("Expected an expression on the right side of this operator."); + if (h.try_advance()) break; + } + else { + last_val = false; + h.advance("Expected a value on the right side of the operator."); + } + } + else break; + } + else { + if (op == operator_t::PAREN_OPEN) { + op_stack.push_back({ h.loc(), { precedence_t::PAREN } }); + h.advance("Expected a value."); + last_val = false; + } + else if (pre_ops.find(op) != pre_ops.end()) { + op_stack.push_back({ h.loc(), pre_ops[op] }); + h.advance("Expected a value on the right side of the operator."); + } + else break; + } + continue; + } + else break; + } + + if (res.size() == 0) return false; + + while (!op_stack.empty()) { + if (op_stack.back().precedence == precedence_t::PAREN) throw message_t::error("Unclosed paren.", op_stack.back().location); + if (op_stack.back().precedence == precedence_t::CALL_START) throw message_t::error("Unclosed call.", op_stack.back().location); + if (!pop(op_stack, res)) return h.err("Expected an expression on the right side of this operator."); + } + + out = res.front().map(); + + return h.submit(false); +} +bool ast::parse_stat_exp(ast_ctx_t &ctx, size_t &i, map_t &res) { + tree_helper_t h(ctx, i); + if (!h.parse(parse_exp, res)) return false; + if (!h.ended() && h.curr().is_operator(operator_t::SEMICOLON)) return h.submit(true); + + ctx.messages.push(message_t::error("Expected a semicolon.", h.loc(1))); + return h.submit(false); +} diff --git a/src/compiler/treeifier/ast/parsers/export.cc b/src/compiler/treeifier/ast/parsers/export.cc new file mode 100644 index 0000000..dfd29a8 --- /dev/null +++ b/src/compiler/treeifier/ast/parsers/export.cc @@ -0,0 +1,12 @@ +#include "compiler/treeifier/ast/helper.hh" + +bool ast::parse_export(ast_ctx_t &ctx, size_t &res_i, map_t &out) { + tree_helper_t h(ctx, res_i); + + if (out["exported"].is_true()) { + ctx.messages.push(message_t(message_t::WARNING, "Export is alredy specified for this definition.", h.prev_loc())); + } + out["exported"] = true; + + return ctx.group("$_def")(ctx, res_i, out); +} diff --git a/src/compiler/treeifier/ast/parsers/field.cc b/src/compiler/treeifier/ast/parsers/field.cc new file mode 100644 index 0000000..a879a1d --- /dev/null +++ b/src/compiler/treeifier/ast/parsers/field.cc @@ -0,0 +1,36 @@ +#include "compiler/treeifier/ast/helper.hh" + +bool ast::parse_field(ast_ctx_t &ctx, size_t &res_i, map_t &out) { + tree_helper_t h(ctx, res_i); + + if (h.ended()) return false; + + if (!h.parse(parse_identifier, out["name"].map({}))) return false; + + bool type = false, defval = false; + + h.throw_ended("Expected a colon or an equals sign."); + + if (h.curr().is_operator(operator_t::COLON)) { + h.advance(); + h.force_parse(parse_type, "Expected a type.", out["type"].map({})); + type = true; + } + if (h.curr().is_operator(operator_t::ASSIGN)) { + h.advance(); + h.force_parse(parse_exp, "Expected an expression.", out["value"].map({})); + type = true; + } + + if (!h.ended() && h.curr().is_operator(operator_t::SEMICOLON)) { + if (type || defval) return h.submit(); + else return h.err("A type or a default value must be specified "); + } + else if (type || defval) { + ctx.messages.push(message_t::error("Expected a semicolon.", h.loc(1))); + return h.submit(false); + } + else return false; + + return h.submit(true); +} diff --git a/src/compiler/treeifier/ast/parsers/func.cc b/src/compiler/treeifier/ast/parsers/func.cc new file mode 100644 index 0000000..1931aa8 --- /dev/null +++ b/src/compiler/treeifier/ast/parsers/func.cc @@ -0,0 +1,88 @@ +#include "compiler/treeifier/ast/helper.hh" + +static bool parse_arg(ast_ctx_t &ctx, size_t &res_i, map_t &out) { + tree_helper_t h(ctx, res_i); + + if (h.ended()) return false; + + if (!h.parse(parse_identifier, out["name"].map({}))) return false; + + bool type = false, defval = false; + + h.throw_ended("Expected a colon or an equals sign."); + + if (h.curr().is_operator(operator_t::COLON)) { + h.advance(); + h.force_parse(parse_type, "Expected a type.", out["type"].map({})); + type = true; + } + if (h.curr().is_operator(operator_t::ASSIGN)) { + h.advance(); + h.force_parse(parse_exp, "Expected an expression.", out["value"].map({})); + type = true; + } + + if (!type && !defval) { + ctx.messages.push(message_t::error("Expected a type or a default value.", h.loc(1))); + } + + return h.submit(false); +} + +bool ast::parse_func(ast_ctx_t &ctx, size_t &res_i, map_t &out) { + tree_helper_t h(ctx, res_i); + + if (h.ended()) return false; + + if (!h.parse(parse_identifier, out["name"].map({}))) return false; + if (h.ended()) return false; + if (!h.curr().is_operator(operator_t::PAREN_OPEN)) return false; + h.advance("Expected a closing paren or a parameter."); + + auto ¶ms = out["params"].array({}); + auto &content = out["content"].array({}); + + while (true) { + if (h.curr().is_operator(operator_t::PAREN_CLOSE)) { + h.advance("Expected a function body."); + break; + } + h.force_push_parse(parse_arg, "Expected a parameter.", params); + if (h.curr().is_operator(operator_t::COMMA)) { + h.advance("Expected a parameter."); + } + } + + if (h.curr().is_operator(operator_t::COLON)) { + h.advance("Expected a type."); + h.force_parse(parse_type, "Expected a type", out["type"].map({})); + } + + if (h.curr().is_operator(operator_t::SEMICOLON)) return h.submit(true); + else if (h.curr().is_operator(operator_t::LAMBDA)) { + h.advance("Expected an expression."); + map_t exp; + h.force_parse(parse_exp, "Expected an expression.", exp); + content.push_back({ + { "$_name", "$_return" }, + { "content", exp }, + }); + return h.submit(false); + } + else if (h.curr().is_operator(operator_t::BRACE_OPEN)) { + h.advance("Expected a statement."); + while (true) { + if (h.curr().is_operator(operator_t::BRACE_CLOSE)) { + return h.submit(true); + } + + h.force_push_parse(ctx.group("$_stat"), "Expected an expression.", content); + } + } + else { + ctx.messages.push(message_t::error("Expected a semicolon, brace open or a lambda operator.", h.loc(1))); + return h.submit(false); + } + + return h.submit(true); +} diff --git a/src/compiler/treeifier/ast/parsers/glob.cc b/src/compiler/treeifier/ast/parsers/glob.cc new file mode 100644 index 0000000..2b8021a --- /dev/null +++ b/src/compiler/treeifier/ast/parsers/glob.cc @@ -0,0 +1,69 @@ +#include "compiler/treeifier/ast.hh" +#include "compiler/treeifier/ast/helper.hh" +// #include "./type.cc" + +using namespace ppc::comp::tree::ast; + +static bool nmsp_def(ast_ctx_t &ctx, size_t &res_i, data::map_t &res) { + tree_helper_t h(ctx, res_i); + if (h.ended()) return false; + + if (!h.curr().is_identifier("namespace")) return false; + h.advance("Expected a namespace"); + h.force_parse(parse_nmsp, "Expected a namespace.", res); + if (!h.curr().is_operator(operator_t::SEMICOLON)) { + ctx.messages.push(message_t::error("Expected a semicolon.", h.loc(1))); + return h.submit(false); + } + return h.submit(true); +} +static bool import(ast_ctx_t &ctx, size_t &res_i, data::map_t &res) { + tree_helper_t h(ctx, res_i); + if (h.ended()) return false; + + if (!h.curr().is_identifier("import")) return false; + h.advance("Expected a namespace"); + h.force_parse(parse_nmsp, "Expected a namespace.", res); + if (!h.curr().is_operator(operator_t::SEMICOLON)) { + ctx.messages.push(message_t::error("Expected a semicolon.", h.loc(1))); + return h.submit(false); + } + + return h.submit(true); +} + +bool ast::parse_glob(ast_ctx_t &ctx, size_t &res_i, data::map_t &out) { + tree_helper_t h(ctx, res_i); + + if (h.ended()) return true; + if (h.parse(nmsp_def, out["namespace"].map({}))) { + ctx.nmsp = conv::map_to_nmsp(out["namespace"].map()); + } + else { + out["namespace"] = data::null; + } + + auto &imports = out["imports"].array({}); + auto &contents = out["content"].array({}); + + while (true) { + map_t map; + if (!h.parse(import, map)) break; + imports.push_back(map); + auto nmsp = conv::map_to_nmsp(map); + + if (!ctx.imports.emplace(nmsp).second) h.err("The namespace '" + nmsp.to_string() + "' is already imported."); + } + + while (true) { + if (h.ended()) break; + if (!h.push_parse(ctx.group("$_def"), contents)) { + ctx.messages.push(message_t::error("Invalid token.", h.loc())); + h.i++; + } + } + + if (!h.ended()) h.err("Invalid token."); + + return h.submit(); +} diff --git a/src/compiler/treeifier/ast/parsers/group.cc b/src/compiler/treeifier/ast/parsers/group.cc new file mode 100644 index 0000000..4cad173 --- /dev/null +++ b/src/compiler/treeifier/ast/parsers/group.cc @@ -0,0 +1,116 @@ +#include "compiler/treeifier/ast.hh" +#include "compiler/treeifier/tokenizer.hh" +#include "compiler/treeifier/ast/helper.hh" +#include +#include +#include +#include + +using namespace ppc::comp::tree; +using namespace ppc::comp::tree::ast; +using namespace std::string_literals; +using namespace std; + +static bool read_nmsp(ast_ctx_t &ctx, size_t &i, lang::loc_namespace_name_t &name) { + tree_helper_t h(ctx, i); + map_t res; + if (!h.parse(parse_nmsp, res)) return false; + name = conv::map_to_nmsp(res); + return h.submit(false); +} +template +static bool resolve_nmsp(ast_ctx_t &ctx, const lang::namespace_name_t &name, T begin, T end, lang::namespace_name_t &actual_name) { + for (auto it = begin; it != end; it++) { + const namespace_name_t &curr = it->first; + if (curr == name) { + actual_name = name; + return true; + } + } + for (const auto &import : ctx.imports) { + auto new_name = name; + new_name.insert(new_name.begin(), import.begin(), import.end()); + for (auto it = begin; it != end; it++) { + const namespace_name_t &curr = it->first; + if (curr == new_name) { + actual_name = name; + return true; + } + } + } + return false; +} + +group_t &group_t::insert(const std::string &name, parser_t parser, const std::string &relative_to, bool after) { + if (parsers.find(name) != parsers.end()) { + throw "The parser '" + name + "' is already in the group."; + } + + auto it = unnamed_parsers.find(relative_to); + if (it == unnamed_parsers.end()) { + throw "The parser '" + relative_to + "' isn't in the group or isn't unnamed."; + } + + if (after) it++; + + unnamed_parsers.insert(it, name); + + return *this; +} +group_t &group_t::replace(const std::string &name, parser_t parser) { + auto it = parsers.find(name); + + if (parsers.find(name) == parsers.end()) { + throw "The parser '" + name + "' isn't in the group."; + } + + it->second = parser; + + return *this; +} +group_t &group_t::add_last(const std::string &name, parser_t parser) { + if (parsers.find(name) != parsers.end()) { + throw "The parser '" + name + "' is already in the group."; + } + + parsers.emplace(name, parser); + unnamed_parsers.emplace(name); + + return *this; +} +group_t &group_t::add_named(const std::string &name, parser_t parser, const lang::namespace_name_t &identifier) { + if (parsers.find(name) != parsers.end()) { + throw "The parser '" + name + "' is already in the group."; + } + + parsers.emplace(name, parser); + named_parsers.emplace(identifier, name); + + return *this; +} + +bool group_t::operator()(ast_ctx_t &ctx, size_t &i, data::map_t &out) const { + tree_helper_t h(ctx, i); + + if (h.ended()) return false; + + loc_namespace_name_t name; + if (read_nmsp(ctx, h.i, name)) { + namespace_name_t actual; + if (resolve_nmsp(ctx, name.strip_location(), named_parsers.begin(), named_parsers.end(), actual)) { + auto parser = parsers.find(this->named_parsers.find(actual)->second); + out["$_name"] = parser->first; + if (h.parse(parser->second, out)) return h.submit(false); + else throw message_t::error("Unexpected construct specifier.", h.res_loc()); + } + } + + for (auto name : unnamed_parsers) { + out["$_name"] = name; + if (parsers.at(name)(ctx, i, out)) return true; + } + + stringstream m; + + return false; +} diff --git a/src/compiler/treeifier/ast/parsers/identifier.cc b/src/compiler/treeifier/ast/parsers/identifier.cc new file mode 100644 index 0000000..a1119e0 --- /dev/null +++ b/src/compiler/treeifier/ast/parsers/identifier.cc @@ -0,0 +1,15 @@ +#include "compiler/treeifier/ast/helper.hh" + +bool ast::parse_identifier(ast_ctx_t &ctx, size_t &res_i, map_t &out) { + tree_helper_t h(ctx, res_i); + + if (h.ended()) return false; + + if (h.curr().is_identifier()) { + auto loc = h.loc(); + out["location"] = conv::loc_to_map(loc); + out["content"] = h.curr().identifier(); + return h.submit(); + } + else return false; +} diff --git a/src/compiler/treeifier/ast/parsers/nmsp.cc b/src/compiler/treeifier/ast/parsers/nmsp.cc new file mode 100644 index 0000000..13431ad --- /dev/null +++ b/src/compiler/treeifier/ast/parsers/nmsp.cc @@ -0,0 +1,20 @@ +#include "compiler/treeifier/ast/helper.hh" + +bool ast::parse_nmsp(ast_ctx_t &ctx, size_t &res_i, map_t &out) { + tree_helper_t h(ctx, res_i); + + if (h.ended()) return false; + + auto &arr = (out["content"] = array_t()).array(); + + if (!h.push_parse(parse_identifier, arr)) return false; + + while (true) { + if (h.ended()) break; + if (!h.curr().is_operator(operator_t::DOUBLE_COLON)) break; + h.force_push_parse(parse_identifier, "Expected an identifier.", arr); + } + + out["location"] = conv::loc_to_map(h.res_loc()); + return h.submit(false); +} diff --git a/src/compiler/treeifier/ast/parsers/stat.cc b/src/compiler/treeifier/ast/parsers/stat.cc new file mode 100644 index 0000000..4548797 --- /dev/null +++ b/src/compiler/treeifier/ast/parsers/stat.cc @@ -0,0 +1,98 @@ +#include "compiler/treeifier/ast/helper.hh" + +bool ast::parse_if(ast_ctx_t &ctx, size_t &res_i, map_t &out) { + tree_helper_t h(ctx, res_i); + + h.throw_ended(); + if (!h.curr("Expected open parens after if keyword.").is_operator(operator_t::PAREN_OPEN)) { + throw message_t::error("Expected open parens after if keyword.", h.loc(1)); + } + + h.advance("Expected an expression."); + h.force_parse(parse_exp, "Expected an expression.", out["condition"].map({})); + + if (!h.curr("Expected closed parens.").is_operator(operator_t::PAREN_CLOSE)) { + throw message_t::error("Expected closed parens.", h.loc(1)); + } + + h.advance("Expected a statement."); + h.force_parse(ctx.group("$_stat"), "Expected a statement.", out["if"].map({})); + + if (h.ended() || !h.curr().is_identifier("else")) return h.submit(false); + + h.advance("Expected a statement."); + h.force_parse(ctx.group("$_stat"), "Expected a statement.", out["else"].map({})); + + return h.submit(false); +} + +bool ast::parse_while(ast_ctx_t &ctx, size_t &res_i, map_t &out) { + tree_helper_t h(ctx, res_i); + + h.throw_ended(); + if (!h.curr("Expected open parens after while keyword.").is_operator(operator_t::PAREN_OPEN)) { + throw message_t::error("Expected open parens after while keyword.", h.loc(1)); + } + + h.advance("Expected an expression."); + h.force_parse(parse_exp, "Expected an expression.", out["condition"].map({})); + + if (!h.curr("Expected closed parens.").is_operator(operator_t::PAREN_CLOSE)) { + throw message_t::error("Expected closed parens.", h.loc(1)); + } + + h.advance("Expected a statement."); + h.force_parse(ctx.group("$_stat"), "Expected a statement.", out["while"].map({})); + + return h.submit(false); +} + +bool ast::parse_return(ast_ctx_t &ctx, size_t &res_i, map_t &out) { + tree_helper_t h(ctx, res_i); + + h.advance("Expected an expression."); + h.force_parse(parse_exp, "Expected an expression.", out["condition"].map({})); + + if (!h.curr("Expected a semicolon.").is_operator(operator_t::SEMICOLON)) { + throw message_t::error("Expected a semicolon.", h.loc(1)); + } + + return h.submit(true); +} + +bool ast::parse_break(ast_ctx_t &ctx, size_t &res_i, map_t &out) { + tree_helper_t h(ctx, res_i); + + if (!h.curr("Expected a semicolon.").is_operator(operator_t::SEMICOLON)) { + throw message_t::error("Expected a semicolon.", h.loc(1)); + } + + return h.submit(true); +} + +bool ast::parse_continue(ast_ctx_t &ctx, size_t &res_i, map_t &out) { + tree_helper_t h(ctx, res_i); + + if (!h.curr("Expected a semicolon.").is_operator(operator_t::SEMICOLON)) { + throw message_t::error("Expected a semicolon.", h.loc(1)); + } + + return h.submit(true); +} + +bool ast::parse_stat_comp(ast_ctx_t &ctx, size_t &res_i, map_t &out) { + tree_helper_t h(ctx, res_i); + + if (h.ended()) return false; + if (!h.curr().is_operator(operator_t::BRACE_OPEN)) return false; + h.advance("Expected a statement or a closing brace."); + + auto &content = out["content"].array({}); + + while (!h.curr().is_operator(operator_t::BRACE_CLOSE)) { + h.throw_ended("Expected a statement or a closing brace."); + h.push_parse(ctx.group("$_stat"), content); + } + + return h.submit(true); +} \ No newline at end of file diff --git a/src/compiler/treeifier/ast/parsers/type.cc b/src/compiler/treeifier/ast/parsers/type.cc new file mode 100644 index 0000000..eebcabf --- /dev/null +++ b/src/compiler/treeifier/ast/parsers/type.cc @@ -0,0 +1,35 @@ +#include "compiler/treeifier/ast/helper.hh" + +bool ast::parse_type(ast_ctx_t &ctx, size_t &res_i, map_t &out) { + tree_helper_t h(ctx, res_i); + + if (h.ended()) return false; + + auto &nmsp = out["namespace"].map({}); + size_t ptr_n = 0; + + if (!h.parse(parse_nmsp, nmsp)) return false; + + while (!h.ended() && h.curr().is_operator(operator_t::MULTIPLY)) { + ptr_n++; + if (!h.try_advance()) break; + } + + auto &nmsp_arr = nmsp["content"].array(); + + h.i--; + out["location"] = conv::loc_to_map(h.res_loc()); + h.i++; + out["name"] = nmsp_arr.back(); + out["ptr_n"] = (float)ptr_n; + nmsp_arr.pop_back(); + if (nmsp_arr.empty()) out["namespace"] = null; + else { + auto loc_1 = conv::map_to_loc(nmsp_arr.front().map()["location"].string()); + auto loc_2 = conv::map_to_loc(nmsp_arr.back().map()["location"].string()); + auto loc = loc_1.intersect(loc_2); + nmsp["location"] = conv::loc_to_map(loc); + } + + return h.submit(false); +} diff --git a/src/compiler/treeifier/lexer.cc b/src/compiler/treeifier/lexer.cc index 9783e90..0127d64 100644 --- a/src/compiler/treeifier/lexer.cc +++ b/src/compiler/treeifier/lexer.cc @@ -3,392 +3,210 @@ #include "utils/message.hh" using namespace ppc; -using namespace comp::tree::lex; +using namespace ppc::messages; +using namespace ppc::comp::tree::lex; -struct lexlet_t { - bool(*is_valid)(char curr); - struct process_res_t { - bool ended; - bool repeat; - bool dont_add; - const lexlet_t *new_parselet; - bool has_message; - messages::message_t msg; - }; - process_res_t (*process)(char curr); +struct res_t; +using lexlet_t = res_t (*)(char c, std::vector &tok); + +struct res_t { + lexlet_t new_parselet; token_t::kind_t type; + bool _repeat; + bool _add; + + res_t add(bool val = true) { + this->_add = val; + return *this; + } + res_t repeat(bool val = true) { + this->_repeat = val; + return *this; + } }; -using process_res_t = lexlet_t::process_res_t; -extern const lexlet_t LEXLET_DEFAULT; -extern const lexlet_t LEXLET_IDENTIFIER; -extern const lexlet_t LEXLET_OPERATOR; -extern const lexlet_t LEXLET_ZERO; -extern const lexlet_t LEXLET_FLOAT; -extern const lexlet_t LEXLET_BIN; -extern const lexlet_t LEXLET_OCT; -extern const lexlet_t LEXLET_DEC; -extern const lexlet_t LEXLET_HEX; -extern const lexlet_t LEXLET_STRING_LITERAL; -extern const lexlet_t LEXLET_CHAR_LITERAL; -extern const lexlet_t LEXLET_COMMENT; -extern const lexlet_t LEXLET_MULTICOMMENT; - -static bool is_digit(char c) { - return c >= '0' && c <= '9'; -} -static bool is_oct(char c) { +static inline bool isoct(char c) { return c >= '0' && c <= '7'; } -static bool is_hex(char c) { - return is_digit(c) || (c >= 'A' && c <= 'F') || (c >= 'a' || c <= 'f'); +static inline bool is_any(char c, std::string chars) { + auto res = chars.find(c) != std::string::npos; + return res; } -static bool is_lower(char c) { - return c >= 'a' && c <= 'z'; -} -static bool is_upper(char c) { - return c >= 'A' && c <= 'Z'; -} -static bool is_letter(char c) { - return is_lower(c) || is_upper(c); -} -static bool is_alphanumeric(char c) { - return is_letter(c) || is_digit(c); -} -static bool is_any(char c, std::string chars) { - return chars.find(c) != -1u; +static inline bool is_operator(char c) { + return is_any(c, "=!<>+-*/%&|^?:,.(){}[];~"); } -static process_res_t lexer_switch(const lexlet_t *lexlet) { +static res_t lexlet_default(char c, std::vector &tok); + +static res_t lexer_switch(lexlet_t lexlet, bool repeat = false) { return { - .ended = false, - .repeat = false, .new_parselet = lexlet, + ._repeat = repeat, }; } -static process_res_t lexer_repeat_switch(const lexlet_t *lexlet) { - return (process_res_t) { - .ended = false, - .repeat = true, - .new_parselet = lexlet, +static res_t lexer_end(token_t::kind_t type, bool repeat = true) { + return { + .new_parselet = lexlet_default, + .type = type, + ._repeat = repeat }; } -static process_res_t lexer_end() { - return (process_res_t) { - .ended = true, - .repeat = true, - .new_parselet = nullptr, - }; -} -static process_res_t lexer_none() { - return (process_res_t) { - .ended = false, - .repeat = false, - .new_parselet = nullptr, - }; -} -static process_res_t default_process(char curr) { - if (LEXLET_STRING_LITERAL.is_valid(curr)) return lexer_switch(&LEXLET_STRING_LITERAL); - if (LEXLET_CHAR_LITERAL.is_valid(curr)) return lexer_switch(&LEXLET_CHAR_LITERAL); - if (LEXLET_OPERATOR.is_valid(curr)) return lexer_switch(&LEXLET_OPERATOR); - if (LEXLET_ZERO.is_valid(curr)) return lexer_switch(&LEXLET_ZERO); - if (LEXLET_DEC.is_valid(curr)) return lexer_switch(&LEXLET_DEC); - if (LEXLET_FLOAT.is_valid(curr)) return lexer_switch(&LEXLET_FLOAT); - if (LEXLET_IDENTIFIER.is_valid(curr)) return lexer_switch(&LEXLET_IDENTIFIER); - else return (process_res_t) { - .ended = true, - .repeat = false, - .new_parselet = nullptr, - }; +static res_t lexer_none() { + return { ._add = true }; } -static bool identifier_is_valid(char curr) { - return is_letter(curr) || curr == '_' || curr == '@' || curr == '$'; -} -static process_res_t identifier_process(char curr) { - bool valid = (is_alphanumeric(curr) || curr == '_' || curr == '@' || curr == '$'); - return (process_res_t) { - .ended = !valid, - .repeat = !valid, - .new_parselet = &LEXLET_IDENTIFIER, - }; -} - -static bool last_escape = false; -static bool literal_ended = false; - -static bool string_is_valid(char curr) { - last_escape = false; - literal_ended = false; - return curr == '"'; -} -static process_res_t string_process(char curr) { - if (last_escape) { - last_escape = false; - return lexer_none(); - } - - if (curr == '\\') { - last_escape = true; - } - else if (curr == '"') { - literal_ended = true; - } - else if (literal_ended) return lexer_end(); - return lexer_none(); -} - -static bool char_is_valid(char curr) { - last_escape = false; - literal_ended = false; - return curr == '\''; -} -static process_res_t char_process(char curr) { - if (last_escape) { - last_escape = false; - return lexer_none(); - } - - if (curr == '\\') { - last_escape = true; - } - else if (curr == '\'') { - literal_ended = true; - } - else if (literal_ended) return lexer_end(); - return lexer_none(); -} - -static char first_op; -static int op_i = 0; - -static bool operator_is_valid(char curr) { - if (is_any(curr, "=!<>+-*/%&|^?:,.(){}[];")) { - first_op = curr; - op_i = 1; - return true; - } - else return false; -} -static process_res_t operator_process(char curr) { - bool failed = true; - if (first_op == curr && op_i == 1 && is_any(curr, "+-&|?<>")) failed = false; - if (curr == '=') { - if (op_i == 1 && is_any(first_op, "<>=!+-/*%")) failed = false; - if (op_i == 2 && is_any(first_op, "<>?")) failed = false; - } - if (first_op == '-' && curr == '>' && op_i == 1) failed = false; - - if (first_op == '/' && op_i == 1) { - if (curr == '/') return lexer_switch(&LEXLET_COMMENT); - else if (curr == '*') return lexer_switch(&LEXLET_MULTICOMMENT); - } - - op_i++; - - if (failed) return lexer_end(); +static res_t lexlet_identifier(char c, std::vector &tok) { + if (is_operator(c) || isspace(c)) return lexer_end(token_t::IDENTIFIER); else return lexer_none(); -} +}; +static res_t lexlet_hex(char c, std::vector &tok) { + if (isxdigit(c)) return lexer_none(); + else return lexer_end(token_t::HEX_LITERAL); +}; +static res_t lexlet_bin(char c, std::vector &tok) { + if (is_any(c, "01")) return lexer_none(); + else if (isdigit(c)) throw message_t::error("A binary literal may only contain zeroes and ones."); + else return lexer_end(token_t::BIN_LITERAL); +}; +static res_t lexlet_oct(char c, std::vector &tok) { + if (isoct(c)) return lexer_none(); + else if (isdigit(c)) throw message_t::error("An octal literal may only contain octal digits."); + else return lexer_end(token_t::OCT_LITERAL); +}; +static res_t lexlet_float(char c, std::vector &tok) { + if (isdigit(c)) return lexer_none(); + else return lexer_end(token_t::FLOAT_LITERAL); +}; +static res_t lexlet_dec(char c, std::vector &tok) { + if (isdigit(c)) return lexer_none(); + else if (c == '.') return lexer_switch(lexlet_float); + else return lexer_end(token_t::DEC_LITERAL); +}; -static bool zero_is_valid(char curr) { - return curr == '0'; -} -static process_res_t zero_process(char curr) { - if (curr == '.') return lexer_switch(&LEXLET_FLOAT); - else if (curr == 'b') return lexer_switch(&LEXLET_BIN); - else if (curr == 'x') return lexer_switch(&LEXLET_HEX); - else if (is_digit(curr)) return lexer_repeat_switch(&LEXLET_OCT); - else return lexer_end(); -} - -static bool dec_is_valid(char curr) { - return is_digit(curr); -} -static process_res_t dec_process(char curr) { - if (is_digit(curr)) return lexer_none(); - else if (curr == '.') return lexer_switch(&LEXLET_FLOAT); - else return lexer_end(); -} - -static bool only_dot = false; - -static bool float_is_valid(char curr) { - return only_dot = curr == '.'; -} -static process_res_t float_process(char curr) { - if (is_digit(curr)) { - only_dot = false; - return lexer_none(); +static res_t lexlet_zero(char c, std::vector &tok) { + if (c == '.') return lexer_switch(lexlet_float).add(); + else if (c == 'b') return lexer_switch(lexlet_bin).add(); + else if (c == 'x') return lexer_switch(lexlet_hex).add(); + else if (isdigit(c)) return lexer_switch(lexlet_oct, true); + else return lexer_end(token_t::DEC_LITERAL); +}; +static res_t lexlet_comment(char c, std::vector &tok) { + tok.clear(); + if (c == '\n') return lexer_switch(lexlet_default); + else return lexer_none().add(false); +}; +static res_t lexlet_multicomment(char c, std::vector &tok) { + if (c == '/' && tok.size() && tok.back() == '*') { + tok.clear(); + return lexer_switch(lexlet_default); } - else return lexer_end(); -} -static process_res_t hex_process(char curr) { - if (is_hex(curr)) return lexer_none(); - else return lexer_end(); -} -static process_res_t bin_process(char curr) { - if (curr == '0' || curr == '1') return lexer_none(); - else if (is_digit(curr)) - throw messages::message_t { messages::message_t::ERROR, NO_LOCATION, "A binary literal may only contain zeroes and ones." }; - else return lexer_end(); -} -static process_res_t oct_process(char curr) { - if (is_oct(curr)) return lexer_none(); - else if (is_digit(curr)) - throw messages::message_t { messages::message_t::ERROR, NO_LOCATION, "An octal literal may only contain octal digits." }; - else return lexer_end(); -} + return lexer_none(); +}; +static res_t lexlet_operator(char c, std::vector &tok) { + bool failed = false; -static process_res_t comment_process(char curr) { - if (curr == '\n') return lexer_end(); - else return (process_res_t) { - .ended = false, - .dont_add = true, - }; -} -static bool last_star = false; + if (tok.size() > 0) { + failed = true; + char first_op = tok[0]; + size_t op_i = tok.size(); -static process_res_t multicomment_process(char curr) { - if (curr == '/' && last_star) { - last_star = false; - return { - .ended = true, - .repeat = false, - .new_parselet = nullptr, - }; + if (first_op == '.' && isdigit(c)) return lexer_switch(lexlet_float).add(); + + if (first_op == c && op_i == 1 && is_any(c, ":+-&|?<>")) failed = false; + if (c == '=') { + if (op_i == 1 && is_any(first_op, "<>=!+-/*%")) failed = false; + if (op_i == 2 && is_any(first_op, "<>?")) failed = false; + } + if (first_op == '-' && c == '>' && op_i == 1) failed = false; + + if (first_op == '/' && op_i == 1) { + if (c == '/') return lexer_switch(lexlet_comment); + else if (c == '*') return lexer_switch(lexlet_multicomment); + } } - if (curr == '*') last_star = true; - - return { - .ended = false, - .dont_add = true, - }; -} -const lexlet_t LEXLET_DEFAULT = (lexlet_t) { - .process = default_process, - .type = token_t::NONE, + if (failed) return lexer_end(token_t::OPERATOR); + else return lexer_none(); }; -const lexlet_t LEXLET_IDENTIFIER = (lexlet_t) { - .is_valid = identifier_is_valid, - .process = identifier_process, - .type = token_t::IDENTIFIER, +static res_t lexlet_string(char c, std::vector &tok) { + if ((c == '"') && (tok.back() != '\\')) return lexer_end(token_t::STRING_LITERAL, false).add(); + else return lexer_none(); }; -const lexlet_t LEXLET_ZERO = (lexlet_t) { - .is_valid = zero_is_valid, - .process = zero_process, - .type = token_t::DEC_LITERAL, -}; -const lexlet_t LEXLET_DEC = (lexlet_t) { - .is_valid = dec_is_valid, - .process = dec_process, - .type = token_t::DEC_LITERAL, -}; -const lexlet_t LEXLET_HEX = (lexlet_t) { - .process = hex_process, - .type = token_t::HEX_LITERAL, -}; -const lexlet_t LEXLET_BIN = (lexlet_t) { - .process = bin_process, - .type = token_t::BIN_LITERAL, -}; -const lexlet_t LEXLET_OCT = (lexlet_t) { - .process = oct_process, - .type = token_t::OCT_LITERAL, -}; -const lexlet_t LEXLET_FLOAT = (lexlet_t) { - .is_valid = float_is_valid, - .process = float_process, - .type = token_t::FLOAT_LITERAL, -}; -const lexlet_t LEXLET_OPERATOR = (lexlet_t) { - .is_valid = operator_is_valid, - .process = operator_process, - .type = token_t::OPERATOR, -}; -const lexlet_t LEXLET_STRING_LITERAL = (lexlet_t) { - .is_valid = string_is_valid, - .process = string_process, - .type = token_t::STRING_LITERAL, -}; -const lexlet_t LEXLET_CHAR_LITERAL = (lexlet_t) { - .is_valid = char_is_valid, - .process = char_process, - .type = token_t::CHAR_LITERAL, -}; -const lexlet_t LEXLET_COMMENT = { - .is_valid = nullptr, - .process = comment_process, - .type = token_t::NONE, -}; -const lexlet_t LEXLET_MULTICOMMENT = { - .is_valid = nullptr, - .process = multicomment_process, - .type = token_t::NONE, +static res_t lexlet_char(char c, std::vector &tok) { + if ((c == '\'') && (tok.back() != '\\')) return lexer_end(token_t::CHAR_LITERAL, false).add(); + else return lexer_none(); }; -std::vector token_t::parse_many(ppc::messages::msg_stack_t &msg_stack, const std::string &filename, const std::string &src) { +static res_t lexlet_default(char c, std::vector &tok) { + if (c == '"') return lexer_switch(lexlet_string).add(); + if (c == '\'') return lexer_switch(lexlet_char).add(); + if (c == '0') return lexer_switch(lexlet_zero).add(); + if (is_operator(c)) return lexer_switch(lexlet_operator).add(); + if (isdigit(c)) return lexer_switch(lexlet_dec).add(); + if (isspace(c)) { + tok.clear(); + return lexer_none().add(false); + } + return lexer_switch(lexlet_identifier).add(); +}; + +std::vector token_t::parse_many(ppc::messages::msg_stack_t &msg_stack, const std::string &filename, const std::string &_src) { + auto src = _src + '\n'; std::vector tokens; std::vector curr_token; - lexlet_t curr = LEXLET_DEFAULT; - std::size_t start = 0, line = 0, curr_start = 0, curr_line = 0, length = 0, i = 0; + lexlet_t curr = lexlet_default; + std::size_t start = 0, line = 0, curr_start = 0, curr_line = 0, i = 0; - while (src[i]) { + while (i < src.size()) { char c = src[i]; try { - process_res_t res = curr.process(c); - if (i == 0) res.repeat = false; - if (res.has_message) throw res.msg; + res_t res = curr(c, curr_token); + if (i == 0) res._repeat = false; - if (res.ended) { - if (curr.type) { - location_t loc = { filename, line, start, i - length, length }; - tokens.push_back({ curr.type, { curr_token.begin(), curr_token.end() }, loc }); - curr_token.clear(); - } - - length = 0; - - curr = LEXLET_DEFAULT; + if (res._add) { + curr_token.push_back(c); } - else { - if (res.new_parselet) { - if (!curr.type) { - start = curr_start; - line = curr_line; - } - curr = *res.new_parselet; - } - if (!res.dont_add) { - curr_token.push_back(c); - length++; + if (res.type) { + size_t len = curr_token.size(); + location_t loc(filename, line, start, i - len, len); + tokens.push_back({ res.type, { curr_token.begin(), curr_token.end() }, loc }); + curr_token.clear(); + } + if (res.new_parselet) { + if (curr == lexlet_default && res.new_parselet != lexlet_default) { + start = curr_start; + line = curr_line; } + curr = res.new_parselet; } - if (!res.repeat) { - curr_start++; - if (c == '\n') { - curr_line++; - curr_start = 0; - } + if (!res._repeat) { i++; + curr_start++; + if (i == src.size()) break; + if (c == '\n') { + curr_start = 0; + curr_line++; + } } } catch (const messages::message_t &msg) { - throw messages::message_t { msg.level, { filename, line, start, i - length, length }, msg.content }; + throw message_t(msg.level, msg.content, location_t(filename, line, start, i - curr_token.size(), curr_token.size())); } } - location_t loc = { filename, line, start, i - length, length }; - if (curr.type) { - tokens.push_back(token_t { - curr.type, std::string { curr_token.begin(), curr_token.end() }, - { filename, line, start, i - length, length } - }); - } + curr_start--; + + if (curr_token.size()) curr_token.pop_back(); + + if (curr == lexlet_string) + throw message_t::error("Unclosed string literal.", location_t(filename, line, start, i - curr_token.size(), curr_token.size())); + if (curr == lexlet_char) + throw message_t::error("Unclosed char literal.", location_t(filename, line, start, i - curr_token.size(), curr_token.size())); + if (curr != lexlet_default) throw message_t::error("Unexpected end.", location_t(filename, curr_line, curr_start, i, 1)); return tokens; } diff --git a/src/compiler/treeifier/operators.cc b/src/compiler/treeifier/operators.cc index 4d560c4..b03bd90 100644 --- a/src/compiler/treeifier/operators.cc +++ b/src/compiler/treeifier/operators.cc @@ -2,10 +2,12 @@ #include "compiler/treeifier/tokenizer.hh" using namespace ppc::comp::tree; +using namespace ppc::comp; using namespace std::string_literals; std::vector operators = { + "(none)", "<", ">", "<=", ">=", "==", "!=", "&&", "||", "<<", ">>", "^", "&", "|", "!", "~", "++", "--", @@ -17,16 +19,15 @@ std::vector operators = { "[", "]", "{", "}", "(", ")" }; - -const std::string &tok::operator_stringify(tok::operator_t kw) { - if (kw < 0 || kw >= operators.size()) throw "Invalid operator ID given."s; - return operators[kw]; +const std::string &tree::operator_stringify(operator_t op) { + if (op < 0 || op >= operators.size()) throw "Invalid operator ID given."s; + return operators[op]; } -tok::operator_t tok::operator_find(const std::string &raw) { +operator_t tree::operator_find(const std::string &raw) { std::size_t i = 0; for (const auto &op : operators) { - if (op == raw) return (tok::operator_t)i; + if (op == raw) return (operator_t)i; i++; } - throw "Invalid operator '"s + raw + "' given."; + throw "Invalid operator '" + raw + "' given."; } diff --git a/src/compiler/treeifier/tokenizer.cc b/src/compiler/treeifier/tokenizer.cc index 6230af2..e603d86 100644 --- a/src/compiler/treeifier/tokenizer.cc +++ b/src/compiler/treeifier/tokenizer.cc @@ -1,4 +1,5 @@ #include +#include #include "compiler/treeifier/tokenizer.hh" #include "compiler/treeifier/lexer.hh" @@ -7,12 +8,12 @@ using namespace messages; using namespace comp::tree; using namespace std::string_literals; -static std::vector parse_string(msg_stack_t &msg_stack, bool is_char, lex::token_t token) { +static std::vector parse_string(msg_stack_t &msg_stack, bool is_char, const lex::token_t &token) { char literal_char = is_char ? '\'' : '"'; bool escaping = false; - std::vector res { }; + std::vector res; location_t curr_char_loc = token.location; curr_char_loc.length = 1; curr_char_loc.start++; @@ -31,10 +32,29 @@ static std::vector parse_string(msg_stack_t &msg_stack, bool is_char, lex: else if (c == 'r') new_c = '\r'; else if (c == 't') new_c = '\t'; else if (c == 'v') new_c = '\v'; - // TODO: Add support for oct, hex and utf8 literals + else if (c >= '0' && c <= '7') { + new_c = 0; + size_t n = 0; + while (c >= '0' && c <= '7') { + new_c <<= 3; + new_c |= c - '0'; + c = token.data[++i]; + n++; + } + if (n > 3) { + location_t loc = curr_char_loc; + loc.code_start--; + loc.start--; + loc.length = n + 1; + msg_stack.warn("Octal escape sequence overflows 255 8-bit limit (3 digits).", loc); + } + curr_char_loc.start += n - 1; + i--; + } + // TODO: Add support for hex and utf8 literals else if (c == literal_char || c == '\\') new_c = c; else { - throw message_t { message_t::ERROR, curr_char_loc, "Unescapable character." }; + throw message_t(message_t::ERROR, "Unescapable character.", curr_char_loc); } res.push_back(new_c); escaping = false; @@ -49,138 +69,166 @@ static std::vector parse_string(msg_stack_t &msg_stack, bool is_char, lex: if (c == '\n') break; } - if (is_char) throw message_t { message_t::ERROR, token.location, "Unterminated char literal." }; - else throw message_t { message_t::ERROR, token.location, "Unterminated string literal." }; + if (is_char) throw message_t(message_t::ERROR, "Unterminated char literal.", token.location); + else throw message_t(message_t::ERROR, "Unterminated string literal.", token.location); } -static tok::token_t parse_int(msg_stack_t &msg_stack, lex::token_t token) { - enum radix_t { - BINARY, - OCTAL, - DECIMAL, - HEXADECIMAL, - } radix; - std::size_t i = 0; +static std::vector parse_bin(msg_stack_t &msg_stack, size_t i, const std::string &data) { + std::vector res; - switch (token.type) { - case lex::token_t::BIN_LITERAL: - i += 2; - radix = BINARY; - break; - case lex::token_t::OCT_LITERAL: - i++; - radix = OCTAL; - break; - case lex::token_t::DEC_LITERAL: - radix = DECIMAL; - break; - case lex::token_t::HEX_LITERAL: - i += 2; - radix = HEXADECIMAL; - break; - default: - throw "WTF r u doing bro?"s; - } + int last_byte = 0; + int lastbyte_n = 0; - std::size_t j = token.data.length() - 1; - - uint64_t res = 0; - - for (; i <= j; i++) { - char c = token.data[i]; - int8_t digit; - switch (radix) { - case BINARY: - digit = c - '0'; - res <<= 1; - res |= digit; - break; - case OCTAL: - digit = c - '0'; - if (digit < 0 || digit > 7) { - throw message_t { message_t::ERROR, token.location, "Octal literals may contain numbers between 0 and 7." }; - } - res <<= 3; - res |= digit; - break; - case 2: - digit = c - '0'; - res *= 10; - res += digit; - break; - case 3: - if (c >= 'a' && c <= 'f') digit = c - 'a' + 9; - else if (c >= 'A' && c <= 'F') digit = c - 'A' + 9; - else if (c >= '0' && c <= '9') digit = c - '0'; - else throw message_t { message_t::ERROR, token.location, "Invalid character '"s + c + "' in hex literal." }; - res <<= 4; - res |= digit; - break; + for (size_t j = i; j < data.length(); j++) { + if (lastbyte_n == 8) { + lastbyte_n = 0; + res.push_back(last_byte); + last_byte = 0; } + + last_byte <<= 1; + last_byte |= data[j] - '0'; + lastbyte_n++; } - return tok::token_t { res, token.location }; + res.push_back(last_byte); + std::reverse(res.begin(), res.end()); + + return res; } -static tok::token_t parse_float(msg_stack_t &msg_stack, lex::token_t token) { - double whole = 0, fract = 0; +static std::vector parse_hex(msg_stack_t &msg_stack, size_t i, const std::string &data) { + std::vector res; - char c; - std::size_t i; + int last_byte = 0; + int lastbyte_n = 0; - for (i = 0; i < token.data.length() && ((c = token.data[i]) > '0' && c < '9'); i++) { - if (c == '.') break; - int digit = c - '0'; - whole *= 10; - whole += digit; - } - - if (c == '.') { - i++; - for (; i < token.data.length() && ((c = token.data[i]) > '0' && c < '9'); i++) { - int digit = c - '0'; - fract += digit; - fract /= 10; + for (size_t j = i; j < data.length(); j++) { + if (lastbyte_n == 8) { + lastbyte_n = 0; + res.push_back(last_byte); + last_byte = 0; } + + int digit = data[j] - '0'; + if (data[j] >= 'a' && data[j] <= 'f') digit = data[j] - 'a' + 10; + if (data[j] >= 'A' && data[j] <= 'F') digit = data[j] - 'F' + 10; + + last_byte <<= 4; + last_byte |= digit; + lastbyte_n += 4; } - return tok::token_t { whole + fract, token.location }; -} + res.push_back(last_byte); + std::reverse(res.begin(), res.end()); -tok::token_t tok::token_t::parse(messages::msg_stack_t &msg_stack, lex::token_t in) { - switch (in.type) { - case lex::token_t::IDENTIFIER: - return tok::token_t { in.data, in.location }; - case lex::token_t::OPERATOR: - try { - auto op = tok::operator_find(in.data); - return token_t { op, in.location }; - } - catch (std::string &err) { - throw message_t { message_t::ERROR, in.location, "Operator not recognised."s }; - } - case lex::token_t::BIN_LITERAL: - case lex::token_t::OCT_LITERAL: - case lex::token_t::DEC_LITERAL: - case lex::token_t::HEX_LITERAL: - return parse_int(msg_stack, in); - case lex::token_t::FLOAT_LITERAL: - return parse_float(msg_stack, in); - case lex::token_t::STRING_LITERAL: - return { parse_string(msg_stack, false, in) }; - case lex::token_t::CHAR_LITERAL: { - auto str = parse_string(msg_stack, true, in); - if (str.size() != 1) throw message_t { message_t::ERROR, in.location, "Char literal must consist of just one character." }; - return str.front(); + return res; +} +static std::vector parse_oct(msg_stack_t &msg_stack, size_t i, const std::string &data) { + std::vector res; + + int last_byte = 0; + int lastbyte_n = 0; + + for (size_t j = i; j < data.length(); j++) { + if (lastbyte_n >= 8) { + lastbyte_n = 0; + res.push_back(last_byte); + last_byte >>= 8; } - default: - throw message_t { message_t::ERROR, in.location, "Token type not recognised." }; - } -} -std::vector tok::token_t::parse_many(messages::msg_stack_t &msg_stack, std::vector tokens) { - std::vector res; - for (auto &tok : tokens) { - res.push_back(tok::token_t::parse(msg_stack, tok)); + int digit = data[j] - '0'; + + last_byte <<= 3; + last_byte |= digit; + lastbyte_n += 3; + } + + res.push_back(last_byte); + std::reverse(res.begin(), res.end()); + + return res; +} +static void mult_10(std::vector &val) { + std::vector res; + + int carry = 0; + + for (size_t i = 0; i < val.size(); i++) { + carry = val[i] * 10 + carry; + res.push_back(carry); + carry >>= 8; + } + + if (carry != 0) res.push_back(carry); + val = res; +} +static void add_byte(std::vector &a, uint8_t b) { + int carry = b; + + for (size_t i = 0; i < a.size(); i++) { + carry = a[i] + carry; + a[i] = carry; + carry >>= 8; + if (carry == 0) break; + } + + if (carry != 0) a.push_back(carry); +} +static std::vector parse_dec(msg_stack_t &msg_stack, size_t i, const std::string &data) { + std::vector res; + + for (size_t j = i; j < data.length(); j++) { + int digit = data[j] - '0'; + + mult_10(res); + if (res.empty()) res.push_back(digit); + else add_byte(res, digit); + } + + return res; +} +static std::vector parse_float(msg_stack_t &msg_stack, const lex::token_t &token) { + throw "no floats lol bozo"s; +} + +token_t token_t::parse(messages::msg_stack_t &msg_stack, lex::token_t in) { + switch (in.type) { + case lex::token_t::IDENTIFIER: + return token_t(in.data, in.location); + case lex::token_t::OPERATOR: + try { + auto op = operator_find(in.data); + return token_t(op, in.location); + } + catch (std::string &err) { + throw message_t(message_t::ERROR, "Operator not recognised."s, in.location); + } + case lex::token_t::BIN_LITERAL: + return { parse_bin(msg_stack, 1, in.data), false, in.location }; + case lex::token_t::OCT_LITERAL: + return { parse_oct(msg_stack, 1, in.data), false, in.location }; + case lex::token_t::DEC_LITERAL: + return { parse_dec(msg_stack, 0, in.data), false, in.location }; + case lex::token_t::HEX_LITERAL: + return { parse_hex(msg_stack, 2, in.data), false, in.location }; + case lex::token_t::FLOAT_LITERAL: + return { parse_float(msg_stack, in), false, in.location }; + case lex::token_t::STRING_LITERAL: + return { parse_string(msg_stack, false, in), true, in.location }; + case lex::token_t::CHAR_LITERAL: { + auto res = parse_string(msg_stack, true, in); + std::reverse(res.begin(), res.end()); + return { res, false, in.location }; + } + default: + throw message_t(message_t::ERROR, "Token type not recognised.", in.location); + } +} +std::vector token_t::parse_many(messages::msg_stack_t &msg_stack, std::vector tokens) { + std::vector res; + + for (auto &tok : tokens) { + res.push_back(token_t::parse(msg_stack, tok)); } return res; diff --git a/src/lang/proj.txt b/src/lang.proj similarity index 50% rename from src/lang/proj.txt rename to src/lang.proj index e45037f..3151a2d 100644 --- a/src/lang/proj.txt +++ b/src/lang.proj @@ -1,2 +1,2 @@ lang -core \ No newline at end of file +utils \ No newline at end of file diff --git a/src/lang/common.cc b/src/lang/common.cc new file mode 100644 index 0000000..bf4bd70 --- /dev/null +++ b/src/lang/common.cc @@ -0,0 +1,60 @@ + +#include +#include "lang/common.hh" + +namespace ppc::lang { + std::string loc_namespace_name_t::to_string() const { + std::stringstream res; + + for (size_t i = 0; i < size(); i++) { + if (i != 0) res << "::"; + res << (*this)[i]; + } + + return res.str(); + } + std::string namespace_name_t::to_string() const { + std::stringstream res; + + for (size_t i = 0; i < size(); i++) { + if (i != 0) res << "::"; + res << (*this)[i]; + } + + return res.str(); + } + + int namespace_name_t::compare(const namespace_name_t &b) const { + const auto &a = *this; + for (size_t i = 0; i < a.size() && i < b.size(); i++) { + auto cmp = a[i].compare(b[i]); + if (cmp != 0) return cmp; + } + + if (a.size() > b.size()) return 1; + else if (a.size() == b.size()) return 0; + else return -1; + } + int loc_namespace_name_t::compare(const loc_namespace_name_t &b) const { + const auto &a = *this; + for (size_t i = 0; i < a.size() && i < b.size(); i++) { + auto cmp = a[i].compare(b[i]); + if (cmp != 0) return cmp; + } + + if (a.size() > b.size()) return 1; + else if (a.size() == b.size()) return 0; + else return -1; + } + + namespace_name_t loc_namespace_name_t::strip_location() const { + namespace_name_t res; + + for (const auto &el : *this) { + res.push_back(el); + } + + return res; + } +} + diff --git a/src/lang/version.cc b/src/lang/version.cc index 21c2918..a7f0642 100644 --- a/src/lang/version.cc +++ b/src/lang/version.cc @@ -4,14 +4,14 @@ using namespace ppc; bool version_t::operator==(version_t other) const { bool major_same = major == other.major; - bool minor_same = minor == -1 || other.minor == -1 || minor == other.minor; - bool revision_same = revision == -1 || other.revision == -1 || revision == other.revision; + bool minor_same = minor == -1u || other.minor == -1 || minor == other.minor; + bool revision_same = revision == -1u || other.revision == -1u || revision == other.revision; return major_same && minor_same && revision_same; } bool version_t::is_compliant(version_t other) const { bool major_compliant = major == other.major; - bool minor_compliant = minor == -1 || other.minor == -1 || minor <= other.minor; + bool minor_compliant = minor == -1u || other.minor == -1u || minor <= other.minor; return major_compliant && minor_compliant; } diff --git a/src/lsproj.cc b/src/lsproj.cc index 8df6e4e..e133779 100644 --- a/src/lsproj.cc +++ b/src/lsproj.cc @@ -15,11 +15,11 @@ std::string read_str(std::istream &f, const std::string &skip_chars, const std:: while (true) { c = f.get(); auto a = end_chars.find(c); - if (c == -1 || a != -1ull) { + if (c == -1 || a != std::string::npos) { end_char = c; return ""; } - if ((a = skip_chars.find(c)) == -1ull) { + if ((a = skip_chars.find(c)) == std::string::npos) { f.unget(); break; } @@ -27,7 +27,7 @@ std::string read_str(std::istream &f, const std::string &skip_chars, const std:: while (true) { c = f.get(); - if (c == -1 || end_chars.find(c) != -1ull) { + if (c == -1 || end_chars.find(c) != std::string::npos) { end_char = c; break; } @@ -35,7 +35,7 @@ std::string read_str(std::istream &f, const std::string &skip_chars, const std:: res.push_back(c); } while (true) { - if (skip_chars.find(res.back()) != -1ull) res.pop_back(); + if (skip_chars.find(res.back()) != std::string::npos) res.pop_back(); else break; } @@ -63,14 +63,14 @@ project_t read_project(std::istream &f) { }; } - if (name.find(',') != -1ull || name.find(' ') != -1ull) { + if (name.find(',') != std::string::npos || name.find(' ') != std::string::npos) { throw (std::string)"The name of a project may not contain spaces or commas."; } while (true) { std::string dep = read_str(f, " \v\t\r\n", ",\n", end_ch); - if (dep.find(' ') != -1ull) { + if (dep.find(' ') != std::string::npos) { throw (std::string)"The name of a dependency may not contain spaces."; } @@ -101,7 +101,7 @@ int main(int argc, const char* argv[]) { throw (std::string)"Incorrect usage. Syntax: [src-dir] [project-name] [output|deps]."; } - std::string proj_path = (std::string)argv[0] + "/" + argv[1] + "/proj.txt"; + std::string proj_path = (std::string)argv[0] + "/" + argv[1] + ".proj"; proj_name = argv[1]; std::ifstream f { proj_path, std::ios_base::in }; diff --git a/src/main/proj.txt b/src/main.proj similarity index 95% rename from src/main/proj.txt rename to src/main.proj index 23e5243..dd1bfcc 100644 --- a/src/main/proj.txt +++ b/src/main.proj @@ -1,2 +1,2 @@ -main +main utils, compiler \ No newline at end of file diff --git a/src/main/main.cc b/src/main/main.cc index 871054b..2f4ec65 100644 --- a/src/main/main.cc +++ b/src/main/main.cc @@ -22,14 +22,17 @@ #include #include "utils/threading.hh" #include "utils/strings.hh" +#include "utils/json.hh" #include "compiler/treeifier/lexer.hh" #include "compiler/treeifier/tokenizer.hh" +#include "compiler/treeifier/ast.hh" #include "./opions.hh" using std::cout; using std::size_t; using namespace ppc; using namespace ppc::comp::tree; +using namespace ppc::comp::tree::ast; void add_flags(options::parser_t &parser) { parser.add_flag({ @@ -120,7 +123,7 @@ void add_flags(options::parser_t &parser) { .description = "Prints a 'what?' type of message (you'll see)", .match_type = options::MATCH_PREFIX, .execute = [](options::parser_t &parser, const std::string &option, ppc::messages::msg_stack_t &global_stack) { - global_stack.push({ (messages::message_t::level_t)69, NO_LOCATION, "IDK LOL." }); + global_stack.push(messages::message_t((messages::message_t::level_t)69, "IDK LOL.")); } }); } @@ -139,37 +142,48 @@ int main(int argc, const char *argv[]) { std::vector files; messages::msg_stack_t msg_stack; - options::parser_t parser; - data::map_t conf; - add_flags(parser); + try { + options::parser_t parser; + data::map_t conf; + add_flags(parser); - for (const auto &arg : args) { - if (!parser.parse(arg, msg_stack, conf)) { - files.push_back(arg); - } - } - - for (const auto &file : files) { - std::ifstream f { file, std::ios_base::in }; - try { - auto res = tok::token_t::parse_many(msg_stack, lex::token_t::parse_file(msg_stack, file, f)); - - for (auto tok : res) { - if (tok.is_identifier()) std::cout << "Identifier: \t" << tok.identifier(); - if (tok.is_operator()) std::cout << "Operator: \t" << tok::operator_stringify(tok._operator()); - if (tok.is_float_lit()) std::cout << "Float: \t" << tok.float_lit(); - if (tok.is_int_lit()) std::cout << "Int: \t" << tok.int_lit(); - if (tok.is_char_lit()) std::cout << "Char: \t" << tok.char_lit(); - if (tok.is_string_lit()) std::cout << "String: \t" << std::string { tok.string_lit().begin(), tok.string_lit().end() }; - std::cout << std::endl; + for (const auto &arg : args) { + if (!parser.parse(arg, msg_stack, conf)) { + files.push_back(arg); } } - catch (const messages::message_t &msg) { - msg_stack.push(msg); + + for (const auto &file : files) { + try { + std::ifstream f { file, std::ios_base::in }; + auto tokens = token_t::parse_many(msg_stack, lex::token_t::parse_file(msg_stack, file, f)); + auto ast = ast_ctx_t::parse(ast::parse_glob, msg_stack, tokens); + + std::cout << data::json::stringify(ast) << std::endl; + } + catch (const messages::message_t &msg) { + msg_stack.push(msg); + } } } + catch (const messages::message_t &msg) { + msg_stack.push(msg); + } + #ifndef PROFILE_debug + catch (const std::string &msg) { + msg_stack.push(message_t::error(msg)); + } + catch (...) { + std::cout << std::endl; + msg_stack.push(message_t::error("A fatal error occurred.")); + } + #endif msg_stack.print(std::cout, messages::message_t::DEBUG, true); + #ifdef PROFILE_debug + system("pause"); + #endif + return 0; -} \ No newline at end of file +} diff --git a/src/main/options.cc b/src/main/options.cc index d5b46c2..9328a1c 100644 --- a/src/main/options.cc +++ b/src/main/options.cc @@ -6,7 +6,7 @@ using namespace ppc; bool check_shorthand(std::string &option, const options::flag_t &flag) { if (option.size() < 2 || option[0] != '-') return false; - if (option.size() == 2 && std::string { flag.shorthands }.find(option[1]) != -1u) { + if (option.size() == 2 && std::string { flag.shorthands }.find(option[1]) != std::string::npos) { option = ""; return true; } diff --git a/src/utils/proj.txt b/src/utils.proj similarity index 100% rename from src/utils/proj.txt rename to src/utils.proj diff --git a/src/utils/data.cc b/src/utils/data.cc index e2801f4..f83e097 100644 --- a/src/utils/data.cc +++ b/src/utils/data.cc @@ -1,135 +1,176 @@ #include "utils/data.hh" -bool ppc::data::value_t::is_null() const { - return type == type_t::Null; -} -bool ppc::data::value_t::is_map() const { - return type == type_t::Map; -} -bool ppc::data::value_t::is_array() const { - return type == type_t::Arr; -} -bool ppc::data::value_t::is_number() const { - return type == type_t::Num; -} -bool ppc::data::value_t::is_string() const { - return type == type_t::Str; -} -bool ppc::data::value_t::is_bool() const { - return type == type_t::Bool; -} +namespace ppc::data { + bool value_t::is_null() const { + return type == type_t::Null; + } + bool value_t::is_map() const { + return type == type_t::Map; + } + bool value_t::is_array() const { + return type == type_t::Arr; + } + bool value_t::is_number() const { + return type == type_t::Num; + } + bool value_t::is_string() const { + return type == type_t::Str; + } + bool value_t::is_bool() const { + return type == type_t::Bool; + } -bool ppc::data::value_t::array(ppc::data::array_t &out) const { - if (is_array()) { - out = *val.arr; - return true; + array_t &value_t::array(const array_t &val) { + *this = val; + return *this->val.arr; } - return false; -} -bool ppc::data::value_t::map(ppc::data::map_t &out) const { - if (is_map()) { - out = *val.map; - return true; + map_t &value_t::map(const map_t &val) { + *this = val; + return *this->val.map; } - return false; -} -bool ppc::data::value_t::number(ppc::data::number_t &out) const { - if (is_number()) { - out = val.num; - return true; + number_t &value_t::number(number_t val) { + *this = val; + return this->val.num; } - return false; -} -bool ppc::data::value_t::string(ppc::data::string_t &out) const { - if (is_string()) { - out = *val.str; - return true; + string_t &value_t::string(const string_t &val) { + *this = val; + return *this->val.str; } - return false; -} -bool ppc::data::value_t::boolean(ppc::data::bool_t &out) const { - if (is_bool()) { - out = val.bl; - return true; + bool_t &value_t::boolean(bool_t val) { + *this = val; + return this->val.bl; } - return false; -} -const ppc::data::array_t &ppc::data::value_t::array() const { - if (is_array()) return *val.arr; - else throw (std::string)"The value isn't an array."; -} -const ppc::data::map_t &ppc::data::value_t::map() const { - if (is_map()) return *val.map; - else throw (std::string)"The value isn't a map."; -} -ppc::data::number_t ppc::data::value_t::number() const { - if (is_number()) return val.num; - else throw (std::string)"The value isn't a number."; -} -const ppc::data::string_t &ppc::data::value_t::string() const { - if (is_string()) return *val.str; - else throw (std::string)"The value isn't a string."; -} -ppc::data::bool_t ppc::data::value_t::boolean() const { - if (is_bool()) return val.bl; - else throw (std::string)"The value isn't a bool."; -} - -ppc::data::value_t::value_t() { - this->type = type_t::Null; -} -ppc::data::value_t::value_t(const ppc::data::array_t &val) { - this->type = type_t::Arr; - this->val.arr = new array_t { val }; -} -ppc::data::value_t::value_t(const ppc::data::map_t &val) { - this->type = type_t::Map; - this->val.map = new map_t { val }; -} -ppc::data::value_t::value_t(const ppc::data::string_t &val) { - this->type = type_t::Str; - this->val.str = new string_t { val }; -} -ppc::data::value_t::value_t(ppc::data::bool_t val) { - this->type = type_t::Bool; - this->val.bl = val; -} -ppc::data::value_t::value_t(ppc::data::number_t val) { - this->type = type_t::Num; - this->val.num = val; -} -ppc::data::value_t::value_t(const ppc::data::value_t &other) { - type = other.type; - switch (other.type) { - case type_t::Map: - val.map = new map_t { *other.val.map }; - break; - case type_t::Arr: - val.arr = new array_t { *other.val.arr }; - break; - case type_t::Str: - val.str = new string_t { *other.val.str }; - break; - default: - val = other.val; - break; + array_t &value_t::array() { + if (is_array()) return *val.arr; + else throw (std::string)"The value isn't an array."; } -} -ppc::data::value_t::~value_t() { - switch (type) { - case type_t::Map: - delete val.map; - break; - case type_t::Arr: - delete val.arr; - break; - case type_t::Str: - delete val.str; - break; - default: - break; + map_t &value_t::map() { + if (is_map()) return *val.map; + else throw (std::string)"The value isn't a map."; + } + number_t &value_t::number() { + if (is_number()) return val.num; + else throw (std::string)"The value isn't a number."; + } + string_t &value_t::string() { + if (is_string()) return *val.str; + else throw (std::string)"The value isn't a string."; + } + bool_t &value_t::boolean() { + if (is_bool()) return val.bl; + else throw (std::string)"The value isn't a bool."; } -} + + const array_t &value_t::array() const { + if (is_array()) return *val.arr; + else throw (std::string)"The value isn't an array."; + } + const map_t &value_t::map() const { + if (is_map()) return *val.map; + else throw (std::string)"The value isn't a map."; + } + number_t value_t::number() const { + if (is_number()) return val.num; + else throw (std::string)"The value isn't a number."; + } + const string_t &value_t::string() const { + if (is_string()) return *val.str; + else throw (std::string)"The value isn't a string."; + } + bool_t value_t::boolean() const { + if (is_bool()) return val.bl; + else throw (std::string)"The value isn't a bool."; + } + + value_t::value_t() { + this->type = type_t::Null; + } + value_t::value_t(const array_t &val) { + this->type = type_t::Arr; + this->val.arr = new array_t(val); + } + value_t::value_t(const map_t &val) { + this->type = type_t::Map; + this->val.map = new map_t(val); + } + value_t::value_t(const string_t &val) { + this->type = type_t::Str; + this->val.str = new string_t(val); + } + value_t::value_t(const char *val) { + this->type = type_t::Str; + this->val.str = new string_t(val); + } + value_t::value_t(bool_t val) { + this->type = type_t::Bool; + this->val.bl = val; + } + value_t::value_t(number_t val) { + this->type = type_t::Num; + this->val.num = val; + } + value_t::value_t(const value_t &other) { + type = other.type; + switch (other.type) { + case type_t::Map: + val.map = new map_t(*other.val.map); + break; + case type_t::Arr: + val.arr = new array_t(*other.val.arr); + break; + case type_t::Str: + val.str = new string_t(*other.val.str); + break; + default: + val = other.val; + break; + } + } + + value_t::~value_t() { + switch (type) { + case type_t::Map: + delete val.map; + break; + case type_t::Arr: + delete val.arr; + break; + case type_t::Str: + delete val.str; + break; + default: + break; + } + } + + value_t::value_t(std::initializer_list> map): + value_t(map_t(map)) { } + + value_t &value_t::operator=(const value_t &other) { + type = other.type; + switch (other.type) { + case type_t::Map: + val.map = new map_t(*other.val.map); + break; + case type_t::Arr: + val.arr = new array_t(*other.val.arr); + break; + case type_t::Str: + val.str = new string_t(*other.val.str); + break; + default: + val = other.val; + break; + } + return *this; + } + value_t &value_t::operator=(const char *other) { + type = type_t::Str; + val.str = new string_t(other); + return *this; + } + +} diff --git a/src/utils/json.cc b/src/utils/json.cc new file mode 100644 index 0000000..828f037 --- /dev/null +++ b/src/utils/json.cc @@ -0,0 +1,49 @@ +#include "utils/json.hh" +#include + +namespace ppc::data::json { + std::string stringify(const data::value_t &val) { + std::stringstream out; + bool first = true; + + if (val.is_array()) { + out << '['; + + for (const auto &el : val.array()) { + if (el.is_null()) continue; + if (!first) out << ','; + first = false; + out << stringify(el); + } + + out << ']'; + } + else if (val.is_map()) { + out << '{'; + + for (const auto &el : val.map()) { + if (el.second.is_null()) continue; + if (!first) out << ','; + first = false; + out << '"' << el.first << '"' << ':' << stringify(el.second); + } + + out << '}'; + } + else if (val.is_bool()) { + if (val.boolean()) out << "true"; + else out << "false"; + } + else if (val.is_null()) { + out << "null"; + } + else if (val.is_number()) { + out << val.number(); + } + else if (val.is_string()) { + out << '"' << val.string() << '"'; + } + + return out.str(); + } +} \ No newline at end of file diff --git a/src/utils/location.cc b/src/utils/location.cc index b5e29aa..9169ddd 100644 --- a/src/utils/location.cc +++ b/src/utils/location.cc @@ -2,6 +2,7 @@ #include using namespace ppc; +using namespace std::string_literals; std::string location_t::to_string() const { std::stringstream res; @@ -11,18 +12,18 @@ std::string location_t::to_string() const { res << filename; written_anything = true; } - if (line != -1u) { + if (line + 1 != 0) { if (written_anything) res << ':'; res << line + 1; written_anything = true; } - if (start != -1u) { + if (start + 1 != 0) { if (written_anything) res << ':'; res << start + 1; written_anything = true; } - if (length != -1u) { - if (written_anything) res << '(' << length + 1 << ')'; + if (length + 1 != 0) { + if (written_anything) res << '(' << length << ')'; written_anything = true; } @@ -40,11 +41,7 @@ location_t location_t::intersect(location_t other) const { if (a.start == -1u || b.start == -1u) return { }; - if (a.start > b.start) { - location_t c = a; - a = b; - b = c; - } + if (a.start > b.start) return other.intersect(*this); fix_location(a); fix_location(b); @@ -59,59 +56,39 @@ location_t location_t::intersect(location_t other) const { return a; } -location_t::location_t() { - this->line = -1; - this->start = -1; - this->length = -1; - this->code_start = -1; - this->filename = ""; +bool location_t::operator==(const location_t &other) const { + if (this->filename != other.filename) return false; + if (this->line != other.line) return false; + if (this->start != other.start) return false; + if (this->length != other.length) return false; + if (this->code_start != other.code_start) return false; + + return true; } -location_t::location_t(std::string filename) { - this->line = -1; - this->start = -1; - this->length = -1; - this->code_start = -1; - this->filename = filename; -} -location_t::location_t(std::size_t line, std::size_t start) { - this->line = line; - this->start = start; - this->length = -1; - this->code_start = -1; - this->filename = ""; -} -location_t::location_t(std::string filename, std::size_t line, std::size_t start) { - this->line = line; - this->start = start; - this->length = -1; - this->code_start = -1; - this->filename = filename; -} -location_t::location_t(std::size_t line, std::size_t start, std::size_t code_start) { - this->line = line; - this->start = start; - this->length = -1; + + + +std::string empty = ""; + +location_t::location_t(): + location_t(empty, -1, -1, -1, -1) { } +location_t::location_t(const std::string &filename): + location_t(filename, -1, -1, -1, -1) { } +location_t::location_t(std::size_t line, std::size_t start): + location_t(empty, line, start, -1, -1) { } +location_t::location_t(const std::string &filename, std::size_t line, std::size_t start): + location_t(filename, line, start, -1, -1) { } +location_t::location_t(std::size_t line, std::size_t start, std::size_t code_start): + location_t(empty, line, start, code_start, -1) { } +location_t::location_t(const std::string &filename, std::size_t line, std::size_t start, std::size_t code_start): + location_t(filename, line, start, code_start, -1) { } +location_t::location_t(std::size_t line, std::size_t start, std::size_t code_start, std::size_t length): + location_t(empty, line, start, code_start, length) { } +location_t::location_t(const std::string &filename, std::size_t line, std::size_t start, std::size_t code_start, std::size_t length): filename(filename) { + this->length = length; this->code_start = code_start; - this->filename = ""; -} -location_t::location_t(std::string filename, std::size_t line, std::size_t start, std::size_t code_start) { this->line = line; this->start = start; - this->length = -1; - this->code_start = code_start; - this->filename = filename; -} -location_t::location_t(std::size_t line, std::size_t start, std::size_t code_start, std::size_t length) { - this->line = line; - this->start = start; - this->length = line; - this->code_start = code_start; - this->filename = ""; -} -location_t::location_t(std::string filename, std::size_t line, std::size_t start, std::size_t code_start, std::size_t length) { - this->line = line; - this->start = start; - this->length = line; - this->code_start = code_start; - this->filename = filename; } + +const location_t location_t::NONE = { }; diff --git a/src/utils/message.cc b/src/utils/message.cc index 8b49f57..61959ea 100644 --- a/src/utils/message.cc +++ b/src/utils/message.cc @@ -4,68 +4,70 @@ using namespace ppc; -std::string messages::message_t::to_string() const { - std::string loc_readable = location.to_string(); - std::string level_readable; +namespace ppc::messages { + std::string message_t::to_string() const { + std::string loc_readable = location.to_string(); + std::string level_readable; - switch (level) { - case messages::message_t::DEBUG: level_readable = "debug"; break; - case messages::message_t::SUGGESTION: level_readable = "suggestion"; break; - case messages::message_t::INFO: level_readable = "info"; break; - case messages::message_t::WARNING: level_readable = "warning"; break; - case messages::message_t::ERROR: level_readable = "error"; break; - default: level_readable = "what?"; break; - } - - std::stringstream res { }; - - if (loc_readable.length()) res << loc_readable << ": "; - res << level_readable << ": " << content; - - return res.str(); -} -bool messages::message_t::is_severe() const { - return level > messages::message_t::WARNING; -} - -bool messages::msg_stack_t::is_failed() const { - for (const auto &msg : messages) { - if (msg.is_severe()) return true; - } - return false; -} -void messages::msg_stack_t::print(std::ostream &output, messages::message_t::level_t threshold, bool color_output) const { - if (!messages.size()) return; - - for (const auto &msg : messages) { - if (msg.level < threshold) continue; - - std::string loc_readable = msg.location.to_string(); - - switch (msg.level) { - case messages::message_t::DEBUG: - output << (color_output ? "\e[38;5;8mdebug: " : "debug: "); - break; - case messages::message_t::SUGGESTION: - output << (color_output ? "\e[38;5;45msuggestion: " : "suggestion: "); - break; - case messages::message_t::INFO: - output << (color_output ? "\e[38;5;33minfo: ": "info: "); - break; - case messages::message_t::WARNING: - output << (color_output ? "\e[38;5;214mwarning: " : "warning: "); - break; - case messages::message_t::ERROR: - output << (color_output ? "\e[38;5;196merror: " : "error: "); - break; - default: - output << (color_output ? "\e[38;5;196mw\e[38;5;226mh\e[38;5;118ma\e[38;5;162mt\e[38;5;129m?\e[0m: " : "what?: "); - break; + switch (level) { + case message_t::DEBUG: level_readable = "debug"; break; + case message_t::SUGGESTION: level_readable = "suggestion"; break; + case message_t::INFO: level_readable = "info"; break; + case message_t::WARNING: level_readable = "warning"; break; + case message_t::ERROR: level_readable = "error"; break; + default: level_readable = "what?"; break; } - if (loc_readable.length()) output << loc_readable << ": "; - output << msg.content; - if (color_output) output << "\e[0m"; - output << std::endl; + std::stringstream res { }; + + if (loc_readable.length()) res << loc_readable << ": "; + res << level_readable << ": " << content; + + return res.str(); } -} + bool message_t::is_severe() const { + return level > message_t::WARNING; + } + + bool msg_stack_t::is_failed() const { + for (const auto &msg : messages) { + if (msg.is_severe()) return true; + } + return false; + } + void msg_stack_t::print(std::ostream &output, message_t::level_t threshold, bool color_output) const { + if (!messages.size()) return; + + for (const auto &msg : messages) { + if (msg.level < threshold) continue; + + std::string loc_readable = msg.location.to_string(); + + switch (msg.level) { + case message_t::DEBUG: + output << (color_output ? "\e[38;5;8mdebug: " : "debug: "); + break; + case message_t::SUGGESTION: + output << (color_output ? "\e[38;5;45msuggestion: " : "suggestion: "); + break; + case message_t::INFO: + output << (color_output ? "\e[38;5;33minfo: ": "info: "); + break; + case message_t::WARNING: + output << (color_output ? "\e[38;5;214mwarning: " : "warning: "); + break; + case message_t::ERROR: + output << (color_output ? "\e[38;5;196merror: " : "error: "); + break; + default: + output << (color_output ? "\e[38;5;196mw\e[38;5;226mh\e[38;5;118ma\e[38;5;162mt\e[38;5;129m?\e[0m: " : "what?: "); + break; + } + + if (loc_readable.length()) output << loc_readable << ": "; + output << msg.content; + if (color_output) output << "\e[0m"; + output << std::endl; + } + } +} \ No newline at end of file diff --git a/src/utils/strings.cc b/src/utils/strings.cc index 4982718..1482a81 100644 --- a/src/utils/strings.cc +++ b/src/utils/strings.cc @@ -8,7 +8,7 @@ std::vector str::split(const std::string &splittable, std::initiali std::vector res; for (char c : splittable) { - if (std::string { splitters }.find(c) == -1u) { + if (std::string { splitters }.find(c) == std::string::npos) { buff << c; } else { @@ -29,10 +29,10 @@ std::vector str::split(const std::string &splittable, std::initiali std::string str::trim(std::string splittable, std::initializer_list splitters) { auto split = std::string { splitters }; - while (!splittable.empty() && split.find(splittable[0]) != -1u) { + while (!splittable.empty() && split.find(splittable[0]) != std::string::npos) { splittable = splittable.substr(1); } - while (!splittable.empty() && split.find(splittable[splittable.length() - 1]) != -1u) { + while (!splittable.empty() && split.find(splittable[splittable.length() - 1]) != std::string::npos) { splittable = splittable.substr(0, splittable.length() - 1); }