From 2a0104808c33f45b1530be48ffda6f8531a976fa Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Fri, 28 Oct 2022 10:08:30 +0300 Subject: [PATCH] fix: split int and string literals --- include/compiler/treeifier/tokenizer.hh | 17 +++++--- src/compiler/treeifier/tokenizer.cc | 58 +++++++++++++++++++++---- 2 files changed, 61 insertions(+), 14 deletions(-) diff --git a/include/compiler/treeifier/tokenizer.hh b/include/compiler/treeifier/tokenizer.hh index adaece9..c2e40b2 100644 --- a/include/compiler/treeifier/tokenizer.hh +++ b/include/compiler/treeifier/tokenizer.hh @@ -79,7 +79,8 @@ namespace ppc::comp::tree { NONE, IDENTIFIER, OPERATOR, - LITERAL, + INT_LITERAL, + STR_LITERAL, } kind; union data_t { std::string *identifier; @@ -91,7 +92,9 @@ namespace ppc::comp::tree { bool is_identifier() const { return kind == IDENTIFIER; } bool is_operator() const { return kind == OPERATOR; } - bool is_literal() const { return kind == LITERAL; } + bool is_int_literal() const { return kind == INT_LITERAL; } + bool is_str_literal() const { return kind == STR_LITERAL; } + bool is_literal() const { return is_int_literal() || is_str_literal(); } const auto &identifier() const { if (!is_identifier()) throw std::string { "Token is not an identifier." }; @@ -118,8 +121,8 @@ namespace ppc::comp::tree { kind = OPERATOR; data._operator = op; } - token_t(const std::vector &val, location_t loc = location_t::NONE): location(loc) { - kind = LITERAL; + token_t(const std::vector &val, bool is_str, location_t loc = location_t::NONE): location(loc) { + kind = is_str ? STR_LITERAL : INT_LITERAL; data.literal = new std::vector { val }; } token_t(const token_t &tok): location(tok.location) { @@ -128,14 +131,16 @@ namespace ppc::comp::tree { case NONE: break; case IDENTIFIER: data.identifier = new std::string { *tok.data.identifier }; break; case OPERATOR: data._operator = tok.data._operator; break; - case LITERAL: data.literal = new std::vector { *tok.data.literal }; break; + case STR_LITERAL: + case INT_LITERAL: data.literal = new std::vector { *tok.data.literal }; break; } } ~token_t() { switch (kind) { case IDENTIFIER: delete data.identifier; break; - case LITERAL: delete data.literal; break; + case STR_LITERAL: + case INT_LITERAL: delete data.literal; break; default: break; } } diff --git a/src/compiler/treeifier/tokenizer.cc b/src/compiler/treeifier/tokenizer.cc index fd5fdcd..5e5d362 100644 --- a/src/compiler/treeifier/tokenizer.cc +++ b/src/compiler/treeifier/tokenizer.cc @@ -60,7 +60,7 @@ static std::vector parse_bin(msg_stack_t &msg_stack, size_t i, const st int last_byte = 0; int lastbyte_n = 0; - for (size_t j = 0; j < data.length(); j++) { + for (size_t j = i; j < data.length(); j++) { if (lastbyte_n == 8) { lastbyte_n = 0; res.push_back(last_byte); @@ -83,7 +83,7 @@ static std::vector parse_hex(msg_stack_t &msg_stack, size_t i, const st int last_byte = 0; int lastbyte_n = 0; - for (size_t j = 0; j < data.length(); j++) { + for (size_t j = i; j < data.length(); j++) { if (lastbyte_n == 8) { lastbyte_n = 0; res.push_back(last_byte); @@ -110,7 +110,7 @@ static std::vector parse_oct(msg_stack_t &msg_stack, size_t i, const st int last_byte = 0; int lastbyte_n = 0; - for (size_t j = 0; j < data.length(); j++) { + for (size_t j = i; j < data.length(); j++) { if (lastbyte_n >= 8) { lastbyte_n = 0; res.push_back(last_byte); @@ -129,6 +129,45 @@ static std::vector parse_oct(msg_stack_t &msg_stack, size_t i, const st return res; } +static void mult_10(std::vector &val) { + std::vector res; + + int carry = 0; + + for (size_t i = 0; i < val.size(); i++) { + carry = val[i] * 10 + carry; + res.push_back(carry); + carry >>= 8; + } + + if (carry != 0) res.push_back(carry); + val = res; +} +static void add_byte(std::vector &a, uint8_t b) { + int carry = b; + + for (size_t i = 0; i < a.size(); i++) { + carry = a[i] + carry; + a[i] = carry; + carry >>= 8; + if (carry == 0) break; + } + + if (carry != 0) a.push_back(carry); +} +static std::vector parse_dec(msg_stack_t &msg_stack, size_t i, const std::string &data) { + std::vector res; + + for (size_t j = i; j < data.length(); j++) { + int digit = data[j] - '0'; + + mult_10(res); + if (res.empty()) res.push_back(digit); + else add_byte(res, digit); + } + + return res; +} static std::vector parse_int(msg_stack_t &msg_stack, const lex::token_t &token) { switch (token.type) { @@ -137,7 +176,7 @@ static std::vector parse_int(msg_stack_t &msg_stack, const lex::token_t case lex::token_t::OCT_LITERAL: return parse_oct(msg_stack, 1, token.data); case lex::token_t::DEC_LITERAL: - throw "no dec literals lol bozo."s; + return parse_dec(msg_stack, 0, token.data); case lex::token_t::HEX_LITERAL: return parse_hex(msg_stack, 2, token.data); default: @@ -161,16 +200,19 @@ token_t token_t::parse(messages::msg_stack_t &msg_stack, lex::token_t in) { throw message_t(message_t::ERROR, "Operator not recognised."s, in.location); } case lex::token_t::BIN_LITERAL: + return { parse_bin(msg_stack, 1, in.data), false, in.location }; case lex::token_t::OCT_LITERAL: + return { parse_oct(msg_stack, 1, in.data), false, in.location }; case lex::token_t::DEC_LITERAL: + return { parse_dec(msg_stack, 0, in.data), false, in.location }; case lex::token_t::HEX_LITERAL: - return { parse_int(msg_stack, in), in.location }; + return { parse_hex(msg_stack, 2, in.data), false, in.location }; case lex::token_t::FLOAT_LITERAL: - return { parse_float(msg_stack, in), in.location }; + return { parse_float(msg_stack, in), false, in.location }; case lex::token_t::STRING_LITERAL: - return { parse_string(msg_stack, false, in), in.location }; + return { parse_string(msg_stack, false, in), true, in.location }; case lex::token_t::CHAR_LITERAL: - return { parse_string(msg_stack, true, in), in.location }; + return { parse_string(msg_stack, true, in), false, in.location }; default: throw message_t(message_t::ERROR, "Token type not recognised.", in.location); }