fix: split int and string literals

2022-10-28 10:08:30 +03:00 · 2022-10-28 10:08:30 +03:00 · 2a0104808c
commit 2a0104808c
parent e510584b98
2 changed files with 61 additions and 14 deletions
--- a/include/compiler/treeifier/tokenizer.hh
+++ b/include/compiler/treeifier/tokenizer.hh
@ -79,7 +79,8 @@ namespace ppc::comp::tree {
            NONE,
            IDENTIFIER,
            OPERATOR,
-            LITERAL,
+            INT_LITERAL,
            STR_LITERAL,
        } kind;
        union data_t {
            std::string *identifier;
@ -91,7 +92,9 @@ namespace ppc::comp::tree {
        bool is_identifier() const { return kind == IDENTIFIER; }
        bool is_operator() const { return kind == OPERATOR; }
-        bool is_literal() const { return kind == LITERAL; }
+        bool is_int_literal() const { return kind == INT_LITERAL; }
        bool is_str_literal() const { return kind == STR_LITERAL; }
        bool is_literal() const { return is_int_literal() || is_str_literal(); }
        const auto &identifier() const {
            if (!is_identifier()) throw std::string { "Token is not an identifier." };
@ -118,8 +121,8 @@ namespace ppc::comp::tree {
            kind = OPERATOR;
            data._operator = op;
        }
-        token_t(const std::vector<uint8_t> &val, location_t loc = location_t::NONE): location(loc) {
+        token_t(const std::vector<uint8_t> &val, bool is_str, location_t loc = location_t::NONE): location(loc) {
-            kind = LITERAL;
+            kind = is_str ? STR_LITERAL : INT_LITERAL;
            data.literal = new std::vector<uint8_t> { val };
        }
        token_t(const token_t &tok): location(tok.location) {
@ -128,14 +131,16 @@ namespace ppc::comp::tree {
                case NONE: break;
                case IDENTIFIER: data.identifier = new std::string { *tok.data.identifier }; break;
                case OPERATOR: data._operator = tok.data._operator; break;
-                case LITERAL: data.literal = new std::vector<uint8_t> { *tok.data.literal }; break;
+                case STR_LITERAL:
                case INT_LITERAL: data.literal = new std::vector<uint8_t> { *tok.data.literal }; break;
            }
        }
        ~token_t() {
            switch (kind) {
                case IDENTIFIER: delete data.identifier; break;
-                case LITERAL: delete data.literal; break;
+                case STR_LITERAL:
                case INT_LITERAL: delete data.literal; break;
                default: break;
            }
        }
--- a/src/compiler/treeifier/tokenizer.cc
+++ b/src/compiler/treeifier/tokenizer.cc
@ -60,7 +60,7 @@ static std::vector<uint8_t> parse_bin(msg_stack_t &msg_stack, size_t i, const st
    int last_byte = 0;
    int lastbyte_n = 0;
-    for (size_t j = 0; j < data.length(); j++) {
+    for (size_t j = i; j < data.length(); j++) {
        if (lastbyte_n == 8) {
            lastbyte_n = 0;
            res.push_back(last_byte);
@ -83,7 +83,7 @@ static std::vector<uint8_t> parse_hex(msg_stack_t &msg_stack, size_t i, const st
    int last_byte = 0;
    int lastbyte_n = 0;
-    for (size_t j = 0; j < data.length(); j++) {
+    for (size_t j = i; j < data.length(); j++) {
        if (lastbyte_n == 8) {
            lastbyte_n = 0;
            res.push_back(last_byte);
@ -110,7 +110,7 @@ static std::vector<uint8_t> parse_oct(msg_stack_t &msg_stack, size_t i, const st
    int last_byte = 0;
    int lastbyte_n = 0;
-    for (size_t j = 0; j < data.length(); j++) {
+    for (size_t j = i; j < data.length(); j++) {
        if (lastbyte_n >= 8) {
            lastbyte_n = 0;
            res.push_back(last_byte);
@ -129,6 +129,45 @@ static std::vector<uint8_t> parse_oct(msg_stack_t &msg_stack, size_t i, const st
    return res;
 }
 static void mult_10(std::vector<uint8_t> &val) {
    std::vector<uint8_t> res;
    int carry = 0;
    for (size_t i = 0; i < val.size(); i++) {
        carry = val[i] * 10 + carry;
        res.push_back(carry);
        carry >>= 8;
    }
    if (carry != 0) res.push_back(carry);
    val = res;
 }
 static void add_byte(std::vector<uint8_t> &a, uint8_t b) {
    int carry = b;
    for (size_t i = 0; i < a.size(); i++) {
        carry = a[i] + carry;
        a[i] = carry;
        carry >>= 8;
        if (carry == 0) break;
    }
    if (carry != 0) a.push_back(carry);
 }
 static std::vector<uint8_t> parse_dec(msg_stack_t &msg_stack, size_t i, const std::string &data) {
    std::vector<uint8_t> res;
    for (size_t j = i; j < data.length(); j++) {
        int digit = data[j] - '0';
        mult_10(res);
        if (res.empty()) res.push_back(digit);
        else add_byte(res, digit);
    }
    return res;
 }
 static std::vector<uint8_t> parse_int(msg_stack_t &msg_stack, const lex::token_t &token) {
    switch (token.type) {
@ -137,7 +176,7 @@ static std::vector<uint8_t> parse_int(msg_stack_t &msg_stack, const lex::token_t
        case lex::token_t::OCT_LITERAL:
            return parse_oct(msg_stack, 1, token.data);
        case lex::token_t::DEC_LITERAL:
-            throw "no dec literals lol bozo."s;
+            return parse_dec(msg_stack, 0, token.data);
        case lex::token_t::HEX_LITERAL:
            return parse_hex(msg_stack, 2, token.data);
        default:
@ -161,16 +200,19 @@ token_t token_t::parse(messages::msg_stack_t &msg_stack, lex::token_t in) {
                throw message_t(message_t::ERROR, "Operator not recognised."s, in.location);
            }
        case lex::token_t::BIN_LITERAL:
            return { parse_bin(msg_stack, 1, in.data), false, in.location };
        case lex::token_t::OCT_LITERAL:
            return { parse_oct(msg_stack, 1, in.data), false, in.location };
        case lex::token_t::DEC_LITERAL:
            return { parse_dec(msg_stack, 0, in.data), false, in.location };
        case lex::token_t::HEX_LITERAL:
-            return { parse_int(msg_stack, in), in.location };
+            return { parse_hex(msg_stack, 2, in.data), false, in.location };
        case lex::token_t::FLOAT_LITERAL:
-            return { parse_float(msg_stack, in), in.location };
+            return { parse_float(msg_stack, in), false, in.location };
        case lex::token_t::STRING_LITERAL:
-            return { parse_string(msg_stack, false, in), in.location };
+            return { parse_string(msg_stack, false, in), true, in.location };
        case lex::token_t::CHAR_LITERAL:
-            return { parse_string(msg_stack, true, in), in.location };
+            return { parse_string(msg_stack, true, in), false, in.location };
        default:
            throw message_t(message_t::ERROR, "Token type not recognised.", in.location);
    }