From 19c8af768b556698bd23044334fcd4a6ab75b2c1 Mon Sep 17 00:00:00 2001
From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com>
Date: Fri, 28 Oct 2022 10:38:09 +0300
Subject: [PATCH] feat: add oct parsing in string parser

---
 include/utils/message.hh            |  5 ++++
 src/compiler/treeifier/tokenizer.cc | 43 +++++++++++++++++------------
 2 files changed, 30 insertions(+), 18 deletions(-)

diff --git a/include/utils/message.hh b/include/utils/message.hh
index 75886d8..1d8eaf3 100644
--- a/include/utils/message.hh
+++ b/include/utils/message.hh
@@ -42,6 +42,11 @@ namespace ppc::messages {
         inline auto end() const { return messages.end(); }
 
         void push(const message_t &msg) { messages.push_back(msg); }
+        void err(const std::string &msg, location_t loc = location_t::NONE) { push({ message_t::ERROR, msg, loc }); }
+        void warn(const std::string &msg, location_t loc = location_t::NONE) { push({ message_t::WARNING, msg, loc }); }
+        void suggest(const std::string &msg, location_t loc = location_t::NONE) { push({ message_t::SUGGESTION, msg, loc }); }
+        void info(const std::string &msg, location_t loc = location_t::NONE) { push({ message_t::INFO, msg, loc }); }
+        void debug(const std::string &msg, location_t loc = location_t::NONE) { push({ message_t::DEBUG, msg, loc }); }
         void push(const msg_stack_t &other) {
             for (const auto &msg : other) push(msg);
         }
diff --git a/src/compiler/treeifier/tokenizer.cc b/src/compiler/treeifier/tokenizer.cc
index 5e5d362..e603d86 100644
--- a/src/compiler/treeifier/tokenizer.cc
+++ b/src/compiler/treeifier/tokenizer.cc
@@ -32,7 +32,26 @@ static std::vector<uint8_t> parse_string(msg_stack_t &msg_stack, bool is_char, c
             else if (c == 'r') new_c = '\r';
             else if (c == 't') new_c = '\t';
             else if (c == 'v') new_c = '\v';
-            // TODO: Add support for oct, hex and utf8 literals
+            else if (c >= '0' && c <= '7') {
+                new_c = 0;
+                size_t n = 0;
+                while (c >= '0' && c <= '7') {
+                    new_c <<= 3;
+                    new_c |= c - '0';
+                    c = token.data[++i];
+                    n++;
+                }
+                if (n > 3) {
+                    location_t loc = curr_char_loc;
+                    loc.code_start--;
+                    loc.start--;
+                    loc.length = n + 1;
+                    msg_stack.warn("Octal escape sequence overflows 255 8-bit limit (3 digits).", loc);
+                }
+                curr_char_loc.start += n - 1;
+                i--;
+            }
+            // TODO: Add support for hex and utf8 literals
             else if (c == literal_char || c == '\\') new_c = c;
             else {
                 throw message_t(message_t::ERROR, "Unescapable character.", curr_char_loc);
@@ -168,21 +187,6 @@ static std::vector<uint8_t> parse_dec(msg_stack_t &msg_stack, size_t i, const st
 
     return res;
 }
-
-static std::vector<uint8_t> parse_int(msg_stack_t &msg_stack, const lex::token_t &token) {
-    switch (token.type) {
-        case lex::token_t::BIN_LITERAL:
-            return parse_bin(msg_stack, 2, token.data);
-        case lex::token_t::OCT_LITERAL:
-            return parse_oct(msg_stack, 1, token.data);
-        case lex::token_t::DEC_LITERAL:
-            return parse_dec(msg_stack, 0, token.data);
-        case lex::token_t::HEX_LITERAL:
-            return parse_hex(msg_stack, 2, token.data);
-        default:
-            throw "WTF r u doing bro?"s;
-    }
-}
 static std::vector<uint8_t> parse_float(msg_stack_t &msg_stack, const lex::token_t &token) {
     throw "no floats lol bozo"s;
 }
@@ -211,8 +215,11 @@ token_t token_t::parse(messages::msg_stack_t &msg_stack, lex::token_t in) {
             return { parse_float(msg_stack, in), false, in.location };
         case lex::token_t::STRING_LITERAL:
             return { parse_string(msg_stack, false, in), true, in.location };
-        case lex::token_t::CHAR_LITERAL:
-            return { parse_string(msg_stack, true, in), false, in.location };
+        case lex::token_t::CHAR_LITERAL: {
+            auto res = parse_string(msg_stack, true, in);
+            std::reverse(res.begin(), res.end());
+            return { res, false, in.location };
+        }
         default:
             throw message_t(message_t::ERROR, "Token type not recognised.", in.location);
     }