From 19c8af768b556698bd23044334fcd4a6ab75b2c1 Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Fri, 28 Oct 2022 10:38:09 +0300 Subject: [PATCH] feat: add oct parsing in string parser --- include/utils/message.hh | 5 ++++ src/compiler/treeifier/tokenizer.cc | 43 +++++++++++++++++------------ 2 files changed, 30 insertions(+), 18 deletions(-) diff --git a/include/utils/message.hh b/include/utils/message.hh index 75886d8..1d8eaf3 100644 --- a/include/utils/message.hh +++ b/include/utils/message.hh @@ -42,6 +42,11 @@ namespace ppc::messages { inline auto end() const { return messages.end(); } void push(const message_t &msg) { messages.push_back(msg); } + void err(const std::string &msg, location_t loc = location_t::NONE) { push({ message_t::ERROR, msg, loc }); } + void warn(const std::string &msg, location_t loc = location_t::NONE) { push({ message_t::WARNING, msg, loc }); } + void suggest(const std::string &msg, location_t loc = location_t::NONE) { push({ message_t::SUGGESTION, msg, loc }); } + void info(const std::string &msg, location_t loc = location_t::NONE) { push({ message_t::INFO, msg, loc }); } + void debug(const std::string &msg, location_t loc = location_t::NONE) { push({ message_t::DEBUG, msg, loc }); } void push(const msg_stack_t &other) { for (const auto &msg : other) push(msg); } diff --git a/src/compiler/treeifier/tokenizer.cc b/src/compiler/treeifier/tokenizer.cc index 5e5d362..e603d86 100644 --- a/src/compiler/treeifier/tokenizer.cc +++ b/src/compiler/treeifier/tokenizer.cc @@ -32,7 +32,26 @@ static std::vector parse_string(msg_stack_t &msg_stack, bool is_char, c else if (c == 'r') new_c = '\r'; else if (c == 't') new_c = '\t'; else if (c == 'v') new_c = '\v'; - // TODO: Add support for oct, hex and utf8 literals + else if (c >= '0' && c <= '7') { + new_c = 0; + size_t n = 0; + while (c >= '0' && c <= '7') { + new_c <<= 3; + new_c |= c - '0'; + c = token.data[++i]; + n++; + } + if (n > 3) { + location_t loc = curr_char_loc; + loc.code_start--; + loc.start--; + loc.length = n + 1; + msg_stack.warn("Octal escape sequence overflows 255 8-bit limit (3 digits).", loc); + } + curr_char_loc.start += n - 1; + i--; + } + // TODO: Add support for hex and utf8 literals else if (c == literal_char || c == '\\') new_c = c; else { throw message_t(message_t::ERROR, "Unescapable character.", curr_char_loc); @@ -168,21 +187,6 @@ static std::vector parse_dec(msg_stack_t &msg_stack, size_t i, const st return res; } - -static std::vector parse_int(msg_stack_t &msg_stack, const lex::token_t &token) { - switch (token.type) { - case lex::token_t::BIN_LITERAL: - return parse_bin(msg_stack, 2, token.data); - case lex::token_t::OCT_LITERAL: - return parse_oct(msg_stack, 1, token.data); - case lex::token_t::DEC_LITERAL: - return parse_dec(msg_stack, 0, token.data); - case lex::token_t::HEX_LITERAL: - return parse_hex(msg_stack, 2, token.data); - default: - throw "WTF r u doing bro?"s; - } -} static std::vector parse_float(msg_stack_t &msg_stack, const lex::token_t &token) { throw "no floats lol bozo"s; } @@ -211,8 +215,11 @@ token_t token_t::parse(messages::msg_stack_t &msg_stack, lex::token_t in) { return { parse_float(msg_stack, in), false, in.location }; case lex::token_t::STRING_LITERAL: return { parse_string(msg_stack, false, in), true, in.location }; - case lex::token_t::CHAR_LITERAL: - return { parse_string(msg_stack, true, in), false, in.location }; + case lex::token_t::CHAR_LITERAL: { + auto res = parse_string(msg_stack, true, in); + std::reverse(res.begin(), res.end()); + return { res, false, in.location }; + } default: throw message_t(message_t::ERROR, "Token type not recognised.", in.location); }