From a8cda64516631c7ee969fa82427e07741f1d7658 Mon Sep 17 00:00:00 2001 From: TopchetoEU <36534413+TopchetoEU@users.noreply.github.com> Date: Tue, 11 Oct 2022 13:12:42 +0300 Subject: [PATCH] fix: float and int literals --- src/compiler/treeifier/lexer.cc | 31 +++++++++++++++-------------- src/compiler/treeifier/tokenizer.cc | 18 ++++++++--------- 2 files changed, 24 insertions(+), 25 deletions(-) diff --git a/src/compiler/treeifier/lexer.cc b/src/compiler/treeifier/lexer.cc index 1d72280..2f89eea 100644 --- a/src/compiler/treeifier/lexer.cc +++ b/src/compiler/treeifier/lexer.cc @@ -15,7 +15,7 @@ struct res_t { bool _repeat; bool _add; - res_t add(bool val = false) { + res_t add(bool val = true) { this->_add = val; return *this; } @@ -26,14 +26,14 @@ struct res_t { }; -static bool isoct(char c) { +static inline bool isoct(char c) { return c >= '0' && c <= '7'; } -static bool is_any(char c, std::string chars) { +static inline bool is_any(char c, std::string chars) { auto res = chars.find(c) != std::string::npos; return res; } -static bool is_operator(char c) { +static inline bool is_operator(char c) { return is_any(c, "=!<>+-*/%&|^?:,.(){}[];"); } @@ -85,9 +85,9 @@ static res_t lexlet_dec(char c, std::vector &tok) { }; static res_t lexlet_zero(char c, std::vector &tok) { - if (c == '.') return lexer_switch(lexlet_float); - else if (c == 'b') return lexer_switch(lexlet_bin); - else if (c == 'x') return lexer_switch(lexlet_hex); + if (c == '.') return lexer_switch(lexlet_float).add(); + else if (c == 'b') return lexer_switch(lexlet_bin).add(); + else if (c == 'x') return lexer_switch(lexlet_hex).add(); else if (isdigit(c)) return lexer_switch(lexlet_oct, true); else return lexer_end(token_t::DEC_LITERAL); }; @@ -107,11 +107,14 @@ static res_t lexlet_multicomment(char c, std::vector &tok) { static res_t lexlet_operator(char c, std::vector &tok) { bool failed = false; + if (tok.size() > 0) { failed = true; char first_op = tok[0]; size_t op_i = tok.size(); + if (first_op == '.' && isdigit(c)) return lexer_switch(lexlet_float).add(); + if (first_op == c && op_i == 1 && is_any(c, ":+-&|?<>")) failed = false; if (c == '=') { if (op_i == 1 && is_any(first_op, "<>=!+-/*%")) failed = false; @@ -138,18 +141,16 @@ static res_t lexlet_char(char c, std::vector &tok) { }; static res_t lexlet_default(char c, std::vector &tok) { - tok.push_back(c); - if (c == '"') return lexer_switch(lexlet_string); - if (c == '\'') return lexer_switch(lexlet_char); - if (c == '0') return lexer_switch(lexlet_zero); - if (c == '.') return lexer_switch(lexlet_float); - if (is_operator(c)) return lexer_switch(lexlet_operator); - if (isdigit(c)) return lexer_switch(lexlet_dec); + if (c == '"') return lexer_switch(lexlet_string).add(); + if (c == '\'') return lexer_switch(lexlet_char).add(); + if (c == '0') return lexer_switch(lexlet_zero).add(); + if (is_operator(c)) return lexer_switch(lexlet_operator).add(); + if (isdigit(c)) return lexer_switch(lexlet_dec).add(); if (isspace(c)) { tok.clear(); return lexer_none().add(false); } - return lexer_switch(lexlet_identifier); + return lexer_switch(lexlet_identifier).add(); }; std::vector token_t::parse_many(ppc::messages::msg_stack_t &msg_stack, const std::string &filename, const std::string &_src) { diff --git a/src/compiler/treeifier/tokenizer.cc b/src/compiler/treeifier/tokenizer.cc index 2063afe..763aab1 100644 --- a/src/compiler/treeifier/tokenizer.cc +++ b/src/compiler/treeifier/tokenizer.cc @@ -7,7 +7,7 @@ using namespace messages; using namespace comp::tree; using namespace std::string_literals; -static std::vector parse_string(msg_stack_t &msg_stack, bool is_char, lex::token_t token) { +static std::vector parse_string(msg_stack_t &msg_stack, bool is_char, const lex::token_t &token) { char literal_char = is_char ? '\'' : '"'; bool escaping = false; @@ -52,7 +52,7 @@ static std::vector parse_string(msg_stack_t &msg_stack, bool is_char, lex: if (is_char) throw message_t(message_t::ERROR, "Unterminated char literal.", token.location); else throw message_t(message_t::ERROR, "Unterminated string literal.", token.location); } -static token_t parse_int(msg_stack_t &msg_stack, lex::token_t token) { +static token_t parse_int(msg_stack_t &msg_stack, const lex::token_t &token) { enum radix_t { BINARY, OCTAL, @@ -82,11 +82,9 @@ static token_t parse_int(msg_stack_t &msg_stack, lex::token_t token) { throw "WTF r u doing bro?"s; } - std::size_t j = token.data.length() - 1; - uint64_t res = 0; - for (; i <= j; i++) { + for (; i <= token.data.length() - 1; i++) { char c = token.data[i]; int8_t digit; switch (radix) { @@ -109,8 +107,8 @@ static token_t parse_int(msg_stack_t &msg_stack, lex::token_t token) { res += digit; break; case 3: - if (c >= 'a' && c <= 'f') digit = c - 'a' + 9; - else if (c >= 'A' && c <= 'F') digit = c - 'A' + 9; + if (c >= 'a' && c <= 'f') digit = c - 'a' + 10; + else if (c >= 'A' && c <= 'F') digit = c - 'A' + 10; else if (c >= '0' && c <= '9') digit = c - '0'; else throw message_t(message_t::ERROR, "Invalid character '"s + c + "' in hex literal.", token.location); res <<= 4; @@ -121,13 +119,13 @@ static token_t parse_int(msg_stack_t &msg_stack, lex::token_t token) { return token_t(res, token.location); } -static token_t parse_float(msg_stack_t &msg_stack, lex::token_t token) { +static token_t parse_float(msg_stack_t &msg_stack, const lex::token_t &token) { double whole = 0, fract = 0; char c; std::size_t i; - for (i = 0; i < token.data.length() && ((c = token.data[i]) > '0' && c < '9'); i++) { + for (i = 0; i < token.data.length() && isdigit(c = token.data[i]); i++) { if (c == '.') break; int digit = c - '0'; whole *= 10; @@ -136,7 +134,7 @@ static token_t parse_float(msg_stack_t &msg_stack, lex::token_t token) { if (c == '.') { i++; - for (; i < token.data.length() && ((c = token.data[i]) > '0' && c < '9'); i++) { + for (; i < token.data.length() && isdigit(c = token.data[i]); i++) { int digit = c - '0'; fract += digit; fract /= 10;