ppc-lang/include/compiler/treeifier/tokenizer.hh
2022-09-19 10:34:19 +03:00

190 lines
5.7 KiB
C++

#pragma once
#include "utils/location.hh"
#include "utils/message.hh"
#include "compiler/treeifier/lexer.hh"
namespace ppc::comp::tree::tok {
enum operator_t {
LESS_THAN,
GREATER_THAN,
LESS_THAN_EQUALS,
GREATER_THAN_EQUALS,
EQUALS,
NOT_EQUALS,
DOUBLE_AND,
DOUBLE_OR,
SHIFT_LEFT,
SHIFT_RIGHT,
XOR,
AND,
OR,
NOT,
BITWISE_NEGATIVE,
INCREASE,
DECREASE,
ADD,
SUBTRACT,
DIVIDE,
MULTIPLY,
MODULO,
CONDITIONAL,
NULL_COALESCING,
ASSIGN,
ASSIGN_ADD,
ASSIGN_SUBTRACT,
ASSIGN_MULTIPLY,
ASSIGN_DIVIDE,
ASSIGN_MODULO,
ASSIGN_SHIFT_LEFT,
ASSIGN_SHIFT_RIGHT,
ASSIGN_XOR,
ASSIGN_AND,
ASSIGN_OR,
ASSIGN_DOUBLE_AND,
ASSIGN_DOUBLE_OR,
ASSIGN_NULL_COALESCING,
PTR_MEMBER,
DOT,
COMMA,
SEMICOLON,
COLON,
LAMBDA,
BRACKET_OPEN,
BRACKET_CLOSE,
BRACE_OPEN,
BRACE_CLOSE,
PAREN_OPEN,
PAREN_CLOSE,
VAL,
REF,
SIZEOF,
};
struct token_t {
private:
enum kind_t {
NONE,
IDENTIFIER,
OPERATOR,
INT,
FLOAT,
CHAR,
STRING,
} kind;
union data_t {
std::string *identifier;
operator_t _operator;
std::uint64_t int_literal;
double float_literal;
char char_literal;
std::vector<char> *string_literal;
} data;
public:
ppc::location_t location;
bool is_identifier() { return kind == IDENTIFIER; }
bool is_operator() { return kind == OPERATOR; }
bool is_int_lit() { return kind == INT; }
bool is_float_lit() { return kind == FLOAT; }
bool is_char_lit() { return kind == CHAR; }
bool is_string_lit() { return kind == STRING; }
auto const &identifier() {
if (!is_identifier()) throw std::string { "Token is not an identifier." };
else return *data.identifier;
}
auto _operator() {
if (!is_operator()) throw std::string { "Token is not an operator." };
else return data._operator;
}
auto int_lit() {
if (!is_int_lit()) throw std::string { "Token is not an int literal." };
else return data.int_literal;
}
auto float_lit() {
if (!is_float_lit()) throw std::string { "Token is not a float literal." };
else return data.float_literal;
}
auto char_lit() {
if (!is_char_lit()) throw std::string { "Token is not a char literal." };
else return data.char_literal;
}
auto const &string_lit() {
if (!is_string_lit()) throw std::string { "Token is not a string literal." };
else return *data.string_literal;
}
bool is_operator(operator_t op) { return is_operator() && _operator() == op; }
bool is_identifier(std::string &&val) { return is_identifier() && identifier() == val; }
token_t() { kind = NONE; }
token_t(std::string const &identifier, location_t loc = NO_LOCATION) {
kind = IDENTIFIER;
data.identifier = new std::string { identifier };
location = loc;
}
token_t(operator_t op, location_t loc = NO_LOCATION) {
kind = OPERATOR;
data._operator = op;
location = loc;
}
token_t(std::uint64_t val, location_t loc = NO_LOCATION) {
kind = INT;
data.int_literal = val;
location = loc;
}
token_t(double val, location_t loc = NO_LOCATION) {
kind = FLOAT;
data.float_literal = val;
location = loc;
}
token_t(char c, location_t loc = NO_LOCATION) {
kind = CHAR;
data.char_literal = c;
location = loc;
}
token_t(std::vector<char> const &val, location_t loc = NO_LOCATION) {
kind = STRING;
data.string_literal = new std::vector<char> { val };
location = loc;
}
token_t(token_t const &tok) {
kind = tok.kind;
switch (kind) {
case NONE: break;
case IDENTIFIER: data.identifier = new std::string { *tok.data.identifier }; break;
case OPERATOR: data._operator = tok.data._operator; break;
case INT: data.int_literal = tok.data.int_literal; break;
case FLOAT: data.float_literal = tok.data.float_literal; break;
case CHAR: data.char_literal = tok.data.char_literal; break;
case STRING: data.string_literal = new std::vector<char> { *tok.data.string_literal }; break;
}
location = tok.location;
}
~token_t() {
switch (kind) {
case IDENTIFIER: delete data.identifier; break;
case STRING: delete data.string_literal; break;
default: break;
}
}
static tok::token_t parse(messages::msg_stack_t &msg_stack, lex::token_t token);
static std::vector<token_t> parse_many(messages::msg_stack_t &msg_stack, std::vector<lex::token_t> tokens);
};
operator_t operator_find(std::string const &text);
std::string const &operator_stringify(operator_t kw);
}