ppc-lang/include/compiler/treeifier/tokenizer.hh

155 lines
4.5 KiB
C++
Raw Normal View History

2022-09-19 07:34:19 +00:00
#pragma once
#include "utils/location.hh"
#include "utils/message.hh"
#include "compiler/treeifier/lexer.hh"
2022-10-04 16:58:17 +00:00
namespace ppc::comp::tree {
2022-09-19 07:34:19 +00:00
enum operator_t {
2022-10-17 19:39:59 +00:00
NONE,
2022-09-19 07:34:19 +00:00
LESS_THAN,
GREATER_THAN,
LESS_THAN_EQUALS,
GREATER_THAN_EQUALS,
EQUALS,
NOT_EQUALS,
DOUBLE_AND,
DOUBLE_OR,
SHIFT_LEFT,
SHIFT_RIGHT,
XOR,
AND,
OR,
NOT,
BITWISE_NEGATIVE,
INCREASE,
DECREASE,
ADD,
SUBTRACT,
DIVIDE,
MULTIPLY,
MODULO,
CONDITIONAL,
NULL_COALESCING,
ASSIGN,
ASSIGN_ADD,
ASSIGN_SUBTRACT,
ASSIGN_MULTIPLY,
ASSIGN_DIVIDE,
ASSIGN_MODULO,
ASSIGN_SHIFT_LEFT,
ASSIGN_SHIFT_RIGHT,
ASSIGN_XOR,
ASSIGN_AND,
ASSIGN_OR,
ASSIGN_DOUBLE_AND,
ASSIGN_DOUBLE_OR,
ASSIGN_NULL_COALESCING,
PTR_MEMBER,
DOT,
COMMA,
SEMICOLON,
COLON,
2022-10-04 16:58:17 +00:00
DOUBLE_COLON,
2022-09-19 07:34:19 +00:00
LAMBDA,
BRACKET_OPEN,
BRACKET_CLOSE,
BRACE_OPEN,
BRACE_CLOSE,
PAREN_OPEN,
PAREN_CLOSE,
VAL,
REF,
SIZEOF,
};
struct token_t {
private:
enum kind_t {
NONE,
IDENTIFIER,
OPERATOR,
2022-10-28 07:08:30 +00:00
INT_LITERAL,
STR_LITERAL,
2022-09-19 07:34:19 +00:00
} kind;
union data_t {
std::string *identifier;
operator_t _operator;
std::vector<uint8_t> *literal;
2022-09-19 07:34:19 +00:00
} data;
public:
ppc::location_t location;
bool is_identifier() const { return kind == IDENTIFIER; }
bool is_operator() const { return kind == OPERATOR; }
2022-10-28 07:08:30 +00:00
bool is_int_literal() const { return kind == INT_LITERAL; }
bool is_str_literal() const { return kind == STR_LITERAL; }
bool is_literal() const { return is_int_literal() || is_str_literal(); }
2022-09-19 07:34:19 +00:00
const auto &identifier() const {
2022-09-19 07:34:19 +00:00
if (!is_identifier()) throw std::string { "Token is not an identifier." };
else return *data.identifier;
}
auto _operator() const {
2022-09-19 07:34:19 +00:00
if (!is_operator()) throw std::string { "Token is not an operator." };
else return data._operator;
}
const auto &literal() const {
if (!is_literal()) throw std::string { "Token is not a literal." };
else return *data.literal;
2022-09-19 07:34:19 +00:00
}
bool is_operator(operator_t op) const { return is_operator() && _operator() == op; }
bool is_identifier(const std::string &val) const { return is_identifier() && identifier() == val; }
2022-09-19 07:34:19 +00:00
token_t() { kind = NONE; }
2022-10-09 13:34:02 +00:00
token_t(const std::string &identifier, location_t loc = location_t::NONE): location(loc) {
2022-09-19 07:34:19 +00:00
kind = IDENTIFIER;
data.identifier = new std::string { identifier };
}
2022-10-09 13:34:02 +00:00
token_t(operator_t op, location_t loc = location_t::NONE): location(loc) {
2022-09-19 07:34:19 +00:00
kind = OPERATOR;
data._operator = op;
}
2022-10-28 07:08:30 +00:00
token_t(const std::vector<uint8_t> &val, bool is_str, location_t loc = location_t::NONE): location(loc) {
kind = is_str ? STR_LITERAL : INT_LITERAL;
data.literal = new std::vector<uint8_t> { val };
2022-09-19 07:34:19 +00:00
}
2022-10-09 13:34:02 +00:00
token_t(const token_t &tok): location(tok.location) {
2022-09-19 07:34:19 +00:00
kind = tok.kind;
switch (kind) {
case NONE: break;
case IDENTIFIER: data.identifier = new std::string { *tok.data.identifier }; break;
case OPERATOR: data._operator = tok.data._operator; break;
2022-10-28 07:08:30 +00:00
case STR_LITERAL:
case INT_LITERAL: data.literal = new std::vector<uint8_t> { *tok.data.literal }; break;
2022-09-19 07:34:19 +00:00
}
}
~token_t() {
switch (kind) {
case IDENTIFIER: delete data.identifier; break;
2022-10-28 07:08:30 +00:00
case STR_LITERAL:
case INT_LITERAL: delete data.literal; break;
2022-09-19 07:34:19 +00:00
default: break;
}
}
2022-10-04 16:58:17 +00:00
static token_t parse(messages::msg_stack_t &msg_stack, lex::token_t token);
2022-09-19 07:34:19 +00:00
static std::vector<token_t> parse_many(messages::msg_stack_t &msg_stack, std::vector<lex::token_t> tokens);
};
operator_t operator_find(const std::string &text);
const std::string &operator_stringify(operator_t kw);
2022-09-19 07:34:19 +00:00
}