From f478e6c1223cc8370fa51d44b9244ec25be99788 Mon Sep 17 00:00:00 2001 From: mrb0nk500 Date: Sun, 13 Feb 2022 20:20:59 -0400 Subject: igen: Start work on writing an instruction handler generator. This will make it easier in the long run to modify instructions, add new instructions, and move the opcode tables around. --- igen/lexer.c | 91 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 igen/lexer.c (limited to 'igen/lexer.c') diff --git a/igen/lexer.c b/igen/lexer.c new file mode 100644 index 0000000..275bcdd --- /dev/null +++ b/igen/lexer.c @@ -0,0 +1,91 @@ +#include +#include +#include +#include +#include "lexer.h" +#include "misc.h" + +atom get_token_id(const char c, const int dbg) { + switch (c) { + case '(': return ATOM_LBRACK; + case ')': return ATOM_RBRACK; + case '/': return ATOM_SLASH; + case '+': return ATOM_PLUS; + case '-': return ATOM_MINUS; + case '*': return ATOM_ASTR; + case '%': return ATOM_PRCNT; + case '&': return ATOM_AMPR; + case '|': return ATOM_PIPE; + case '^': return ATOM_CARROT; + case '#': return ATOM_HASH; + case ':': return ATOM_COL; + case ';': return ATOM_SCOL; + case ' ': return ATOM_SPACE; + case '_': return ATOM_USCORE; + case '=': return ATOM_EQUAL; + case '.': return ATOM_DOT; + case '?': return ATOM_QMARK; + case '!': return ATOM_BANG; + case '<': return ATOM_LT; + case '>': return ATOM_GT; + case '%': return ATOM_PERCENT; + case ',': return ATOM_COMMA; + case '\\': return ATOM_BSLASH; + case '\"': return ATOM_QUOTE; + case '\'': return ATOM_SQUOTE; + case '\t': return ATOM_TAB; + case '\n': return ATOM_NLINE; + default: + if (isalpha(c)) { + return ATOM_ALPHA; + } else if (isdigit(c)) { + return ATOM_NUM; + } + break; + } + return ATOM_NONE; +} + +int get_atom_span(const char *str, const atom *atoms, int inv, int dbg) { + int i; + for (i = 0; str[i] != '\0'; ++i) { + const enum atom atom = get_atom_id(str[i], dbg); + for (int j = 0; atoms[j] != ATOM_NONE; ++j) { + const int is_done = (inv) ? (atom == atoms[j]) : (atom != atoms[j]); + if (is_done) { + return i; + } + } + } + return i; +} + +int lex(char *str, int dbg) { + int in_inst_stmt = 0; + lexeme *lex_start = NULL; + lexeme *lex_end = NULL; + + for (int i = 0; str[i] != '\0'; ++i) { + atom atom_id = get_atom_id(str[i], dbg); + switch (atom_id) { + case ATOM_PERCENT: + if (get_atom_id(str[++i]) == ATOM_PERCENT) { + in_inst_stmt = !in_inst_stmt; + } + break; + case ATOM_LBRACK: + break; + case ATOM_ALPHA: + do { + const int ident_len = get_atom_span(&str[i], (const atom []) { + ATOM_ALPHA, + ATOM_USCORE, + ATOM_NUM, + ATOM_NONE + }, 0, dbg); + char *ident = calloc(ident_len+1, sizeof(char)); + } while(0); + break; + } + } +} -- cgit v1.2.3-13-gbd6f