summaryrefslogtreecommitdiff
path: root/igen/lexer.c
diff options
context:
space:
mode:
authormrb0nk500 <b0nk@b0nk.xyz>2022-02-13 20:20:59 -0400
committermrb0nk500 <b0nk@b0nk.xyz>2022-02-13 20:20:59 -0400
commitf478e6c1223cc8370fa51d44b9244ec25be99788 (patch)
tree4abe8889888c0b098ea99ee020c446254822a923 /igen/lexer.c
parent6833f6bc2a5730169084c74d8e8fc0b76666b2a0 (diff)
igen: Start work on writing an instruction handler
generator. This will make it easier in the long run to modify instructions, add new instructions, and move the opcode tables around.
Diffstat (limited to 'igen/lexer.c')
-rw-r--r--igen/lexer.c91
1 files changed, 91 insertions, 0 deletions
diff --git a/igen/lexer.c b/igen/lexer.c
new file mode 100644
index 0000000..275bcdd
--- /dev/null
+++ b/igen/lexer.c
@@ -0,0 +1,91 @@
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "lexer.h"
+#include "misc.h"
+
+atom get_token_id(const char c, const int dbg) {
+ switch (c) {
+ case '(': return ATOM_LBRACK;
+ case ')': return ATOM_RBRACK;
+ case '/': return ATOM_SLASH;
+ case '+': return ATOM_PLUS;
+ case '-': return ATOM_MINUS;
+ case '*': return ATOM_ASTR;
+ case '%': return ATOM_PRCNT;
+ case '&': return ATOM_AMPR;
+ case '|': return ATOM_PIPE;
+ case '^': return ATOM_CARROT;
+ case '#': return ATOM_HASH;
+ case ':': return ATOM_COL;
+ case ';': return ATOM_SCOL;
+ case ' ': return ATOM_SPACE;
+ case '_': return ATOM_USCORE;
+ case '=': return ATOM_EQUAL;
+ case '.': return ATOM_DOT;
+ case '?': return ATOM_QMARK;
+ case '!': return ATOM_BANG;
+ case '<': return ATOM_LT;
+ case '>': return ATOM_GT;
+ case '%': return ATOM_PERCENT;
+ case ',': return ATOM_COMMA;
+ case '\\': return ATOM_BSLASH;
+ case '\"': return ATOM_QUOTE;
+ case '\'': return ATOM_SQUOTE;
+ case '\t': return ATOM_TAB;
+ case '\n': return ATOM_NLINE;
+ default:
+ if (isalpha(c)) {
+ return ATOM_ALPHA;
+ } else if (isdigit(c)) {
+ return ATOM_NUM;
+ }
+ break;
+ }
+ return ATOM_NONE;
+}
+
+int get_atom_span(const char *str, const atom *atoms, int inv, int dbg) {
+ int i;
+ for (i = 0; str[i] != '\0'; ++i) {
+ const enum atom atom = get_atom_id(str[i], dbg);
+ for (int j = 0; atoms[j] != ATOM_NONE; ++j) {
+ const int is_done = (inv) ? (atom == atoms[j]) : (atom != atoms[j]);
+ if (is_done) {
+ return i;
+ }
+ }
+ }
+ return i;
+}
+
+int lex(char *str, int dbg) {
+ int in_inst_stmt = 0;
+ lexeme *lex_start = NULL;
+ lexeme *lex_end = NULL;
+
+ for (int i = 0; str[i] != '\0'; ++i) {
+ atom atom_id = get_atom_id(str[i], dbg);
+ switch (atom_id) {
+ case ATOM_PERCENT:
+ if (get_atom_id(str[++i]) == ATOM_PERCENT) {
+ in_inst_stmt = !in_inst_stmt;
+ }
+ break;
+ case ATOM_LBRACK:
+ break;
+ case ATOM_ALPHA:
+ do {
+ const int ident_len = get_atom_span(&str[i], (const atom []) {
+ ATOM_ALPHA,
+ ATOM_USCORE,
+ ATOM_NUM,
+ ATOM_NONE
+ }, 0, dbg);
+ char *ident = calloc(ident_len+1, sizeof(char));
+ } while(0);
+ break;
+ }
+ }
+}