From 524cfc23b15e1067076e45b056cb1d84e87e66cb Mon Sep 17 00:00:00 2001 From: mrb0nk500 Date: Thu, 17 Feb 2022 16:42:31 -0400 Subject: igen: Did some more work on it. --- igen/lexer.c | 142 ++++++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 120 insertions(+), 22 deletions(-) (limited to 'igen/lexer.c') diff --git a/igen/lexer.c b/igen/lexer.c index 1d3268e..15226b8 100644 --- a/igen/lexer.c +++ b/igen/lexer.c @@ -6,35 +6,133 @@ #include "misc.h" #include "preprocessor.h" +char *skip_seperators(source *src, whitespace *wspace, int preserve_src, int dbg) { + char *text = NULL; + char *tmp = src->text; + + src->text = max(src->text, text); + + src->text = skip_whitespace(src, wspace, 1, 1, dbg); + src->text = skip_comment(src, wspace, dbg); + + text = src->text; + if (preserve_src) { + src->text = tmp; + } + return text; +} + cond_stmt *lex_cond_stmt(source *src, int dbg) { + const keyword *cond_keywords[] = { + {"do", COND_DO, NULL}, + {"for", COND_FOR, NULL}, + {"if", COND_IF, NULL}, + {"while", COND_WHILE, NULL}, + }; + const char *text = src->text; + char *key = NULL; + size_t key_len = 0; + cond_type type = COND_NONE; + whitespace wsp = {0}; + + text = skip_seperators(src, &wsp, 1, dbg); + key_len = strcspn(text, " \t\v\b"); + + key = calloc(key_len+1, sizeof(char)); + memcpy(key, text, key_len); + type = find_keyword(key, cond_keywords, NULL, NULL, dbg); + free(key); + + text += key_len+1; + + if (type >= COND_NONE) { + cond_stmt *cond = calloc(1, sizeof(cond_stmt)); + cond->type = type; + + src->text = text; + src->cur.x += key_len+1; + + if (type != COND_DO) { + cond->expr = lex_expr(src, dbg); + } + + cond->stmt = lex_stmt(src, dbg); + + if (type == COND_DO) { + text = skip_seperators(src, &wsp, 1, dbg); + key_len = strcspn(text, " \t\v\b"); + + key = calloc(key_len+1, sizeof(char)); + memcpy(key, text, key_len); + + if (!strcmp(key, "while")) { + text += key_len+1; + + src->text = text; + src->cur.x += key_len+1; + + cond->expr = lex_expr(src, dbg); + + text = skip_seperators(src, &wsp, 1, dbg); + + if (*text++ != ';') { + --text; + throw_error(src, 1, "Missing \';\' after do while statement."); + } else { + src->text = text; + ++src->cur.x; + } + } else { + throw_error(src, 1, "Missing \'while\' after do while statement."); + } + free(key); + } + return s; + } } stmt *lex_comp_stmt(source *src, int dbg) { - char *tmp = *str; - if (*tmp++ == '{') { - stmt *s = lex_stmt(&tmp, dbg); - if (*tmp++ == '}') { - *str = tmp; - return s; + char *text = NULL; + whitespace wsp = {0}; + + text = skip_seperators(src, &wsp, dbg); + + if (*text++ == '{') { + stmt *s; + + src->text = text; + ++src->cur.x; + + s = lex_stmt(src, dbg); + s->wsp = wsp; + + text = src->text; + + if (*text++ == '}') { + ++src->cur.x; } else { - throw_error(src, 1, "Missing \'}\' in compound statement."); + --text; + throw_error(src, 1, "Missing terminating \'}\' in compound statement."); } + src->text = text; + return s; } return NULL; } stmt *lex_stmt(source *src, int dbg) { + const char *text = src->text; const alt_stmt alts[] = { - {STMT_DIR, offsetof(stmt, dir), lex_dir}, {STMT_FUNC, offsetof(stmt, func), lex_func}, {STMT_EXPR, offsetof(stmt, expr), lex_exprs}, {STMT_COND, offsetof(stmt, cond_stmt), lex_cond_stmt}, {STMT_COMP, offsetof(stmt, down), lex_comp_stmt}, }; + for (int i = 0; i < NUM_STMTS; ++i) { - char *tmp = *str; - void *data = alts[i].lex(&tmp, dbg); + src->text = text; + void *data = alts[i].lex(src, dbg); if (data != NULL) { stmt *s = calloc(1, sizeof(stmt)); void **member = (char **)s+alts[i].offset; @@ -43,25 +141,25 @@ stmt *lex_stmt(source *src, int dbg) { } } + src->text = text; return NULL; } -stmt *lex_library(source *src, stmt **end, int dbg) { - stmt *start = lex_stmt(str, dbg); - end = (end != NULL) ? end : &start; - for (stmt *s = start; s != NULL; s = lex_stmt(str, dbg)) { - (*end)->next = s; - *end = s; +void lex_library(source *src, int dbg) { + src->root = lex_stmt(src, dbg); + src->last = (src->last != NULL) ? src->last : src->root; + for (stmt *s = src->root; s != NULL; s = lex_stmt(str, dbg)) { + src->last->next = s; + src->last = s; } return start; }; -int lex(char *str, int dbg) { - stmt *start = NULL; - stmt *end = NULL; +int lex(source *src, int dbg) { + char *text = src->text; - source *src = preprocess(str, dbg); - start = library(&str, &end, dbg); + library(src, dbg); + src->text = text; - return (start != NULL && end != NULL); + return (src->root != NULL && src->last != NULL); } -- cgit v1.2.3-13-gbd6f