diff options
author | mrb0nk500 <b0nk@b0nk.xyz> | 2022-02-15 18:49:29 -0400 |
---|---|---|
committer | mrb0nk500 <b0nk@b0nk.xyz> | 2022-02-18 10:05:01 -0400 |
commit | 722c7f08e409d1f6f3a26bda666c15d7082f52e3 (patch) | |
tree | af0c77367a611398db7214b2bda90b499196b941 | |
parent | f478e6c1223cc8370fa51d44b9244ec25be99788 (diff) |
igen: Start work on a better lexer.
The other one I was about to do would've been harder
to read, and understand.
So, I've decided to write a more readable version.
-rw-r--r-- | igen/lexer.c | 121 | ||||
-rw-r--r-- | igen/lexer.h | 82 |
2 files changed, 92 insertions, 111 deletions
diff --git a/igen/lexer.c b/igen/lexer.c index 275bcdd..03f7e87 100644 --- a/igen/lexer.c +++ b/igen/lexer.c @@ -5,87 +5,60 @@ #include "lexer.h" #include "misc.h" -atom get_token_id(const char c, const int dbg) { - switch (c) { - case '(': return ATOM_LBRACK; - case ')': return ATOM_RBRACK; - case '/': return ATOM_SLASH; - case '+': return ATOM_PLUS; - case '-': return ATOM_MINUS; - case '*': return ATOM_ASTR; - case '%': return ATOM_PRCNT; - case '&': return ATOM_AMPR; - case '|': return ATOM_PIPE; - case '^': return ATOM_CARROT; - case '#': return ATOM_HASH; - case ':': return ATOM_COL; - case ';': return ATOM_SCOL; - case ' ': return ATOM_SPACE; - case '_': return ATOM_USCORE; - case '=': return ATOM_EQUAL; - case '.': return ATOM_DOT; - case '?': return ATOM_QMARK; - case '!': return ATOM_BANG; - case '<': return ATOM_LT; - case '>': return ATOM_GT; - case '%': return ATOM_PERCENT; - case ',': return ATOM_COMMA; - case '\\': return ATOM_BSLASH; - case '\"': return ATOM_QUOTE; - case '\'': return ATOM_SQUOTE; - case '\t': return ATOM_TAB; - case '\n': return ATOM_NLINE; - default: - if (isalpha(c)) { - return ATOM_ALPHA; - } else if (isdigit(c)) { - return ATOM_NUM; - } - break; +cond_stmt *lex_cond_stmt(char **str, int dbg) { + +} + +stmt *lex_comp_stmt(char **str, int dbg) { + char *tmp = *str; + if (*tmp++ == '{') { + stmt *s = lex_stmt(&tmp, dbg); + if (*tmp++ == '}') { + *str = tmp; + return s; + } else { + throw_error("Missing \'}\' in stmt."); + } } - return ATOM_NONE; + return NULL; } -int get_atom_span(const char *str, const atom *atoms, int inv, int dbg) { - int i; - for (i = 0; str[i] != '\0'; ++i) { - const enum atom atom = get_atom_id(str[i], dbg); - for (int j = 0; atoms[j] != ATOM_NONE; ++j) { - const int is_done = (inv) ? (atom == atoms[j]) : (atom != atoms[j]); - if (is_done) { - return i; - } +stmt *lex_stmt(char **str, int dbg) { + const alt_stmt alts[] = { + {STMT_DIR, offsetof(stmt, dir), lex_dir}, + {STMT_FUNC, offsetof(stmt, func), lex_func}, + {STMT_EXPR, offsetof(stmt, expr), lex_exprs}, + {STMT_COND, offsetof(stmt, cond_stmt), lex_cond_stmt}, + {STMT_COMP, offsetof(stmt, down), lex_comp_stmt}, + }; + for (int i = 0; i < NUM_STMTS; ++i) { + char *tmp = *str; + void *data = alts[i].lex(&tmp, dbg); + if (data != NULL) { + stmt *s = calloc(1, sizeof(stmt)); + void **member = (char **)s+alts[i].offset; + *member = data; + return s; } } - return i; + + return NULL; } +stmt *lex_library(char **str, stmt **end, int dbg) { + stmt *start = lex_stmt(str, dbg); + end = (end != NULL) ? end : &start; + for (stmt *s = start; s != NULL; s = lex_stmt(str, dbg)) { + (*end)->next = s; + *end = s; + } + return start; +}; + int lex(char *str, int dbg) { - int in_inst_stmt = 0; - lexeme *lex_start = NULL; - lexeme *lex_end = NULL; + stmt *start = NULL; + stmt *end = NULL; - for (int i = 0; str[i] != '\0'; ++i) { - atom atom_id = get_atom_id(str[i], dbg); - switch (atom_id) { - case ATOM_PERCENT: - if (get_atom_id(str[++i]) == ATOM_PERCENT) { - in_inst_stmt = !in_inst_stmt; - } - break; - case ATOM_LBRACK: - break; - case ATOM_ALPHA: - do { - const int ident_len = get_atom_span(&str[i], (const atom []) { - ATOM_ALPHA, - ATOM_USCORE, - ATOM_NUM, - ATOM_NONE - }, 0, dbg); - char *ident = calloc(ident_len+1, sizeof(char)); - } while(0); - break; - } - } + start = library(&str, &end, dbg); + return (start != NULL && end != NULL); } diff --git a/igen/lexer.h b/igen/lexer.h index ba81cb1..454d2e1 100644 --- a/igen/lexer.h +++ b/igen/lexer.h @@ -1,45 +1,53 @@ #ifndef LEXER_H #define LEXER_H -typedef enum atom atom; -typedef struct lexeme lexeme; - -enum atom { - ATOM_LBRACK, - ATOM_RBRACK, - ATOM_SLASH, - ATOM_PLUS, - ATOM_MINUS, - ATOM_ASTR, - ATOM_PRCNT, - ATOM_AMPR, - ATOM_PIPE, - ATOM_CARROT, - ATOM_HASH, - ATOM_COL, - ATOM_SCOL, - ATOM_SPACE, - ATOM_USCORE, - ATOM_EQUAL, - ATOM_DOT, - ATOM_QMARK, - ATOM_BANG, - ATOM_LT, - ATOM_GT, - ATOM_PERCENT, - ATOM_COMMA, - ATOM_BSLASH, - ATOM_QUOTE, - ATOM_SQUOTE, - ATOM_TAB, - ATOM_NLINE, - ATOM_ALPHA, - ATOM_NUM, - ATOM_NONE, - NUM_ATOMS +#include <stdlib.h> + +typedef enum stmt_type stmt_type; +typedef enum cond_type cond_type; +typedef struct alt_stmt alt_stmt; +typedef struct cond_stmt cond_stmt; +typedef struct stmt stmt; + +enum stmt_type { + STMT_DIR, + STMT_FUNC, + STMT_EXPR, + STMT_COND, + STMT_COMP, + NUM_STMTS +}; + +enum cond_type { + COND_IF, + COND_FOR, + COND_WHILE, + COND_DO_WHILE, + NUM_CONDS +}; + +struct alt_stmt { + int type; + size_t offset; + void *(*lex)(char **str, int dbg); +}; + +struct cond_stmt { + cond_type type; + expr *expr; + stmt *stmt; }; -struct lexeme { +struct stmt { + stmt_type type; + union { + dir *dir; + func *func; + expr *expr; + cond_stmt *cond_stmt; + stmt *down; + }; + stmt *next; }; extern int lex(char *str, int dbg); |