summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormrb0nk500 <b0nk@b0nk.xyz>2022-02-15 18:49:29 -0400
committermrb0nk500 <b0nk@b0nk.xyz>2022-02-18 10:05:01 -0400
commit722c7f08e409d1f6f3a26bda666c15d7082f52e3 (patch)
treeaf0c77367a611398db7214b2bda90b499196b941
parentf478e6c1223cc8370fa51d44b9244ec25be99788 (diff)
igen: Start work on a better lexer.
The other one I was about to do would've been harder to read, and understand. So, I've decided to write a more readable version.
-rw-r--r--igen/lexer.c121
-rw-r--r--igen/lexer.h82
2 files changed, 92 insertions, 111 deletions
diff --git a/igen/lexer.c b/igen/lexer.c
index 275bcdd..03f7e87 100644
--- a/igen/lexer.c
+++ b/igen/lexer.c
@@ -5,87 +5,60 @@
#include "lexer.h"
#include "misc.h"
-atom get_token_id(const char c, const int dbg) {
- switch (c) {
- case '(': return ATOM_LBRACK;
- case ')': return ATOM_RBRACK;
- case '/': return ATOM_SLASH;
- case '+': return ATOM_PLUS;
- case '-': return ATOM_MINUS;
- case '*': return ATOM_ASTR;
- case '%': return ATOM_PRCNT;
- case '&': return ATOM_AMPR;
- case '|': return ATOM_PIPE;
- case '^': return ATOM_CARROT;
- case '#': return ATOM_HASH;
- case ':': return ATOM_COL;
- case ';': return ATOM_SCOL;
- case ' ': return ATOM_SPACE;
- case '_': return ATOM_USCORE;
- case '=': return ATOM_EQUAL;
- case '.': return ATOM_DOT;
- case '?': return ATOM_QMARK;
- case '!': return ATOM_BANG;
- case '<': return ATOM_LT;
- case '>': return ATOM_GT;
- case '%': return ATOM_PERCENT;
- case ',': return ATOM_COMMA;
- case '\\': return ATOM_BSLASH;
- case '\"': return ATOM_QUOTE;
- case '\'': return ATOM_SQUOTE;
- case '\t': return ATOM_TAB;
- case '\n': return ATOM_NLINE;
- default:
- if (isalpha(c)) {
- return ATOM_ALPHA;
- } else if (isdigit(c)) {
- return ATOM_NUM;
- }
- break;
+cond_stmt *lex_cond_stmt(char **str, int dbg) {
+
+}
+
+stmt *lex_comp_stmt(char **str, int dbg) {
+ char *tmp = *str;
+ if (*tmp++ == '{') {
+ stmt *s = lex_stmt(&tmp, dbg);
+ if (*tmp++ == '}') {
+ *str = tmp;
+ return s;
+ } else {
+ throw_error("Missing \'}\' in stmt.");
+ }
}
- return ATOM_NONE;
+ return NULL;
}
-int get_atom_span(const char *str, const atom *atoms, int inv, int dbg) {
- int i;
- for (i = 0; str[i] != '\0'; ++i) {
- const enum atom atom = get_atom_id(str[i], dbg);
- for (int j = 0; atoms[j] != ATOM_NONE; ++j) {
- const int is_done = (inv) ? (atom == atoms[j]) : (atom != atoms[j]);
- if (is_done) {
- return i;
- }
+stmt *lex_stmt(char **str, int dbg) {
+ const alt_stmt alts[] = {
+ {STMT_DIR, offsetof(stmt, dir), lex_dir},
+ {STMT_FUNC, offsetof(stmt, func), lex_func},
+ {STMT_EXPR, offsetof(stmt, expr), lex_exprs},
+ {STMT_COND, offsetof(stmt, cond_stmt), lex_cond_stmt},
+ {STMT_COMP, offsetof(stmt, down), lex_comp_stmt},
+ };
+ for (int i = 0; i < NUM_STMTS; ++i) {
+ char *tmp = *str;
+ void *data = alts[i].lex(&tmp, dbg);
+ if (data != NULL) {
+ stmt *s = calloc(1, sizeof(stmt));
+ void **member = (char **)s+alts[i].offset;
+ *member = data;
+ return s;
}
}
- return i;
+
+ return NULL;
}
+stmt *lex_library(char **str, stmt **end, int dbg) {
+ stmt *start = lex_stmt(str, dbg);
+ end = (end != NULL) ? end : &start;
+ for (stmt *s = start; s != NULL; s = lex_stmt(str, dbg)) {
+ (*end)->next = s;
+ *end = s;
+ }
+ return start;
+};
+
int lex(char *str, int dbg) {
- int in_inst_stmt = 0;
- lexeme *lex_start = NULL;
- lexeme *lex_end = NULL;
+ stmt *start = NULL;
+ stmt *end = NULL;
- for (int i = 0; str[i] != '\0'; ++i) {
- atom atom_id = get_atom_id(str[i], dbg);
- switch (atom_id) {
- case ATOM_PERCENT:
- if (get_atom_id(str[++i]) == ATOM_PERCENT) {
- in_inst_stmt = !in_inst_stmt;
- }
- break;
- case ATOM_LBRACK:
- break;
- case ATOM_ALPHA:
- do {
- const int ident_len = get_atom_span(&str[i], (const atom []) {
- ATOM_ALPHA,
- ATOM_USCORE,
- ATOM_NUM,
- ATOM_NONE
- }, 0, dbg);
- char *ident = calloc(ident_len+1, sizeof(char));
- } while(0);
- break;
- }
- }
+ start = library(&str, &end, dbg);
+ return (start != NULL && end != NULL);
}
diff --git a/igen/lexer.h b/igen/lexer.h
index ba81cb1..454d2e1 100644
--- a/igen/lexer.h
+++ b/igen/lexer.h
@@ -1,45 +1,53 @@
#ifndef LEXER_H
#define LEXER_H
-typedef enum atom atom;
-typedef struct lexeme lexeme;
-
-enum atom {
- ATOM_LBRACK,
- ATOM_RBRACK,
- ATOM_SLASH,
- ATOM_PLUS,
- ATOM_MINUS,
- ATOM_ASTR,
- ATOM_PRCNT,
- ATOM_AMPR,
- ATOM_PIPE,
- ATOM_CARROT,
- ATOM_HASH,
- ATOM_COL,
- ATOM_SCOL,
- ATOM_SPACE,
- ATOM_USCORE,
- ATOM_EQUAL,
- ATOM_DOT,
- ATOM_QMARK,
- ATOM_BANG,
- ATOM_LT,
- ATOM_GT,
- ATOM_PERCENT,
- ATOM_COMMA,
- ATOM_BSLASH,
- ATOM_QUOTE,
- ATOM_SQUOTE,
- ATOM_TAB,
- ATOM_NLINE,
- ATOM_ALPHA,
- ATOM_NUM,
- ATOM_NONE,
- NUM_ATOMS
+#include <stdlib.h>
+
+typedef enum stmt_type stmt_type;
+typedef enum cond_type cond_type;
+typedef struct alt_stmt alt_stmt;
+typedef struct cond_stmt cond_stmt;
+typedef struct stmt stmt;
+
+enum stmt_type {
+ STMT_DIR,
+ STMT_FUNC,
+ STMT_EXPR,
+ STMT_COND,
+ STMT_COMP,
+ NUM_STMTS
+};
+
+enum cond_type {
+ COND_IF,
+ COND_FOR,
+ COND_WHILE,
+ COND_DO_WHILE,
+ NUM_CONDS
+};
+
+struct alt_stmt {
+ int type;
+ size_t offset;
+ void *(*lex)(char **str, int dbg);
+};
+
+struct cond_stmt {
+ cond_type type;
+ expr *expr;
+ stmt *stmt;
};
-struct lexeme {
+struct stmt {
+ stmt_type type;
+ union {
+ dir *dir;
+ func *func;
+ expr *expr;
+ cond_stmt *cond_stmt;
+ stmt *down;
+ };
+ stmt *next;
};
extern int lex(char *str, int dbg);