From 524cfc23b15e1067076e45b056cb1d84e87e66cb Mon Sep 17 00:00:00 2001 From: mrb0nk500 Date: Thu, 17 Feb 2022 16:42:31 -0400 Subject: igen: Did some more work on it. --- igen/igen.c | 13 +-- igen/lexer.c | 142 ++++++++++++++++++++---- igen/lexer.h | 12 +- igen/misc.h | 3 + igen/preprocessor.c | 314 ++++++++++++++++++++++++++++++++++++++++++++++++---- igen/preprocessor.h | 34 +++++- 6 files changed, 464 insertions(+), 54 deletions(-) diff --git a/igen/igen.c b/igen/igen.c index fa9f10f..921ca79 100644 --- a/igen/igen.c +++ b/igen/igen.c @@ -4,27 +4,26 @@ #include #include "lexer.h" #include "misc.h" +#include "preprocessor.h" void usage(const char *name) { printf("Usage: %s \n", name); } int main(int argc, char **argv) { - char *buf; - long file_size = 0; int dbg = 0; int ret = 0; + source *root_src; if (argc < 2) { usage(argv[0]); ret = 1; } - buf = read_file(argc[1], &file_size); - if (buf == NULL) { - printf("Error: read_file() returned NULL.\n"); + root_src = preprocess(NULL, argv[1], dbg); + if (root_src == NULL) { usage(argv[0]); ret = 1; + } else { + ret = lex(root_src, dbg); } - - ret = lex(buf, dbg); return ret; } diff --git a/igen/lexer.c b/igen/lexer.c index 1d3268e..15226b8 100644 --- a/igen/lexer.c +++ b/igen/lexer.c @@ -6,35 +6,133 @@ #include "misc.h" #include "preprocessor.h" +char *skip_seperators(source *src, whitespace *wspace, int preserve_src, int dbg) { + char *text = NULL; + char *tmp = src->text; + + src->text = max(src->text, text); + + src->text = skip_whitespace(src, wspace, 1, 1, dbg); + src->text = skip_comment(src, wspace, dbg); + + text = src->text; + if (preserve_src) { + src->text = tmp; + } + return text; +} + cond_stmt *lex_cond_stmt(source *src, int dbg) { + const keyword *cond_keywords[] = { + {"do", COND_DO, NULL}, + {"for", COND_FOR, NULL}, + {"if", COND_IF, NULL}, + {"while", COND_WHILE, NULL}, + }; + const char *text = src->text; + char *key = NULL; + size_t key_len = 0; + cond_type type = COND_NONE; + whitespace wsp = {0}; + + text = skip_seperators(src, &wsp, 1, dbg); + key_len = strcspn(text, " \t\v\b"); + + key = calloc(key_len+1, sizeof(char)); + memcpy(key, text, key_len); + type = find_keyword(key, cond_keywords, NULL, NULL, dbg); + free(key); + + text += key_len+1; + + if (type >= COND_NONE) { + cond_stmt *cond = calloc(1, sizeof(cond_stmt)); + cond->type = type; + + src->text = text; + src->cur.x += key_len+1; + + if (type != COND_DO) { + cond->expr = lex_expr(src, dbg); + } + + cond->stmt = lex_stmt(src, dbg); + + if (type == COND_DO) { + text = skip_seperators(src, &wsp, 1, dbg); + key_len = strcspn(text, " \t\v\b"); + + key = calloc(key_len+1, sizeof(char)); + memcpy(key, text, key_len); + + if (!strcmp(key, "while")) { + text += key_len+1; + + src->text = text; + src->cur.x += key_len+1; + + cond->expr = lex_expr(src, dbg); + + text = skip_seperators(src, &wsp, 1, dbg); + + if (*text++ != ';') { + --text; + throw_error(src, 1, "Missing \';\' after do while statement."); + } else { + src->text = text; + ++src->cur.x; + } + } else { + throw_error(src, 1, "Missing \'while\' after do while statement."); + } + free(key); + } + return s; + } } stmt *lex_comp_stmt(source *src, int dbg) { - char *tmp = *str; - if (*tmp++ == '{') { - stmt *s = lex_stmt(&tmp, dbg); - if (*tmp++ == '}') { - *str = tmp; - return s; + char *text = NULL; + whitespace wsp = {0}; + + text = skip_seperators(src, &wsp, dbg); + + if (*text++ == '{') { + stmt *s; + + src->text = text; + ++src->cur.x; + + s = lex_stmt(src, dbg); + s->wsp = wsp; + + text = src->text; + + if (*text++ == '}') { + ++src->cur.x; } else { - throw_error(src, 1, "Missing \'}\' in compound statement."); + --text; + throw_error(src, 1, "Missing terminating \'}\' in compound statement."); } + src->text = text; + return s; } return NULL; } stmt *lex_stmt(source *src, int dbg) { + const char *text = src->text; const alt_stmt alts[] = { - {STMT_DIR, offsetof(stmt, dir), lex_dir}, {STMT_FUNC, offsetof(stmt, func), lex_func}, {STMT_EXPR, offsetof(stmt, expr), lex_exprs}, {STMT_COND, offsetof(stmt, cond_stmt), lex_cond_stmt}, {STMT_COMP, offsetof(stmt, down), lex_comp_stmt}, }; + for (int i = 0; i < NUM_STMTS; ++i) { - char *tmp = *str; - void *data = alts[i].lex(&tmp, dbg); + src->text = text; + void *data = alts[i].lex(src, dbg); if (data != NULL) { stmt *s = calloc(1, sizeof(stmt)); void **member = (char **)s+alts[i].offset; @@ -43,25 +141,25 @@ stmt *lex_stmt(source *src, int dbg) { } } + src->text = text; return NULL; } -stmt *lex_library(source *src, stmt **end, int dbg) { - stmt *start = lex_stmt(str, dbg); - end = (end != NULL) ? end : &start; - for (stmt *s = start; s != NULL; s = lex_stmt(str, dbg)) { - (*end)->next = s; - *end = s; +void lex_library(source *src, int dbg) { + src->root = lex_stmt(src, dbg); + src->last = (src->last != NULL) ? src->last : src->root; + for (stmt *s = src->root; s != NULL; s = lex_stmt(str, dbg)) { + src->last->next = s; + src->last = s; } return start; }; -int lex(char *str, int dbg) { - stmt *start = NULL; - stmt *end = NULL; +int lex(source *src, int dbg) { + char *text = src->text; - source *src = preprocess(str, dbg); - start = library(&str, &end, dbg); + library(src, dbg); + src->text = text; - return (start != NULL && end != NULL); + return (src->root != NULL && src->last != NULL); } diff --git a/igen/lexer.h b/igen/lexer.h index 454d2e1..0c5fea4 100644 --- a/igen/lexer.h +++ b/igen/lexer.h @@ -2,15 +2,17 @@ #define LEXER_H #include +#include "preprocessor.h" typedef enum stmt_type stmt_type; typedef enum cond_type cond_type; +typedef struct source source; typedef struct alt_stmt alt_stmt; typedef struct cond_stmt cond_stmt; typedef struct stmt stmt; enum stmt_type { - STMT_DIR, + STMT_NONE, STMT_FUNC, STMT_EXPR, STMT_COND, @@ -19,17 +21,18 @@ enum stmt_type { }; enum cond_type { + COND_NONE, COND_IF, COND_FOR, COND_WHILE, - COND_DO_WHILE, + COND_DO, NUM_CONDS }; struct alt_stmt { int type; size_t offset; - void *(*lex)(char **str, int dbg); + void *(*lex)(source *src, int dbg); }; struct cond_stmt { @@ -41,7 +44,6 @@ struct cond_stmt { struct stmt { stmt_type type; union { - dir *dir; func *func; expr *expr; cond_stmt *cond_stmt; @@ -50,5 +52,5 @@ struct stmt { stmt *next; }; -extern int lex(char *str, int dbg); +extern int lex(source *src, int dbg); #endif diff --git a/igen/misc.h b/igen/misc.h index 9216250..dc92cd8 100644 --- a/igen/misc.h +++ b/igen/misc.h @@ -1,6 +1,9 @@ #ifndef MISC_H #define MISC_H +#define min(a, b) (((a) < (b)) ? (a) : (b)) +#define max(a, b) (((a) > (b)) ? (a) : (b)) + extern char *read_file(const char *filename, long *size); extern char *get_line(char **str); extern char *make_str(const char *str); diff --git a/igen/preprocessor.c b/igen/preprocessor.c index e83bd20..e6a895c 100644 --- a/igen/preprocessor.c +++ b/igen/preprocessor.c @@ -2,6 +2,7 @@ #include #include #include +#include "misc.h" #include "preprocessor.h" static const keyword *preproc_keywords[] = { @@ -89,11 +90,11 @@ char *skip_whitespace(source *src, whitespace *wspace, int count_lines, int coun return &text[span]; } -char *skip_comment(source *src, whitespace *wspace, enum comment_type *type, int dbg) { +char *skip_comment(source *src, whitespace *wspace, comment_type *type, int dbg) { /*char *text = skip_whitespace(src wspace, 1, 1, dbg);*/ char *text = src->text; size_t span = get_comment_span(src, text++, dbg); - enum comment_type dummy = COMM_NONE; + comment_type dummy = COMM_NONE; whitespace wsp = {0}; type = (type != NULL) ? type : &dummy; @@ -129,8 +130,59 @@ char *skip_comment(source *src, whitespace *wspace, enum comment_type *type, int return &text[span]; } -comment *get_comment(source *src, int dbg) { +char *find_comment(source *src, whitespace *wspace, comment_type *type, int dbg) { + /*char *text = skip_whitespace(src wspace, 1, 1, dbg);*/ char *text = src->text; + char *multi_comment = strstr(text, "/*"); + char *single_comment = strstr(text, "//"); + char *next_comment = NULL; + comment_type dummy = COMM_NONE; + whitespace wsp = {0}; + + type = (type != NULL) ? type : &dummy; + + if (multi_comment != NULL && single_comment != NULL) { + char *first_comment = min(multi_comment, single_comment); + *type = (first_comment == multi_comment) ? COMM_MULTI : COMM_SINGLE; + } else if (multi_comment == NULL && single_comment == NULL) { + *type = COMM_NONE; + } else if (multi_comment != NULL) { + *type = COMM_MULTI; + } else { + *type = COMM_SINGLE; + } + + switch (*type) { + case COMM_MULTI: next_comment = multi_comment; break; + case COMM_SINGLE: next_comment = single_comment; break; + } + + if (next_comment != NULL) { + size_t span = next_comment-text; + count_whitespace(&wsp, text, span, 1, 0, dbg); + count_whitespace(&wsp, &text[span], strspn(&text[span], "\r\n\f"), 0, 1, dbg); + + if (wspace != NULL) { + *wspace = wsp; + } + + if (wsp.tabs) { + const int tab_stop = src->tab_width; + const int extra_tabs = wsp.spaces/tab_stop; + src->cur.x += ((wsp.tabs+extra_tabs)*tab_stop); + } else { + src->cur.x += wsp.spaces; + } + src->cur.y += wsp.lines; + + return &text[span]; + } else { + return text; + } + +} + +comment *get_comment(source *src, char *text, int dbg) { char *after_comment; size_t comment_len = 0; comment *com = calloc(1, sizeof(comment)); @@ -151,7 +203,102 @@ comment *get_comment(source *src, int dbg) { return com; } -source *pp_include(source *src, int dbg) { +source *make_source(const char *filename, int tab_stop, int dbg) { + long file_size = 0; + size_t filename_len = strlen(filename); + source *src = calloc(1, sizeof(source)); + + src->text = read_file(filename, &file_size); + if (src->text == NULL) { + printf("Error: read_file() returned NULL.\n"); + free(src); + return NULL; + } + src->tab_width = tab_stop; + src->filename = calloc(filename_len+1, sizeof(char)); + memcpy(src->filename, filename, filename_len); + return src; +} + +int cmp_key(const void *key, const void *elem) { + const char *k = key; + const keyword *e = *(const keyword **)elem; + return strcmp(k, e->name); +} + +int cmp_keyword(const void *key1, const void *key2) { + const keyword *k1 = *(const keyword **)key1; + const keyword *k2 = *(const keyword **)key2; + return strcmp(k1->name, k2->name); +} + +keyword **copy_keyword_table(keyword **keywords, size_t *key_count, int dbg) { + size_t dummy = 0; + + key_count = (key_count != NULL) ? key_count : &dummy; + if (keywords == NULL) { + *key_count = 0; + return NULL; + } else { + for (*key_count = 0; keywords[count] != NULL; ++(*key_count)); + if (!*key_count) { + return NULL; + } else { + keyword **table = calloc(*key_count+1, sizeof(keyword *)); + #if 0 + for (size_t i = 0; i < *key_count; ++i) { + table[i] = calloc(1, sizeof(keyword)); + memcpy(table[i], keywords[i], sizeof(keyword)); + } + #else + memcpy(table, keywords, *key_count); + #endif + return table; + } + } +} + +int find_keyword(const char *key, keyword **keywords, void *ctx, void **callback_ret, int dbg) { + int ret = -1; + size_t key_count = 0; + keyword *k; + keyword **sorted_keywords = copy_keyword_table(keywords, &key_count, dbg); + void *dummy = NULL; + callback_ret = (callback_ret != NULL) ? callback_ret : &dummy; + + if (sorted_keywords == NULL) { + *callback_ret = NULL; + return ret; + } + + if (key_count > 1) { + qsort(sorted_keywords, key_count, sizeof(keyword *), cmp_keyword); + } + + k = *(keyword **)bsearch(key, sorted_keywords, key_count, sizeof(keyword *), cmp_key); + + if (k != NULL) { + if (k->found_keyword) { + *callback_ret = k->found_keyword(ctx, dbg); + } else { + *callback_ret = NULL; + } + ret = k->id; + } else { + *callback_ret = NULL; + } + + #if 0 + for (size_t i = 0; i < *key_count; ++i) { + free(sorted_keywords[i]); + sorted_keywords[i] = NULL; + } + #endif + free(sorted_keywords); + return ret; +} + +char *pp_include(source *src, int dbg) { char *text = skip_whitespace(src, NULL, 0, 1, dbg); if (*text == '\r' || *text == '\n' || *text == '\f') { throw_error(src, 1, "Found line separator \'%s\' before the operand of an include directive.", esc_char_to_string(*text)); @@ -166,27 +313,156 @@ source *pp_include(source *src, int dbg) { return NULL; } else { long dummy = 0; - source *inc_src = calloc(1, sizeof(source)); - inc_src->tab_width = src->tab_width; - inc_src->filename = calloc((tmp--)-text, sizeof(char)); - inc_src->text = read_file(inc_src->filename, &dummy); - if (inc_src->text == NULL) { - throw_error(src, 1, "File \"%s\" couldn't be read.", inc_src->filename); - free(inc_src->filename); - free(inc_src); - src->text = skip_line(text, dbg); - return NULL; - } + char *filename = calloc((tmp--)-text, sizeof(char)); + memcpy(filename, text, tmp-text); + return filename; } } else { throw_error(src, 1, "Missing quote mark at the start of the operand of an include directive."); src->text = text; return NULL; } - if - size_t span = strspn(text, " \t\v\r\n"); - if (text[span]+1 == '/' && text[]) } -source *preprocess(const char *str, int dbg) { +struct str_list { + char *str; + struct str_list *next; +}; + +struct comment_list { + comment *com; + struct comment_list *next; +}; + +int is_included(source *parent, const char *filename, int dbg) { + if (parent->include_list != NULL) { + source **includes = parent->include_list; + for (int i = 0; includes[i] != NULL; ++i) { + if (is_included(includes[i], filename, dbg)) { + return 1; + } + } + return 0; + } + return !strcmp(parent->filename, filename); +} + +void free_str_list(struct str_list *list) { + for (struct str_list *l = list, l2 = l; l != NULL; l = l->next, l2 = l) { + free(l2->str); + l2->str = NULL; + free(l2); + } +} + +void free_comment_list(struct comment_list *list) { + for (struct comment_list *l = list, l2 = l; l != NULL; l = l->next, l2 = l) { + free(l2->com); + l2->com = NULL; + free(l2); + } +} + +source **make_include_list(source *src, struct str_list *list, int dbg) { + int inc_count = 0; + source **include_list = NULL; + + for (struct str_list *inc = list; inc != NULL; inc = inc->next, ++inc_count); + + include_list = calloc(inc_count+1, sizeof(source *)); + + do { + struct str_list *inc = list; + for (int i = 0; inc != NULL && i < inc_count; ++i, inc = inc->next) { + src->include_list[i] = preprocess(src, inc->str, dbg); + } + } while(0); +} + +comment **make_comment_list(source *src, int dbg) { + int comment_count = 0; + struct comment_list *list = calloc(1, sizeof(struct comment_list)); + struct comment_list *list_end = list; + comment **comments = NULL; + whitespace wsp = {0}; + + for (char *text = src->text; text != NULL && *text != '\0'; text = find_comment(src, &wsp, NULL, dbg), list_end = list_end->next, ++comment_count) { + list_end->com = get_comment(src, text, dbg); + list_end->com->wsp = wsp; + list_end->next = calloc(1, sizeof(struct comment_list)); + } + + comments = calloc(comment_count+1, sizeof(comment *)); + + do { + struct comment_list *l = list; + for (int i = 0; l != NULL && i < comment_count; ++i, l = l->next) { + comments[i] = calloc(1, sizeof(comment)); + memcpy(comments[i], l->com, sizeof(comment)); + } + } while(0); + free_comment_list(list); + return comments; +} + +source *preprocess(source *parent, const char *filename, int dbg) { + source *src = make_source(filename, 8, dbg); + src->parent = parent; + if (src == NULL) { + if (parent == NULL) { + printf("Error: Source struct for file \"%s\" couldn't be created.\n", filename); + } else { + throw_error(parent, 1, "Source struct for file \"%s\" couldn't be created.", filename); + } + return NULL; + } else { + int is_directive = 0; + struct str_list *include_list = calloc(1, sizeof(struct str_list)); + struct str_list *inc_end = include_list; + + src->included = (parent != NULL); + + for (char *text = src->text; *text != '\0'; ++text) { + text = skip_whitespace(src, NULL, 1, 1, dbg) + text = skip_comment(src, NULL, NULL, dbg); + + if (is_directive) { + char *dir_key = NULL; + void *key_data = NULL; + size_t key_len = strcspn(text, " \t\v\b"); + directive_type dir_type = DIR_NONE; + + dir_key = calloc(key_len+1, sizeof(char)); + memcpy(dir_key, text, key_len); + dir_type = find_keyword(dir_key, preproc_keywords, src, &key_data, dbg); + free(dir_key); + + if (key_data != NULL) { + switch (dir_type) { + case DIR_INCLUDE: + if (parent != NULL) { + if (is_included(parent, (char *)key_data, dbg)) { + break; + } + } + inc_end->str = (char *)key_data; + inc_end->next = calloc(1, sizeof(struct str_list)); + inc_end = include_list->next; + break; + } + } + + is_directive = 0; + } + + if (*text == '#') { + is_directive = 1; + } + } + src->include_list = make_include_list(src, include_list, dbg); + src->comments = make_comment_list(src, dbg); + free_str_list(include_list); + include_list = NULL; + } + return src; } diff --git a/igen/preprocessor.h b/igen/preprocessor.h index 7a85e45..200f5b2 100644 --- a/igen/preprocessor.h +++ b/igen/preprocessor.h @@ -1,11 +1,25 @@ #ifndef PREPROCESSOR_H #define PREPROCESSOR_H +#include "lexer.h" + + typedef enum comment_type comment_type; +typedef enum directive_type directive_type; +typedef struct stmt stmt; +typedef struct linked_list linked_list; +typedef struct keyword keyword; typedef struct cursor cursor; typedef struct whitespace whitespace; typedef struct comment comment; typedef struct source source; +typedef void *(keyword_cb)(void *ctx, int dbg); + +enum directive_type { + DIR_NONE, + DIR_INCLUDE, + NUM_DIRS +}; enum comment_type { COMM_NONE, @@ -14,6 +28,17 @@ enum comment_type { NUM_COMMS }; +struct linked_list { + void *data; + linked_list *next; +}; + +struct keyword { + char *name; + int id; + keyword_cb *found_keyword; +}; + struct cursor { int line; int column; @@ -36,12 +61,19 @@ struct comment { }; struct source { + source *parent; source **include_list; + comment **comments; char *filename; char *text; + int included : 1; int tab_width; cursor cur; + stmt *root; + stmt *last; }; -extern source *preprocess(const char *str, int dbg); +/*extern keyword *find_keyword(const char *key, keyword **keywords, int dbg);*/ +extern int find_keyword(const char *key, keyword **keywords, void *ctx, void **callback_ret, int dbg); +extern source *preprocess(source *parent, const char *filename, int dbg); #endif -- cgit v1.2.3-13-gbd6f