summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormrb0nk500 <b0nk@b0nk.xyz>2022-02-17 16:42:31 -0400
committermrb0nk500 <b0nk@b0nk.xyz>2022-02-18 10:05:01 -0400
commit524cfc23b15e1067076e45b056cb1d84e87e66cb (patch)
tree03a1b91d23da653ec3e5b9703b0b1544238400d9
parent5f753ddee7d935e0ba4750a6a8c26fe056c77612 (diff)
igen: Did some more work on it.
-rw-r--r--igen/igen.c13
-rw-r--r--igen/lexer.c142
-rw-r--r--igen/lexer.h12
-rw-r--r--igen/misc.h3
-rw-r--r--igen/preprocessor.c314
-rw-r--r--igen/preprocessor.h34
6 files changed, 464 insertions, 54 deletions
diff --git a/igen/igen.c b/igen/igen.c
index fa9f10f..921ca79 100644
--- a/igen/igen.c
+++ b/igen/igen.c
@@ -4,27 +4,26 @@
#include <string.h>
#include "lexer.h"
#include "misc.h"
+#include "preprocessor.h"
void usage(const char *name) {
printf("Usage: %s <file>\n", name);
}
int main(int argc, char **argv) {
- char *buf;
- long file_size = 0;
int dbg = 0;
int ret = 0;
+ source *root_src;
if (argc < 2) {
usage(argv[0]);
ret = 1;
}
- buf = read_file(argc[1], &file_size);
- if (buf == NULL) {
- printf("Error: read_file() returned NULL.\n");
+ root_src = preprocess(NULL, argv[1], dbg);
+ if (root_src == NULL) {
usage(argv[0]);
ret = 1;
+ } else {
+ ret = lex(root_src, dbg);
}
-
- ret = lex(buf, dbg);
return ret;
}
diff --git a/igen/lexer.c b/igen/lexer.c
index 1d3268e..15226b8 100644
--- a/igen/lexer.c
+++ b/igen/lexer.c
@@ -6,35 +6,133 @@
#include "misc.h"
#include "preprocessor.h"
+char *skip_seperators(source *src, whitespace *wspace, int preserve_src, int dbg) {
+ char *text = NULL;
+ char *tmp = src->text;
+
+ src->text = max(src->text, text);
+
+ src->text = skip_whitespace(src, wspace, 1, 1, dbg);
+ src->text = skip_comment(src, wspace, dbg);
+
+ text = src->text;
+ if (preserve_src) {
+ src->text = tmp;
+ }
+ return text;
+}
+
cond_stmt *lex_cond_stmt(source *src, int dbg) {
+ const keyword *cond_keywords[] = {
+ {"do", COND_DO, NULL},
+ {"for", COND_FOR, NULL},
+ {"if", COND_IF, NULL},
+ {"while", COND_WHILE, NULL},
+ };
+ const char *text = src->text;
+ char *key = NULL;
+ size_t key_len = 0;
+ cond_type type = COND_NONE;
+ whitespace wsp = {0};
+
+ text = skip_seperators(src, &wsp, 1, dbg);
+ key_len = strcspn(text, " \t\v\b");
+
+ key = calloc(key_len+1, sizeof(char));
+ memcpy(key, text, key_len);
+ type = find_keyword(key, cond_keywords, NULL, NULL, dbg);
+ free(key);
+
+ text += key_len+1;
+
+ if (type >= COND_NONE) {
+ cond_stmt *cond = calloc(1, sizeof(cond_stmt));
+ cond->type = type;
+
+ src->text = text;
+ src->cur.x += key_len+1;
+
+ if (type != COND_DO) {
+ cond->expr = lex_expr(src, dbg);
+ }
+
+ cond->stmt = lex_stmt(src, dbg);
+
+ if (type == COND_DO) {
+ text = skip_seperators(src, &wsp, 1, dbg);
+ key_len = strcspn(text, " \t\v\b");
+
+ key = calloc(key_len+1, sizeof(char));
+ memcpy(key, text, key_len);
+
+ if (!strcmp(key, "while")) {
+ text += key_len+1;
+
+ src->text = text;
+ src->cur.x += key_len+1;
+
+ cond->expr = lex_expr(src, dbg);
+
+ text = skip_seperators(src, &wsp, 1, dbg);
+
+ if (*text++ != ';') {
+ --text;
+ throw_error(src, 1, "Missing \';\' after do while statement.");
+ } else {
+ src->text = text;
+ ++src->cur.x;
+ }
+ } else {
+ throw_error(src, 1, "Missing \'while\' after do while statement.");
+ }
+ free(key);
+ }
+ return s;
+ }
}
stmt *lex_comp_stmt(source *src, int dbg) {
- char *tmp = *str;
- if (*tmp++ == '{') {
- stmt *s = lex_stmt(&tmp, dbg);
- if (*tmp++ == '}') {
- *str = tmp;
- return s;
+ char *text = NULL;
+ whitespace wsp = {0};
+
+ text = skip_seperators(src, &wsp, dbg);
+
+ if (*text++ == '{') {
+ stmt *s;
+
+ src->text = text;
+ ++src->cur.x;
+
+ s = lex_stmt(src, dbg);
+ s->wsp = wsp;
+
+ text = src->text;
+
+ if (*text++ == '}') {
+ ++src->cur.x;
} else {
- throw_error(src, 1, "Missing \'}\' in compound statement.");
+ --text;
+ throw_error(src, 1, "Missing terminating \'}\' in compound statement.");
}
+ src->text = text;
+ return s;
}
return NULL;
}
stmt *lex_stmt(source *src, int dbg) {
+ const char *text = src->text;
const alt_stmt alts[] = {
- {STMT_DIR, offsetof(stmt, dir), lex_dir},
{STMT_FUNC, offsetof(stmt, func), lex_func},
{STMT_EXPR, offsetof(stmt, expr), lex_exprs},
{STMT_COND, offsetof(stmt, cond_stmt), lex_cond_stmt},
{STMT_COMP, offsetof(stmt, down), lex_comp_stmt},
};
+
for (int i = 0; i < NUM_STMTS; ++i) {
- char *tmp = *str;
- void *data = alts[i].lex(&tmp, dbg);
+ src->text = text;
+ void *data = alts[i].lex(src, dbg);
if (data != NULL) {
stmt *s = calloc(1, sizeof(stmt));
void **member = (char **)s+alts[i].offset;
@@ -43,25 +141,25 @@ stmt *lex_stmt(source *src, int dbg) {
}
}
+ src->text = text;
return NULL;
}
-stmt *lex_library(source *src, stmt **end, int dbg) {
- stmt *start = lex_stmt(str, dbg);
- end = (end != NULL) ? end : &start;
- for (stmt *s = start; s != NULL; s = lex_stmt(str, dbg)) {
- (*end)->next = s;
- *end = s;
+void lex_library(source *src, int dbg) {
+ src->root = lex_stmt(src, dbg);
+ src->last = (src->last != NULL) ? src->last : src->root;
+ for (stmt *s = src->root; s != NULL; s = lex_stmt(str, dbg)) {
+ src->last->next = s;
+ src->last = s;
}
return start;
};
-int lex(char *str, int dbg) {
- stmt *start = NULL;
- stmt *end = NULL;
+int lex(source *src, int dbg) {
+ char *text = src->text;
- source *src = preprocess(str, dbg);
- start = library(&str, &end, dbg);
+ library(src, dbg);
+ src->text = text;
- return (start != NULL && end != NULL);
+ return (src->root != NULL && src->last != NULL);
}
diff --git a/igen/lexer.h b/igen/lexer.h
index 454d2e1..0c5fea4 100644
--- a/igen/lexer.h
+++ b/igen/lexer.h
@@ -2,15 +2,17 @@
#define LEXER_H
#include <stdlib.h>
+#include "preprocessor.h"
typedef enum stmt_type stmt_type;
typedef enum cond_type cond_type;
+typedef struct source source;
typedef struct alt_stmt alt_stmt;
typedef struct cond_stmt cond_stmt;
typedef struct stmt stmt;
enum stmt_type {
- STMT_DIR,
+ STMT_NONE,
STMT_FUNC,
STMT_EXPR,
STMT_COND,
@@ -19,17 +21,18 @@ enum stmt_type {
};
enum cond_type {
+ COND_NONE,
COND_IF,
COND_FOR,
COND_WHILE,
- COND_DO_WHILE,
+ COND_DO,
NUM_CONDS
};
struct alt_stmt {
int type;
size_t offset;
- void *(*lex)(char **str, int dbg);
+ void *(*lex)(source *src, int dbg);
};
struct cond_stmt {
@@ -41,7 +44,6 @@ struct cond_stmt {
struct stmt {
stmt_type type;
union {
- dir *dir;
func *func;
expr *expr;
cond_stmt *cond_stmt;
@@ -50,5 +52,5 @@ struct stmt {
stmt *next;
};
-extern int lex(char *str, int dbg);
+extern int lex(source *src, int dbg);
#endif
diff --git a/igen/misc.h b/igen/misc.h
index 9216250..dc92cd8 100644
--- a/igen/misc.h
+++ b/igen/misc.h
@@ -1,6 +1,9 @@
#ifndef MISC_H
#define MISC_H
+#define min(a, b) (((a) < (b)) ? (a) : (b))
+#define max(a, b) (((a) > (b)) ? (a) : (b))
+
extern char *read_file(const char *filename, long *size);
extern char *get_line(char **str);
extern char *make_str(const char *str);
diff --git a/igen/preprocessor.c b/igen/preprocessor.c
index e83bd20..e6a895c 100644
--- a/igen/preprocessor.c
+++ b/igen/preprocessor.c
@@ -2,6 +2,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include "misc.h"
#include "preprocessor.h"
static const keyword *preproc_keywords[] = {
@@ -89,11 +90,11 @@ char *skip_whitespace(source *src, whitespace *wspace, int count_lines, int coun
return &text[span];
}
-char *skip_comment(source *src, whitespace *wspace, enum comment_type *type, int dbg) {
+char *skip_comment(source *src, whitespace *wspace, comment_type *type, int dbg) {
/*char *text = skip_whitespace(src wspace, 1, 1, dbg);*/
char *text = src->text;
size_t span = get_comment_span(src, text++, dbg);
- enum comment_type dummy = COMM_NONE;
+ comment_type dummy = COMM_NONE;
whitespace wsp = {0};
type = (type != NULL) ? type : &dummy;
@@ -129,8 +130,59 @@ char *skip_comment(source *src, whitespace *wspace, enum comment_type *type, int
return &text[span];
}
-comment *get_comment(source *src, int dbg) {
+char *find_comment(source *src, whitespace *wspace, comment_type *type, int dbg) {
+ /*char *text = skip_whitespace(src wspace, 1, 1, dbg);*/
char *text = src->text;
+ char *multi_comment = strstr(text, "/*");
+ char *single_comment = strstr(text, "//");
+ char *next_comment = NULL;
+ comment_type dummy = COMM_NONE;
+ whitespace wsp = {0};
+
+ type = (type != NULL) ? type : &dummy;
+
+ if (multi_comment != NULL && single_comment != NULL) {
+ char *first_comment = min(multi_comment, single_comment);
+ *type = (first_comment == multi_comment) ? COMM_MULTI : COMM_SINGLE;
+ } else if (multi_comment == NULL && single_comment == NULL) {
+ *type = COMM_NONE;
+ } else if (multi_comment != NULL) {
+ *type = COMM_MULTI;
+ } else {
+ *type = COMM_SINGLE;
+ }
+
+ switch (*type) {
+ case COMM_MULTI: next_comment = multi_comment; break;
+ case COMM_SINGLE: next_comment = single_comment; break;
+ }
+
+ if (next_comment != NULL) {
+ size_t span = next_comment-text;
+ count_whitespace(&wsp, text, span, 1, 0, dbg);
+ count_whitespace(&wsp, &text[span], strspn(&text[span], "\r\n\f"), 0, 1, dbg);
+
+ if (wspace != NULL) {
+ *wspace = wsp;
+ }
+
+ if (wsp.tabs) {
+ const int tab_stop = src->tab_width;
+ const int extra_tabs = wsp.spaces/tab_stop;
+ src->cur.x += ((wsp.tabs+extra_tabs)*tab_stop);
+ } else {
+ src->cur.x += wsp.spaces;
+ }
+ src->cur.y += wsp.lines;
+
+ return &text[span];
+ } else {
+ return text;
+ }
+
+}
+
+comment *get_comment(source *src, char *text, int dbg) {
char *after_comment;
size_t comment_len = 0;
comment *com = calloc(1, sizeof(comment));
@@ -151,7 +203,102 @@ comment *get_comment(source *src, int dbg) {
return com;
}
-source *pp_include(source *src, int dbg) {
+source *make_source(const char *filename, int tab_stop, int dbg) {
+ long file_size = 0;
+ size_t filename_len = strlen(filename);
+ source *src = calloc(1, sizeof(source));
+
+ src->text = read_file(filename, &file_size);
+ if (src->text == NULL) {
+ printf("Error: read_file() returned NULL.\n");
+ free(src);
+ return NULL;
+ }
+ src->tab_width = tab_stop;
+ src->filename = calloc(filename_len+1, sizeof(char));
+ memcpy(src->filename, filename, filename_len);
+ return src;
+}
+
+int cmp_key(const void *key, const void *elem) {
+ const char *k = key;
+ const keyword *e = *(const keyword **)elem;
+ return strcmp(k, e->name);
+}
+
+int cmp_keyword(const void *key1, const void *key2) {
+ const keyword *k1 = *(const keyword **)key1;
+ const keyword *k2 = *(const keyword **)key2;
+ return strcmp(k1->name, k2->name);
+}
+
+keyword **copy_keyword_table(keyword **keywords, size_t *key_count, int dbg) {
+ size_t dummy = 0;
+
+ key_count = (key_count != NULL) ? key_count : &dummy;
+ if (keywords == NULL) {
+ *key_count = 0;
+ return NULL;
+ } else {
+ for (*key_count = 0; keywords[count] != NULL; ++(*key_count));
+ if (!*key_count) {
+ return NULL;
+ } else {
+ keyword **table = calloc(*key_count+1, sizeof(keyword *));
+ #if 0
+ for (size_t i = 0; i < *key_count; ++i) {
+ table[i] = calloc(1, sizeof(keyword));
+ memcpy(table[i], keywords[i], sizeof(keyword));
+ }
+ #else
+ memcpy(table, keywords, *key_count);
+ #endif
+ return table;
+ }
+ }
+}
+
+int find_keyword(const char *key, keyword **keywords, void *ctx, void **callback_ret, int dbg) {
+ int ret = -1;
+ size_t key_count = 0;
+ keyword *k;
+ keyword **sorted_keywords = copy_keyword_table(keywords, &key_count, dbg);
+ void *dummy = NULL;
+ callback_ret = (callback_ret != NULL) ? callback_ret : &dummy;
+
+ if (sorted_keywords == NULL) {
+ *callback_ret = NULL;
+ return ret;
+ }
+
+ if (key_count > 1) {
+ qsort(sorted_keywords, key_count, sizeof(keyword *), cmp_keyword);
+ }
+
+ k = *(keyword **)bsearch(key, sorted_keywords, key_count, sizeof(keyword *), cmp_key);
+
+ if (k != NULL) {
+ if (k->found_keyword) {
+ *callback_ret = k->found_keyword(ctx, dbg);
+ } else {
+ *callback_ret = NULL;
+ }
+ ret = k->id;
+ } else {
+ *callback_ret = NULL;
+ }
+
+ #if 0
+ for (size_t i = 0; i < *key_count; ++i) {
+ free(sorted_keywords[i]);
+ sorted_keywords[i] = NULL;
+ }
+ #endif
+ free(sorted_keywords);
+ return ret;
+}
+
+char *pp_include(source *src, int dbg) {
char *text = skip_whitespace(src, NULL, 0, 1, dbg);
if (*text == '\r' || *text == '\n' || *text == '\f') {
throw_error(src, 1, "Found line separator \'%s\' before the operand of an include directive.", esc_char_to_string(*text));
@@ -166,27 +313,156 @@ source *pp_include(source *src, int dbg) {
return NULL;
} else {
long dummy = 0;
- source *inc_src = calloc(1, sizeof(source));
- inc_src->tab_width = src->tab_width;
- inc_src->filename = calloc((tmp--)-text, sizeof(char));
- inc_src->text = read_file(inc_src->filename, &dummy);
- if (inc_src->text == NULL) {
- throw_error(src, 1, "File \"%s\" couldn't be read.", inc_src->filename);
- free(inc_src->filename);
- free(inc_src);
- src->text = skip_line(text, dbg);
- return NULL;
- }
+ char *filename = calloc((tmp--)-text, sizeof(char));
+ memcpy(filename, text, tmp-text);
+ return filename;
}
} else {
throw_error(src, 1, "Missing quote mark at the start of the operand of an include directive.");
src->text = text;
return NULL;
}
- if
- size_t span = strspn(text, " \t\v\r\n");
- if (text[span]+1 == '/' && text[])
}
-source *preprocess(const char *str, int dbg) {
+struct str_list {
+ char *str;
+ struct str_list *next;
+};
+
+struct comment_list {
+ comment *com;
+ struct comment_list *next;
+};
+
+int is_included(source *parent, const char *filename, int dbg) {
+ if (parent->include_list != NULL) {
+ source **includes = parent->include_list;
+ for (int i = 0; includes[i] != NULL; ++i) {
+ if (is_included(includes[i], filename, dbg)) {
+ return 1;
+ }
+ }
+ return 0;
+ }
+ return !strcmp(parent->filename, filename);
+}
+
+void free_str_list(struct str_list *list) {
+ for (struct str_list *l = list, l2 = l; l != NULL; l = l->next, l2 = l) {
+ free(l2->str);
+ l2->str = NULL;
+ free(l2);
+ }
+}
+
+void free_comment_list(struct comment_list *list) {
+ for (struct comment_list *l = list, l2 = l; l != NULL; l = l->next, l2 = l) {
+ free(l2->com);
+ l2->com = NULL;
+ free(l2);
+ }
+}
+
+source **make_include_list(source *src, struct str_list *list, int dbg) {
+ int inc_count = 0;
+ source **include_list = NULL;
+
+ for (struct str_list *inc = list; inc != NULL; inc = inc->next, ++inc_count);
+
+ include_list = calloc(inc_count+1, sizeof(source *));
+
+ do {
+ struct str_list *inc = list;
+ for (int i = 0; inc != NULL && i < inc_count; ++i, inc = inc->next) {
+ src->include_list[i] = preprocess(src, inc->str, dbg);
+ }
+ } while(0);
+}
+
+comment **make_comment_list(source *src, int dbg) {
+ int comment_count = 0;
+ struct comment_list *list = calloc(1, sizeof(struct comment_list));
+ struct comment_list *list_end = list;
+ comment **comments = NULL;
+ whitespace wsp = {0};
+
+ for (char *text = src->text; text != NULL && *text != '\0'; text = find_comment(src, &wsp, NULL, dbg), list_end = list_end->next, ++comment_count) {
+ list_end->com = get_comment(src, text, dbg);
+ list_end->com->wsp = wsp;
+ list_end->next = calloc(1, sizeof(struct comment_list));
+ }
+
+ comments = calloc(comment_count+1, sizeof(comment *));
+
+ do {
+ struct comment_list *l = list;
+ for (int i = 0; l != NULL && i < comment_count; ++i, l = l->next) {
+ comments[i] = calloc(1, sizeof(comment));
+ memcpy(comments[i], l->com, sizeof(comment));
+ }
+ } while(0);
+ free_comment_list(list);
+ return comments;
+}
+
+source *preprocess(source *parent, const char *filename, int dbg) {
+ source *src = make_source(filename, 8, dbg);
+ src->parent = parent;
+ if (src == NULL) {
+ if (parent == NULL) {
+ printf("Error: Source struct for file \"%s\" couldn't be created.\n", filename);
+ } else {
+ throw_error(parent, 1, "Source struct for file \"%s\" couldn't be created.", filename);
+ }
+ return NULL;
+ } else {
+ int is_directive = 0;
+ struct str_list *include_list = calloc(1, sizeof(struct str_list));
+ struct str_list *inc_end = include_list;
+
+ src->included = (parent != NULL);
+
+ for (char *text = src->text; *text != '\0'; ++text) {
+ text = skip_whitespace(src, NULL, 1, 1, dbg)
+ text = skip_comment(src, NULL, NULL, dbg);
+
+ if (is_directive) {
+ char *dir_key = NULL;
+ void *key_data = NULL;
+ size_t key_len = strcspn(text, " \t\v\b");
+ directive_type dir_type = DIR_NONE;
+
+ dir_key = calloc(key_len+1, sizeof(char));
+ memcpy(dir_key, text, key_len);
+ dir_type = find_keyword(dir_key, preproc_keywords, src, &key_data, dbg);
+ free(dir_key);
+
+ if (key_data != NULL) {
+ switch (dir_type) {
+ case DIR_INCLUDE:
+ if (parent != NULL) {
+ if (is_included(parent, (char *)key_data, dbg)) {
+ break;
+ }
+ }
+ inc_end->str = (char *)key_data;
+ inc_end->next = calloc(1, sizeof(struct str_list));
+ inc_end = include_list->next;
+ break;
+ }
+ }
+
+ is_directive = 0;
+ }
+
+ if (*text == '#') {
+ is_directive = 1;
+ }
+ }
+ src->include_list = make_include_list(src, include_list, dbg);
+ src->comments = make_comment_list(src, dbg);
+ free_str_list(include_list);
+ include_list = NULL;
+ }
+ return src;
}
diff --git a/igen/preprocessor.h b/igen/preprocessor.h
index 7a85e45..200f5b2 100644
--- a/igen/preprocessor.h
+++ b/igen/preprocessor.h
@@ -1,11 +1,25 @@
#ifndef PREPROCESSOR_H
#define PREPROCESSOR_H
+#include "lexer.h"
+
+
typedef enum comment_type comment_type;
+typedef enum directive_type directive_type;
+typedef struct stmt stmt;
+typedef struct linked_list linked_list;
+typedef struct keyword keyword;
typedef struct cursor cursor;
typedef struct whitespace whitespace;
typedef struct comment comment;
typedef struct source source;
+typedef void *(keyword_cb)(void *ctx, int dbg);
+
+enum directive_type {
+ DIR_NONE,
+ DIR_INCLUDE,
+ NUM_DIRS
+};
enum comment_type {
COMM_NONE,
@@ -14,6 +28,17 @@ enum comment_type {
NUM_COMMS
};
+struct linked_list {
+ void *data;
+ linked_list *next;
+};
+
+struct keyword {
+ char *name;
+ int id;
+ keyword_cb *found_keyword;
+};
+
struct cursor {
int line;
int column;
@@ -36,12 +61,19 @@ struct comment {
};
struct source {
+ source *parent;
source **include_list;
+ comment **comments;
char *filename;
char *text;
+ int included : 1;
int tab_width;
cursor cur;
+ stmt *root;
+ stmt *last;
};
-extern source *preprocess(const char *str, int dbg);
+/*extern keyword *find_keyword(const char *key, keyword **keywords, int dbg);*/
+extern int find_keyword(const char *key, keyword **keywords, void *ctx, void **callback_ret, int dbg);
+extern source *preprocess(source *parent, const char *filename, int dbg);
#endif