From 5f753ddee7d935e0ba4750a6a8c26fe056c77612 Mon Sep 17 00:00:00 2001 From: mrb0nk500 Date: Wed, 16 Feb 2022 17:05:00 -0400 Subject: igen: Start work on writing a preprocessor. --- igen/lexer.c | 13 ++-- igen/preprocessor.c | 192 ++++++++++++++++++++++++++++++++++++++++++++++++++++ igen/preprocessor.h | 47 +++++++++++++ 3 files changed, 247 insertions(+), 5 deletions(-) create mode 100644 igen/preprocessor.c create mode 100644 igen/preprocessor.h diff --git a/igen/lexer.c b/igen/lexer.c index 03f7e87..1d3268e 100644 --- a/igen/lexer.c +++ b/igen/lexer.c @@ -4,12 +4,13 @@ #include #include "lexer.h" #include "misc.h" +#include "preprocessor.h" -cond_stmt *lex_cond_stmt(char **str, int dbg) { +cond_stmt *lex_cond_stmt(source *src, int dbg) { } -stmt *lex_comp_stmt(char **str, int dbg) { +stmt *lex_comp_stmt(source *src, int dbg) { char *tmp = *str; if (*tmp++ == '{') { stmt *s = lex_stmt(&tmp, dbg); @@ -17,13 +18,13 @@ stmt *lex_comp_stmt(char **str, int dbg) { *str = tmp; return s; } else { - throw_error("Missing \'}\' in stmt."); + throw_error(src, 1, "Missing \'}\' in compound statement."); } } return NULL; } -stmt *lex_stmt(char **str, int dbg) { +stmt *lex_stmt(source *src, int dbg) { const alt_stmt alts[] = { {STMT_DIR, offsetof(stmt, dir), lex_dir}, {STMT_FUNC, offsetof(stmt, func), lex_func}, @@ -45,7 +46,7 @@ stmt *lex_stmt(char **str, int dbg) { return NULL; } -stmt *lex_library(char **str, stmt **end, int dbg) { +stmt *lex_library(source *src, stmt **end, int dbg) { stmt *start = lex_stmt(str, dbg); end = (end != NULL) ? end : &start; for (stmt *s = start; s != NULL; s = lex_stmt(str, dbg)) { @@ -59,6 +60,8 @@ int lex(char *str, int dbg) { stmt *start = NULL; stmt *end = NULL; + source *src = preprocess(str, dbg); start = library(&str, &end, dbg); + return (start != NULL && end != NULL); } diff --git a/igen/preprocessor.c b/igen/preprocessor.c new file mode 100644 index 0000000..e83bd20 --- /dev/null +++ b/igen/preprocessor.c @@ -0,0 +1,192 @@ +#include +#include +#include +#include +#include "preprocessor.h" + +static const keyword *preproc_keywords[] = { + &(const keyword *) {"include", DIR_INCLUDE, pp_include}, + NULL +}; + +char *skip_line(const char *str, int dbg) { + size_t span = strcspn(str, "\r\n\f"); + /*span += strspn(&str[span], "\r\n\f");*/ + return &str[span+strspn(&str[span], "\r\n\f")]; +} + +size_t line_span(const char *str, int dbg) { + return skip_line(str, dbg)-str; +} + +size_t get_comment_span(source *src, const char *str, int dbg) { + if (str[0] == '/') { + if (str[1] == '*') { + char *tmp = strstr(str, "*/")+strlen("*/"); + if (tmp == NULL) { + throw_error(src, 0, "Unterminated comment (missing \"*/\")."); + } + return tmp-str; + } else if (str[1] == '/') { + return line_span(str, dbg); + } + } + return 0; +} + +size_t get_whitespace_span(const char *str, int count_lines, int count_columns, int dbg) { + size_t span = 0; + if (count_columns) { + span = strspn(str, " \t\v\b"); + } + if (count_lines) { + span += strspn(&str[span], "\r\n\f"); + } + return span; +} + +void count_whitespace(whitespace *wsp, const char *str, size_t span, int count_lines, int count_columns, int dbg) { + for (size_t i = 0; i < span && str[i] != '\0'; ++i) { + char c = str[i]; + if (str[i+1] != '\b') { + if (count_columns) { + wsp->spaces += (c == ' '); + wsp->vtabs += (c == '\v'); + wsp->tabs += (c == '\t'); + } + if (count_lines) { + i += (c == '\r' && str[i+1] == '\n'); + wsp->lines += (c == '\r' || c == '\n' || c == '\f'); + } + } else { + c = str[++i+1]; + if (c != '\0') { + i += (count_lines && (c == '\r' && str[i+2] == '\n')); + } + ++wsp->bspaces; + } + } +} + +char *skip_whitespace(source *src, whitespace *wspace, int count_lines, int count_columns, int dbg) { + char *text = src->text; + size_t span = get_whitespace_span(text, count_lines, count_columns, dbg); + whitespace wsp = {0}; + + count_whitespace(&wsp, str, span, count_lines, count_columns, dbg); + + if (wsp.tabs) { + const int tab_stop = src->tab_width; + const int extra_tabs = wsp.spaces/tab_stop; + src->cur.x += ((wsp.tabs+extra_tabs)*tab_stop); + } else { + src->cur.x += wsp.spaces; + } + src->cur.y += wsp.lines; + if (wspace != NULL) { + *wspace = wsp; + } + return &text[span]; +} + +char *skip_comment(source *src, whitespace *wspace, enum comment_type *type, int dbg) { + /*char *text = skip_whitespace(src wspace, 1, 1, dbg);*/ + char *text = src->text; + size_t span = get_comment_span(src, text++, dbg); + enum comment_type dummy = COMM_NONE; + whitespace wsp = {0}; + + type = (type != NULL) ? type : &dummy; + + if (span) { + if (*text++ == '*') { + *type = COMM_MULTI; + /*for (size_t i = 0; text[i] != '\0' && i < span; i = line_span(&text[i], dbg), ++wsp.lines);*/ + count_whitespace(&wsp, text, span, 1, 0, dbg); + count_whitespace(&wsp, &text[span], strspn(&text[span], "\r\n\f"), 0, 1, dbg); + } else { + *type = COMM_SINGLE; + ++wsp.lines + text -= 2; + } + + if (wspace != NULL) { + *wspace = wsp; + } + + if (wsp.tabs) { + const int tab_stop = src->tab_width; + const int extra_tabs = wsp.spaces/tab_stop; + src->cur.x += ((wsp.tabs+extra_tabs)*tab_stop); + } else { + src->cur.x += wsp.spaces; + } + src->cur.y += wsp.lines; + } else { + --text; + *type = COMM_NONE; + } + return &text[span]; +} + +comment *get_comment(source *src, int dbg) { + char *text = src->text; + char *after_comment; + size_t comment_len = 0; + comment *com = calloc(1, sizeof(comment)); + + text = skip_whitespace(src, &com->wsp, 1, 1, dbg); + com->start_pos = src->cur; + + after_comment = skip_comment(src, NULL, &com->type, dbg); + com->end_pos = src->cur; + + switch (com->type) { + case COMM_MULTI : comment_len = strstr(text, "*/")-text; break; + case COMM_SINGLE: comment_len = strcspn(text, "\r\n\f"); break; + } + + com->text = calloc(comment_len+1, sizeof(char)); + memcpy(com->text, &text[strlen("/*")], comment_len); + return com; +} + +source *pp_include(source *src, int dbg) { + char *text = skip_whitespace(src, NULL, 0, 1, dbg); + if (*text == '\r' || *text == '\n' || *text == '\f') { + throw_error(src, 1, "Found line separator \'%s\' before the operand of an include directive.", esc_char_to_string(*text)); + src->text = text; + return NULL; + } else if (*text == '\"' || *text == '\'') { + char c = *text++; + char *tmp = strchr(text, c); + if (tmp == NULL) { + throw_error(src, 1, "Missing terminating %c character.", c); + src->text = text; + return NULL; + } else { + long dummy = 0; + source *inc_src = calloc(1, sizeof(source)); + inc_src->tab_width = src->tab_width; + inc_src->filename = calloc((tmp--)-text, sizeof(char)); + inc_src->text = read_file(inc_src->filename, &dummy); + if (inc_src->text == NULL) { + throw_error(src, 1, "File \"%s\" couldn't be read.", inc_src->filename); + free(inc_src->filename); + free(inc_src); + src->text = skip_line(text, dbg); + return NULL; + } + } + } else { + throw_error(src, 1, "Missing quote mark at the start of the operand of an include directive."); + src->text = text; + return NULL; + } + if + size_t span = strspn(text, " \t\v\r\n"); + if (text[span]+1 == '/' && text[]) +} + +source *preprocess(const char *str, int dbg) { +} diff --git a/igen/preprocessor.h b/igen/preprocessor.h new file mode 100644 index 0000000..7a85e45 --- /dev/null +++ b/igen/preprocessor.h @@ -0,0 +1,47 @@ +#ifndef PREPROCESSOR_H +#define PREPROCESSOR_H + +typedef enum comment_type comment_type; +typedef struct cursor cursor; +typedef struct whitespace whitespace; +typedef struct comment comment; +typedef struct source source; + +enum comment_type { + COMM_NONE, + COMM_MULTI, + COMM_SINGLE, + NUM_COMMS +}; + +struct cursor { + int line; + int column; +}; + +struct whitespace { + int spaces; + int bspaces; + int tabs; + int vtabs; + int lines; +}; + +struct comment { + enum comment_type type; + char *text; + cursor start_pos; + cursor end_pos; + whitespace wsp; +}; + +struct source { + source **include_list; + char *filename; + char *text; + int tab_width; + cursor cur; +}; + +extern source *preprocess(const char *str, int dbg); +#endif -- cgit v1.2.3-13-gbd6f