summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormrb0nk500 <b0nk@b0nk.xyz>2022-02-16 17:05:00 -0400
committermrb0nk500 <b0nk@b0nk.xyz>2022-02-18 10:05:01 -0400
commit5f753ddee7d935e0ba4750a6a8c26fe056c77612 (patch)
tree07b152f726bff71f871f3a5a736098a1e0b0bd04
parent722c7f08e409d1f6f3a26bda666c15d7082f52e3 (diff)
igen: Start work on writing a preprocessor.
-rw-r--r--igen/lexer.c13
-rw-r--r--igen/preprocessor.c192
-rw-r--r--igen/preprocessor.h47
3 files changed, 247 insertions, 5 deletions
diff --git a/igen/lexer.c b/igen/lexer.c
index 03f7e87..1d3268e 100644
--- a/igen/lexer.c
+++ b/igen/lexer.c
@@ -4,12 +4,13 @@
#include <string.h>
#include "lexer.h"
#include "misc.h"
+#include "preprocessor.h"
-cond_stmt *lex_cond_stmt(char **str, int dbg) {
+cond_stmt *lex_cond_stmt(source *src, int dbg) {
}
-stmt *lex_comp_stmt(char **str, int dbg) {
+stmt *lex_comp_stmt(source *src, int dbg) {
char *tmp = *str;
if (*tmp++ == '{') {
stmt *s = lex_stmt(&tmp, dbg);
@@ -17,13 +18,13 @@ stmt *lex_comp_stmt(char **str, int dbg) {
*str = tmp;
return s;
} else {
- throw_error("Missing \'}\' in stmt.");
+ throw_error(src, 1, "Missing \'}\' in compound statement.");
}
}
return NULL;
}
-stmt *lex_stmt(char **str, int dbg) {
+stmt *lex_stmt(source *src, int dbg) {
const alt_stmt alts[] = {
{STMT_DIR, offsetof(stmt, dir), lex_dir},
{STMT_FUNC, offsetof(stmt, func), lex_func},
@@ -45,7 +46,7 @@ stmt *lex_stmt(char **str, int dbg) {
return NULL;
}
-stmt *lex_library(char **str, stmt **end, int dbg) {
+stmt *lex_library(source *src, stmt **end, int dbg) {
stmt *start = lex_stmt(str, dbg);
end = (end != NULL) ? end : &start;
for (stmt *s = start; s != NULL; s = lex_stmt(str, dbg)) {
@@ -59,6 +60,8 @@ int lex(char *str, int dbg) {
stmt *start = NULL;
stmt *end = NULL;
+ source *src = preprocess(str, dbg);
start = library(&str, &end, dbg);
+
return (start != NULL && end != NULL);
}
diff --git a/igen/preprocessor.c b/igen/preprocessor.c
new file mode 100644
index 0000000..e83bd20
--- /dev/null
+++ b/igen/preprocessor.c
@@ -0,0 +1,192 @@
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "preprocessor.h"
+
+static const keyword *preproc_keywords[] = {
+ &(const keyword *) {"include", DIR_INCLUDE, pp_include},
+ NULL
+};
+
+char *skip_line(const char *str, int dbg) {
+ size_t span = strcspn(str, "\r\n\f");
+ /*span += strspn(&str[span], "\r\n\f");*/
+ return &str[span+strspn(&str[span], "\r\n\f")];
+}
+
+size_t line_span(const char *str, int dbg) {
+ return skip_line(str, dbg)-str;
+}
+
+size_t get_comment_span(source *src, const char *str, int dbg) {
+ if (str[0] == '/') {
+ if (str[1] == '*') {
+ char *tmp = strstr(str, "*/")+strlen("*/");
+ if (tmp == NULL) {
+ throw_error(src, 0, "Unterminated comment (missing \"*/\").");
+ }
+ return tmp-str;
+ } else if (str[1] == '/') {
+ return line_span(str, dbg);
+ }
+ }
+ return 0;
+}
+
+size_t get_whitespace_span(const char *str, int count_lines, int count_columns, int dbg) {
+ size_t span = 0;
+ if (count_columns) {
+ span = strspn(str, " \t\v\b");
+ }
+ if (count_lines) {
+ span += strspn(&str[span], "\r\n\f");
+ }
+ return span;
+}
+
+void count_whitespace(whitespace *wsp, const char *str, size_t span, int count_lines, int count_columns, int dbg) {
+ for (size_t i = 0; i < span && str[i] != '\0'; ++i) {
+ char c = str[i];
+ if (str[i+1] != '\b') {
+ if (count_columns) {
+ wsp->spaces += (c == ' ');
+ wsp->vtabs += (c == '\v');
+ wsp->tabs += (c == '\t');
+ }
+ if (count_lines) {
+ i += (c == '\r' && str[i+1] == '\n');
+ wsp->lines += (c == '\r' || c == '\n' || c == '\f');
+ }
+ } else {
+ c = str[++i+1];
+ if (c != '\0') {
+ i += (count_lines && (c == '\r' && str[i+2] == '\n'));
+ }
+ ++wsp->bspaces;
+ }
+ }
+}
+
+char *skip_whitespace(source *src, whitespace *wspace, int count_lines, int count_columns, int dbg) {
+ char *text = src->text;
+ size_t span = get_whitespace_span(text, count_lines, count_columns, dbg);
+ whitespace wsp = {0};
+
+ count_whitespace(&wsp, str, span, count_lines, count_columns, dbg);
+
+ if (wsp.tabs) {
+ const int tab_stop = src->tab_width;
+ const int extra_tabs = wsp.spaces/tab_stop;
+ src->cur.x += ((wsp.tabs+extra_tabs)*tab_stop);
+ } else {
+ src->cur.x += wsp.spaces;
+ }
+ src->cur.y += wsp.lines;
+ if (wspace != NULL) {
+ *wspace = wsp;
+ }
+ return &text[span];
+}
+
+char *skip_comment(source *src, whitespace *wspace, enum comment_type *type, int dbg) {
+ /*char *text = skip_whitespace(src wspace, 1, 1, dbg);*/
+ char *text = src->text;
+ size_t span = get_comment_span(src, text++, dbg);
+ enum comment_type dummy = COMM_NONE;
+ whitespace wsp = {0};
+
+ type = (type != NULL) ? type : &dummy;
+
+ if (span) {
+ if (*text++ == '*') {
+ *type = COMM_MULTI;
+ /*for (size_t i = 0; text[i] != '\0' && i < span; i = line_span(&text[i], dbg), ++wsp.lines);*/
+ count_whitespace(&wsp, text, span, 1, 0, dbg);
+ count_whitespace(&wsp, &text[span], strspn(&text[span], "\r\n\f"), 0, 1, dbg);
+ } else {
+ *type = COMM_SINGLE;
+ ++wsp.lines
+ text -= 2;
+ }
+
+ if (wspace != NULL) {
+ *wspace = wsp;
+ }
+
+ if (wsp.tabs) {
+ const int tab_stop = src->tab_width;
+ const int extra_tabs = wsp.spaces/tab_stop;
+ src->cur.x += ((wsp.tabs+extra_tabs)*tab_stop);
+ } else {
+ src->cur.x += wsp.spaces;
+ }
+ src->cur.y += wsp.lines;
+ } else {
+ --text;
+ *type = COMM_NONE;
+ }
+ return &text[span];
+}
+
+comment *get_comment(source *src, int dbg) {
+ char *text = src->text;
+ char *after_comment;
+ size_t comment_len = 0;
+ comment *com = calloc(1, sizeof(comment));
+
+ text = skip_whitespace(src, &com->wsp, 1, 1, dbg);
+ com->start_pos = src->cur;
+
+ after_comment = skip_comment(src, NULL, &com->type, dbg);
+ com->end_pos = src->cur;
+
+ switch (com->type) {
+ case COMM_MULTI : comment_len = strstr(text, "*/")-text; break;
+ case COMM_SINGLE: comment_len = strcspn(text, "\r\n\f"); break;
+ }
+
+ com->text = calloc(comment_len+1, sizeof(char));
+ memcpy(com->text, &text[strlen("/*")], comment_len);
+ return com;
+}
+
+source *pp_include(source *src, int dbg) {
+ char *text = skip_whitespace(src, NULL, 0, 1, dbg);
+ if (*text == '\r' || *text == '\n' || *text == '\f') {
+ throw_error(src, 1, "Found line separator \'%s\' before the operand of an include directive.", esc_char_to_string(*text));
+ src->text = text;
+ return NULL;
+ } else if (*text == '\"' || *text == '\'') {
+ char c = *text++;
+ char *tmp = strchr(text, c);
+ if (tmp == NULL) {
+ throw_error(src, 1, "Missing terminating %c character.", c);
+ src->text = text;
+ return NULL;
+ } else {
+ long dummy = 0;
+ source *inc_src = calloc(1, sizeof(source));
+ inc_src->tab_width = src->tab_width;
+ inc_src->filename = calloc((tmp--)-text, sizeof(char));
+ inc_src->text = read_file(inc_src->filename, &dummy);
+ if (inc_src->text == NULL) {
+ throw_error(src, 1, "File \"%s\" couldn't be read.", inc_src->filename);
+ free(inc_src->filename);
+ free(inc_src);
+ src->text = skip_line(text, dbg);
+ return NULL;
+ }
+ }
+ } else {
+ throw_error(src, 1, "Missing quote mark at the start of the operand of an include directive.");
+ src->text = text;
+ return NULL;
+ }
+ if
+ size_t span = strspn(text, " \t\v\r\n");
+ if (text[span]+1 == '/' && text[])
+}
+
+source *preprocess(const char *str, int dbg) {
+}
diff --git a/igen/preprocessor.h b/igen/preprocessor.h
new file mode 100644
index 0000000..7a85e45
--- /dev/null
+++ b/igen/preprocessor.h
@@ -0,0 +1,47 @@
+#ifndef PREPROCESSOR_H
+#define PREPROCESSOR_H
+
+typedef enum comment_type comment_type;
+typedef struct cursor cursor;
+typedef struct whitespace whitespace;
+typedef struct comment comment;
+typedef struct source source;
+
+enum comment_type {
+ COMM_NONE,
+ COMM_MULTI,
+ COMM_SINGLE,
+ NUM_COMMS
+};
+
+struct cursor {
+ int line;
+ int column;
+};
+
+struct whitespace {
+ int spaces;
+ int bspaces;
+ int tabs;
+ int vtabs;
+ int lines;
+};
+
+struct comment {
+ enum comment_type type;
+ char *text;
+ cursor start_pos;
+ cursor end_pos;
+ whitespace wsp;
+};
+
+struct source {
+ source **include_list;
+ char *filename;
+ char *text;
+ int tab_width;
+ cursor cur;
+};
+
+extern source *preprocess(const char *str, int dbg);
+#endif