From 5f753ddee7d935e0ba4750a6a8c26fe056c77612 Mon Sep 17 00:00:00 2001
From: mrb0nk500 <b0nk@b0nk.xyz>
Date: Wed, 16 Feb 2022 17:05:00 -0400
Subject: igen: Start work on writing a preprocessor.

---
 igen/lexer.c        |  13 ++--
 igen/preprocessor.c | 192 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 igen/preprocessor.h |  47 +++++++++++++
 3 files changed, 247 insertions(+), 5 deletions(-)
 create mode 100644 igen/preprocessor.c
 create mode 100644 igen/preprocessor.h

diff --git a/igen/lexer.c b/igen/lexer.c
index 03f7e87..1d3268e 100644
--- a/igen/lexer.c
+++ b/igen/lexer.c
@@ -4,12 +4,13 @@
 #include <string.h>
 #include "lexer.h"
 #include "misc.h"
+#include "preprocessor.h"
 
-cond_stmt *lex_cond_stmt(char **str, int dbg) {
+cond_stmt *lex_cond_stmt(source *src, int dbg) {
 
 }
 
-stmt *lex_comp_stmt(char **str, int dbg) {
+stmt *lex_comp_stmt(source *src, int dbg) {
 	char *tmp = *str;
 	if (*tmp++ == '{') {
 		stmt *s = lex_stmt(&tmp, dbg);
@@ -17,13 +18,13 @@ stmt *lex_comp_stmt(char **str, int dbg) {
 			*str = tmp;
 			return s;
 		} else {
-			throw_error("Missing \'}\' in stmt.");
+			throw_error(src, 1, "Missing \'}\' in compound statement.");
 		}
 	}
 	return NULL;
 }
 
-stmt *lex_stmt(char **str, int dbg) {
+stmt *lex_stmt(source *src, int dbg) {
 	const alt_stmt alts[] = {
 		{STMT_DIR, offsetof(stmt, dir), lex_dir},
 		{STMT_FUNC, offsetof(stmt, func), lex_func},
@@ -45,7 +46,7 @@ stmt *lex_stmt(char **str, int dbg) {
 	return NULL;
 }
 
-stmt *lex_library(char **str, stmt **end, int dbg) {
+stmt *lex_library(source *src, stmt **end, int dbg) {
 	stmt *start = lex_stmt(str, dbg);
 	end = (end != NULL) ? end : &start;
 	for (stmt *s = start; s != NULL; s = lex_stmt(str, dbg)) {
@@ -59,6 +60,8 @@ int lex(char *str, int dbg) {
 	stmt *start = NULL;
 	stmt *end = NULL;
 
+	source *src = preprocess(str, dbg);
 	start = library(&str, &end, dbg);
+
 	return (start != NULL && end != NULL);
 }
diff --git a/igen/preprocessor.c b/igen/preprocessor.c
new file mode 100644
index 0000000..e83bd20
--- /dev/null
+++ b/igen/preprocessor.c
@@ -0,0 +1,192 @@
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "preprocessor.h"
+
+static const keyword *preproc_keywords[] = {
+	&(const keyword *) {"include", DIR_INCLUDE, pp_include},
+	NULL
+};
+
+char *skip_line(const char *str, int dbg) {
+	size_t span = strcspn(str, "\r\n\f");
+	/*span += strspn(&str[span], "\r\n\f");*/
+	return &str[span+strspn(&str[span], "\r\n\f")];
+}
+
+size_t line_span(const char *str, int dbg) {
+	return skip_line(str, dbg)-str;
+}
+
+size_t get_comment_span(source *src, const char *str, int dbg) {
+	if (str[0] == '/') {
+		if (str[1] == '*') {
+			char *tmp = strstr(str, "*/")+strlen("*/");
+			if (tmp == NULL) {
+				throw_error(src, 0, "Unterminated comment (missing \"*/\").");
+			}
+			return tmp-str;
+		} else if (str[1] == '/') {
+			return line_span(str, dbg);
+		}
+	}
+	return 0;
+}
+
+size_t get_whitespace_span(const char *str, int count_lines, int count_columns, int dbg) {
+	size_t span = 0;
+	if (count_columns) {
+		span = strspn(str, " \t\v\b");
+	}
+	if (count_lines) {
+		span += strspn(&str[span], "\r\n\f");
+	}
+	return span;
+}
+
+void count_whitespace(whitespace *wsp, const char *str, size_t span, int count_lines, int count_columns, int dbg) {
+	for (size_t i = 0; i < span && str[i] != '\0'; ++i) {
+		char c = str[i];
+		if (str[i+1] != '\b') {
+			if (count_columns) {
+				wsp->spaces += (c == ' ');
+				wsp->vtabs += (c == '\v');
+				wsp->tabs += (c == '\t');
+			}
+			if (count_lines) {
+				i += (c == '\r' && str[i+1] == '\n');
+				wsp->lines += (c == '\r' || c == '\n' || c == '\f');
+			}
+		} else {
+			c = str[++i+1];
+			if (c != '\0') {
+				i += (count_lines && (c == '\r' && str[i+2] == '\n'));
+			}
+			++wsp->bspaces;
+		}
+	}
+}
+
+char *skip_whitespace(source *src, whitespace *wspace, int count_lines, int count_columns, int dbg) {
+	char *text = src->text;
+	size_t span = get_whitespace_span(text, count_lines, count_columns, dbg);
+	whitespace wsp = {0};
+
+	count_whitespace(&wsp, str, span, count_lines, count_columns, dbg);
+
+	if (wsp.tabs) {
+		const int tab_stop = src->tab_width;
+		const int extra_tabs = wsp.spaces/tab_stop;
+		src->cur.x += ((wsp.tabs+extra_tabs)*tab_stop);
+	} else {
+		src->cur.x += wsp.spaces;
+	}
+	src->cur.y += wsp.lines;
+	if (wspace != NULL) {
+		*wspace = wsp;
+	}
+	return &text[span];
+}
+
+char *skip_comment(source *src, whitespace *wspace, enum comment_type *type, int dbg) {
+	/*char *text = skip_whitespace(src wspace, 1, 1, dbg);*/
+	char *text = src->text;
+	size_t span = get_comment_span(src, text++, dbg);
+	enum comment_type dummy = COMM_NONE;
+	whitespace wsp = {0};
+
+	type = (type != NULL) ? type : &dummy;
+
+	if (span) {
+		if (*text++ == '*') {
+			*type = COMM_MULTI;
+			/*for (size_t i = 0; text[i] != '\0' && i < span; i = line_span(&text[i], dbg), ++wsp.lines);*/
+			count_whitespace(&wsp, text, span, 1, 0, dbg);
+			count_whitespace(&wsp, &text[span], strspn(&text[span], "\r\n\f"), 0, 1, dbg);
+		} else {
+			*type = COMM_SINGLE;
+			++wsp.lines
+			text -= 2;
+		}
+
+		if (wspace != NULL) {
+			*wspace = wsp;
+		}
+
+		if (wsp.tabs) {
+			const int tab_stop = src->tab_width;
+			const int extra_tabs = wsp.spaces/tab_stop;
+			src->cur.x += ((wsp.tabs+extra_tabs)*tab_stop);
+		} else {
+			src->cur.x += wsp.spaces;
+		}
+		src->cur.y += wsp.lines;
+	} else {
+		--text;
+		*type = COMM_NONE;
+	}
+	return &text[span];
+}
+
+comment *get_comment(source *src, int dbg) {
+	char *text = src->text;
+	char *after_comment;
+	size_t comment_len = 0;
+	comment *com = calloc(1, sizeof(comment));
+
+	text = skip_whitespace(src, &com->wsp, 1, 1, dbg);
+	com->start_pos = src->cur;
+
+	after_comment = skip_comment(src, NULL, &com->type, dbg);
+	com->end_pos = src->cur;
+
+	switch (com->type) {
+		case COMM_MULTI	: comment_len = strstr(text, "*/")-text; break;
+		case COMM_SINGLE: comment_len = strcspn(text, "\r\n\f"); break;
+	}
+
+	com->text = calloc(comment_len+1, sizeof(char));
+	memcpy(com->text, &text[strlen("/*")], comment_len);
+	return com;
+}
+
+source *pp_include(source *src, int dbg) {
+	char *text = skip_whitespace(src, NULL, 0, 1, dbg);
+	if (*text == '\r' || *text == '\n' || *text == '\f') {
+		throw_error(src, 1, "Found line separator \'%s\' before the operand of an include directive.", esc_char_to_string(*text));
+		src->text = text;
+		return NULL;
+	} else if (*text == '\"' || *text == '\'') {
+		char c = *text++;
+		char *tmp = strchr(text, c);
+		if (tmp == NULL) {
+			throw_error(src, 1, "Missing terminating %c character.", c);
+			src->text = text;
+			return NULL;
+		} else {
+			long dummy = 0;
+			source *inc_src = calloc(1, sizeof(source));
+			inc_src->tab_width = src->tab_width;
+			inc_src->filename = calloc((tmp--)-text, sizeof(char));
+			inc_src->text = read_file(inc_src->filename, &dummy);
+			if (inc_src->text == NULL) {
+				throw_error(src, 1, "File \"%s\" couldn't be read.", inc_src->filename);
+				free(inc_src->filename);
+				free(inc_src);
+				src->text = skip_line(text, dbg);
+				return NULL;
+			}
+		}
+	} else {
+		throw_error(src, 1, "Missing quote mark at the start of the operand of an include directive.");
+		src->text = text;
+		return NULL;
+	}
+	if
+	size_t span = strspn(text, " \t\v\r\n");
+	if (text[span]+1 == '/' && text[])
+}
+
+source *preprocess(const char *str, int dbg) {
+}
diff --git a/igen/preprocessor.h b/igen/preprocessor.h
new file mode 100644
index 0000000..7a85e45
--- /dev/null
+++ b/igen/preprocessor.h
@@ -0,0 +1,47 @@
+#ifndef PREPROCESSOR_H
+#define PREPROCESSOR_H
+
+typedef enum comment_type comment_type;
+typedef struct cursor cursor;
+typedef struct whitespace whitespace;
+typedef struct comment comment;
+typedef struct source source;
+
+enum comment_type {
+	COMM_NONE,
+	COMM_MULTI,
+	COMM_SINGLE,
+	NUM_COMMS
+};
+
+struct cursor {
+	int line;
+	int column;
+};
+
+struct whitespace {
+	int spaces;
+	int bspaces;
+	int tabs;
+	int vtabs;
+	int lines;
+};
+
+struct comment {
+	enum comment_type type;
+	char *text;
+	cursor start_pos;
+	cursor end_pos;
+	whitespace wsp;
+};
+
+struct source {
+	source **include_list;
+	char *filename;
+	char *text;
+	int tab_width;
+	cursor cur;
+};
+
+extern source *preprocess(const char *str, int dbg);
+#endif
-- 
cgit v1.2.3-70-g09d2