diff options
| -rw-r--r-- | igen/igen.c | 13 | ||||
| -rw-r--r-- | igen/lexer.c | 142 | ||||
| -rw-r--r-- | igen/lexer.h | 12 | ||||
| -rw-r--r-- | igen/misc.h | 3 | ||||
| -rw-r--r-- | igen/preprocessor.c | 314 | ||||
| -rw-r--r-- | igen/preprocessor.h | 34 | 
6 files changed, 464 insertions, 54 deletions
diff --git a/igen/igen.c b/igen/igen.c index fa9f10f..921ca79 100644 --- a/igen/igen.c +++ b/igen/igen.c @@ -4,27 +4,26 @@  #include <string.h>  #include "lexer.h"  #include "misc.h" +#include "preprocessor.h"  void usage(const char *name) {  	printf("Usage: %s <file>\n", name);  }  int main(int argc, char **argv) { -	char *buf; -	long file_size = 0;  	int dbg = 0;  	int ret = 0; +	source *root_src;  	if (argc < 2) {  		usage(argv[0]);  		ret = 1;  	} -	buf = read_file(argc[1], &file_size); -	if (buf == NULL) { -		printf("Error: read_file() returned NULL.\n"); +	root_src = preprocess(NULL, argv[1], dbg); +	if (root_src == NULL) {  		usage(argv[0]);  		ret = 1; +	} else { +		ret = lex(root_src, dbg);  	} - -	ret = lex(buf, dbg);  	return ret;  } diff --git a/igen/lexer.c b/igen/lexer.c index 1d3268e..15226b8 100644 --- a/igen/lexer.c +++ b/igen/lexer.c @@ -6,35 +6,133 @@  #include "misc.h"  #include "preprocessor.h" +char *skip_seperators(source *src, whitespace *wspace, int preserve_src, int dbg) { +	char *text = NULL; +	char *tmp = src->text; + +	src->text = max(src->text, text); + +	src->text = skip_whitespace(src, wspace, 1, 1, dbg); +	src->text = skip_comment(src, wspace, dbg); + +	text = src->text; +	if (preserve_src) { +		src->text = tmp; +	} +	return text; +} +  cond_stmt *lex_cond_stmt(source *src, int dbg) { +	const keyword *cond_keywords[] = { +		{"do", COND_DO, NULL}, +		{"for", COND_FOR, NULL}, +		{"if", COND_IF, NULL}, +		{"while", COND_WHILE, NULL}, +	}; +	const char *text = src->text; +	char *key = NULL; +	size_t key_len = 0; +	cond_type type = COND_NONE; +	whitespace wsp = {0}; + +	text = skip_seperators(src, &wsp, 1, dbg); +	key_len = strcspn(text, " \t\v\b"); + +	key = calloc(key_len+1, sizeof(char)); +	memcpy(key, text, key_len); +	type = find_keyword(key, cond_keywords, NULL, NULL, dbg); +	free(key); + +	text += key_len+1; + +	if (type >= COND_NONE) { +		cond_stmt *cond = calloc(1, sizeof(cond_stmt)); +		cond->type = type; + +		src->text = text; +		src->cur.x += key_len+1; + +		if (type != COND_DO) { +			cond->expr = lex_expr(src, dbg); +		} + +		cond->stmt = lex_stmt(src, dbg); + +		if (type == COND_DO) { +			text = skip_seperators(src, &wsp, 1, dbg); +			key_len = strcspn(text, " \t\v\b"); + +			key = calloc(key_len+1, sizeof(char)); +			memcpy(key, text, key_len); + +			if (!strcmp(key, "while")) { +				text += key_len+1; + +				src->text = text; +				src->cur.x += key_len+1; + +				cond->expr = lex_expr(src, dbg); + +				text = skip_seperators(src, &wsp, 1, dbg); + +				if (*text++ != ';') { +					--text; +					throw_error(src, 1, "Missing \';\' after do while statement."); +				} else { +					src->text = text; +					++src->cur.x; +				} +			} else { +				throw_error(src, 1, "Missing \'while\' after do while statement."); +			} +			free(key); +		} +		return s; +	}  }  stmt *lex_comp_stmt(source *src, int dbg) { -	char *tmp = *str; -	if (*tmp++ == '{') { -		stmt *s = lex_stmt(&tmp, dbg); -		if (*tmp++ == '}') { -			*str = tmp; -			return s; +	char *text = NULL; +	whitespace wsp = {0}; + +	text = skip_seperators(src, &wsp, dbg); + +	if (*text++ == '{') { +		stmt *s; + +		src->text = text; +		++src->cur.x; + +		s = lex_stmt(src, dbg); +		s->wsp = wsp; + +		text = src->text; + +		if (*text++ == '}') { +			++src->cur.x;  		} else { -			throw_error(src, 1, "Missing \'}\' in compound statement."); +			--text; +			throw_error(src, 1, "Missing terminating \'}\' in compound statement.");  		} +		src->text = text; +		return s;  	}  	return NULL;  }  stmt *lex_stmt(source *src, int dbg) { +	const char *text = src->text;  	const alt_stmt alts[] = { -		{STMT_DIR, offsetof(stmt, dir), lex_dir},  		{STMT_FUNC, offsetof(stmt, func), lex_func},  		{STMT_EXPR, offsetof(stmt, expr), lex_exprs},  		{STMT_COND, offsetof(stmt, cond_stmt), lex_cond_stmt},  		{STMT_COMP, offsetof(stmt, down), lex_comp_stmt},  	}; +  	for (int i = 0; i < NUM_STMTS; ++i) { -		char *tmp = *str; -		void *data = alts[i].lex(&tmp, dbg); +		src->text = text; +		void *data = alts[i].lex(src, dbg);  		if (data != NULL) {  			stmt *s = calloc(1, sizeof(stmt));  			void **member = (char **)s+alts[i].offset; @@ -43,25 +141,25 @@ stmt *lex_stmt(source *src, int dbg) {  		}  	} +	src->text = text;  	return NULL;  } -stmt *lex_library(source *src, stmt **end, int dbg) { -	stmt *start = lex_stmt(str, dbg); -	end = (end != NULL) ? end : &start; -	for (stmt *s = start; s != NULL; s = lex_stmt(str, dbg)) { -		(*end)->next = s; -		*end = s; +void lex_library(source *src, int dbg) { +	src->root = lex_stmt(src, dbg); +	src->last = (src->last != NULL) ? src->last : src->root; +	for (stmt *s = src->root; s != NULL; s = lex_stmt(str, dbg)) { +		src->last->next = s; +		src->last = s;  	}  	return start;  }; -int lex(char *str, int dbg) { -	stmt *start = NULL; -	stmt *end = NULL; +int lex(source *src, int dbg) { +	char *text = src->text; -	source *src = preprocess(str, dbg); -	start = library(&str, &end, dbg); +	library(src, dbg); +	src->text = text; -	return (start != NULL && end != NULL); +	return (src->root != NULL && src->last != NULL);  } diff --git a/igen/lexer.h b/igen/lexer.h index 454d2e1..0c5fea4 100644 --- a/igen/lexer.h +++ b/igen/lexer.h @@ -2,15 +2,17 @@  #define LEXER_H  #include <stdlib.h> +#include "preprocessor.h"  typedef enum stmt_type stmt_type;  typedef enum cond_type cond_type; +typedef struct source source;  typedef struct alt_stmt alt_stmt;  typedef struct cond_stmt cond_stmt;  typedef struct stmt stmt;  enum stmt_type { -	STMT_DIR, +	STMT_NONE,  	STMT_FUNC,  	STMT_EXPR,  	STMT_COND, @@ -19,17 +21,18 @@ enum stmt_type {  };  enum cond_type { +	COND_NONE,  	COND_IF,  	COND_FOR,  	COND_WHILE, -	COND_DO_WHILE, +	COND_DO,  	NUM_CONDS  };  struct alt_stmt {  	int type;  	size_t offset; -	void *(*lex)(char **str, int dbg); +	void *(*lex)(source *src, int dbg);  };  struct cond_stmt { @@ -41,7 +44,6 @@ struct cond_stmt {  struct stmt {  	stmt_type type;  	union { -		dir *dir;  		func *func;  		expr *expr;  		cond_stmt *cond_stmt; @@ -50,5 +52,5 @@ struct stmt {  	stmt *next;  }; -extern int lex(char *str, int dbg); +extern int lex(source *src, int dbg);  #endif diff --git a/igen/misc.h b/igen/misc.h index 9216250..dc92cd8 100644 --- a/igen/misc.h +++ b/igen/misc.h @@ -1,6 +1,9 @@  #ifndef MISC_H  #define MISC_H +#define min(a, b) (((a) < (b)) ? (a) : (b)) +#define max(a, b) (((a) > (b)) ? (a) : (b)) +  extern char *read_file(const char *filename, long *size);  extern char *get_line(char **str);  extern char *make_str(const char *str); diff --git a/igen/preprocessor.c b/igen/preprocessor.c index e83bd20..e6a895c 100644 --- a/igen/preprocessor.c +++ b/igen/preprocessor.c @@ -2,6 +2,7 @@  #include <stdio.h>  #include <stdlib.h>  #include <string.h> +#include "misc.h"  #include "preprocessor.h"  static const keyword *preproc_keywords[] = { @@ -89,11 +90,11 @@ char *skip_whitespace(source *src, whitespace *wspace, int count_lines, int coun  	return &text[span];  } -char *skip_comment(source *src, whitespace *wspace, enum comment_type *type, int dbg) { +char *skip_comment(source *src, whitespace *wspace, comment_type *type, int dbg) {  	/*char *text = skip_whitespace(src wspace, 1, 1, dbg);*/  	char *text = src->text;  	size_t span = get_comment_span(src, text++, dbg); -	enum comment_type dummy = COMM_NONE; +	comment_type dummy = COMM_NONE;  	whitespace wsp = {0};  	type = (type != NULL) ? type : &dummy; @@ -129,8 +130,59 @@ char *skip_comment(source *src, whitespace *wspace, enum comment_type *type, int  	return &text[span];  } -comment *get_comment(source *src, int dbg) { +char *find_comment(source *src, whitespace *wspace, comment_type *type, int dbg) { +	/*char *text = skip_whitespace(src wspace, 1, 1, dbg);*/  	char *text = src->text; +	char *multi_comment = strstr(text, "/*"); +	char *single_comment = strstr(text, "//"); +	char *next_comment = NULL; +	comment_type dummy = COMM_NONE; +	whitespace wsp = {0}; + +	type = (type != NULL) ? type : &dummy; + +	if (multi_comment != NULL && single_comment != NULL) { +		char *first_comment = min(multi_comment, single_comment); +		*type = (first_comment == multi_comment) ? COMM_MULTI : COMM_SINGLE; +	} else if (multi_comment == NULL && single_comment == NULL) { +		*type = COMM_NONE; +	} else if (multi_comment != NULL) { +		*type = COMM_MULTI; +	} else { +		*type = COMM_SINGLE; +	} + +	switch (*type) { +		case COMM_MULTI: next_comment = multi_comment; break; +		case COMM_SINGLE: next_comment = single_comment; break; +	} + +	if (next_comment != NULL) { +		size_t span = next_comment-text; +		count_whitespace(&wsp, text, span, 1, 0, dbg); +		count_whitespace(&wsp, &text[span], strspn(&text[span], "\r\n\f"), 0, 1, dbg); + +		if (wspace != NULL) { +			*wspace = wsp; +		} + +		if (wsp.tabs) { +			const int tab_stop = src->tab_width; +			const int extra_tabs = wsp.spaces/tab_stop; +			src->cur.x += ((wsp.tabs+extra_tabs)*tab_stop); +		} else { +			src->cur.x += wsp.spaces; +		} +		src->cur.y += wsp.lines; + +		return &text[span]; +	} else { +		return text; +	} + +} + +comment *get_comment(source *src, char *text, int dbg) {  	char *after_comment;  	size_t comment_len = 0;  	comment *com = calloc(1, sizeof(comment)); @@ -151,7 +203,102 @@ comment *get_comment(source *src, int dbg) {  	return com;  } -source *pp_include(source *src, int dbg) { +source *make_source(const char *filename, int tab_stop, int dbg) { +	long file_size = 0; +	size_t filename_len = strlen(filename); +	source *src = calloc(1, sizeof(source)); + +	src->text = read_file(filename, &file_size); +	if (src->text == NULL) { +		printf("Error: read_file() returned NULL.\n"); +		free(src); +		return NULL; +	} +	src->tab_width = tab_stop; +	src->filename = calloc(filename_len+1, sizeof(char)); +	memcpy(src->filename, filename, filename_len); +	return src; +} + +int cmp_key(const void *key, const void *elem) { +	const char *k = key; +	const keyword *e = *(const keyword **)elem; +	return strcmp(k, e->name); +} + +int cmp_keyword(const void *key1, const void *key2) { +	const keyword *k1 = *(const keyword **)key1; +	const keyword *k2 = *(const keyword **)key2; +	return strcmp(k1->name, k2->name); +} + +keyword **copy_keyword_table(keyword **keywords, size_t *key_count, int dbg) { +	size_t dummy = 0; + +	key_count = (key_count != NULL) ? key_count : &dummy; +	if (keywords == NULL) { +		*key_count = 0; +		return NULL; +	} else { +		for (*key_count = 0; keywords[count] != NULL; ++(*key_count)); +		if (!*key_count) { +			return NULL; +		} else { +			keyword **table = calloc(*key_count+1, sizeof(keyword *)); +			#if 0 +			for (size_t i = 0; i < *key_count; ++i) { +				table[i] = calloc(1, sizeof(keyword)); +				memcpy(table[i], keywords[i], sizeof(keyword)); +			} +			#else +			memcpy(table, keywords, *key_count); +			#endif +			return table; +		} +	} +} + +int find_keyword(const char *key, keyword **keywords, void *ctx, void **callback_ret, int dbg) { +	int ret = -1; +	size_t key_count = 0; +	keyword *k; +	keyword **sorted_keywords = copy_keyword_table(keywords, &key_count, dbg); +	void *dummy = NULL; +	callback_ret = (callback_ret != NULL) ? callback_ret : &dummy; + +	if (sorted_keywords == NULL) { +		*callback_ret = NULL; +		return ret; +	} + +	if (key_count > 1) { +		qsort(sorted_keywords, key_count, sizeof(keyword *), cmp_keyword); +	} + +	k = *(keyword **)bsearch(key, sorted_keywords, key_count, sizeof(keyword *), cmp_key); + +	if (k != NULL) { +		if (k->found_keyword) { +			*callback_ret = k->found_keyword(ctx, dbg); +		} else { +			*callback_ret = NULL; +		} +		ret = k->id; +	} else { +		*callback_ret = NULL; +	} + +	#if 0 +	for (size_t i = 0; i < *key_count; ++i) { +		free(sorted_keywords[i]); +		sorted_keywords[i] = NULL; +	} +	#endif +	free(sorted_keywords); +	return ret; +} + +char *pp_include(source *src, int dbg) {  	char *text = skip_whitespace(src, NULL, 0, 1, dbg);  	if (*text == '\r' || *text == '\n' || *text == '\f') {  		throw_error(src, 1, "Found line separator \'%s\' before the operand of an include directive.", esc_char_to_string(*text)); @@ -166,27 +313,156 @@ source *pp_include(source *src, int dbg) {  			return NULL;  		} else {  			long dummy = 0; -			source *inc_src = calloc(1, sizeof(source)); -			inc_src->tab_width = src->tab_width; -			inc_src->filename = calloc((tmp--)-text, sizeof(char)); -			inc_src->text = read_file(inc_src->filename, &dummy); -			if (inc_src->text == NULL) { -				throw_error(src, 1, "File \"%s\" couldn't be read.", inc_src->filename); -				free(inc_src->filename); -				free(inc_src); -				src->text = skip_line(text, dbg); -				return NULL; -			} +			char *filename = calloc((tmp--)-text, sizeof(char)); +			memcpy(filename, text, tmp-text); +			return filename;  		}  	} else {  		throw_error(src, 1, "Missing quote mark at the start of the operand of an include directive.");  		src->text = text;  		return NULL;  	} -	if -	size_t span = strspn(text, " \t\v\r\n"); -	if (text[span]+1 == '/' && text[])  } -source *preprocess(const char *str, int dbg) { +struct str_list { +	char *str; +	struct str_list *next; +}; + +struct comment_list { +	comment *com; +	struct comment_list *next; +}; + +int is_included(source *parent, const char *filename, int dbg) { +	if (parent->include_list != NULL) { +		source **includes = parent->include_list; +		for (int i = 0; includes[i] != NULL; ++i) { +			if (is_included(includes[i], filename, dbg)) { +				return 1; +			} +		} +		return 0; +	} +	return !strcmp(parent->filename, filename); +} + +void free_str_list(struct str_list *list) { +	for (struct str_list *l = list, l2 = l; l != NULL; l = l->next, l2 = l) { +		free(l2->str); +		l2->str = NULL; +		free(l2); +	} +} + +void free_comment_list(struct comment_list *list) { +	for (struct comment_list *l = list, l2 = l; l != NULL; l = l->next, l2 = l) { +		free(l2->com); +		l2->com = NULL; +		free(l2); +	} +} + +source **make_include_list(source *src, struct str_list *list, int dbg) { +	int inc_count = 0; +	source **include_list = NULL; + +	for (struct str_list *inc = list; inc != NULL; inc = inc->next, ++inc_count); + +	include_list = calloc(inc_count+1, sizeof(source *)); + +	do { +		struct str_list *inc = list; +		for (int i = 0; inc != NULL && i < inc_count; ++i, inc = inc->next) { +			src->include_list[i] = preprocess(src, inc->str, dbg); +		} +	} while(0); +} + +comment **make_comment_list(source *src, int dbg) { +	int comment_count = 0; +	struct comment_list *list = calloc(1, sizeof(struct comment_list)); +	struct comment_list *list_end = list; +	comment **comments = NULL; +	whitespace wsp = {0}; + +	for (char *text = src->text; text != NULL && *text != '\0'; text = find_comment(src, &wsp, NULL, dbg), list_end = list_end->next, ++comment_count) { +		list_end->com = get_comment(src, text, dbg); +		list_end->com->wsp = wsp; +		list_end->next = calloc(1, sizeof(struct comment_list)); +	} + +	comments = calloc(comment_count+1, sizeof(comment *)); + +	do { +		struct comment_list *l = list; +		for (int i = 0; l != NULL && i < comment_count; ++i, l = l->next) { +			comments[i] = calloc(1, sizeof(comment)); +			memcpy(comments[i], l->com, sizeof(comment)); +		} +	} while(0); +	free_comment_list(list); +	return comments; +} + +source *preprocess(source *parent, const char *filename, int dbg) { +	source *src = make_source(filename, 8, dbg); +	src->parent = parent; +	if (src == NULL) { +		if (parent == NULL) { +			printf("Error: Source struct for file \"%s\" couldn't be created.\n", filename); +		} else { +			throw_error(parent, 1, "Source struct for file \"%s\" couldn't be created.", filename); +		} +		return NULL; +	} else { +		int is_directive = 0; +		struct str_list *include_list = calloc(1, sizeof(struct str_list)); +		struct str_list *inc_end = include_list; + +		src->included = (parent != NULL); + +		for (char *text = src->text; *text != '\0'; ++text) { +			text = skip_whitespace(src, NULL, 1, 1, dbg) +			text = skip_comment(src, NULL, NULL, dbg); + +			if (is_directive) { +				char *dir_key = NULL; +				void *key_data = NULL; +				size_t key_len = strcspn(text, " \t\v\b"); +				directive_type dir_type = DIR_NONE; + +				dir_key = calloc(key_len+1, sizeof(char)); +				memcpy(dir_key, text, key_len); +				dir_type = find_keyword(dir_key, preproc_keywords, src, &key_data, dbg); +				free(dir_key); + +				if (key_data != NULL) { +					switch (dir_type) { +						case DIR_INCLUDE: +							if (parent != NULL) { +								if (is_included(parent, (char *)key_data, dbg)) { +									break; +								} +							} +							inc_end->str = (char *)key_data; +							inc_end->next = calloc(1, sizeof(struct str_list)); +							inc_end = include_list->next; +							break; +					} +				} + +				is_directive = 0; +			} + +			if (*text == '#') { +				is_directive = 1; +			} +		} +		src->include_list = make_include_list(src, include_list, dbg); +		src->comments = make_comment_list(src, dbg); +		free_str_list(include_list); +		include_list = NULL; +	} +	return src;  } diff --git a/igen/preprocessor.h b/igen/preprocessor.h index 7a85e45..200f5b2 100644 --- a/igen/preprocessor.h +++ b/igen/preprocessor.h @@ -1,11 +1,25 @@  #ifndef PREPROCESSOR_H  #define PREPROCESSOR_H +#include "lexer.h" + +  typedef enum comment_type comment_type; +typedef enum directive_type directive_type; +typedef struct stmt stmt; +typedef struct linked_list linked_list; +typedef struct keyword keyword;  typedef struct cursor cursor;  typedef struct whitespace whitespace;  typedef struct comment comment;  typedef struct source source; +typedef void *(keyword_cb)(void *ctx, int dbg); + +enum directive_type { +	DIR_NONE, +	DIR_INCLUDE, +	NUM_DIRS +};  enum comment_type {  	COMM_NONE, @@ -14,6 +28,17 @@ enum comment_type {  	NUM_COMMS  }; +struct linked_list { +	void *data; +	linked_list *next; +}; + +struct keyword { +	char *name; +	int id; +	keyword_cb *found_keyword; +}; +  struct cursor {  	int line;  	int column; @@ -36,12 +61,19 @@ struct comment {  };  struct source { +	source *parent;  	source **include_list; +	comment **comments;  	char *filename;  	char *text; +	int included : 1;  	int tab_width;  	cursor cur; +	stmt *root; +	stmt *last;  }; -extern source *preprocess(const char *str, int dbg); +/*extern keyword *find_keyword(const char *key, keyword **keywords, int dbg);*/ +extern int find_keyword(const char *key, keyword **keywords, void *ctx, void **callback_ret, int dbg); +extern source *preprocess(source *parent, const char *filename, int dbg);  #endif  | 
