#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "misc.h"
#include "preprocessor.h"
char *pp_include(source *src, int dbg);
static const keyword *preproc_keywords[] = {
&(const keyword) {"include", DIR_INCLUDE, (keyword_cb *)pp_include},
NULL
};
char *skip_line(const char *str, int dbg) {
const size_t span = strcspn(str, "\r\n\f");
return (char *)&str[span+strspn(&str[span], "\r\n\f")];
}
size_t line_span(const char *str, int dbg) {
return skip_line(str, dbg)-str;
}
size_t get_comment_span(source *src, const char *str, int dbg) {
if (str[0] == '/') {
if (str[1] == '*') {
char *tmp = strstr(str, "*/")+strlen("*/");
if (tmp == NULL) {
throw_error(src, 0, "Unterminated comment (missing \"*/\").");
}
return tmp-str;
} else if (str[1] == '/') {
return line_span(str, dbg);
}
}
return 0;
}
size_t get_whitespace_span(const char *str, int count_lines, int count_columns, int dbg) {
size_t span = 0;
if (count_columns) {
span = strspn(str, " \t\v\b");
}
if (count_lines) {
span += strspn(&str[span], "\r\n\f");
}
return span;
}
void count_whitespace(whitespace *wsp, const char *str, size_t span, int count_lines, int count_columns, int dbg) {
for (size_t i = 0; i < span && str[i] != '\0'; ++i) {
char c = str[i];
if (str[i+1] != '\b') {
if (count_columns) {
wsp->spaces += (c == ' ');
wsp->vtabs += (c == '\v');
wsp->tabs += (c == '\t');
}
if (count_lines) {
i += (c == '\r' && str[i+1] == '\n');
wsp->lines += (c == '\r' || c == '\n' || c == '\f');
}
} else {
c = str[++i+1];
if (c != '\0') {
i += (count_lines && (c == '\r' && str[i+2] == '\n'));
}
++wsp->bspaces;
}
}
}
char *skip_whitespace(source *src, whitespace *wspace, int count_lines, int count_columns, int dbg) {
char *text = src->text;
size_t span = get_whitespace_span(text, count_lines, count_columns, dbg);
whitespace wsp = {0};
count_whitespace(&wsp, text, span, count_lines, count_columns, dbg);
if (wsp.tabs) {
const int tab_stop = src->tab_width;
const int extra_tabs = wsp.spaces/tab_stop;
src->cur.column += ((wsp.tabs+extra_tabs)*tab_stop);
} else {
src->cur.column += wsp.spaces;
}
src->cur.line += wsp.lines;
if (wspace != NULL) {
*wspace = wsp;
}
return &text[span];
}
char *skip_comment(source *src, whitespace *wspace, comment_type *type, int dbg) {
char *text = src->text;
size_t span = get_comment_span(src, text++, dbg);
comment_type dummy = COMM_NONE;
whitespace wsp = {0};
type = (type != NULL) ? type : &dummy;
if (span) {
if (*text++ == '*') {
*type = COMM_MULTI;
count_whitespace(&wsp, text, span, 1, 0, dbg);
count_whitespace(&wsp, &text[span], strspn(&text[span], "\r\n\f"), 0, 1, dbg);
} else {
*type = COMM_SINGLE;
++wsp.lines;
text -= 2;
}
if (wspace != NULL) {
*wspace = wsp;
}
if (wsp.tabs) {
const int tab_stop = src->tab_width;
const int extra_tabs = wsp.spaces/tab_stop;
src->cur.column += ((wsp.tabs+extra_tabs)*tab_stop);
} else {
src->cur.column += wsp.spaces;
}
src->cur.line += wsp.lines;
} else {
--text;
*type = COMM_NONE;
}
return &text[span];
}
char *find_comment(source *src, whitespace *wspace, comment_type *type, int dbg) {
char *text = src->text;
char *multi_comment = strstr(text, "/*");
char *single_comment = strstr(text, "//");
char *next_comment = NULL;
comment_type dummy = COMM_NONE;
whitespace wsp = {0};
type = (type != NULL) ? type : &dummy;
if (multi_comment != NULL && single_comment != NULL) {
char *first_comment = min(multi_comment, single_comment);
*type = (first_comment == multi_comment) ? COMM_MULTI : COMM_SINGLE;
} else if (multi_comment == NULL && single_comment == NULL) {
*type = COMM_NONE;
} else if (multi_comment != NULL) {
*type = COMM_MULTI;
} else {
*type = COMM_SINGLE;
}
switch (*type) {
case COMM_MULTI: next_comment = multi_comment; break;
case COMM_SINGLE: next_comment = single_comment; break;
}
if (next_comment != NULL) {
size_t span = next_comment-text;
count_whitespace(&wsp, text, span, 1, 0, dbg);
count_whitespace(&wsp, &text[span], strspn(&text[span], "\r\n\f"), 0, 1, dbg);
if (wspace != NULL) {
*wspace = wsp;
}
if (wsp.tabs) {
const int tab_stop = src->tab_width;
const int extra_tabs = wsp.spaces/tab_stop;
src->cur.column += ((wsp.tabs+extra_tabs)*tab_stop);
} else {
src->cur.column += wsp.spaces;
}
src->cur.line += wsp.lines;
return &text[span];
} else {
return text;
}
}
comment *get_comment(source *src, char *text, int dbg) {
char *after_comment;
size_t comment_len = 0;
comment *com = calloc(1, sizeof(comment));
text = skip_whitespace(src, &com->wsp, 1, 1, dbg);
com->start_pos = src->cur;
after_comment = skip_comment(src, NULL, &com->type, dbg);
com->end_pos = src->cur;
switch (com->type) {
case COMM_MULTI : comment_len = strstr(text, "*/")-text; break;
case COMM_SINGLE: comment_len = strcspn(text, "\r\n\f"); break;
}
com->text = calloc(comment_len+1, sizeof(char));
memcpy(com->text, &text[strlen("/*")], comment_len);
return com;
}
source *make_source(const char *filename, int tab_stop, int dbg) {
long file_size = 0;
size_t filename_len = strlen(filename);
source *src = calloc(1, sizeof(source));
src->text = read_file(filename, &file_size);
if (src->text == NULL) {
printf("Error: read_file() returned NULL.\n");
free(src);
return NULL;
}
src->tab_width = tab_stop;
src->filename = calloc(filename_len+1, sizeof(char));
memcpy(src->filename, filename, filename_len);
return src;
}
int cmp_key(const void *key, const void *elem) {
const char *k = key;
const keyword *e = *(const keyword **)elem;
return strcmp(k, e->name);
}
int cmp_keyword(const void *key1, const void *key2) {
const keyword *k1 = *(const keyword **)key1;
const keyword *k2 = *(const keyword **)key2;
return strcmp(k1->name, k2->name);
}
keyword **copy_keyword_table(keyword **keywords, size_t *key_count, int dbg) {
size_t dummy = 0;
key_count = (key_count != NULL) ? key_count : &dummy;
if (keywords == NULL) {
*key_count = 0;
return NULL;
} else {
for (*key_count = 0; keywords[*key_count] != NULL; ++(*key_count));
if (!*key_count) {
return NULL;
} else {
keyword **table = calloc(*key_count+1, sizeof(keyword *));
#if 0
#else
memcpy(table, keywords, *key_count);
#endif
return table;
}
}
}
int find_keyword(const char *key, keyword **keywords, void *ctx, void **callback_ret, int dbg) {
int ret = -1;
size_t key_count = 0;
keyword *k;
keyword **sorted_keywords = copy_keyword_table(keywords, &key_count, dbg);
void *dummy = NULL;
callback_ret = (callback_ret != NULL) ? callback_ret : &dummy;
if (sorted_keywords == NULL) {
*callback_ret = NULL;
return ret;
}
if (key_count > 1) {
qsort(sorted_keywords, key_count, sizeof(keyword *), cmp_keyword);
}
k = *(keyword **)bsearch(key, sorted_keywords, key_count, sizeof(keyword *), cmp_key);
if (k != NULL) {
if (k->found_keyword) {
*callback_ret = k->found_keyword(ctx, dbg);
} else {
*callback_ret = NULL;
}
ret = k->id;
} else {
*callback_ret = NULL;
}
#if 0
#endif
free(sorted_keywords);
return ret;
}
char *pp_include(source *src, int dbg) {
char *text = skip_whitespace(src, NULL, 0, 1, dbg);
if (*text == '\r' || *text == '\n' || *text == '\f') {
throw_error(src, 1, "Found line separator \'%s\' before the operand of an include directive.", esc_char_to_string(*text));
src->text = text;
return NULL;
} else if (*text == '\"' || *text == '\'') {
char c = *text++;
char *tmp = strchr(text, c);
if (tmp == NULL) {
throw_error(src, 1, "Missing terminating %c character.", c);
src->text = text;
return NULL;
} else {
long dummy = 0;
char *filename = calloc((tmp--)-text, sizeof(char));
memcpy(filename, text, tmp-text);
return filename;
}
} else {
throw_error(src, 1, "Missing quote mark at the start of the operand of an include directive.");
src->text = text;
return NULL;
}
}
struct str_list {
char *str;
struct str_list *next;
};
struct comment_list {
comment *com;
struct comment_list *next;
};
int is_included(source *parent, const char *filename, int dbg) {
if (parent->include_list != NULL) {
source **includes = parent->include_list;
for (int i = 0; includes[i] != NULL; ++i) {
if (is_included(includes[i], filename, dbg)) {
return 1;
}
}
return 0;
}
return !strcmp(parent->filename, filename);
}
void free_str_list(struct str_list *list) {
for (struct str_list *l = list, *l2 = l; l != NULL; l = l->next, l2 = l) {
free(l2->str);
l2->str = NULL;
free(l2);
}
}
void free_comment_list(struct comment_list *list) {
for (struct comment_list *l = list, *l2 = l; l != NULL; l = l->next, l2 = l) {
free(l2->com);
l2->com = NULL;
free(l2);
}
}
source **make_include_list(source *src, struct str_list *list, int dbg) {
int inc_count = 0;
source **include_list = NULL;
for (struct str_list *inc = list; inc != NULL; inc = inc->next, ++inc_count);
include_list = calloc(inc_count+1, sizeof(source *));
do {
struct str_list *inc = list;
for (int i = 0; inc != NULL && i < inc_count; ++i, inc = inc->next) {
src->include_list[i] = preprocess(src, inc->str, dbg);
}
} while(0);
}
comment **make_comment_list(source *src, int dbg) {
int comment_count = 0;
struct comment_list *list = calloc(1, sizeof(struct comment_list));
struct comment_list *list_end = list;
comment **comments = NULL;
whitespace wsp = {0};
for (char *text = src->text; text != NULL && *text != '\0'; text = find_comment(src, &wsp, NULL, dbg), list_end = list_end->next, ++comment_count) {
list_end->com = get_comment(src, text, dbg);
list_end->com->wsp = wsp;
list_end->next = calloc(1, sizeof(struct comment_list));
}
comments = calloc(comment_count+1, sizeof(comment *));
do {
struct comment_list *l = list;
for (int i = 0; l != NULL && i < comment_count; ++i, l = l->next) {
comments[i] = calloc(1, sizeof(comment));
memcpy(comments[i], l->com, sizeof(comment));
}
} while(0);
free_comment_list(list);
return comments;
}
source *preprocess(source *parent, const char *filename, int dbg) {
source *src = make_source(filename, 8, dbg);
src->parent = parent;
if (src == NULL) {
if (parent == NULL) {
printf("Error: Source struct for file \"%s\" couldn't be created.\n", filename);
} else {
throw_error(parent, 1, "Source struct for file \"%s\" couldn't be created.", filename);
}
return NULL;
} else {
int is_directive = 0;
struct str_list *include_list = calloc(1, sizeof(struct str_list));
struct str_list *inc_end = include_list;
src->included = (parent != NULL);
for (char *text = src->text; *text != '\0'; ++text) {
text = skip_whitespace(src, NULL, 1, 1, dbg);
text = skip_comment(src, NULL, NULL, dbg);
if (is_directive) {
char *dir_key = NULL;
void *key_data = NULL;
size_t key_len = strcspn(text, " \t\v\b");
directive_type dir_type = DIR_NONE;
dir_key = calloc(key_len+1, sizeof(char));
memcpy(dir_key, text, key_len);
dir_type = find_keyword(dir_key, (keyword **)preproc_keywords, src, &key_data, dbg);
free(dir_key);
if (key_data != NULL) {
switch (dir_type) {
case DIR_INCLUDE:
if (parent != NULL) {
if (is_included(parent, (char *)key_data, dbg)) {
break;
}
}
inc_end->str = (char *)key_data;
inc_end->next = calloc(1, sizeof(struct str_list));
inc_end = include_list->next;
break;
}
}
is_directive = 0;
}
if (*text == '#') {
is_directive = 1;
}
}
src->include_list = make_include_list(src, include_list, dbg);
src->comments = make_comment_list(src, dbg);
free_str_list(include_list);
include_list = NULL;
}
return src;
}