#include #include #include #include #include "misc.h" #include "preprocessor.h" char *pp_include(source *src, int dbg); static const keyword *preproc_keywords[] = { &(const keyword) {"include", DIR_INCLUDE, (keyword_cb *)pp_include}, NULL }; char *skip_line(const char *str, int dbg) { const size_t span = strcspn(str, "\r\n\f"); /*span += strspn(&str[span], "\r\n\f");*/ return (char *)&str[span+strspn(&str[span], "\r\n\f")]; } size_t line_span(const char *str, int dbg) { return skip_line(str, dbg)-str; } size_t get_comment_span(source *src, const char *str, int dbg) { if (str[0] == '/') { if (str[1] == '*') { char *tmp = strstr(str, "*/")+strlen("*/"); if (tmp == NULL) { throw_error(src, 0, "Unterminated comment (missing \"*/\")."); } return tmp-str; } else if (str[1] == '/') { return line_span(str, dbg); } } return 0; } size_t get_whitespace_span(const char *str, int count_lines, int count_columns, int dbg) { size_t span = 0; if (count_columns) { span = strspn(str, " \t\v\b"); } if (count_lines) { span += strspn(&str[span], "\r\n\f"); } return span; } void count_whitespace(whitespace *wsp, const char *str, size_t span, int count_lines, int count_columns, int dbg) { for (size_t i = 0; i < span && str[i] != '\0'; ++i) { char c = str[i]; if (str[i+1] != '\b') { if (count_columns) { wsp->spaces += (c == ' '); wsp->vtabs += (c == '\v'); wsp->tabs += (c == '\t'); } if (count_lines) { i += (c == '\r' && str[i+1] == '\n'); wsp->lines += (c == '\r' || c == '\n' || c == '\f'); } } else { c = str[++i+1]; if (c != '\0') { i += (count_lines && (c == '\r' && str[i+2] == '\n')); } ++wsp->bspaces; } } } char *skip_whitespace(source *src, whitespace *wspace, int count_lines, int count_columns, int dbg) { char *text = src->text; size_t span = get_whitespace_span(text, count_lines, count_columns, dbg); whitespace wsp = {0}; count_whitespace(&wsp, text, span, count_lines, count_columns, dbg); if (wsp.tabs) { const int tab_stop = src->tab_width; const int extra_tabs = wsp.spaces/tab_stop; src->cur.column += ((wsp.tabs+extra_tabs)*tab_stop); } else { src->cur.column += wsp.spaces; } src->cur.line += wsp.lines; if (wspace != NULL) { *wspace = wsp; } return &text[span]; } char *skip_comment(source *src, whitespace *wspace, comment_type *type, int dbg) { /*char *text = skip_whitespace(src wspace, 1, 1, dbg);*/ char *text = src->text; size_t span = get_comment_span(src, text++, dbg); comment_type dummy = COMM_NONE; whitespace wsp = {0}; type = (type != NULL) ? type : &dummy; if (span) { if (*text++ == '*') { *type = COMM_MULTI; /*for (size_t i = 0; text[i] != '\0' && i < span; i = line_span(&text[i], dbg), ++wsp.lines);*/ count_whitespace(&wsp, text, span, 1, 0, dbg); count_whitespace(&wsp, &text[span], strspn(&text[span], "\r\n\f"), 0, 1, dbg); } else { *type = COMM_SINGLE; ++wsp.lines; text -= 2; } if (wspace != NULL) { *wspace = wsp; } if (wsp.tabs) { const int tab_stop = src->tab_width; const int extra_tabs = wsp.spaces/tab_stop; src->cur.column += ((wsp.tabs+extra_tabs)*tab_stop); } else { src->cur.column += wsp.spaces; } src->cur.line += wsp.lines; } else { --text; *type = COMM_NONE; } return &text[span]; } char *find_comment(source *src, whitespace *wspace, comment_type *type, int dbg) { /*char *text = skip_whitespace(src wspace, 1, 1, dbg);*/ char *text = src->text; char *multi_comment = strstr(text, "/*"); char *single_comment = strstr(text, "//"); char *next_comment = NULL; comment_type dummy = COMM_NONE; whitespace wsp = {0}; type = (type != NULL) ? type : &dummy; if (multi_comment != NULL && single_comment != NULL) { char *first_comment = min(multi_comment, single_comment); *type = (first_comment == multi_comment) ? COMM_MULTI : COMM_SINGLE; } else if (multi_comment == NULL && single_comment == NULL) { *type = COMM_NONE; } else if (multi_comment != NULL) { *type = COMM_MULTI; } else { *type = COMM_SINGLE; } switch (*type) { case COMM_MULTI: next_comment = multi_comment; break; case COMM_SINGLE: next_comment = single_comment; break; } if (next_comment != NULL) { size_t span = next_comment-text; count_whitespace(&wsp, text, span, 1, 0, dbg); count_whitespace(&wsp, &text[span], strspn(&text[span], "\r\n\f"), 0, 1, dbg); if (wspace != NULL) { *wspace = wsp; } if (wsp.tabs) { const int tab_stop = src->tab_width; const int extra_tabs = wsp.spaces/tab_stop; src->cur.column += ((wsp.tabs+extra_tabs)*tab_stop); } else { src->cur.column += wsp.spaces; } src->cur.line += wsp.lines; return &text[span]; } else { return text; } } comment *get_comment(source *src, char *text, int dbg) { char *after_comment; size_t comment_len = 0; comment *com = calloc(1, sizeof(comment)); text = skip_whitespace(src, &com->wsp, 1, 1, dbg); com->start_pos = src->cur; after_comment = skip_comment(src, NULL, &com->type, dbg); com->end_pos = src->cur; switch (com->type) { case COMM_MULTI : comment_len = strstr(text, "*/")-text; break; case COMM_SINGLE: comment_len = strcspn(text, "\r\n\f"); break; } com->text = calloc(comment_len+1, sizeof(char)); memcpy(com->text, &text[strlen("/*")], comment_len); return com; } source *make_source(const char *filename, int tab_stop, int dbg) { long file_size = 0; size_t filename_len = strlen(filename); source *src = calloc(1, sizeof(source)); src->text = read_file(filename, &file_size); if (src->text == NULL) { printf("Error: read_file() returned NULL.\n"); free(src); return NULL; } src->tab_width = tab_stop; src->filename = calloc(filename_len+1, sizeof(char)); memcpy(src->filename, filename, filename_len); return src; } int cmp_key(const void *key, const void *elem) { const char *k = key; const keyword *e = *(const keyword **)elem; return strcmp(k, e->name); } int cmp_keyword(const void *key1, const void *key2) { const keyword *k1 = *(const keyword **)key1; const keyword *k2 = *(const keyword **)key2; return strcmp(k1->name, k2->name); } keyword **copy_keyword_table(keyword **keywords, size_t *key_count, int dbg) { size_t dummy = 0; key_count = (key_count != NULL) ? key_count : &dummy; if (keywords == NULL) { *key_count = 0; return NULL; } else { for (*key_count = 0; keywords[*key_count] != NULL; ++(*key_count)); if (!*key_count) { return NULL; } else { keyword **table = calloc(*key_count+1, sizeof(keyword *)); #if 0 for (size_t i = 0; i < *key_count; ++i) { table[i] = calloc(1, sizeof(keyword)); memcpy(table[i], keywords[i], sizeof(keyword)); } #else memcpy(table, keywords, *key_count); #endif return table; } } } int find_keyword(const char *key, keyword **keywords, void *ctx, void **callback_ret, int dbg) { int ret = -1; size_t key_count = 0; keyword *k; keyword **sorted_keywords = copy_keyword_table(keywords, &key_count, dbg); void *dummy = NULL; callback_ret = (callback_ret != NULL) ? callback_ret : &dummy; if (sorted_keywords == NULL) { *callback_ret = NULL; return ret; } if (key_count > 1) { qsort(sorted_keywords, key_count, sizeof(keyword *), cmp_keyword); } k = *(keyword **)bsearch(key, sorted_keywords, key_count, sizeof(keyword *), cmp_key); if (k != NULL) { if (k->found_keyword) { *callback_ret = k->found_keyword(ctx, dbg); } else { *callback_ret = NULL; } ret = k->id; } else { *callback_ret = NULL; } #if 0 for (size_t i = 0; i < *key_count; ++i) { free(sorted_keywords[i]); sorted_keywords[i] = NULL; } #endif free(sorted_keywords); return ret; } char *pp_include(source *src, int dbg) { char *text = skip_whitespace(src, NULL, 0, 1, dbg); if (*text == '\r' || *text == '\n' || *text == '\f') { throw_error(src, 1, "Found line separator \'%s\' before the operand of an include directive.", esc_char_to_string(*text)); src->text = text; return NULL; } else if (*text == '\"' || *text == '\'') { char c = *text++; char *tmp = strchr(text, c); if (tmp == NULL) { throw_error(src, 1, "Missing terminating %c character.", c); src->text = text; return NULL; } else { long dummy = 0; char *filename = calloc((tmp--)-text, sizeof(char)); memcpy(filename, text, tmp-text); return filename; } } else { throw_error(src, 1, "Missing quote mark at the start of the operand of an include directive."); src->text = text; return NULL; } } struct str_list { char *str; struct str_list *next; }; struct comment_list { comment *com; struct comment_list *next; }; int is_included(source *parent, const char *filename, int dbg) { if (parent->include_list != NULL) { source **includes = parent->include_list; for (int i = 0; includes[i] != NULL; ++i) { if (is_included(includes[i], filename, dbg)) { return 1; } } return 0; } return !strcmp(parent->filename, filename); } void free_str_list(struct str_list *list) { for (struct str_list *l = list, *l2 = l; l != NULL; l = l->next, l2 = l) { free(l2->str); l2->str = NULL; free(l2); } } void free_comment_list(struct comment_list *list) { for (struct comment_list *l = list, *l2 = l; l != NULL; l = l->next, l2 = l) { free(l2->com); l2->com = NULL; free(l2); } } source **make_include_list(source *src, struct str_list *list, int dbg) { int inc_count = 0; source **include_list = NULL; for (struct str_list *inc = list; inc != NULL; inc = inc->next, ++inc_count); include_list = calloc(inc_count+1, sizeof(source *)); do { struct str_list *inc = list; for (int i = 0; inc != NULL && i < inc_count; ++i, inc = inc->next) { src->include_list[i] = preprocess(src, inc->str, dbg); } } while(0); } comment **make_comment_list(source *src, int dbg) { int comment_count = 0; struct comment_list *list = calloc(1, sizeof(struct comment_list)); struct comment_list *list_end = list; comment **comments = NULL; whitespace wsp = {0}; for (char *text = src->text; text != NULL && *text != '\0'; text = find_comment(src, &wsp, NULL, dbg), list_end = list_end->next, ++comment_count) { list_end->com = get_comment(src, text, dbg); list_end->com->wsp = wsp; list_end->next = calloc(1, sizeof(struct comment_list)); } comments = calloc(comment_count+1, sizeof(comment *)); do { struct comment_list *l = list; for (int i = 0; l != NULL && i < comment_count; ++i, l = l->next) { comments[i] = calloc(1, sizeof(comment)); memcpy(comments[i], l->com, sizeof(comment)); } } while(0); free_comment_list(list); return comments; } source *preprocess(source *parent, const char *filename, int dbg) { source *src = make_source(filename, 8, dbg); src->parent = parent; if (src == NULL) { if (parent == NULL) { printf("Error: Source struct for file \"%s\" couldn't be created.\n", filename); } else { throw_error(parent, 1, "Source struct for file \"%s\" couldn't be created.", filename); } return NULL; } else { int is_directive = 0; struct str_list *include_list = calloc(1, sizeof(struct str_list)); struct str_list *inc_end = include_list; src->included = (parent != NULL); for (char *text = src->text; *text != '\0'; ++text) { text = skip_whitespace(src, NULL, 1, 1, dbg); text = skip_comment(src, NULL, NULL, dbg); if (is_directive) { char *dir_key = NULL; void *key_data = NULL; size_t key_len = strcspn(text, " \t\v\b"); directive_type dir_type = DIR_NONE; dir_key = calloc(key_len+1, sizeof(char)); memcpy(dir_key, text, key_len); dir_type = find_keyword(dir_key, (keyword **)preproc_keywords, src, &key_data, dbg); free(dir_key); if (key_data != NULL) { switch (dir_type) { case DIR_INCLUDE: if (parent != NULL) { if (is_included(parent, (char *)key_data, dbg)) { break; } } inc_end->str = (char *)key_data; inc_end->next = calloc(1, sizeof(struct str_list)); inc_end = include_list->next; break; } } is_directive = 0; } if (*text == '#') { is_directive = 1; } } src->include_list = make_include_list(src, include_list, dbg); src->comments = make_comment_list(src, dbg); free_str_list(include_list); include_list = NULL; } return src; }