#include "asmmon.h" #include "lexer.h" uint8_t lex_type; uint16_t sym_count = 0; token *tokens = NULL; token *last_tok = NULL; symbol *locals = NULL; symbol *last_loc = NULL; symbol *cur_sym = NULL; symbol *struct_sym = NULL; line *tmp_line = NULL; symbol *find_symbol(symbol *root, const char *name, int depth, uint8_t dbg) { symbol *s = root; symbol *ret = NULL; char *tmp = malloc(strlen(name)+1); memcpy(tmp, name, strlen(name)+1); char *scope_name = strtok_r(tmp, ".", &tmp); size_t name_len = strlen(scope_name); for (; s != NULL; s = s->next) { size_t sym_name_len = strlen(s->name); if (name_len == sym_name_len && scope_name[0] == s->name[0] && !strcmp(scope_name, s->name)) { if (depth) { if (s->down) { ret = find_symbol(s->down, (tmp == NULL) ? scope_name : tmp, depth-1, dbg); if (ret) { return ret; } } } else { return s; } } } return NULL; } int add_symbol(symbol *sym, const char *name, symbol **root, symbol **lsym, symbol **rloc, symbol **lloc, symbol **csym, int csym_depth, int depth, uint8_t dbg) { symbol *s = (!depth) ? ((*lsym) ? *lsym : *root): ((*lloc) ? *lloc : *rloc); sym->next = NULL; sym->up = NULL; sym->down = NULL; sym->prev = NULL; if (depth && depth >= csym_depth) { //s = *csym; s = find_symbol(*root, name, depth-1, dbg); depth -= (!csym_depth) ? depth-1 : csym_depth; } int j = 0; int is_new_scope = 0; for (int i = 0; i < depth; i++) { if (s) { if (s->down != NULL) { s->down->up = (s->down->up == NULL) ? s : s->down->up; s = s->down; } else if (j) { return 0; } else { int is_first_entry = ((s->up == NULL) || (s->next == NULL && s->up)); is_new_scope = (depth <= 1 && is_first_entry); j++; } } else { is_new_scope = 1; /*return 0;*/ } } for (; s != NULL && s->next != NULL; s = s->next) { if (dbg) { printf("s: %p, s->next: %p, s->prev: %p\n", s, s->next, s->prev); } } if ((is_new_scope && *lsym) || *lloc || *csym) { /* if (is_new_scope) { s = *lsym; } else { s = (!depth) ? *lsym : *lloc; }*/ if (is_new_scope && sym->depth > s->depth) { s->down = sym; s->down->up = s; s->down->prev = NULL; } else { s->next = sym; } if (dbg) { for (symbol *tmp = s; tmp && tmp->prev; tmp = tmp->prev) { printf("add_symbol(): s: %p, s->name: %s, s->next: %p, s->prev: %p, s->up: %p, s->down: %p\n", tmp, tmp->name, tmp->next, tmp->prev, tmp->up, tmp->down); } } sym->next = NULL; if (s->next == sym) { sym->prev = s; if (sym->depth) { sym->up = s->up; } } sym->down = NULL; } if ((*root && (*root)->prev)) { (*root)->prev = NULL; } if ((*rloc && (*rloc)->prev)) { (*rloc)->prev = NULL; } if (!depth) { if (*lsym == NULL) { *root = sym; sym->next = NULL; sym->prev = NULL; sym->up = NULL; sym->down = NULL; } *lsym = sym; *rloc = NULL; *lloc = NULL; } else { if (is_new_scope) { *lloc = NULL; } if (*lloc == NULL) { *rloc = sym; sym->next = NULL; sym->prev = NULL; sym->down = NULL; } *lloc = sym; } *csym = sym; return 1; } char *find_deepest_scope(const char *name) { int i; for (i = strlen(name); i >= 0 && name[i] != '.'; i--); return (char *)name+(i+1); } symbol *mksymbol(const char *name, uint64_t val, uint8_t def, int depth, uint8_t use_scope, uint16_t id, uint8_t dbg) { uint16_t i = 0; symbol *s = find_symbol(symbols, (char *)name, depth, dbg); char *scope_name = (!use_scope) ? find_deepest_scope(name) : (char *)name; if (s) { if (def) { if (s->def) { if (dbg) { printf("mksymbol(): oof, you cannot redefine the symbol: %s\n", scope_name); } defined = 1; } else { defined = 0; } s->def = def; s->val = val; s->id = i; if (dbg) { printf("mksymbol(): def: %u, val: $%016"PRIX64", name: %s\n", def, val, scope_name); printf("mksymbol(): i: $%X, id: $%04X\n", i, s->id); } } return s; } size_t str_size = strlen(scope_name)+1; s = malloc(sizeof(symbol)); s->name = malloc(str_size); s->def = def; s->val = val; s->count = 0; s->isstruct = 0; s->depth = depth; s->next = NULL; s->up = NULL; s->down = NULL; s->prev = NULL; s->id = sym_count++; memcpy(s->name, scope_name, str_size); defined = 0; if (dbg) { printf("mksymbol(): def: %u, val: $%016"PRIX64", name: %s, id: $%04X\n", def, val, scope_name, sym_count-1); } return s; } uint16_t fixup_cnt = 0; symbol *get_sym(const char *name, uint64_t val, token *t, uint8_t depth, uint8_t use_scope, uint8_t dbg) { symbol *s = mksymbol(name, val, 0, depth, use_scope, use_scope, dbg); if (dbg) { printf("get_sym(): Symbol ID: $%X.\n", s->id); } if (!s->def) { if (dbg) { printf("get_sym(): oof, symbol %s, does not exist, yet.\n", name); } fixup *f = malloc(sizeof(fixup)); (last_fix) ? (last_fix->next = f) : (fixups = f); f->adr = val; f->t = t; f->s = s; f->next = NULL; last_fix = f; fixup_cnt++; } return s; } uint16_t reslv_fixups(uint8_t dbg) { fixup *f = fixups; symbol *ls; uint16_t i = 0, j = 0; for (; f; f = f->next) { if (f->s->def) { if (dbg) { printf("reslv_fixups(): Symbol ID: $%X, Symbol Name: %s, Symbol Value: $%"PRIX64".\n", f->s->id, f->s->name, f->s->val); } f->t->sym = f->s; } else { if (dbg) { printf("reslv_fixups(): oof, undefined reference to '%s', at $%016"PRIX64".\n", f->s->name, f->adr); } i++; } } return i; } uint16_t get_comment(const char *com, uint8_t dbg) { uint16_t i = 0; for (; comment[i] && i < comidx; i++) { if (com[0] == comment[i][0] && !strcmp(com, comment[i])) { break; } } if (comment[i] == NULL) { if (dbg) { printf("get_comment(): oof, the index $%04X is NULL.\n", i); printf("get_comment(): oof, the comment \"%s\", was not found in the comment table.\n", com); } size_t size = strlen(com)+1; comment[comidx] = malloc(size); memcpy(comment[comidx], com, size); return comidx++; } if (dbg) { if (strcmp(com, comment[i])) { printf("get_comment(): oof, the comment \"%s\" is somehow not in the comment table, even though it should be at index $%04X.\n", com, i); } printf("get_comment(): The return value of strcmp(com, comment[$%04X]) is %i.\n", i, strcmp(com, comment[i])); printf("get_comment(): Found comment \"%s\", in the table, at index $%04X.\n", com, i); } return i; } uint16_t get_string(const char *str, uint8_t dbg) { uint16_t i = 0; uint8_t isstr = 0; for (; i < stridx; i++) { if (isstr || string[i] == NULL) { break; } else if (str[0] == string[i][0]) { isstr = !strcmp(str, string[i]); } } if (string[i] == NULL || i == stridx) { if (dbg) { printf("get_string(): oof, the index $%04X is NULL.\n", i); printf("get_string(): oof, the string \"%s\", was not found in the string table.\n", str); } return 0xFFFF; } if (dbg) { printf("get_string(): Found string \"%s\", in the table, at index $%04X.\n", str, i); } return i; } line *find_line(uint32_t ln, uint8_t dbg) { uint32_t i = 0; line *l = lines; for (; l && l->linenum != ln; l = l->next); if (l != NULL) { if (l->linenum == ln) { if (dbg) { printf("find_line(): Found line number %u.\n", ln); } return l; } } else { if (dbg) { printf("find_line(): oof, could not find line number %u.\n", ln); } return NULL; } return l; } char *mk_scope_name(symbol *csym, int depth, const char *name, uint8_t dbg) { size_t len = strlen(name); size_t name_len = len; symbol *s = csym; char **scopes; if (depth) { scopes = malloc(sizeof(char *)*depth); } for (; s && s->depth > depth; s = s->up); for (int i = depth; i && s; i--) { if (dbg) { printf("mk_scope_name(): s->depth: %i\n", s->depth); } s = (s->depth >= i && s->up) ? s->up : s; //s = (i < 2 && s->up) ? s->up : s; len += strlen(s->name); scopes[i-1] = s->name; } len += depth; char *scope_name = malloc(len+1); char *tmp = scope_name; memset(tmp, 0, len+1); for (int i = 0; i < depth; i++) { size_t name_len = strlen(scopes[i]); memcpy(tmp, scopes[i], name_len); tmp += name_len; *tmp++ = '.'; if (dbg) { printf("mk_scope_name(): scope_name: %s\n", scope_name); } } memcpy(tmp, name, name_len); if (dbg) { printf("mk_scope_name(): scope_name: %s\n", scope_name); } return scope_name; } int is_struct = 0; int is_anon = 0; void create_struct(symbol *c_sym, line *l, token *t, token *lt, const char *name, uint8_t dbg) { int depth = is_struct-is_anon; uint8_t ismember = !(depth == 1 && lt && lt->id == TOK_DIR); int is_new_scope = (lt && lt->id == TOK_DIR); depth -= is_new_scope; char *struct_name = mk_scope_name(c_sym, depth, name, dbg); symbol *s = mksymbol(struct_name, 0, 1, depth, 0, 0, dbg); int is_sym_added = add_symbol(s, struct_name, &symbols, &last_sym, &locals, &last_loc, &c_sym, 0, depth, dbg); t->sym = get_sym(struct_name, 0, t, depth, 0, dbg); if (isfixup) { isfixup = reslv_fixups(dbg); } if (lt && lt->id == TOK_DIR) { t->sym->isstruct = 1; t->id = (lt->type == DIR_STRUCT) ? TOK_STRUCT : TOK_UNION; tmp_line = l; } else { t->id = TOK_MEMBER; t->sym->isanon = (is_anon > 0); } isfixup += (t->sym == NULL); int is_top = (c_sym == NULL); c_sym = (!ismember && !c_sym) ? last_sym : c_sym; if (!ismember) { if (!is_top) { c_sym = t->sym; locals = NULL; last_loc = NULL; } else { c_sym->down = locals; } } cur_sym = c_sym; } void end_struct(symbol *c_sym, symbol *s_sym, uint8_t dbg) { int skip = 0; if (is_anon > 0) { if ((c_sym && c_sym->isanon) || (c_sym->up && !c_sym->up->isanon) || (c_sym && s_sym->isanon)) { int depth = is_struct-is_anon; if ((depth > 0 || (is_struct > 1 && is_anon >= is_struct)) && c_sym->depth > depth) { for (; c_sym->depth > is_struct-is_anon && c_sym->up; c_sym = c_sym->up) { if (dbg) { printf("end_struct(): c_sym->depth: %i, is_struct-is_anon: %i\n", c_sym->depth, is_struct-is_anon); } } } else if (c_sym->depth) { is_anon--; } } else if (is_struct <= 0) { is_anon = 0; } skip = (!is_anon); } if (((is_struct-is_anon) > 0 && !skip) || (is_anon <= 0 && is_struct <= 0)) { symbol *s; for (s = locals; s; s = s->next) { if (s->up == NULL) { s->up = c_sym; } if (dbg) { printf("end_struct(): s: %p, s->up: %p, c_sym: %p, last_loc: %p\n", s, s->up, c_sym, last_loc); } } } if ((is_anon <= 0 || is_struct <= 0)) { if (dbg) { for (symbol *s = c_sym; s->next && !s->isanon; s = s->next) { printf("end_struct(): %p, s->next: %p, s->prev: %p, s->isanon: %i\n", s, s->next, s->prev, s->isanon); } } for (s_sym = c_sym; s_sym->prev && !s_sym->isanon; s_sym = s_sym->prev) { if (dbg) { printf("end_struct(): s_sym: %p, s_sym->next: %p, s_sym->prev: %p, s_sym->isanon: %i\n", s_sym, s_sym->next, s_sym->prev, s_sym->isanon); } } struct_sym = s_sym; } if ((is_struct-is_anon) > 0 && !skip) { symbol *s = c_sym; for (; s->prev; s = s->prev) { if (s->up == NULL && c_sym->up) { s->up = c_sym->up; } if (dbg) { printf("end_struct(): s: %p, s->up: %p, c_sym->up: %p, last_loc: %p\n", s, s->up, c_sym->up, last_loc); } } if (c_sym->up) { cur_sym = c_sym->up; } for (locals = locals->up; locals->prev; locals = locals->prev); for (last_loc = locals; last_loc->next; last_loc = last_loc->next); } } fixup *find_fixup(fixup *root, const char *name, int depth, uint8_t dbg) { size_t name_len = strlen(name); for (fixup *f = root; f; f = f->next) { symbol *s = f->s; if (s && s->name) { if (s->depth == depth) { size_t sym_name_len = strlen(s->name); if (name_len == sym_name_len && name[0] == s->name[0] && !strcmp(name, s->name)) { if (!s->def) { return f; } } } } } return NULL; } void find_expr_sym(expr *root, const char *name, symbol *sym, int depth, uint8_t dbg) { size_t name_len = strlen(name); if (root) { if (root->left) { find_expr_sym(root->left, name, sym, depth, dbg); } if (root->right) { find_expr_sym(root->right, name, sym, depth, dbg); } if (root->type == EXPR_SYM) { if (root->value.sym) { symbol *s = root->value.sym; size_t sym_name_len = strlen(s->name); if (name_len == sym_name_len && name[0] == s->name[0] && !strcmp(name, s->name)) { if (s->depth == depth && !s->def) { root->value.sym = sym; } } } } } } void resolve_symbol_names(line *l, const char *name, symbol *sym, int depth, uint8_t dbg) { size_t name_len = strlen(name); for (token *t = l->tok; t; t = t->next) { size_t sym_name_len; switch (t->id) { case TOK_LABEL: case TOK_SYM: if (t->sym) { sym_name_len = strlen(t->sym->name); if (name_len == sym_name_len && name[0] == t->sym->name[0] && !strcmp(name, t->sym->name)) { if (t->sym->depth == depth && !t->sym->def) { t->sym = sym; } } } break; case TOK_EXPR: find_expr_sym(t->expr, name, sym, depth, dbg); break; } } if (l->next) { resolve_symbol_names(l->next, name, sym, depth, dbg); } } void new_symbol(token *t, const char *name, uint64_t value, int depth, uint8_t dbg) { size_t name_len = strlen(name); char *scope_name = mk_scope_name(cur_sym, depth, name, dbg); symbol *s; fixup *f = find_fixup(fixups, scope_name, depth, dbg); if (f == NULL) { s = mksymbol(scope_name, value, 1, depth, 0, 0, dbg); } else { s = f->s; resolve_symbol_names(lines, scope_name, s, depth, dbg); s->def = 1; s->val = value; free(s->name); s->name = NULL; s->name = malloc(name_len+1); memcpy(s->name, name, name_len+1); } int is_sym_added = add_symbol(s, scope_name, &symbols, &last_sym, &locals, &last_loc, &cur_sym, 0, depth, dbg); if (isfixup) { isfixup = reslv_fixups(dbg); } if (t) { t->sym = get_sym(scope_name, value, t, depth, 0, dbg); isfixup += (t->sym == NULL); } } char *parse_escape(char *s, char *code) { char dummy; int count; char *end; int base = 0; unsigned int value; if (*s++ != '\\') { #if 0 ierror(0); /* Start of escape sequence not found. */ #endif } if (code == NULL) { code = &dummy; } #if 0 if (!esc_sequences) { *code = '\\'; return s; } #endif switch (*s) { case 'a' : *code = '\a'; return s+1; case 'b' : *code = '\b'; return s+1; case 'f' : *code = '\f'; return s+1; case 'n' : *code = '\n'; return s+1; case 'r' : *code = '\r'; return s+1; case 't' : *code = '\t'; return s+1; case 'v' : *code = '\v'; return s+1; case '\\': *code = '\\'; return s+1; case '\"': *code = '\"'; return s+1; case '\'': *code = '\''; return s+1; case 'e' : *code = '\x1B'; return s+1; case '$' : case 'x' : case 'X' : base = 16; s++; /* Falls Through. */ case '%' : base = (!base) ? 2 : base; s += (!base); /* Falls Through. */ case '0' : case '1' : case '2' : case '3' : case '4' : case '5' : case '6' : case '7' : case '8' : case '9' : base = (!base) ? 8 : base; value = strtoull(s, &end, base); *code = value; return end; default : #if 0 general_error(35, *s); /* No valid escape sequence was found. */ #endif return s; } } uint64_t parse_quote(char **s, char delm, int get_value, uint8_t *buf, uint8_t dbg) { uint64_t value = 0; uint8_t *tmp_val = (buf != NULL) ? buf : (uint8_t *)&value; const uint8_t has_delm = (delm && **s == delm); char *str = *s+has_delm; int i; for (i = 0; *str; i++) { char c; /* Are we at the start of an escape character? */ if (*str == '\\') { str = parse_escape(str, &c); } else { c = *str++; if (delm && c == delm) { if (*str == delm) { /* Allow for multiple repeated * instances of delm to be treated * as a single instance of delm. */ str++; } else { break; } } } if (get_value && (buf != NULL || i < sizeof(uint64_t))) { tmp_val[i] = c; } } if (get_value && buf != NULL) { tmp_val[i] = '\0'; } //i = (get_value && buf != NULL && i > len) ? len : i; *s = str; return (buf != NULL) ? ++i : value; } expr *make_expr(int type, uint64_t value, symbol *sym, uint8_t dbg) { expr *new = malloc(sizeof(expr)); new->type = type; new->left = NULL; new->right = NULL; if (sym) { new->value.sym = sym; } else { new->value.val = value; } return new; } int is_reg(const char *str) { size_t len = strlen(str); switch (len) { case 1: switch(*str) { case 'a': case 'A': return REG_A; case 'b': case 'B': return REG_B; case 'x': case 'X': return REG_X; case 'y': case 'Y': return REG_Y; case 'e': case 'E': return REG_E; case 'c': case 'C': return REG_C; case 'd': case 'D': return REG_D; case 's': case 'S': return REG_S; case 'f': case 'F': return REG_F; } break; case 2: if (str[1] == 'p' || str[1] == 'P') { if ((*str == 's' || *str == 'S') || (*str == 'b' || *str == 'B')) { return ((*str == 's' || *str == 'S')) ? REG_SP : REG_BP; } } else if (*str == 'p' || *str == 'P') { return (str[1] == 'c' || str[1] == 'C') ? REG_PC : -1; } break; case 3: if (*str == 'r' || *str == 'R') { int regnum = strtoul(str+1, NULL, 10); if (regnum >= REG_R11 && regnum <= REG_R15) { return regnum; } } break; } return -1; } #define SKIP_WHITESPACE(str, dbg) \ /* Skip over any whitespace. */ \ for (; isdelm(*str, dbg) & 0x10; str++) int get_expr_type(char **p, uint64_t address, void *val, int *found_reg, char stop, uint8_t dbg) { char *str = *p; int type = EXPR_NONE; SKIP_WHITESPACE(str, dbg); uint8_t ptok = get_ptok(*str, dbg); ptok = (is_altok(ptok, dbg)) ? PTOK_ALPHA : ptok; if (ptok != PTOK_SCOLON && ptok != PTOK_COMMA && *str != stop) { char *tmp = NULL; int i = 0; int base = 0; uint64_t value = 0; symbol *s = NULL; int depth = 0; int scope_depth = 0; char *scope_name = NULL; switch (ptok) { case PTOK_PLUS : type = EXPR_PLUS ; str++; break; case PTOK_MINUS : type = EXPR_MINUS ; str++; break; case PTOK_ASTRSK: type = EXPR_MUL ; str++; break; case PTOK_PIPE : type = EXPR_OR ; str++; break; case PTOK_GT : type = (get_ptok(str[1], dbg) == PTOK_GT) ? (EXPR_RSHFT) : (EXPR_LOW) ; str += 2; break; case PTOK_LT : type = (get_ptok(str[1], dbg) == PTOK_LT) ? (EXPR_LSHFT) : (EXPR_HIGH); str += 2; break; case PTOK_DOLLAR: case PTOK_PERCENT: case PTOK_NUMBER: switch (ptok) { case PTOK_DOLLAR : base = 16; type = EXPR_HEX; str++; break; case PTOK_PERCENT: base = 2; type = EXPR_BIN; str++; break; case PTOK_NUMBER : base = 10; type = EXPR_DEC; /****/ break; } /* Get the number of digits, and * find the end of the number. */ for (; isxdigit(str[i]) && !(isdelm(str[i], dbg) & 0x03); i++); tmp = malloc(i+1); memcpy(tmp, str, i); tmp[i] = '\0'; value = strtoull(tmp, NULL, base); *(uint64_t *)val = value; break; case PTOK_SQUOTE: type = EXPR_CHAR; value = parse_quote(&str, *str, 1, NULL, dbg); *(uint64_t *)val = value; break; case PTOK_AT: /* Increment the depth count, by the * number of '@' signs before the * symbol name. */ for (; *str == '@'; str++, depth++); /* Falls through. */ case PTOK_ALPHA: /* Find the end of the symbol name. * Also increment the depth count every * time a '.' is found in the symbol name. */ for (; !isdelm2(str[i], dbg) || str[i] == '.'; scope_depth += (str[i++] == '.')); tmp = malloc(i); memcpy(tmp, str, i); tmp[i] = '\0'; if (is_reg(tmp) >= 0) { *found_reg = 1; } else { int total_depth = depth+scope_depth; scope_name = (!scope_depth || total_depth) ? mk_scope_name(cur_sym, depth, tmp, dbg) : tmp; s = get_sym(scope_name, address, NULL, total_depth, 1, dbg); isfixup += (s == NULL); type = EXPR_SYM; *(symbol **)val = s; } break; } str += i; } else { *found_reg = 1; } *p = str; return type; } #undef SKIP_WHITESPACE expr *parse_expr(char **line, uint64_t address, int *found_reg, int is_left, char stop, uint8_t dbg) { char *str = *line; char *start = *line; int dummy = 0; found_reg = (found_reg == NULL) ? &dummy : found_reg; expr *left = (is_left) ? parse_expr(line, address, found_reg, 0, stop, dbg) : NULL; expr *new = NULL; int type = EXPR_NONE; for (; !(isdelm(*str, dbg) & 3) && !(*found_reg);) { int old_type = type; uint64_t value = 0; symbol *s = NULL; uintptr_t tmp = 0; type = get_expr_type(&str, address, &tmp, found_reg, stop, dbg); int expr_type = 0; switch (type) { case EXPR_NONE : expr_type = -1; break; case EXPR_HEX : case EXPR_BIN : case EXPR_DEC : case EXPR_CHAR : value = (uint64_t)tmp; break; case EXPR_SYM : s = (symbol *)tmp; break; case EXPR_PLUS : case EXPR_MINUS : switch (old_type) { case EXPR_HEX : case EXPR_BIN : case EXPR_DEC : case EXPR_CHAR : case EXPR_SYM : expr_type = 2; break; default : expr_type = 1; break; } break; case EXPR_HIGH : case EXPR_LOW : expr_type = 1; break; case EXPR_MUL : case EXPR_OR : case EXPR_LSHFT : case EXPR_RSHFT : expr_type = 2; break; } if (expr_type >= 0) { new = make_expr(type, value, s, dbg); switch (expr_type) { case 1: new->left = parse_expr(&str, address, found_reg, 1, stop, dbg); break; case 2: new->left = left; new->right = parse_expr(&str, address, found_reg, 1, stop, dbg); break; } left = new; } } *line = str; return left; } uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) { char sym[0x100]; uint16_t i = 0; uint16_t j = 0; uint16_t comid = 0; uint16_t strid = 0; uint16_t symid = 0; uint64_t value = 0; lex_type = 0xFF; uint8_t lex_subtype = 0xFF; uint8_t k = 0; uint8_t k2 = 0; union reg ch; ch.u64 = 0; uint8_t rs = 0; uint8_t of = 0; uint8_t base = 0; int depth = 0; int is_inst = 0; uint8_t isop = 0; int num = 0; int isch = 0; uint8_t isesc = 0; uint8_t islinenum; int16_t ln = -1; char lnum[6]; uint8_t space = 0; uint8_t tab = 0; uint8_t fall = 0; uint8_t done = 0; char delm = ','; line *l = NULL; token *st = NULL; token *t = NULL; token *lt = NULL; symbol *tmp_sym = NULL; while (isdigit(str[i]) && isdelm(str[i], dbg) != 16) { lnum[j++] = str[i++]; } islinenum = i; if (i) { lnum[j] = '\0'; ln = strtol(lnum, NULL, 10); j = 0; l = find_line(ln, dbg); } else { ln = linenum; l = NULL; } if (l) { address = l->addr; } else { l = malloc(sizeof(line)); (last_line) ? (last_line->next = l) : (lines = l); l->tok = NULL; l->next = NULL; l->count = 0; l->bline = bline; last_line = l; } l->addr = address; while (isdelm(str[i], dbg) != 1) { uint8_t offset = 0; base = 0; j = 0; for (tab = 0, space = 0; isdelm(str[i], dbg) == 16; tab += (str[i] == '\t'), space += (str[i] == ' '), i++); if (dbg) { printf("lex(): tab: %u, space: %u\n", tab, space); } uint8_t ptok = get_ptok(str[i], dbg); if (is_altok(ptok, dbg)) { for (; !isdelm2(str[i+j], dbg) || (is_inst && str[i+j] == '.'); j++); memcpy(lexeme, &str[i], j); lexeme[j] = '\0'; j = 0; ptok = (!is_inst || is_reg(lexeme) < 0) ? PTOK_ALPHA : ptok; memset(lexeme, 0, strlen(lexeme)+1); } switch (ptok) { case PTOK_DOT: i++; for (; !(isdelm(str[i+j], dbg) & 17); j++); memcpy(lexeme, str+i, j); lexeme[j] = '\0'; i += j; if (!isop) { for (k = 0; k < 11; k++) { if (tolower(lexeme[0]) == dir_t[k][0] && !strcasecmp(lexeme, dir_t[k])) { lex_type = TOK_DIR; uint16_t tmp = j; for (j = 0; isdelm(str[i+j], dbg) & 16; j++); uint8_t ret = get_ptok(str[i+j], dbg); ret = (is_altok(ret, dbg)) ? PTOK_ALPHA : ret; j = tmp; if ((k == DIR_STRUCT || k == DIR_UNION) && ret != PTOK_ALPHA) { is_anon++; } is_struct += (k == DIR_STRUCT || k == DIR_UNION); is_struct -= (k == DIR_ENDSTRUCT || k == DIR_ENDUNION); if ((k == DIR_ENDSTRUCT || k == DIR_ENDUNION)) { end_struct(cur_sym, struct_sym, dbg); } break; } } if (lex_type != TOK_DIR && lt && lt->id == TOK_SYM) { lex_type = TOK_MEMBER; i -= j; } else { l->count++; t = make_token(lex_type, k, space, tab, 0, "", NULL, NULL); } } else { for (k = 0; !(isdelm(lexeme[k], dbg) & 17); k++) { lex_type = TOK_RS; switch (tolower(lexeme[k])) { case '2': case 'w': rs = 1; break; case '4': case 'd': rs = 2; break; case '8': case 'q': rs = 3; break; case 'o': /* Set Our token type to ortho suffix. */ lex_type = TOK_OS; break; } l->count++; t = make_token(lex_type, rs, space, tab, 0, "", NULL, NULL); if (t) { lt = t; t = t->next; } } isop = 0; } break; case PTOK_DQUOTE: do { char *tmp = (str + i); int get_value = (ptok == PTOK_SQUOTE); value = parse_quote(&tmp, str[i], get_value, NULL, dbg); tmp--; i++; j = tmp - (str + i); memcpy(lexeme, str+i, j); lexeme[j] = '\0'; i += j; } while (0); strid = get_string(lexeme, dbg); if (strid == 0xFFFF) { strid = stridx; string[strid] = malloc(j+1); memcpy(string[strid], lexeme, j+1); stridx++; } else { } if (dbg) { printf("lex(): str[0x%04X]: %s\n", strid, string[strid]); } if (lt->id == TOK_DIR && lt->type == DIR_INCLUDE) { incl[inc_count+inc_file] = strid; inc_file++; } lex_type = TOK_STRING; l->count++; t = make_token(lex_type, 0, space, tab, 0, string[strid], NULL, NULL); break; case PTOK_SQUOTE: case PTOK_DOLLAR: case PTOK_PERCENT: case PTOK_NUMBER: l->count++; do { lex_type = TOK_EXPR; memset(lexeme, 0, strlen(lexeme)+1); char *tmp = &str[i]; expr *e = parse_expr(&tmp, address, NULL, 0, delm, dbg); t = make_token(lex_type, 0, space, tab, 0, "", NULL, e); j = tmp - &str[i]; memcpy(lexeme, &str[i], j); j = 0; i = tmp - str; t->subtype = (t->subtype == 0xFF && lex_subtype != 0xFF) ? lex_subtype : t->subtype; lex_subtype = 0xFF; } while (0); break; case PTOK_LBRACK: case PTOK_HASH : lex_type = TOK_MEM; value = (ptok == PTOK_LBRACK) ? MEM_IND : MEM_IMM; l->count++; t = make_token(lex_type, value, space, tab, 0, "", NULL, NULL); lex_type = (ptok == PTOK_LBRACK) ? TOK_IND : TOK_IMM; delm = (ptok == PTOK_LBRACK) ? ')' : delm; t->subtype = (t->subtype == 0xFF && lex_subtype != 0xFF) ? lex_subtype : t->subtype; if (lex_subtype != 0xFF) { lex_subtype = 0xFF; } memset(lexeme, 0, strlen(lexeme)+1); lexeme[j++] = str[i]; break; case PTOK_PLUS: case PTOK_MINUS: case PTOK_ASTRSK: case PTOK_GT: case PTOK_LT: case PTOK_PIPE: lex_type = TOK_EXPR; l->count++; memset(lexeme, 0, strlen(lexeme)+1); do { char *tmp = &str[i]; expr *e = parse_expr(&tmp, address, NULL, 0, delm, dbg); t = make_token(lex_type, 0, space, tab, 0, "", NULL, e); j = tmp - &str[i]; memcpy(lexeme, &str[i], j); j = 0; i = tmp - str; t->subtype = (t->subtype == 0xFF && lex_subtype != 0xFF) ? lex_subtype : t->subtype; lex_subtype = 0xFF; } while (0); break; case PTOK_EQU: i++; lex_type = TOK_SYM; memset(lexeme, 0, strlen(lexeme)+1); lexeme[j] = str[i]; if (lt) { lt->id = lex_type; lt->type = depth; } new_symbol(lt, sym, address, depth, dbg); (t) ? (t->subspace = space) : (lt->subspace = space); (t) ? (t->subtab = tab) : (lt->subtab = tab); break; case PTOK_RBRACK: i++; lex_type = TOK_IND; delm = (delm == ')') ? ',' : delm; lexeme[j] = ')'; lexeme[j+1] = '\0'; lexeme[j+2] = '\0'; if (t && t->subtype == 0xFF) { t->subtype = TOK_IND; } else if (lt && lt->subtype == 0xFF) { lt->subtype = TOK_IND; } (t) ? (t->subspace = space) : (lt->subspace = space); (t) ? (t->subtab = tab) : (lt->subtab = tab); break; case PTOK_COMMA: i++; if ((lex_type != TOK_IND && lex_type != TOK_OF)) { lex_type = TOK_CSV; } lex_subtype = TOK_CSV; if (t && t->subtype == 0xFF) { t->subtype = TOK_CSV; } else if (lt && lt->subtype == 0xFF) { lt->subtype = TOK_CSV; } (t) ? (t->subspace = space) : (lt->subspace = space); (t) ? (t->subtab = tab) : (lt->subtab = tab); lexeme[j] = ','; lexeme[j+1] = '\0'; lexeme[j+2] = '\0'; break; case PTOK_B: case PTOK_E: case PTOK_X: case PTOK_Y: case PTOK_S: case PTOK_A: case PTOK_C: case PTOK_D: case PTOK_F: case PTOK_R: lexeme[j+0] = str[i++]; lexeme[j+1] = (ptok == PTOK_R || ((ptok == PTOK_S || ptok == PTOK_B) && get_ptok(str[i], dbg) == PTOK_P)) ? str[i++] : '\0'; lexeme[j+2] = (ptok == PTOK_R) ? str[i++] : '\0'; lexeme[j+3] = '\0'; lex_type = TOK_REG; value = is_reg(lexeme); l->count++; t = make_token(lex_type, value, space, tab, 0, "", NULL, NULL); t->subtype = (t->subtype == 0xFF && lex_subtype != 0xFF) ? lex_subtype : t->subtype; lex_subtype = 0xFF; break; case PTOK_P: lexeme[j] = str[i++]; lexeme[j+1] = (str[i] != ',') ? str[i++] : '\0'; lexeme[j+2] = '\0'; of = 2; lex_type = TOK_OF; l->count++; t = make_token(lex_type, of, space, tab, 0, "", NULL, NULL); break; case PTOK_AT: memset(lexeme, 0, strlen(lexeme)+1); for (char *tmp = str+i; *tmp++ == '@'; depth++); i += depth; lexeme[j] = '@'; lex_type = TOK_LOCAL; if (lt || t) { (t) ? (t->subspace = space) : (lt->subspace = space); (t) ? (t->subtab = tab) : (lt->subtab = tab); } break; case PTOK_COLON: i++; lexeme[j] = ':'; lexeme[j+1] = '\0'; lex_type = TOK_LABEL; if (lt) { lt->id = lex_type; lt->type = depth; } new_symbol(lt, sym, address, depth, dbg); depth = 0; if (dbg) { printf("lex(): isfixup: %u\n", isfixup); } break; case PTOK_SCOLON: i++; for (; isdelm(str[i+j], dbg) != 1; j++); if (!j) { lexeme[j] = ' '; lexeme[j+1] = '\0'; } else { memcpy(lexeme, str+i, j); lexeme[j] = '\0'; i += j; comid = get_comment(lexeme, dbg); /*is_newcom = (comid == 0xFFFF); if (comid == 0xFFFF) { if (line != lineidx && l[line].com != 0xFFFF) { comid = l[line].com; } else { comid = comidx; } comid = comidx; comment[comid] = malloc(j+1); memcpy(comment[comid], lexeme, j+1); comidx++; }*/ if (dbg) { printf("lex(): com[0x%04X]: %s\n", comid, comment[comid]); } } lex_type = TOK_COMMENT; l->count++; if (j) { t = make_token(lex_type, 0, space, tab, 0, comment[comid], NULL, NULL); } else { t = make_token(lex_type, 0, space, tab, 0, "" , NULL, NULL); } break; case PTOK_ALPHA: /* Get the length of the token. */ for (; !isdelm2(str[i+j], dbg) || (is_inst && str[i+j] == '.'); j++); memcpy(lexeme, str+i, j); lexeme[j] = '\0'; i += j; isch = 0; isop = 0; /* We need to figure out if we're allowed to * search for a valid instruction name. * * We're only allowed to so, if: * * 1. The previous token wasn't a directive. * 2. There wasn't an instruction before us. * 3. The length of the token is at, or above * the length of the shortest instruction. * 4. The length of the token is at, or below * the length of the longest instruction. * 5. The character after the token isn't a * label delimiter. * 6. We're not within a struct/union block. */ if (!(lt && lt->id == TOK_DIR) && !is_inst && j > 1 && j <= 3 && str[i] != ':' && !is_struct) { for (k = 0; k < OPNUM; k++) { int find_ext = (k < EXT_OPNUM); int find_ortho = (k < ORTHO_OPNUM); int upper = toupper(lexeme[0]); int isbase = (upper == mne[k][0]); int isext = (find_ext && upper == ext_mne[k][0]); int isortho = (find_ortho && upper == ortho_mne[k][0]); if (isbase || isext || isortho) { int is_base = !strcasecmp(lexeme, mne[k]); int is_ext = (find_ext && !strcasecmp(lexeme, ext_mne[k])); int is_ortho = (find_ortho && !strcasecmp(lexeme, ortho_mne[k])); if (is_base || is_ext || is_ortho) { lex_type = (is_base) ? TOK_OPCODE : lex_type; lex_type = (is_ext) ? TOK_EXTOP : lex_type; lex_type = (is_ortho) ? TOK_ORTHO : lex_type; isop = 1; is_inst = 1; l->count++; t = make_token(lex_type, 0xFF, space, tab, k, "", NULL, NULL); break; } } } } if (!isop) { if (l->tok && l->tok->id == TOK_ORTHO && l->tok->byte == SET) { for (k = 0; k < 8; k++) { int upper = toupper(lexeme[0]); if (upper == set_cc[k][0]) { if (!strcasecmp(lexeme, set_cc[k])) { lex_type = TOK_CC; l->count++; t = make_token(lex_type, 0xFF, space, tab, k, "", NULL, NULL); } } } } else { uint8_t spaces = 0; for (; isdelm(str[i+spaces], dbg) == 16; spaces++); uint8_t ret = get_ptok(str[i+spaces], dbg); if (ret == PTOK_COLON || ret == PTOK_EQU) { depth = (lex_type != TOK_LOCAL) ? 0 : depth; } int is_expr = (!is_struct && str[i+spaces] != ':' && str[i+spaces] != '='); l->count++; memcpy(sym, lexeme, j+1); if (is_expr) { i -= j + (depth); lex_type = TOK_EXPR; memset(lexeme, 0, strlen(lexeme)+1); char *tmp = &str[i]; expr *e = parse_expr(&tmp, address, NULL, 0, delm, dbg); j = tmp - &str[i]; memcpy(lexeme, &str[i], j); /*i += j;*/ i = tmp - str; t = make_token(lex_type, 0, space, tab, 0, "", NULL, e); t->subtype = (t->subtype == 0xFF && lex_subtype != 0xFF) ? lex_subtype : t->subtype; lex_subtype = 0xFF; } else { memcpy(sym, lexeme, j+1); lex_type = TOK_SYM; t = make_token(lex_type, depth, space, tab, 0, "", NULL, NULL); } if (is_struct) { create_struct(cur_sym, l, t, lt, sym, dbg); depth = 0; } /*if (!is_struct && t && t->sym && t->sym->isstruct) { tmp_sym = t->sym; }*/ } } break; } if (!l->tok && t) { l->tok = tokens; } if (dbg) { printf("lex(): lexeme: %s, lex_type: %s\n", lexeme, (lex_type != 0xFF) ? lex_tok[lex_type] : "TOK_NONE"); } j = 0; if ((lex_type == TOK_OPCODE || lex_type == TOK_EXTOP) && !isop) { j = 0; } else if (lex_type != TOK_EXPR && lex_type != TOK_LOCAL && lex_type != TOK_MEMBER && !isdelm2(str[i], dbg)) { i++; } switch (lex_type) { default: lex_type = 0xFF; case TOK_CSV: case TOK_IND: case TOK_LOCAL: memset(lexeme, 0, strlen(lexeme)+1); case TOK_SYM: break; } if (t) { lt = t; t = t->next; } } if (i) { l->tok = tokens; token *tok = tokens; if ((tok->id == TOK_SYM || tok->id == TOK_LABEL) && tok->next) { symbol *s = tok->sym; for (; tok; tok = tok->next) { if (tok->id == TOK_EXPR) { s->val = get_val(tok->expr, address, 3, 0, dbg); } } } tokens = NULL; last_tok = NULL; bytecount dummy; if (!is_struct) { l = (tmp_line) ? tmp_line : l; address = parse_tokens(l->tok, &l, &dummy, 0, address, dbg); if (tmp_line) { tmp_line = NULL; } } if (dbg) { printf("lex(): Next address: $%"PRIX64"\n", address); } if (ln > linenum || islinenum) { l->linenum = ln; if (ln > linenum) { linenum+=(10+(ln & 10)); } } else if (!islinenum) { l->linenum = linenum; linenum += 10; } } return address; }