#include "asmmon.h" #include "lexer.h" uint8_t lex_type; uint16_t sym_count = 0; token *tokens = NULL; token *last_tok = NULL; symbol *locals = NULL; symbol *last_loc = NULL; symbol *cur_sym = NULL; symbol *struct_sym = NULL; line *tmp_line = NULL; symbol *mksymbol(const char *name, uint64_t val, uint8_t def, uint8_t islocal, uint8_t useid, uint16_t id, uint8_t dbg) { uint16_t i = 0; symbol *s = (!islocal) ? symbols : locals; uint8_t flag = 0; for (; s; s = s->next, i++) { if (!useid && name[0] != s->name[0]) { continue; } flag = (useid) ? (id == s->id) : !strcmp(name, s->name); if (flag) { if (def) { if (s->def) { if (dbg) { printf("mksymbol(): oof, you cannot redefine the symbol: %s\n", name); } defined = 1; } else { defined = 0; } s->def = def; s->val = val; s->id = i; if (dbg) { printf("mksymbol(): def: %u, val: $%016"PRIX64", name: %s\n", def, val, name); printf("mksymbol(): i: $%X, id: $%04X\n", i, s->id); } } return s; } } size_t str_size = strlen(name)+1; s = malloc(sizeof(symbol)); s->down = NULL; if (!islocal) { (last_sym) ? (last_sym->next = s) : (symbols = s); if (last_sym) { last_sym->next->prev = last_sym; last_sym->next->up = last_sym->up; last_sym->next->down = NULL; } else { symbols->prev = NULL; symbols->up = NULL; symbols->down = NULL; } } else { (last_loc) ? (last_loc->next = s) : (locals = s); if (last_loc) { last_loc->next->prev = last_loc; last_loc->next->up = last_loc->up; last_loc->next->down = NULL; } else { locals->prev = NULL; locals->down = NULL; } } s->name = malloc(str_size); s->def = def; s->val = val; s->count = 0; s->isstruct = 0; memcpy(s->name, name, str_size); s->next = NULL; s->id = sym_count++; (!islocal) ? (last_sym = s) : (last_loc = s); if (!islocal) { s->down = NULL; /*if (def) { locals = NULL; last_loc = NULL; }*/ } else { cur_sym->count++; } defined = 0; if (dbg) { printf("mksymbol(): def: %u, val: $%016"PRIX64", name: %s, id: $%04X\n", def, val, name, sym_count-1); } return s; } uint16_t fixup_cnt = 0; symbol *get_sym(const char *name, uint64_t val, token *t, uint8_t islocal, uint8_t dbg) { symbol *s = mksymbol(name, 0, 0, islocal, 0, 0, dbg); if (dbg) { printf("get_sym(): Symbol ID: $%X.\n", s->id); } if (s->def) { return s; } else { if (dbg) { printf("get_sym(): oof, symbol %s, does not exist, yet.\n", name); } fixup *f = malloc(sizeof(fixup)); (last_fix) ? (last_fix->next = f) : (fixups = f); f->adr = val; f->t = t; f->s = s; f->next = NULL; last_fix = f; fixup_cnt++; return NULL; } } symbol *find_member(char *name, symbol* root, uint8_t dbg) { /*for (; root->up; root = root->up);*/ symbol *s = root; if (s->down == NULL && s->up != NULL) { s = s->up; } do { s = s->down; for (symbol *m = s; m; m = m->next) { size_t len1 = strlen(name); size_t len2 = strlen(m->name); if (len1 == len2 && name[0] == m->name[0] && !strcmp(name, m->name)) { return m; } } for (; s->next && !s->down; s = s->next); } while (s->down); return NULL; } uint16_t reslv_fixups(uint8_t dbg) { fixup *f = fixups; symbol *ls; uint16_t i = 0, j = 0; for (; f; f = f->next) { if (f->s->def) { if (dbg) { printf("reslv_fixups(): Symbol ID: $%X, Symbol Name: %s, Symbol Value: $%"PRIX64".\n", f->s->id, f->s->name, f->s->val); } f->t->sym = f->s; } else { if (dbg) { printf("reslv_fixups(): oof, undefined reference to '%s', at $%016"PRIX64".\n", f->s->name, f->adr); } i++; } } return i; } uint16_t get_comment(const char *com, uint8_t dbg) { uint16_t i = 0; for (; comment[i] && i < comidx; i++) { if (com[0] == comment[i][0] && !strcmp(com, comment[i])) { break; } } if (comment[i] == NULL) { if (dbg) { printf("get_comment(): oof, the index $%04X is NULL.\n", i); printf("get_comment(): oof, the comment \"%s\", was not found in the comment table.\n", com); } size_t size = strlen(com)+1; comment[comidx] = malloc(size); memcpy(comment[comidx], com, size); return comidx++; } if (dbg) { if (strcmp(com, comment[i])) { printf("get_comment(): oof, the comment \"%s\" is somehow not in the comment table, even though it should be at index $%04X.\n", com, i); } printf("get_comment(): The return value of strcmp(com, comment[$%04X]) is %i.\n", i, strcmp(com, comment[i])); printf("get_comment(): Found comment \"%s\", in the table, at index $%04X.\n", com, i); } return i; } uint16_t get_string(const char *str, uint8_t dbg) { uint16_t i = 0; uint8_t isstr = 0; for (; i < stridx; i++) { if (isstr || string[i] == NULL) { break; } else if (str[0] == string[i][0]) { isstr = !strcmp(str, string[i]); } } if (string[i] == NULL || i == stridx) { if (dbg) { printf("get_string(): oof, the index $%04X is NULL.\n", i); printf("get_string(): oof, the string \"%s\", was not found in the string table.\n", str); } return 0xFFFF; } if (dbg) { printf("get_string(): Found string \"%s\", in the table, at index $%04X.\n", str, i); } return i; } line *find_line(uint32_t ln, uint8_t dbg) { uint32_t i = 0; line *l = lines; for (; l && l->linenum != ln; l = l->next); if (l != NULL) { if (l->linenum == ln) { if (dbg) { printf("find_line(): Found line number %u.\n", ln); } return l; } } else { if (dbg) { printf("find_line(): oof, could not find line number %u.\n", ln); } return NULL; } return l; } uint8_t is_struct = 0; uint8_t is_anon = 0; uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) { char sym[0x100]; uint16_t i = 0; uint16_t j = 0; uint16_t comid = 0; uint16_t strid = 0; uint16_t symid = 0; uint64_t value = 0; lex_type = 0xFF; uint8_t k = 0; union reg ch; ch.u64 = 0; uint8_t rs = 0; uint8_t of = 0; uint8_t base = 0; uint8_t islocal = 0; uint8_t isop = 0; int num = 0; int isch = 0; uint8_t isesc = 0; uint8_t islinenum; int16_t ln = -1; char lnum[6]; uint8_t space = 0; uint8_t tab = 0; uint8_t fall = 0; uint8_t done = 0; /*uint8_t is_newcom = 0;*/ line *l = NULL; token *st = NULL; token *t = NULL; token *lt = NULL; symbol *tmp_sym = NULL; symbol *tsym = NULL; while (isdigit(str[i]) && isdelm(str[i], dbg) != 16) { lnum[j++] = str[i++]; } islinenum = i; if (i) { lnum[j] = '\0'; ln = strtol(lnum, NULL, 10); j = 0; l = find_line(ln, dbg); } else { ln = linenum; l = NULL; } if (l) { address = l->addr; } else { l = malloc(sizeof(line)); (last_line) ? (last_line->next = l) : (lines = l); l->tok = NULL; l->next = NULL; l->count = 0; l->bline = bline; last_line = l; } l->addr = address; while (isdelm(str[i], dbg) != 1) { uint8_t offset = 0; base = 0; space = 0; tab = 0; while (isdelm(str[i+j], dbg) == 16) { tab += str[i+j] == '\t'; space += str[i+j] == ' '; j++; } j = 0; if (dbg) { printf("lex(): tab: %u, space: %u\n", tab, space); } if (isdelm(str[i], dbg) == 16) { for (; isdelm(str[i], dbg) == 16; i++); } uint8_t ptok = get_ptok(str[i], dbg); if (is_altok(ptok, dbg)) { offset++; if ((ptok == PTOK_S && toupper(str[i+1]) == 'P') || (ptok == PTOK_P && toupper(str[i+1]) == 'C')) { offset++; } switch (get_ptok(str[i+offset], dbg)) { case PTOK_B : case PTOK_X : case PTOK_Y : case PTOK_S : case PTOK_P : case PTOK_ALPHA : case PTOK_NUMBER: ptok = PTOK_ALPHA; break; } if ((ptok == PTOK_S && toupper(str[i+1]) != 'P') || (ptok == PTOK_P && toupper(str[i+1]) != 'C')) { ptok = PTOK_ALPHA; } } switch (ptok) { case PTOK_DOT: i++; for (; !(isdelm(str[i+j], dbg) & 17); j++); memcpy(lexeme, str+i, j); lexeme[j] = '\0'; i += j; if (!isop) { for (k = 0; k < 11; k++) { if (tolower(lexeme[0]) == dir_t[k][0] && !strcasecmp(lexeme, dir_t[k])) { lex_type = TOK_DIR; uint16_t tmp = j; for (j = 0; isdelm(str[i+j], dbg) & 16; j++); uint8_t ret = get_ptok(str[i+j], dbg); j = tmp; if ((k == DIR_STRUCT || k == DIR_UNION) && (ret != PTOK_ALPHA || (is_anon && ret == PTOK_ALPHA))) { is_anon++; } is_struct += (k == DIR_STRUCT || k == DIR_UNION); is_struct -= (k == DIR_ENDSTRUCT || k == DIR_ENDUNION); if ((k == DIR_ENDSTRUCT || k == DIR_ENDUNION)) { int skip = 0; if ((int)is_anon > 0) { if ((cur_sym && cur_sym->isanon) || (cur_sym->up && !cur_sym->up->isanon) || (struct_sym && struct_sym->isanon)) { is_anon--; } else if ((int)is_struct <= 0) { is_anon = 0; } skip = (!is_anon); } if (((int)(is_struct-is_anon) > 0 && !skip) || ((int)is_anon <= 0 && (int)is_struct <= 0)) { symbol *s; for (s = locals; s; s = s->next) { if (s->up == NULL) { s->up = cur_sym; } if (dbg) { printf("s: %p, s->up: %p, cur_sym: %p, last_loc: %p\n", s, s->up, cur_sym, last_loc); } } if (cur_sym->down == NULL) { cur_sym->down = locals; } } if (((int)is_anon <= 0 || (int)is_struct <= 0)) { for (struct_sym = cur_sym; struct_sym->prev && !struct_sym->isanon; struct_sym = struct_sym->prev); } if ((int)(is_struct-is_anon) > 0 && !skip) { symbol *s = cur_sym; for (; s->prev; s = s->prev) { if (s->up == NULL && cur_sym->up) { s->up = cur_sym->up; } if (dbg) { printf("s: %p, s->up: %p, cur_sym->up: %p, last_loc: %p\n", s, s->up, cur_sym->up, last_loc); } } if (cur_sym->up) { cur_sym = cur_sym->up; } for (locals = locals->up; locals->prev; locals = locals->prev); for (last_loc = locals; last_loc->next; last_loc = last_loc->next); } } break; } } l->count++; if (lex_type != TOK_DIR && lt && lt->id == TOK_SYM) { lex_type = TOK_MEMBER; i -= j; } else { t = make_token(lex_type, k, space, tab, 0, "", NULL); } } else { lex_type = TOK_RS; switch (tolower(lexeme[j-1])) { case '2': case 'w': rs = 1; break; case '4': case 'd': rs = 2; break; case '8': case 'q': rs = 3; break; } l->count++; t = make_token(lex_type, rs, space, tab, 0, "", NULL); isop = 0; } break; case PTOK_DQUOTE: i++; for (; isdelm(str[i+j], dbg) != 4 || isesc; j++) { isesc = (str[i+j] == '\\' && str[i+(j-1)] != '\\'); } memcpy(lexeme, str+i, j); lexeme[j] = '\0'; i += j; strid = get_string(lexeme, dbg); if (strid == 0xFFFF) { strid = stridx; string[strid] = malloc(j+1); memcpy(string[strid], lexeme, j+1); stridx++; } else { } if (dbg) { printf("lex(): str[0x%04X]: %s\n", strid, string[strid]); } if (lt->id == TOK_DIR && lt->type == DIR_INCLUDE) { incl[inc_count+inc_file] = strid; inc_file++; } lex_type = TOK_STRING; l->count++; t = make_token(lex_type, 0, space, tab, 0, string[strid], NULL); break; case PTOK_DOLLAR: case PTOK_PERCENT: case PTOK_NUMBER: value = 0; switch (ptok) { case PTOK_DOLLAR : base = 16; lex_type = TOK_HEX; i++; break; case PTOK_PERCENT: base = 2; lex_type = TOK_BIN; i++; break; case PTOK_NUMBER : base = 10; lex_type = TOK_DEC; /**/ break; } for (; isxdigit(str[i+j]) && !(isdelm(str[i+j], dbg) & 0x03); j++); memcpy(lexeme, str+i, j); lexeme[j] = '\0'; i += j; value = strtoull(lexeme, NULL, base); if (lt->id == TOK_SYM) { tsym = mksymbol(sym, value, 1, islocal, 0, 0, dbg); if (lt) { lt->sym = get_sym(sym, address, lt, islocal, dbg); } if (!islocal) { cur_sym = last_sym; } tsym = NULL; islocal = 0; isfixup += (lt->sym == NULL); if (dbg) { printf("lex(): isfixup: %u\n", isfixup); } } l->count++; t = make_token(lex_type, 0, space, tab, value, "", NULL); t->digits = (lt->id != TOK_SYM) ? j : 0; break; case PTOK_SQUOTE: i++; k = 0; j = 0; while (isdelm(str[i], dbg) != 8 || isesc) { isesc = (str[i] == '\\' && str[i-1] != '\\'); lexeme[j++] = str[i++]; } isesc = 0; lexeme[j] = '\0'; for (j = 0; lexeme[k] != '\0' && j < 7; k++) { switch (lexeme[k]) { case '\\': switch (lexeme[++k]) { case 'n' : ch.u8[j++] = '\n'; break; case 'r' : ch.u8[j++] = '\r'; break; case 't' : ch.u8[j++] = '\t'; break; case 'b' : ch.u8[j++] = '\b'; break; case '\'': ch.u8[j++] = '\''; break; case '\"': ch.u8[j++] = '\"'; break; case '\\': ch.u8[j++] = '\\'; break; } break; default: ch.u8[j++] = lexeme[k]; } } lex_type = TOK_CHAR; l->count++; t = make_token(lex_type, 0, space, tab, ch.u64, "", NULL); break; case PTOK_LBRACK: case PTOK_HASH : l->tok->type = (ptok == PTOK_LBRACK) ? IND : IMM; lex_type = (ptok == PTOK_LBRACK) ? TOK_IND : TOK_IMM; memset(lexeme, 0, strlen(lexeme)+1); lexeme[j++] = str[i]; (t) ? (t->subspace = space) : (lt->subspace = space); (t) ? (t->subtab = tab) : (lt->subtab = tab); break; case PTOK_PLUS: case PTOK_MINUS: case PTOK_GT: case PTOK_LT: case PTOK_PIPE: lex_type = TOK_EXPR; switch (ptok) { case PTOK_PLUS : value = EXPR_PLUS ; break; case PTOK_MINUS: value = EXPR_MINUS; break; case PTOK_PIPE : value = EXPR_OR ; break; case PTOK_GT : value = (get_ptok(str[i+1], dbg) == PTOK_GT) ? (EXPR_RSHFT) : (EXPR_LOW) ; break; case PTOK_LT : value = (get_ptok(str[i+1], dbg) == PTOK_LT) ? (EXPR_LSHFT) : (EXPR_HIGH); break; } l->count++; t = make_token(lex_type, value, space, tab, 0, "", NULL); memset(lexeme, 0, strlen(lexeme)+1); lexeme[j++] = str[i]; if (value == EXPR_LSHFT || value == EXPR_RSHFT) { lexeme[j++] = str[++i]; } break; case PTOK_EQU: i++; lex_type = TOK_SYM; memset(lexeme, 0, strlen(lexeme)+1); lexeme[j] = str[i]; (t) ? (t->subspace = space) : (lt->subspace = space); (t) ? (t->subtab = tab) : (lt->subtab = tab); break; case PTOK_RBRACK: i++; lex_type = TOK_IND; lexeme[j] = ')'; lexeme[j+1] = '\0'; lexeme[j+2] = '\0'; break; case PTOK_COMMA: i++; if (lex_type != TOK_IND && lex_type != TOK_OF) { lex_type = TOK_CSV; } lexeme[j] = ','; lexeme[j+1] = '\0'; lexeme[j+2] = '\0'; break; case PTOK_B: lexeme[j] = str[i++]; lexeme[j+1] = '\0'; lexeme[j+2] = '\0'; lex_type = TOK_BREG; l->tok->type = BREG; (t) ? (t->subspace = space) : (lt->subspace = space); (t) ? (t->subtab = tab) : (lt->subtab = tab); break; case PTOK_X: case PTOK_Y: lexeme[j] = str[i++]; lexeme[j+1] = '\0'; lexeme[j+2] = '\0'; if (lex_type != TOK_IND && lex_type != TOK_CSV) { break; } switch (ptok) { case PTOK_X: l->tok->type = (lex_type == TOK_IND) ? INDX : ZMX; break; case PTOK_Y: l->tok->type = (lex_type == TOK_IND) ? INDY : ZMY; break; } break; case PTOK_S: case PTOK_P: lexeme[j] = str[i++]; if (str[i] != ',') { lexeme[j+1] = str[i++]; } else { lexeme[j+1] = '\0'; } lexeme[j+2] = '\0'; switch (ptok) { case PTOK_S: of = 1; break; case PTOK_P: of = 2; break; } lex_type = TOK_OF; t = make_token(lex_type, of, space, tab, 0, "", NULL); break; case PTOK_AT: memset(lexeme, 0, strlen(lexeme)+1); lexeme[j] = '@'; islocal = 1; lex_type = TOK_LOCAL; if (lt || t) { (t) ? (t->subspace = space) : (lt->subspace = space); (t) ? (t->subtab = tab) : (lt->subtab = tab); } break; case PTOK_COLON: i++; lexeme[j] = ':'; lexeme[j+1] = '\0'; lex_type = TOK_LABEL; tsym = mksymbol(sym, address, 1, islocal, 0, 0, dbg); if (isfixup) { isfixup = reslv_fixups(dbg); } if (lt) { lt->id = lex_type; lt->type = islocal; lt->sym = get_sym(sym, address, t, islocal, dbg); isfixup += (lt->sym == NULL); } if (!islocal) { cur_sym = last_sym; locals = NULL; last_loc = NULL; } else if (cur_sym->down == NULL && cur_sym == last_sym) { cur_sym->down = locals; cur_sym->down->up = cur_sym; } tsym = NULL; islocal = 0; if (dbg) { printf("lex(): isfixup: %u\n", isfixup); } break; case PTOK_SCOLON: i++; for (; isdelm(str[i+j], dbg) != 1; j++); if (!j) { lexeme[j] = ' '; lexeme[j+1] = '\0'; } else { memcpy(lexeme, str+i, j); lexeme[j] = '\0'; i += j; comid = get_comment(lexeme, dbg); /*is_newcom = (comid == 0xFFFF); if (comid == 0xFFFF) { if (line != lineidx && l[line].com != 0xFFFF) { comid = l[line].com; } else { comid = comidx; } comid = comidx; comment[comid] = malloc(j+1); memcpy(comment[comid], lexeme, j+1); comidx++; }*/ if (dbg) { printf("lex(): com[0x%04X]: %s\n", comid, comment[comid]); } } lex_type = TOK_COMMENT; l->count++; if (j) { t = make_token(lex_type, 0, space, tab, 0, comment[comid], NULL); } else { t = make_token(lex_type, 0, space, tab, 0, "" , NULL); } break; case PTOK_ALPHA: for (; !isdelm2(str[i+j], dbg); j++); memcpy(lexeme, str+i, j); lexeme[j] = '\0'; i += j; isch = 0; isop = 0; if (j == 3 && str[i] != ':' && !is_struct) { for (k = 0; k < OPNUM; k++) { if (toupper(lexeme[0]) == mne[k][0]) { if (!strcasecmp(lexeme, mne[k])) { lex_type = TOK_OPCODE; isop = 1; l->count++; t = make_token(lex_type, 0xFF, space, tab, k, "", NULL); break; } } } } if (!isop) { uint8_t spaces = 0; for (; isdelm(str[i+spaces], dbg) == 16; spaces++); uint8_t ret = get_ptok(str[i+spaces], dbg); if (ret == PTOK_COLON || ret == PTOK_EQU) { islocal = (lex_type == TOK_LOCAL); } lex_type = TOK_SYM; l->count++; t = make_token(lex_type, islocal, space, tab, 0, "", NULL); memcpy(sym, lexeme, j+1); if (dbg) { printf("lex(): spaces: %u\n", spaces); } if (is_struct) { islocal = !(is_struct == 1 && lt && lt->id == TOK_DIR); mksymbol(sym, 0, 1, islocal, 0, 0, dbg); if (isfixup) { isfixup = reslv_fixups(dbg); } t->sym = get_sym(sym, 0, t, islocal, dbg); if (lt && lt->id == TOK_DIR) { t->sym->isstruct = 1; t->id = (lt->type == DIR_STRUCT) ? TOK_STRUCT : TOK_UNION; tmp_line = l; } else { t->id = TOK_MEMBER; t->sym->isanon = (is_anon > 0); } isfixup += (t->sym == NULL); int is_top = (cur_sym == NULL); cur_sym = (!islocal && !cur_sym) ? last_sym : cur_sym; if (!islocal) { if (!is_top) { cur_sym = t->sym; locals = NULL; last_loc = NULL; } else { cur_sym->down = locals; } } else { if (lt && lt->id == TOK_DIR) { if (lt->type == DIR_UNION || lt->type == DIR_STRUCT) { cur_sym->down = locals; cur_sym->down->up = cur_sym; last_loc->up = cur_sym; cur_sym = last_loc; locals = NULL; last_loc = NULL; } } } islocal = 0; } else if ((str[i+spaces] != ':' && str[i+spaces] != '=')) { uint8_t sym_struct = 0; symbol *s; /*tmp_sym = (s && s->isstruct) ? NULL : tmp_sym;*/ if (tmp_sym) { t->sym = find_member(lexeme, tmp_sym, dbg); tmp_sym = NULL; } else { t->sym = get_sym(lexeme, address, t, islocal, dbg); } isfixup += (t && t->sym == NULL); islocal = 0; if (dbg) { printf("lex(): isfixup: %u\n", isfixup); } } if (!is_struct && t && t->sym && t->sym->isstruct) { tmp_sym = t->sym; } } break; } if (!l->tok && t) { l->tok = tokens; } if (dbg) { printf("lex(): lexeme: %s, lex_type: %s\n", lexeme, (lex_type != 0xFF) ? lex_tok[lex_type] : "TOK_NONE"); } j = 0; if (lex_type == TOK_OPCODE && !isop) { j = 0; } else if (lex_type == TOK_EXPR || (lex_type != TOK_MEMBER && !isdelm2(str[i], dbg))) { i++; } switch (lex_type) { default: lex_type = 0xFF; case TOK_CSV: case TOK_IND: case TOK_LOCAL: memset(lexeme, 0, strlen(lexeme)+1); case TOK_SYM: break; } if (t) { lt = t; t = t->next; } } if (i) { l->tok = tokens; token *tok = tokens; if (tok->id == TOK_SYM && tok->next) { symbol *s = tok->sym; for (; tok; tok = tok->next) { switch (tok->id) { case TOK_HEX : case TOK_BIN : case TOK_DEC : case TOK_CHAR: case TOK_EXPR: s->val = get_val(tok, address, 3, dbg); if (tok->next) { tok = skip_expr(tok, dbg); } break; } } } tokens = NULL; last_tok = NULL; bytecount dummy; if (!is_struct) { l = (tmp_line) ? tmp_line : l; address = parse_tokens(l->tok, &l, &dummy, 0, address, dbg); if (tmp_line) { tmp_line = NULL; } } if (dbg) { printf("lex(): Next address: $%"PRIX64"\n", address); } if (ln > linenum || islinenum) { l->linenum = ln; if (ln > linenum) { linenum+=(10+(ln & 10)); } } else if (!islinenum) { l->linenum = linenum; linenum += 10; } } return address; }