#include "asmmon.h" token *tok_global; uint8_t isexpr(uint8_t type, uint8_t dbg) { switch (type) { case EXPR_PLUS: case EXPR_MINUS: case EXPR_LOW: case EXPR_HIGH: case EXPR_OR: case EXPR_LSHFT: case EXPR_RSHFT: return 1; default: return 0; } } uint8_t get_rs(token *t, uint8_t dbg) { if (t->id == TOK_RS) { return t->type; } else { return 0xFF; } } uint8_t get_of(token *t, uint8_t dbg) { if (t->id == TOK_OF) { return t->type; } else { return 0xFF; } } uint8_t get_ind(uint8_t mne, uint8_t am, uint8_t dbg) { uint8_t base_idx = 0; uint8_t offset = 0; switch (mne) { case CMP: base_idx = CMP_IND; break; case CPB: base_idx = CPB_IND; break; case JMP: base_idx = JMP_IND; break; case JSR: base_idx = JSR_IND; break; case LDA: base_idx = LDA_IND; break; case LDB: base_idx = LDB_IND; break; case LDX: base_idx = LDX_IND; break; case LDY: base_idx = LDY_IND; break; case STA: base_idx = STA_IND; break; case STB: base_idx = STB_IND; break; case STX: base_idx = STX_IND; break; case STY: base_idx = STY_IND; break; } switch (am) { case IND : offset = 0; break; case INDY: offset += 1; break; case INDX: offset += 2; break; } return base_idx + offset; } uint64_t get_val(token *t, uint64_t addr, uint8_t size, uint8_t dbg) { uint64_t value = 0; uint64_t tmp_val = 0; uint8_t type = EXPR_NONE; uint8_t isstart = 1; do { if (t->id == TOK_EXPR) { type = t->type; t = t->next; } switch (t->id) { case TOK_HEX: case TOK_DEC: case TOK_BIN: case TOK_CHAR: tmp_val = t->qword; t = t->next; break; case TOK_SYM: case TOK_LABEL: for (; t->sym && t->sym->isstruct && t->next && t->next->id == TOK_SYM; t = t->next); tmp_val = (t->sym) ? t->sym->val : addr; t = t->next; break; } switch (type) { case EXPR_PLUS : (isstart) ? (value = tmp_val) : (value += tmp_val); break; case EXPR_MINUS: (isstart) ? (value = -tmp_val) : (value -= tmp_val); break; case EXPR_OR : value |= tmp_val; break; case EXPR_LSHFT: value <<= tmp_val; break; case EXPR_RSHFT: value >>= tmp_val; break; case EXPR_LOW : value = tmp_val; switch (size) { default: case 2 : value &= 0xFFFFFFFF; break; case 1 : value &= 0x0000FFFF; break; case 0 : value &= 0x000000FF; break; } break; case EXPR_HIGH : value = tmp_val; switch (size) { default: case 2 : value >>= 0x20; break; case 1 : value >>= 0x10; break; case 0 : value >>= 0x08; break; } break; case EXPR_NONE : value = tmp_val; break; } isstart = 0; if (dbg) { printf("get_val(): Value: $%"PRIX64", Expression type: $%X, Expression Value: $%"PRIX64".\n", value, type, tmp_val); } } while (t && t->id == TOK_EXPR && isexpr(t->type, dbg)); return value; } token *skip_expr(token *t, uint8_t dbg) { do { t = (t->id == TOK_EXPR) ? t->next : t; switch (t->id) { case TOK_HEX : case TOK_DEC : case TOK_BIN : case TOK_CHAR : case TOK_SYM : case TOK_LABEL: t = t->next; break; } } while (t && t->id == TOK_EXPR && isexpr(t->type, dbg)); return t; } uint8_t get_directivesize(uint8_t type, uint8_t dbg) { switch (type) { case DIR_QWORD: return 3; case DIR_DWORD: return 2; case DIR_WORD : return 1; case DIR_BYTE : return 0; } return 0; } uint16_t handle_struct(line **ln, uint64_t address, uint16_t offset, uint8_t dbg) { uint8_t is_struct = 0; uint8_t done = 0; uint8_t ismember = 0; uint16_t size = 0; uint16_t member_size = 0; line *l = *ln; symbol *strct = NULL; token *tok = l->tok; for (uint8_t found = 0; tok && !found; tok = tok->next) { switch (tok->id) { case TOK_DIR: is_struct = (tok->type == DIR_STRUCT); found = (tok->type == DIR_STRUCT || tok->type == DIR_UNION); break; case TOK_STRUCT: is_struct = 1; case TOK_UNION : found = 1; break; } } if (tok != NULL) { strct = tok->sym; } if (l && l->next) { l = l->next; } for (; l && !done; l = l->next) { token *t = l->tok; token *start = t; symbol *member; for (; t && !done; t = t->next) { switch (t->id) { case TOK_MEMBER: ismember = 1; member = t->sym; break; case TOK_DIR : ismember = (t->type == DIR_STRUCT || t->type == DIR_UNION) ? 1 : ismember; done = ((is_struct && t->type == DIR_ENDSTRUCT) || (!is_struct && t->type == DIR_ENDUNION)); if (!done && ismember) { switch (t->type) { case DIR_BYTE : member_size = 1; break; case DIR_WORD : member_size = 2; break; case DIR_DWORD : member_size = 4; break; case DIR_QWORD : member_size = 8; break; case DIR_UNION : case DIR_STRUCT: member_size = handle_struct(&l, address, offset, dbg); break; case DIR_RES : member_size = get_val(t, address, 3, dbg); t = skip_expr(t, dbg); break; } if (member && t->type != DIR_UNION && t->type != DIR_STRUCT) { member->val = offset; } if (is_struct) { size += member_size; offset += member_size; } else if (size < member_size) { size = member_size; } } ismember = 0; break; } } if (done) { break; } } *ln = l; if (strct != NULL) { strct->val = size; } return size; } uint64_t handle_directive(token *t, bytecount *bc, uint8_t isasm, uint64_t address, uint8_t dbg) { union reg val; uint8_t c = 0; uint8_t tmp = 0; uint8_t type = t->type; uint64_t tmpaddr = address; t = t->next; for (; t; t = t->next) { tmp = 0; switch (t->id) { case TOK_HEX: case TOK_DEC: case TOK_BIN: case TOK_CHAR: case TOK_SYM: case TOK_LABEL: val.u64 = get_val(t, tmpaddr, get_directivesize(type, dbg), dbg); switch (type) { case DIR_QWORD: /* Falls through. */ if (isasm) { addr[tmpaddr+7] = val.u8[7]; addr[tmpaddr+6] = val.u8[6]; addr[tmpaddr+5] = val.u8[5]; addr[tmpaddr+4] = val.u8[4]; } tmp += 4; case DIR_DWORD: /* Falls through. */ if (isasm) { addr[tmpaddr+3] = val.u8[3]; addr[tmpaddr+2] = val.u8[2]; } tmp += 2; case DIR_WORD: /* Falls through. */ if (isasm) { addr[tmpaddr+1] = val.u8[1]; } tmp++; case DIR_BYTE: if (isasm) { addr[tmpaddr ] = val.u8[0]; } tmp++; tmpaddr += tmp; bc->datasize += tmp; break; } if (t->next && t->next->id == TOK_EXPR && isexpr(t->next->type, dbg)) { t = skip_expr(t, dbg); } break; case TOK_STRING: if (type == DIR_BYTE) { for (uint16_t k = 0; t->str[k] != '\0'; k++) { switch (t->str[k]) { case '\\': switch (t->str[k+1]) { case 'n' : c = '\n'; break; case 'r' : c = '\r'; break; case 't' : c = '\t'; break; case '\"': c = '\"'; break; case '\'': c = '\''; break; case '\\': c = '\\'; break; case '0' : c = '\0'; break; } k++; break; default: c = t->str[k]; break; } if (isasm) { addr[tmpaddr] = c; } tmpaddr++; bc->datasize++; } if (isasm) { addr[tmpaddr] = '\0'; } tmpaddr++; bc->datasize++; } break; } if (t == NULL) { break; } } return tmpaddr; } uint64_t handle_opcode(token *t, bytecount *bc, uint8_t isasm, uint64_t address, uint8_t dbg) { union reg val; uint8_t opsize; uint8_t instr; uint8_t opcode; uint8_t type; uint16_t am = 0; uint8_t tmp = 0; uint8_t prefix = 0; uint8_t rs = 0; uint8_t of = 0; uint8_t tmp_prefix = 0; for (; t; t = t->next) { if (t->id == TOK_OPCODE) { instr = t->byte; type = t->type; } else { break; } tmp = 0; opsize = 1; opcode = 0; if (t->next) { rs = get_rs(t->next, dbg); t = (rs != 0xFF) ? t->next : t; if (t->next) { of = get_of(t->next, dbg); t = (of != 0xFF) ? t->next : t; } } if (rs != 0xFF || of != 0xFF) { tmp_prefix = (rs != 0xFF) ? (rs << 0) : tmp_prefix; tmp_prefix |= (of != 0xFF) ? (of << 2) : tmp_prefix; } prefix = (tmp_prefix) ? ((tmp_prefix << 4) | 3) : 0; uint8_t isincdec = (instr == INC || instr == DEC); uint8_t isimplied = (!t->next || (t->next->id == TOK_COMMENT)); am = inst[instr].am; if ((am & AM_IMPL) && isimplied) { type = IMPL; } else { if (inst[instr].am & AM_REL) { type = REL; } if (t->next) { t = t->next; } val.u64 = get_val(t, address, (rs != 0xFF) ? rs : 0, dbg); } /* Special case for TXS. */ if (instr == TXS) { if (type == IMM) { rs = 1; } else { type = IMPL; } } opcode = inst[instr].op; uint64_t saveaddr = address; uint64_t max_val = 0; uint8_t i = 0; uint8_t j = 1; switch (type) { case BREG: case IMPL: if (instr == CPS) { rs = 0; } if ((am & (AM_IMPL|AM_BREG))) { if ((am & AM_IMPL) && (prefix)) { if (isasm) { addr[address] = prefix; } address++; bc->progsize++; } if (isasm) { addr[address] = (am & AM_BREG) ? opcode+0x14 : opcode; } address++; bc->progsize++; } break; case REL: case IMM: if (am & (AM_IMM|AM_REL)) { rs = (rs != 0xFF) ? rs : 0; tmp = (1 << rs); if (type == REL) { uint64_t max_sign = 0; uint8_t offset = 1; uint64_t tmp_val; tmp_val = val.u64; offset += (prefix != 0); tmp_val -= offset+tmp; tmp_val -= address; switch (rs) { default: max_sign = (int8_t )(1 << 7); break; case 1 : max_sign = (int16_t)(1 << 15); break; case 2 : max_sign = (int32_t)(1 << 31); break; case 3 : max_sign = (int64_t)((uint64_t)1 << 63); break; } if ((int64_t)tmp_val > ~(int64_t)max_sign || (int64_t)tmp_val < (int64_t)max_sign) { offset += (!rs); rs += (rs <= 3); tmp = (1 << rs); tmp_val = val.u64; tmp_val -= offset+tmp; tmp_val -= address; prefix = ((rs << 4) | 3); } val.u64 = tmp_val; } if (prefix) { if (isasm) { addr[address] = prefix; } address++; bc->progsize++; } if (isasm) { addr[address] = opcode; } address++; bc->progsize++; if (isasm) { setreg(addr, +, address, val.u8, +, 0, tmp-1); } } break; default: if (of != 0xFF) { i = 8; for (; i <= 64; i += 8, j++) { max_val |= ((uint64_t)1 << (i-1)); if ((int64_t)val.u64 >= ~(int64_t)(max_val) || (int64_t)val.u64 <= (int64_t)(max_val)) { opsize = j; break; } } } else { for (; i <= 64; i += 8, j++) { max_val |= (0xFF << i); if (val.u64 <= max_val) { opsize = j; break; } } } if (type == 0xFF) { switch (opsize-1) { case 0: case 2: case 5: case 3: type = ZM ; break; case 1: case 4: case 6: case 7: type = ABS; break; } } if (opsize) { if (type != ABS) { switch (opsize) { case 2: opsize = 3; break; case 5: opsize = 6; break; } } prefix |= amp[opsize-1]; } if (am & (AM_ZM|AM_ZMX|AM_ZMY|AM_IND|AM_INDX|AM_INDY|AM_ABS|AM_INDX2|AM_ZM2)) { switch (type) { case ZM: if (am & AM_ZM) { opcode += 0x04; } else if (am & AM_ZM2) { opcode += 0x20; } break; case ZMX: if (am & AM_ZMX) { opcode += 0x06; } break; case ZMY: if (am & AM_ZMY) { opcode += 0x14; } break; case ABS: if (am & AM_ABS) { opcode += 0x10; } break; case INDX: if (am & AM_INDX) { opcode += 0x16; break; } /* Falls Through. */ case IND: case INDY: if (am & (AM_IND|AM_INDY|AM_INDX2)) { opcode = ind_ops[get_ind(instr, type, dbg)]; } break; } if (prefix) { if (isasm) { addr[address] = prefix; } address++; bc->progsize++; } if (isasm) { addr[address] = opcode; } address++; bc->progsize++; if (isasm) { switch (opsize-1) { case 7: addr[address+7] = val.u8[7]; case 6: addr[address+6] = val.u8[6]; case 5: addr[address+5] = val.u8[5]; case 4: addr[address+4] = val.u8[4]; case 3: addr[address+3] = val.u8[3]; case 2: addr[address+2] = val.u8[2]; case 1: addr[address+1] = val.u8[1]; case 0: addr[address ] = val.u8[0]; } } tmp = opsize; } break; } address += tmp; bc->progsize += tmp; } return address; } uint64_t parse_tokens(token *t, line **l, bytecount *bc, uint8_t isasm, uint64_t address, uint8_t dbg) { for (; t; t = t->next) { switch (t->id) { case TOK_DIR: switch (t->type) { case DIR_STRUCT: case DIR_UNION : handle_struct(l, address, 0, dbg); break; case DIR_RES: t = t->next; address += get_val(t, address, 3, dbg); break; case DIR_ORG: t = t->next; address = get_val(t, address, 3, dbg); break; case DIR_BYTE: case DIR_WORD: case DIR_DWORD: case DIR_QWORD: address = handle_directive(t, bc, isasm, address, dbg); break; } break; case TOK_OPCODE: address = handle_opcode(t, bc, isasm, address, dbg); break; case TOK_COMMENT: break; } } return address; } token *make_token(uint8_t id, uint8_t type, uint8_t space, uint8_t tab, uint64_t value, char *str, symbol *s) { token *new_tok = malloc(sizeof(token)); (last_tok) ? (last_tok->next = new_tok) : (tokens = new_tok); new_tok->id = id; new_tok->type = type; new_tok->tab = tab; new_tok->space = space; new_tok->subtab = 0; new_tok->subspace = 0; new_tok->digits = 0; if (s) { new_tok->sym = s; } else if (str[0]) { new_tok->str = str; } else { new_tok->qword = value; } new_tok->next = NULL; last_tok = new_tok; return new_tok; } void assemble(line *ln, bytecount *bc, uint8_t dbg) { uint64_t address = 0; line *l = ln; for (int i = 0; i < 2; i++) { l = ln; for (; l; l = l->next) { l->addr = address; token *t = l->tok; address = parse_tokens(t, &l, bc, 0, address, dbg); } l = ln; for (; l; l = l->next) { address = l->addr; token *t = l->tok; for (; t; t = t->next) { if (t->id == TOK_LABEL && t->sym->val != address) { t->sym->val = l->addr; } } } } l = ln; bc->progsize = 0; bc->datasize = 0; for (; l; l = l->next) { address = parse_tokens(l->tok, &l, bc, 1, address, dbg); } } static void find_dupsym() { symbol *root = symbols; symbol *s = symbols; for (; s; s = s->next) { root = symbols; for (int i = 0; root; root = root->next) { if (root == s) { i++; } if (i > 1) { printf("Found duplicate symbol, s->name: %s, root->name: %s\n", s->name, root->name); i = 0; } } } } static symbol *find_fixup(token *t) { fixup* f = fixups; for (; f && t != f->t; f = f->next); return (f && t == f->t) ? f->s : NULL; } static void print_symval(symbol *s) { if (s) { if (s->down) { print_symval(s->down); } if (s->name) { printf("s->name: %s, s->val: $%"PRIX64"\n", s->name, s->val); } print_symval(s->next); } } static void print_symtree(symbol *s, int depth) { if (s) { if (s->name != NULL) { for (int i = depth; i; i--) { printf("|%s", (i > 1) ? " " : "--"); } printf("%s: $%"PRIX64"\n", s->name, s->val); } if (s->down != NULL) { print_symtree(s->down, depth+1); } print_symtree(s->next, depth); } } void fix_symtree(line *l) { symbol *s = symbols; symbol *cur_sym = NULL; symbol *sym_struct = NULL; symbols = NULL; last_sym = NULL; int islocal = 0; int isanon = 0; int is_struct = 0; int is_structsym = 0; for (; l; l = l->next) { token *t = l->tok; token *lt = NULL; for (; t; t = t->next) { int ismember = (t->id == TOK_MEMBER); switch (t->id) { case TOK_STRUCT: case TOK_UNION : islocal = !(is_struct == 1 && lt && lt->id == TOK_DIR); case TOK_SYM : if (t->id == TOK_SYM && t != l->tok) { break; } case TOK_MEMBER: case TOK_LABEL : if (symbols) { (!islocal && s && !s->up) ? (last_sym = s) : (last_loc = s); } if (((t->type == 1 || ismember) && !islocal) || (islocal && ismember && is_structsym)) { is_structsym = 0; last_loc = NULL; islocal = 1; cur_sym = s; s->down = t->sym; s->down->up = s; s = s->down; if (s) { s->next = NULL; s->prev = NULL; s->down = NULL; } locals = s; } else if ((islocal || t->type == 0)) { if (t->type == 0 && !is_struct && islocal && !ismember) { islocal = 0; if (s) { s->up->down = locals; s = s->up; } } symbol *tmp = s; s = t->sym; if (s) { s->prev = (tmp && tmp != s) ? tmp : NULL; s->up = (s->prev) ? s->prev->up : s->up; } if (s && s->next) { s->next = NULL; } } if (!islocal) { last_loc = NULL; (last_sym) ? (last_sym->next = s) : (symbols = s); cur_sym = s; if (last_sym) { last_sym->next->prev = last_sym; last_sym->next->up = last_sym->up; last_sym->next->down = NULL; } } else { (last_loc) ? (last_loc->next = s) : (locals = s); if (last_loc) { last_loc->next->prev = last_loc; last_loc->next->up = last_loc->up; last_loc->next->down = NULL; } else { locals->prev = NULL; locals->down = NULL; } } break; case TOK_DIR: if (t->type == DIR_STRUCT || t->type == DIR_UNION) { is_struct++; is_structsym = (t->next && (t->next->id == TOK_STRUCT || t->next->id == TOK_UNION)); if ((!is_structsym) || (isanon && is_structsym)) { isanon++; } } else if (t->type == DIR_ENDSTRUCT || t->type == DIR_ENDUNION) { is_struct--; int skip = 0; if (isanon > 0) { if ((cur_sym->up && !cur_sym->up->isanon) || (sym_struct && sym_struct->isanon)) { isanon--; } skip = (!isanon); } if ((int)(is_struct-isanon) > 0 && !skip && cur_sym->up) { for (sym_struct = s->up; sym_struct->prev && !sym_struct->isanon; sym_struct = sym_struct->prev); s = s->up; cur_sym = (cur_sym->up != NULL) ? cur_sym->up : s; } } break; } lt = t; } } } static inline void free_tokens(token *t) { token *tok; if (t != NULL) { tok = t; t = t->next; free(tok); free_tokens(t); } } void free_lines(line *l) { line *ln; if (l != NULL) { free_tokens(l->tok); ln = l; l = l->next; free(ln); free_lines(l); } } static void free_symbols(symbol *s) { symbol *sym; sym = s; if (sym != NULL) { if (s && s->down) { free_symbols(s->down); } if (sym->name != NULL) { free(sym->name); sym->name = NULL; } s = s->next; free(sym); sym = NULL; free_symbols(s); } } static inline void free_fixups(fixup *f) { fixup *fix; if (f != NULL) { fix = f; f = f->next; free(fix); fix = NULL; free_fixups(f); } } uint64_t get_tokmem(token *t) { uint64_t i = 0; for (; t; t = t->next, i++); return i*sizeof(token); } void get_linemem(line *l) { uint64_t i = 0; uint64_t j = 0; for (; l; j += get_tokmem(l->tok), l = l->next, i++); printf("Bytes per line: %"PRIu64", Bytes per token: %"PRIu64", Total size of line table in bytes: %"PRIu64"\n", sizeof(line), sizeof(token), j+(i*sizeof(line))); } void cleanup() { uint16_t i; if (lines) { /*get_linemem(lines);*/ /*fix_symtree(lines);*/ free_lines(lines); lines = NULL; } if (symbols) { /*print_symtree(symbols, 0);*/ free_symbols(symbols); symbols = NULL; } if (fixups) { free_fixups(fixups); fixups = NULL; } while (i < stridx || i < comidx) { if (i < stridx && string[i]) { free(string[i]); string[i] = NULL; } if (i < comidx && comment[i]) { free(comment[i]); comment[i] = NULL; } i++; } }