#include "asmmon.h"
token *tok_global;
uint8_t isexpr(uint8_t type, uint8_t dbg) {
switch (type) {
case EXPR_PLUS:
case EXPR_MINUS:
case EXPR_LOW:
case EXPR_HIGH:
case EXPR_OR:
case EXPR_LSHFT:
case EXPR_RSHFT:
return 1;
default:
return 0;
}
}
uint8_t get_rs(token *t, uint8_t dbg) {
if (t->id == TOK_RS) {
return t->type;
} else {
return 0xFF;
}
}
uint8_t get_of(token *t, uint8_t dbg) {
if (t->id == TOK_OF) {
return t->type;
} else {
return 0xFF;
}
}
uint8_t get_ind(uint8_t mne, uint8_t am, uint8_t dbg) {
uint8_t base_idx = 0;
uint8_t offset = 0;
switch (mne) {
case CMP: base_idx = CMP_IND; break;
case CPB: base_idx = CPB_IND; break;
case JMP: base_idx = JMP_IND; break;
case JSR: base_idx = JSR_IND; break;
case LDA: base_idx = LDA_IND; break;
case LDB: base_idx = LDB_IND; break;
case LDX: base_idx = LDX_IND; break;
case LDY: base_idx = LDY_IND; break;
case STA: base_idx = STA_IND; break;
case STB: base_idx = STB_IND; break;
case STX: base_idx = STX_IND; break;
case STY: base_idx = STY_IND; break;
}
switch (am) {
case IND : offset = 0; break;
case INDY: offset += 1; break;
case INDX: offset += 2; break;
}
return base_idx + offset;
}
uint64_t get_val(token *t, uint64_t addr, uint8_t size, uint8_t dbg) {
uint64_t value = 0;
uint64_t tmp_val = 0;
uint8_t type = EXPR_NONE;
uint8_t isstart = 1;
do {
if (t->id == TOK_EXPR) {
type = t->type;
t = t->next;
}
switch (t->id) {
case TOK_HEX:
case TOK_DEC:
case TOK_BIN:
case TOK_CHAR: tmp_val = t->qword; t = t->next; break;
case TOK_SYM:
case TOK_LABEL:
for (; t->sym && t->sym->isstruct && t->next && t->next->id == TOK_SYM; t = t->next);
tmp_val = (t->sym) ? t->sym->val : addr;
t = t->next;
break;
}
switch (type) {
case EXPR_PLUS : (isstart) ? (value = tmp_val) : (value += tmp_val); break;
case EXPR_MINUS: (isstart) ? (value = -tmp_val) : (value -= tmp_val); break;
case EXPR_OR : value |= tmp_val; break;
case EXPR_LSHFT: value <<= tmp_val; break;
case EXPR_RSHFT: value >>= tmp_val; break;
case EXPR_LOW :
value = tmp_val;
switch (size) {
default:
case 2 : value &= 0xFFFFFFFF; break;
case 1 : value &= 0x0000FFFF; break;
case 0 : value &= 0x000000FF; break;
}
break;
case EXPR_HIGH :
value = tmp_val;
switch (size) {
default:
case 2 : value >>= 0x20; break;
case 1 : value >>= 0x10; break;
case 0 : value >>= 0x08; break;
}
break;
case EXPR_NONE : value = tmp_val; break;
}
isstart = 0;
if (dbg) {
printf("get_val(): Value: $%"PRIX64", Expression type: $%X, Expression Value: $%"PRIX64".\n", value, type, tmp_val);
}
} while (t && t->id == TOK_EXPR && isexpr(t->type, dbg));
return value;
}
token *skip_expr(token *t, uint8_t dbg) {
do {
t = (t->id == TOK_EXPR) ? t->next : t;
switch (t->id) {
case TOK_HEX :
case TOK_DEC :
case TOK_BIN :
case TOK_CHAR :
case TOK_SYM :
case TOK_LABEL: t = t->next; break;
}
} while (t && t->id == TOK_EXPR && isexpr(t->type, dbg));
return t;
}
uint8_t get_directivesize(uint8_t type, uint8_t dbg) {
switch (type) {
case DIR_QWORD: return 3;
case DIR_DWORD: return 2;
case DIR_WORD : return 1;
case DIR_BYTE : return 0;
}
return 0;
}
uint16_t handle_struct(line **ln, uint64_t address, uint16_t offset, uint8_t dbg) {
uint8_t is_struct = 0;
uint8_t done = 0;
uint8_t ismember = 0;
uint16_t size = 0;
uint16_t member_size = 0;
line *l = *ln;
symbol *strct = NULL;
token *tok = l->tok;
for (uint8_t found = 0; tok && !found; tok = tok->next) {
switch (tok->id) {
case TOK_DIR:
is_struct = (tok->type == DIR_STRUCT);
found = (tok->type == DIR_STRUCT || tok->type == DIR_UNION);
break;
case TOK_STRUCT: is_struct = 1;
case TOK_UNION : found = 1; break;
}
}
if (tok != NULL) {
strct = tok->sym;
}
if (l && l->next) {
l = l->next;
}
for (; l && !done; l = l->next) {
token *t = l->tok;
token *start = t;
symbol *member;
for (; t && !done; t = t->next) {
switch (t->id) {
case TOK_MEMBER: ismember = 1; member = t->sym; break;
case TOK_DIR :
ismember = (t->type == DIR_STRUCT || t->type == DIR_UNION) ? 1 : ismember;
done = ((is_struct && t->type == DIR_ENDSTRUCT) || (!is_struct && t->type == DIR_ENDUNION));
if (!done && ismember) {
switch (t->type) {
case DIR_BYTE : member_size = 1; break;
case DIR_WORD : member_size = 2; break;
case DIR_DWORD : member_size = 4; break;
case DIR_QWORD : member_size = 8; break;
case DIR_UNION :
case DIR_STRUCT: member_size = handle_struct(&l, address, offset, dbg); break;
case DIR_RES : member_size = get_val(t, address, 3, dbg); t = skip_expr(t, dbg); break;
}
if (member && t->type != DIR_UNION && t->type != DIR_STRUCT) {
member->val = offset;
}
if (is_struct) {
size += member_size;
offset += member_size;
} else if (size < member_size) {
size = member_size;
}
}
ismember = 0;
break;
}
}
if (done) {
break;
}
}
*ln = l;
if (strct != NULL) {
strct->val = size;
}
return size;
}
uint64_t handle_directive(token *t, bytecount *bc, uint8_t isasm, uint64_t address, uint8_t dbg) {
union reg val;
uint8_t c = 0;
uint8_t tmp = 0;
uint8_t type = t->type;
uint64_t tmpaddr = address;
t = t->next;
for (; t; t = t->next) {
tmp = 0;
switch (t->id) {
case TOK_HEX:
case TOK_DEC:
case TOK_BIN:
case TOK_CHAR:
case TOK_SYM:
case TOK_LABEL:
val.u64 = get_val(t, tmpaddr, get_directivesize(type, dbg), dbg);
switch (type) {
case DIR_QWORD:
if (isasm) {
addr[tmpaddr+7] = val.u8[7];
addr[tmpaddr+6] = val.u8[6];
addr[tmpaddr+5] = val.u8[5];
addr[tmpaddr+4] = val.u8[4];
}
tmp += 4;
case DIR_DWORD:
if (isasm) {
addr[tmpaddr+3] = val.u8[3];
addr[tmpaddr+2] = val.u8[2];
}
tmp += 2;
case DIR_WORD:
if (isasm) {
addr[tmpaddr+1] = val.u8[1];
}
tmp++;
case DIR_BYTE:
if (isasm) {
addr[tmpaddr ] = val.u8[0];
}
tmp++;
tmpaddr += tmp;
bc->datasize += tmp;
break;
}
if (t->next && t->next->id == TOK_EXPR && isexpr(t->next->type, dbg)) {
t = skip_expr(t, dbg);
}
break;
case TOK_STRING:
if (type == DIR_BYTE) {
for (uint16_t k = 0; t->str[k] != '\0'; k++) {
switch (t->str[k]) {
case '\\':
switch (t->str[k+1]) {
case 'n' : c = '\n'; break;
case 'r' : c = '\r'; break;
case 't' : c = '\t'; break;
case '\"': c = '\"'; break;
case '\'': c = '\''; break;
case '\\': c = '\\'; break;
case '0' : c = '\0'; break;
}
k++;
break;
default: c = t->str[k]; break;
}
if (isasm) {
addr[tmpaddr] = c;
}
tmpaddr++;
bc->datasize++;
}
if (isasm) {
addr[tmpaddr] = '\0';
}
tmpaddr++;
bc->datasize++;
}
break;
}
if (t == NULL) {
break;
}
}
return tmpaddr;
}
uint64_t handle_opcode(token *t, bytecount *bc, uint8_t isasm, uint64_t address, uint8_t dbg) {
union reg val;
uint8_t opsize;
uint8_t instr;
uint8_t opcode;
uint8_t type;
uint16_t am = 0;
uint8_t tmp = 0;
uint8_t prefix = 0;
uint8_t rs = 0;
uint8_t of = 0;
uint8_t tmp_prefix = 0;
for (; t; t = t->next) {
if (t->id == TOK_OPCODE) {
instr = t->byte;
type = t->type;
} else {
break;
}
tmp = 0;
opsize = 1;
opcode = 0;
if (t->next) {
rs = get_rs(t->next, dbg);
t = (rs != 0xFF) ? t->next : t;
if (t->next) {
of = get_of(t->next, dbg);
t = (of != 0xFF) ? t->next : t;
}
}
if (rs != 0xFF || of != 0xFF) {
tmp_prefix = (rs != 0xFF) ? (rs << 0) : tmp_prefix;
tmp_prefix |= (of != 0xFF) ? (of << 2) : tmp_prefix;
}
prefix = (tmp_prefix) ? ((tmp_prefix << 4) | 3) : 0;
uint8_t isincdec = (instr == INC || instr == DEC);
uint8_t isimplied = (!t->next || (t->next->id == TOK_COMMENT));
am = inst[instr].am;
if ((am & AM_IMPL) && isimplied) {
type = IMPL;
} else {
if (inst[instr].am & AM_REL) {
type = REL;
}
if (t->next) {
t = t->next;
}
val.u64 = get_val(t, address, (rs != 0xFF) ? rs : 0, dbg);
}
if (instr == TXS) {
if (type == IMM) {
rs = 1;
} else {
type = IMPL;
}
}
opcode = inst[instr].op;
uint64_t saveaddr = address;
uint64_t max_val = 0;
uint8_t i = 0;
uint8_t j = 1;
switch (type) {
case BREG:
case IMPL:
if (instr == CPS) {
rs = 0;
}
if ((am & (AM_IMPL|AM_BREG))) {
if ((am & AM_IMPL) && (prefix)) {
if (isasm) {
addr[address] = prefix;
}
address++;
bc->progsize++;
}
if (isasm) {
addr[address] = (am & AM_BREG) ? opcode+0x14 : opcode;
}
address++;
bc->progsize++;
}
break;
case REL:
case IMM:
if (am & (AM_IMM|AM_REL)) {
rs = (rs != 0xFF) ? rs : 0;
tmp = (1 << rs);
if (type == REL) {
uint64_t max_sign = 0;
uint8_t offset = 1;
uint64_t tmp_val;
tmp_val = val.u64;
offset += (prefix != 0);
tmp_val -= offset+tmp;
tmp_val -= address;
switch (rs) {
default: max_sign = (int8_t )(1 << 7); break;
case 1 : max_sign = (int16_t)(1 << 15); break;
case 2 : max_sign = (int32_t)(1 << 31); break;
case 3 : max_sign = (int64_t)((uint64_t)1 << 63); break;
}
if ((int64_t)tmp_val > ~(int64_t)max_sign || (int64_t)tmp_val < (int64_t)max_sign) {
offset += (!rs);
rs += (rs <= 3);
tmp = (1 << rs);
tmp_val = val.u64;
tmp_val -= offset+tmp;
tmp_val -= address;
prefix = ((rs << 4) | 3);
}
val.u64 = tmp_val;
}
if (prefix) {
if (isasm) {
addr[address] = prefix;
}
address++;
bc->progsize++;
}
if (isasm) {
addr[address] = opcode;
}
address++;
bc->progsize++;
if (isasm) {
setreg(addr, +, address, val.u8, +, 0, tmp-1);
}
}
break;
default:
if (of != 0xFF) {
i = 8;
for (; i <= 64; i += 8, j++) {
max_val |= ((uint64_t)1 << (i-1));
if ((int64_t)val.u64 >= ~(int64_t)(max_val) || (int64_t)val.u64 <= (int64_t)(max_val)) {
opsize = j;
break;
}
}
} else {
for (; i <= 64; i += 8, j++) {
max_val |= (0xFF << i);
if (val.u64 <= max_val) {
opsize = j;
break;
}
}
}
if (type == 0xFF) {
switch (opsize-1) {
case 0: case 2: case 5: case 3: type = ZM ; break;
case 1: case 4: case 6: case 7: type = ABS; break;
}
}
if (opsize) {
if (type != ABS) {
switch (opsize) {
case 2: opsize = 3; break;
case 5: opsize = 6; break;
}
}
prefix |= amp[opsize-1];
}
if (am & (AM_ZM|AM_ZMX|AM_ZMY|AM_IND|AM_INDX|AM_INDY|AM_ABS|AM_INDX2|AM_ZM2)) {
switch (type) {
case ZM:
if (am & AM_ZM) {
opcode += 0x04;
} else if (am & AM_ZM2) {
opcode += 0x20;
}
break;
case ZMX:
if (am & AM_ZMX) {
opcode += 0x06;
}
break;
case ZMY:
if (am & AM_ZMY) {
opcode += 0x14;
}
break;
case ABS:
if (am & AM_ABS) {
opcode += 0x10;
}
break;
case INDX:
if (am & AM_INDX) {
opcode += 0x16;
break;
}
case IND:
case INDY:
if (am & (AM_IND|AM_INDY|AM_INDX2)) {
opcode = ind_ops[get_ind(instr, type, dbg)];
}
break;
}
if (prefix) {
if (isasm) {
addr[address] = prefix;
}
address++;
bc->progsize++;
}
if (isasm) {
addr[address] = opcode;
}
address++;
bc->progsize++;
if (isasm) {
switch (opsize-1) {
case 7: addr[address+7] = val.u8[7];
case 6: addr[address+6] = val.u8[6];
case 5: addr[address+5] = val.u8[5];
case 4: addr[address+4] = val.u8[4];
case 3: addr[address+3] = val.u8[3];
case 2: addr[address+2] = val.u8[2];
case 1: addr[address+1] = val.u8[1];
case 0: addr[address ] = val.u8[0];
}
}
tmp = opsize;
}
break;
}
address += tmp;
bc->progsize += tmp;
}
return address;
}
uint64_t parse_tokens(token *t, line **l, bytecount *bc, uint8_t isasm, uint64_t address, uint8_t dbg) {
for (; t; t = t->next) {
switch (t->id) {
case TOK_DIR:
switch (t->type) {
case DIR_STRUCT:
case DIR_UNION : handle_struct(l, address, 0, dbg); break;
case DIR_RES: t = t->next; address += get_val(t, address, 3, dbg); break;
case DIR_ORG: t = t->next; address = get_val(t, address, 3, dbg); break;
case DIR_BYTE:
case DIR_WORD:
case DIR_DWORD:
case DIR_QWORD: address = handle_directive(t, bc, isasm, address, dbg); break;
}
break;
case TOK_OPCODE: address = handle_opcode(t, bc, isasm, address, dbg); break;
case TOK_COMMENT: break;
}
}
return address;
}
token *make_token(uint8_t id, uint8_t type, uint8_t space, uint8_t tab, uint64_t value, char *str, symbol *s) {
token *new_tok = malloc(sizeof(token));
(last_tok) ? (last_tok->next = new_tok) : (tokens = new_tok);
new_tok->id = id;
new_tok->type = type;
new_tok->tab = tab;
new_tok->space = space;
new_tok->subtab = 0;
new_tok->subspace = 0;
new_tok->digits = 0;
if (s) {
new_tok->sym = s;
} else if (str[0]) {
new_tok->str = str;
} else {
new_tok->qword = value;
}
new_tok->next = NULL;
last_tok = new_tok;
return new_tok;
}
void assemble(line *ln, bytecount *bc, uint8_t dbg) {
uint64_t address = 0;
line *l = ln;
for (int i = 0; i < 2; i++) {
l = ln;
for (; l; l = l->next) {
l->addr = address;
token *t = l->tok;
address = parse_tokens(t, &l, bc, 0, address, dbg);
}
l = ln;
for (; l; l = l->next) {
address = l->addr;
token *t = l->tok;
for (; t; t = t->next) {
if (t->id == TOK_LABEL && t->sym->val != address) {
t->sym->val = l->addr;
}
}
}
}
l = ln;
bc->progsize = 0;
bc->datasize = 0;
for (; l; l = l->next) {
address = parse_tokens(l->tok, &l, bc, 1, address, dbg);
}
}
static void find_dupsym() {
symbol *root = symbols;
symbol *s = symbols;
for (; s; s = s->next) {
root = symbols;
for (int i = 0; root; root = root->next) {
if (root == s) {
i++;
}
if (i > 1) {
printf("Found duplicate symbol, s->name: %s, root->name: %s\n", s->name, root->name);
i = 0;
}
}
}
}
static symbol *find_fixup(token *t) {
fixup* f = fixups;
for (; f && t != f->t; f = f->next);
return (f && t == f->t) ? f->s : NULL;
}
static void print_symval(symbol *s) {
if (s) {
if (s->down) {
print_symval(s->down);
}
if (s->name) {
printf("s->name: %s, s->val: $%"PRIX64"\n", s->name, s->val);
}
print_symval(s->next);
}
}
static void print_symtree(symbol *s, int depth) {
if (s) {
if (s->name != NULL) {
for (int i = depth; i; i--) {
printf("|%s", (i > 1) ? " " : "--");
}
printf("%s: $%"PRIX64"\n", s->name, s->val);
}
if (s->down != NULL) {
print_symtree(s->down, depth+1);
}
print_symtree(s->next, depth);
}
}
void fix_symtree(line *l) {
symbol *s = symbols;
symbol *cur_sym = NULL;
symbol *sym_struct = NULL;
symbols = NULL;
last_sym = NULL;
int islocal = 0;
int isanon = 0;
int is_struct = 0;
int is_structsym = 0;
for (; l; l = l->next) {
token *t = l->tok;
token *lt = NULL;
for (; t; t = t->next) {
int ismember = (t->id == TOK_MEMBER);
switch (t->id) {
case TOK_STRUCT:
case TOK_UNION : islocal = !(is_struct == 1 && lt && lt->id == TOK_DIR);
case TOK_SYM :
if (t->id == TOK_SYM && t != l->tok) {
break;
}
case TOK_MEMBER:
case TOK_LABEL :
if (symbols) {
(!islocal && s && !s->up) ? (last_sym = s) : (last_loc = s);
}
if (((t->type == 1 || ismember) && !islocal) || (islocal && ismember && is_structsym)) {
is_structsym = 0;
last_loc = NULL;
islocal = 1;
cur_sym = s;
s->down = t->sym;
s->down->up = s;
s = s->down;
if (s) {
s->next = NULL;
s->prev = NULL;
s->down = NULL;
}
locals = s;
} else if ((islocal || t->type == 0)) {
if (t->type == 0 && !is_struct && islocal && !ismember) {
islocal = 0;
if (s) {
s->up->down = locals;
s = s->up;
}
}
symbol *tmp = s;
s = t->sym;
if (s) {
s->prev = (tmp && tmp != s) ? tmp : NULL;
s->up = (s->prev) ? s->prev->up : s->up;
}
if (s && s->next) {
s->next = NULL;
}
}
if (!islocal) {
last_loc = NULL;
(last_sym) ? (last_sym->next = s) : (symbols = s);
cur_sym = s;
if (last_sym) {
last_sym->next->prev = last_sym;
last_sym->next->up = last_sym->up;
last_sym->next->down = NULL;
}
} else {
(last_loc) ? (last_loc->next = s) : (locals = s);
if (last_loc) {
last_loc->next->prev = last_loc;
last_loc->next->up = last_loc->up;
last_loc->next->down = NULL;
} else {
locals->prev = NULL;
locals->down = NULL;
}
}
break;
case TOK_DIR:
if (t->type == DIR_STRUCT || t->type == DIR_UNION) {
is_struct++;
is_structsym = (t->next && (t->next->id == TOK_STRUCT || t->next->id == TOK_UNION));
if ((!is_structsym) || (isanon && is_structsym)) {
isanon++;
}
} else if (t->type == DIR_ENDSTRUCT || t->type == DIR_ENDUNION) {
is_struct--;
int skip = 0;
if (isanon > 0) {
if ((cur_sym->up && !cur_sym->up->isanon) || (sym_struct && sym_struct->isanon)) {
isanon--;
}
skip = (!isanon);
}
if ((int)(is_struct-isanon) > 0 && !skip && cur_sym->up) {
for (sym_struct = s->up; sym_struct->prev && !sym_struct->isanon; sym_struct = sym_struct->prev);
s = s->up;
cur_sym = (cur_sym->up != NULL) ? cur_sym->up : s;
}
}
break;
}
lt = t;
}
}
}
static inline void free_tokens(token *t) {
token *tok;
if (t != NULL) {
tok = t;
t = t->next;
free(tok);
free_tokens(t);
}
}
void free_lines(line *l) {
line *ln;
if (l != NULL) {
free_tokens(l->tok);
ln = l;
l = l->next;
free(ln);
free_lines(l);
}
}
static void free_symbols(symbol *s) {
symbol *sym;
sym = s;
if (sym != NULL) {
if (s && s->down) {
free_symbols(s->down);
}
if (sym->name != NULL) {
free(sym->name);
sym->name = NULL;
}
s = s->next;
free(sym);
sym = NULL;
free_symbols(s);
}
}
static inline void free_fixups(fixup *f) {
fixup *fix;
if (f != NULL) {
fix = f;
f = f->next;
free(fix);
fix = NULL;
free_fixups(f);
}
}
uint64_t get_tokmem(token *t) {
uint64_t i = 0;
for (; t; t = t->next, i++);
return i*sizeof(token);
}
void get_linemem(line *l) {
uint64_t i = 0;
uint64_t j = 0;
for (; l; j += get_tokmem(l->tok), l = l->next, i++);
printf("Bytes per line: %"PRIu64", Bytes per token: %"PRIu64", Total size of line table in bytes: %"PRIu64"\n", sizeof(line), sizeof(token), j+(i*sizeof(line)));
}
void cleanup() {
uint16_t i;
if (lines) {
free_lines(lines);
lines = NULL;
}
if (symbols) {
free_symbols(symbols);
symbols = NULL;
}
if (fixups) {
free_fixups(fixups);
fixups = NULL;
}
while (i < stridx || i < comidx) {
if (i < stridx && string[i]) {
free(string[i]);
string[i] = NULL;
}
if (i < comidx && comment[i]) {
free(comment[i]);
comment[i] = NULL;
}
i++;
}
}