diff options
Diffstat (limited to 'assemble.c')
-rw-r--r-- | assemble.c | 378 |
1 files changed, 378 insertions, 0 deletions
diff --git a/assemble.c b/assemble.c new file mode 100644 index 0000000..869e6b8 --- /dev/null +++ b/assemble.c @@ -0,0 +1,378 @@ +#include "asmmon.h" + +token *tok_global; + +uint8_t isexpr(uint8_t type, uint8_t dbg) { + switch (type) { + case EXPR_PLUS: + case EXPR_MINUS: + return 1; + default: + return 0; + } +} + +uint8_t get_rs(token *t, uint8_t inst, uint8_t dbg) { + if (t->id == TOK_RS) { + return t->type; + } else { + return 0xFF; + } +} + +uint64_t get_val(token *t, uint64_t addr, uint8_t size, uint8_t dbg) { + uint64_t value = 0; + uint64_t tmp_val = 0; + uint16_t expr_count = 0; + uint8_t type = EXPR_NONE; + do { + if (expr_count) { + type = t->type; + t = t->next; + } + switch (t->id) { + case TOK_HEX: + case TOK_DEC: + case TOK_BIN: + case TOK_CHAR: + tmp_val = t->qword; + t = t->next; + break; + case TOK_SYM: + case TOK_LABEL: + tmp_val = use_symbol("", t->word, addr, 1, dbg); + t = t->next; + break; + } + switch (type) { + case EXPR_PLUS : value += tmp_val; break; + case EXPR_MINUS: value -= tmp_val; break; + case EXPR_LOW : + switch (size) { + default: + case 2 : value &= 0xFFFFFFFF; break; + case 1 : value &= 0x0000FFFF; break; + case 0 : value &= 0x000000FF; break; + } + break; + case EXPR_HIGH : + switch (size) { + default: + case 2 : value >>= 0x20; break; + case 1 : value >>= 0x10; break; + case 0 : value >>= 0x08; break; + } + break; + case EXPR_NONE : value = tmp_val; break; + } + expr_count++; + if (!t) { + break; + } + } while (t->id == TOK_EXPR && isexpr(t->type, dbg)); + return value; +} + +uint8_t get_dirsize(uint8_t type, uint8_t dbg) { + uint8_t size = 0; + switch (type) { + case DIR_QWORD: size = 3; break; + case DIR_DWORD: size = 2; break; + case DIR_WORD : size = 1; break; + case DIR_BYTE : size = 0; break; + } + return size; +} + +uint64_t handle_dir(token *t, bytecount *bc, uint8_t isasm, uint64_t address, uint8_t dbg) { + union reg val; + uint8_t c = 0; + uint8_t tmp = 0; + uint8_t type = t->type; + uint64_t tmpaddr = address; + t = t->next; + for (; t; t = t->next) { + tmp = 0; + switch (t->id) { + case TOK_HEX: + case TOK_DEC: + case TOK_BIN: + case TOK_CHAR: + case TOK_SYM: + case TOK_LABEL: + val.u64 = get_val(t, tmpaddr, get_dirsize(type, dbg), dbg); + switch (type) { + case DIR_QWORD: + if (isasm) { + addr[tmpaddr+7] = val.u8[7]; + addr[tmpaddr+6] = val.u8[6]; + addr[tmpaddr+5] = val.u8[5]; + addr[tmpaddr+4] = val.u8[4]; + } + tmp += 4; + case DIR_DWORD: + if (isasm) { + addr[tmpaddr+3] = val.u8[3]; + addr[tmpaddr+2] = val.u8[2]; + } + tmp += 2; + case DIR_WORD: + if (isasm) { + addr[tmpaddr+1] = val.u8[1]; + } + tmp++; + case DIR_BYTE: + if (isasm) { + addr[tmpaddr ] = val.u8[0]; + } + tmp++; + tmpaddr += tmp; + bc->datasize += tmp; + break; + } + break; + case TOK_STRING: + if (type == DIR_BYTE) { + for (uint16_t k = 0; t->str[k] != '\0'; k++) { + switch (t->str[k]) { + case '\\': + switch (t->str[k+1]) { + case 'n': c = '\n'; break; + case 'r': c = '\r'; break; + case 't': c = '\t'; break; + case '0': c = '\0'; break; + } + k++; + break; + default: c = t->str[k]; break; + } + if (isasm) { + addr[tmpaddr] = c; + } + tmpaddr++; + bc->datasize++; + } + if (isasm) { + addr[tmpaddr] = '\0'; + } + tmpaddr++; + bc->datasize++; + } + break; + } + } + return tmpaddr; +} + +uint64_t handle_opcode(token *t, bytecount *bc, uint8_t isasm, uint64_t address, uint8_t dbg) { + union reg val; + uint8_t opsize; + uint8_t inst; + uint8_t opcode; + uint8_t type; + uint8_t tmp = 0; + uint8_t prefix = 0; + uint8_t rs = 0; + + for (; t; t = t->next) { + if (t->id == TOK_OPCODE) { + inst = t->byte; + type = t->type; + } else { + break; + } + tmp = 0; + opsize = 0; + opcode = 0; + if (t->next) { + rs = get_rs(t->next, inst, dbg); + t = (rs != 0xFF) ? t->next : t; + } + prefix = (rs != 0xFF) ? ((rs << 4) | 3) : 0; + if (opcodes[inst][IMPL] != 0xFF && (!t->next || t->next->id == TOK_COMMENT)) { + type = IMPL; + } else { + if (t->next) { + t = t->next; + } + val.u64 = get_val(t, address, (rs != 0xFF) ? rs : 0, dbg); + } + opcode = opcodes[inst][type]; + if (inst == 80) { + if (type == IMM) { + rs = 1; + } else { + type = IMPL; + opcode = opcodes[inst][IMM]; + } + } + switch (type) { + case IMPL: + if (isasm) { + addr[address] = opcode; + } + address++; + bc->progsize++; + break; + case IMM: + if (prefix) { + if (isasm) { + addr[address] = prefix; + } + address++; + } + if (isasm) { + addr[address] = opcode; + } + address++; + bc->progsize++; + switch (rs) { + case 3: + if (isasm) { + addr[address+7] = val.u8[7]; + addr[address+6] = val.u8[6]; + addr[address+5] = val.u8[5]; + addr[address+4] = val.u8[4]; + } + tmp += 4; + case 2: + if (isasm) { + addr[address+3] = val.u8[3]; + addr[address+2] = val.u8[2]; + } + tmp += 2; + case 1 : + if (isasm) { + addr[address+1] = val.u8[1]; + } + tmp++; + default: + if (isasm) { + addr[address ] = val.u8[0]; + } + tmp++; + } + break; + default: + opsize = (val.u64 <= 0x00000000000000FF) ? 1 : opsize; + opsize = (val.u64 > 0x00000000000000FF) ? 2 : opsize; + opsize = (val.u64 > 0x000000000000FFFF) ? 3 : opsize; + opsize = (val.u64 > 0x0000000000FFFFFF) ? 4 : opsize; + opsize = (val.u64 > 0x00000000FFFFFFFF) ? 5 : opsize; + opsize = (val.u64 > 0x000000FFFFFFFFFF) ? 6 : opsize; + opsize = (val.u64 > 0x0000FFFFFFFFFFFF) ? 7 : opsize; + opsize = (val.u64 > 0x00FFFFFFFFFFFFFF) ? 8 : opsize; + if (type == 0xFF) { + switch (opsize-1) { + case 0: case 2: case 5: case 3: type = ZM ; break; + case 1: case 4: case 6: case 7: type = ABS; break; + } + } + if (opsize) { + if (type != ABS) { + switch (opsize) { + case 2: opsize = 3; break; + case 5: opsize = 6; break; + } + } + prefix |= amp[opsize-1]; + } + if (prefix) { + if (isasm) { + addr[address] = prefix; + } + address++; + bc->progsize++; + } + opcode = opcodes[inst][type]; + if (isasm) { + addr[address] = opcode; + } + address++; + bc->progsize++; + if (isasm) { + switch (opsize-1) { + case 7: addr[address+7] = val.u8[7]; + case 6: addr[address+6] = val.u8[6]; + case 5: addr[address+5] = val.u8[5]; + case 4: addr[address+4] = val.u8[4]; + case 3: addr[address+3] = val.u8[3]; + case 2: addr[address+2] = val.u8[2]; + case 1: addr[address+1] = val.u8[1]; + case 0: addr[address ] = val.u8[0]; + } + } + tmp = opsize; + break; + } + address += tmp; + bc->progsize += tmp; + } + return address; +} + +uint64_t parse_tokens(token *t, bytecount *bc, uint8_t isasm, uint64_t address, uint8_t dbg) { + for (; t; t = t->next) { + switch (t->id) { + case TOK_DIR: + switch (t->type) { + case DIR_ORG: t = t->next; address = get_val(t, address, 3, dbg); break; + case DIR_BYTE: + case DIR_WORD: + case DIR_DWORD: + case DIR_QWORD: address = handle_dir(t, bc, isasm, address, dbg); break; + } + break; + case TOK_OPCODE: address = handle_opcode(t, bc, isasm, address, dbg); break; + case TOK_COMMENT: break; + } + } + return address; +} + +token *make_token(uint8_t id, uint8_t type, uint64_t value, char *str) { + token *new_tok = malloc(sizeof(token)); + (last_tok) ? (last_tok->next = new_tok) : (tokens = new_tok); + new_tok->id = id; + new_tok->type = type; + if (!str[0]) { + new_tok->qword = value; + } else { + new_tok->str = str; + } + new_tok->next = NULL; + last_tok = new_tok; + return new_tok; +} + +void assemble(line *ln, bytecount *bc, uint8_t dbg) { + uint64_t address = 0; + for (; ln; ln = ln->next) { + address = parse_tokens(ln->tok, bc, 1, address, dbg); + } +} + +void free_tokens(token *t, uint16_t count) { + token *tok; + while (t != NULL) { + if (count--) { + break; + } + tok = t; + free(tok); + t = t->next; + } +} + +void free_lines() { + line *l = lines; + line *ln; + for (; l != NULL; l = l->next) { + if (l < ln) { + break; + } + free_tokens(l->tok, l->count); + ln = l; + free(ln); + } +} |