summaryrefslogtreecommitdiff
path: root/assemble.c
diff options
context:
space:
mode:
Diffstat (limited to 'assemble.c')
-rw-r--r--assemble.c378
1 files changed, 378 insertions, 0 deletions
diff --git a/assemble.c b/assemble.c
new file mode 100644
index 0000000..869e6b8
--- /dev/null
+++ b/assemble.c
@@ -0,0 +1,378 @@
+#include "asmmon.h"
+
+token *tok_global;
+
+uint8_t isexpr(uint8_t type, uint8_t dbg) {
+ switch (type) {
+ case EXPR_PLUS:
+ case EXPR_MINUS:
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+uint8_t get_rs(token *t, uint8_t inst, uint8_t dbg) {
+ if (t->id == TOK_RS) {
+ return t->type;
+ } else {
+ return 0xFF;
+ }
+}
+
+uint64_t get_val(token *t, uint64_t addr, uint8_t size, uint8_t dbg) {
+ uint64_t value = 0;
+ uint64_t tmp_val = 0;
+ uint16_t expr_count = 0;
+ uint8_t type = EXPR_NONE;
+ do {
+ if (expr_count) {
+ type = t->type;
+ t = t->next;
+ }
+ switch (t->id) {
+ case TOK_HEX:
+ case TOK_DEC:
+ case TOK_BIN:
+ case TOK_CHAR:
+ tmp_val = t->qword;
+ t = t->next;
+ break;
+ case TOK_SYM:
+ case TOK_LABEL:
+ tmp_val = use_symbol("", t->word, addr, 1, dbg);
+ t = t->next;
+ break;
+ }
+ switch (type) {
+ case EXPR_PLUS : value += tmp_val; break;
+ case EXPR_MINUS: value -= tmp_val; break;
+ case EXPR_LOW :
+ switch (size) {
+ default:
+ case 2 : value &= 0xFFFFFFFF; break;
+ case 1 : value &= 0x0000FFFF; break;
+ case 0 : value &= 0x000000FF; break;
+ }
+ break;
+ case EXPR_HIGH :
+ switch (size) {
+ default:
+ case 2 : value >>= 0x20; break;
+ case 1 : value >>= 0x10; break;
+ case 0 : value >>= 0x08; break;
+ }
+ break;
+ case EXPR_NONE : value = tmp_val; break;
+ }
+ expr_count++;
+ if (!t) {
+ break;
+ }
+ } while (t->id == TOK_EXPR && isexpr(t->type, dbg));
+ return value;
+}
+
+uint8_t get_dirsize(uint8_t type, uint8_t dbg) {
+ uint8_t size = 0;
+ switch (type) {
+ case DIR_QWORD: size = 3; break;
+ case DIR_DWORD: size = 2; break;
+ case DIR_WORD : size = 1; break;
+ case DIR_BYTE : size = 0; break;
+ }
+ return size;
+}
+
+uint64_t handle_dir(token *t, bytecount *bc, uint8_t isasm, uint64_t address, uint8_t dbg) {
+ union reg val;
+ uint8_t c = 0;
+ uint8_t tmp = 0;
+ uint8_t type = t->type;
+ uint64_t tmpaddr = address;
+ t = t->next;
+ for (; t; t = t->next) {
+ tmp = 0;
+ switch (t->id) {
+ case TOK_HEX:
+ case TOK_DEC:
+ case TOK_BIN:
+ case TOK_CHAR:
+ case TOK_SYM:
+ case TOK_LABEL:
+ val.u64 = get_val(t, tmpaddr, get_dirsize(type, dbg), dbg);
+ switch (type) {
+ case DIR_QWORD:
+ if (isasm) {
+ addr[tmpaddr+7] = val.u8[7];
+ addr[tmpaddr+6] = val.u8[6];
+ addr[tmpaddr+5] = val.u8[5];
+ addr[tmpaddr+4] = val.u8[4];
+ }
+ tmp += 4;
+ case DIR_DWORD:
+ if (isasm) {
+ addr[tmpaddr+3] = val.u8[3];
+ addr[tmpaddr+2] = val.u8[2];
+ }
+ tmp += 2;
+ case DIR_WORD:
+ if (isasm) {
+ addr[tmpaddr+1] = val.u8[1];
+ }
+ tmp++;
+ case DIR_BYTE:
+ if (isasm) {
+ addr[tmpaddr ] = val.u8[0];
+ }
+ tmp++;
+ tmpaddr += tmp;
+ bc->datasize += tmp;
+ break;
+ }
+ break;
+ case TOK_STRING:
+ if (type == DIR_BYTE) {
+ for (uint16_t k = 0; t->str[k] != '\0'; k++) {
+ switch (t->str[k]) {
+ case '\\':
+ switch (t->str[k+1]) {
+ case 'n': c = '\n'; break;
+ case 'r': c = '\r'; break;
+ case 't': c = '\t'; break;
+ case '0': c = '\0'; break;
+ }
+ k++;
+ break;
+ default: c = t->str[k]; break;
+ }
+ if (isasm) {
+ addr[tmpaddr] = c;
+ }
+ tmpaddr++;
+ bc->datasize++;
+ }
+ if (isasm) {
+ addr[tmpaddr] = '\0';
+ }
+ tmpaddr++;
+ bc->datasize++;
+ }
+ break;
+ }
+ }
+ return tmpaddr;
+}
+
+uint64_t handle_opcode(token *t, bytecount *bc, uint8_t isasm, uint64_t address, uint8_t dbg) {
+ union reg val;
+ uint8_t opsize;
+ uint8_t inst;
+ uint8_t opcode;
+ uint8_t type;
+ uint8_t tmp = 0;
+ uint8_t prefix = 0;
+ uint8_t rs = 0;
+
+ for (; t; t = t->next) {
+ if (t->id == TOK_OPCODE) {
+ inst = t->byte;
+ type = t->type;
+ } else {
+ break;
+ }
+ tmp = 0;
+ opsize = 0;
+ opcode = 0;
+ if (t->next) {
+ rs = get_rs(t->next, inst, dbg);
+ t = (rs != 0xFF) ? t->next : t;
+ }
+ prefix = (rs != 0xFF) ? ((rs << 4) | 3) : 0;
+ if (opcodes[inst][IMPL] != 0xFF && (!t->next || t->next->id == TOK_COMMENT)) {
+ type = IMPL;
+ } else {
+ if (t->next) {
+ t = t->next;
+ }
+ val.u64 = get_val(t, address, (rs != 0xFF) ? rs : 0, dbg);
+ }
+ opcode = opcodes[inst][type];
+ if (inst == 80) {
+ if (type == IMM) {
+ rs = 1;
+ } else {
+ type = IMPL;
+ opcode = opcodes[inst][IMM];
+ }
+ }
+ switch (type) {
+ case IMPL:
+ if (isasm) {
+ addr[address] = opcode;
+ }
+ address++;
+ bc->progsize++;
+ break;
+ case IMM:
+ if (prefix) {
+ if (isasm) {
+ addr[address] = prefix;
+ }
+ address++;
+ }
+ if (isasm) {
+ addr[address] = opcode;
+ }
+ address++;
+ bc->progsize++;
+ switch (rs) {
+ case 3:
+ if (isasm) {
+ addr[address+7] = val.u8[7];
+ addr[address+6] = val.u8[6];
+ addr[address+5] = val.u8[5];
+ addr[address+4] = val.u8[4];
+ }
+ tmp += 4;
+ case 2:
+ if (isasm) {
+ addr[address+3] = val.u8[3];
+ addr[address+2] = val.u8[2];
+ }
+ tmp += 2;
+ case 1 :
+ if (isasm) {
+ addr[address+1] = val.u8[1];
+ }
+ tmp++;
+ default:
+ if (isasm) {
+ addr[address ] = val.u8[0];
+ }
+ tmp++;
+ }
+ break;
+ default:
+ opsize = (val.u64 <= 0x00000000000000FF) ? 1 : opsize;
+ opsize = (val.u64 > 0x00000000000000FF) ? 2 : opsize;
+ opsize = (val.u64 > 0x000000000000FFFF) ? 3 : opsize;
+ opsize = (val.u64 > 0x0000000000FFFFFF) ? 4 : opsize;
+ opsize = (val.u64 > 0x00000000FFFFFFFF) ? 5 : opsize;
+ opsize = (val.u64 > 0x000000FFFFFFFFFF) ? 6 : opsize;
+ opsize = (val.u64 > 0x0000FFFFFFFFFFFF) ? 7 : opsize;
+ opsize = (val.u64 > 0x00FFFFFFFFFFFFFF) ? 8 : opsize;
+ if (type == 0xFF) {
+ switch (opsize-1) {
+ case 0: case 2: case 5: case 3: type = ZM ; break;
+ case 1: case 4: case 6: case 7: type = ABS; break;
+ }
+ }
+ if (opsize) {
+ if (type != ABS) {
+ switch (opsize) {
+ case 2: opsize = 3; break;
+ case 5: opsize = 6; break;
+ }
+ }
+ prefix |= amp[opsize-1];
+ }
+ if (prefix) {
+ if (isasm) {
+ addr[address] = prefix;
+ }
+ address++;
+ bc->progsize++;
+ }
+ opcode = opcodes[inst][type];
+ if (isasm) {
+ addr[address] = opcode;
+ }
+ address++;
+ bc->progsize++;
+ if (isasm) {
+ switch (opsize-1) {
+ case 7: addr[address+7] = val.u8[7];
+ case 6: addr[address+6] = val.u8[6];
+ case 5: addr[address+5] = val.u8[5];
+ case 4: addr[address+4] = val.u8[4];
+ case 3: addr[address+3] = val.u8[3];
+ case 2: addr[address+2] = val.u8[2];
+ case 1: addr[address+1] = val.u8[1];
+ case 0: addr[address ] = val.u8[0];
+ }
+ }
+ tmp = opsize;
+ break;
+ }
+ address += tmp;
+ bc->progsize += tmp;
+ }
+ return address;
+}
+
+uint64_t parse_tokens(token *t, bytecount *bc, uint8_t isasm, uint64_t address, uint8_t dbg) {
+ for (; t; t = t->next) {
+ switch (t->id) {
+ case TOK_DIR:
+ switch (t->type) {
+ case DIR_ORG: t = t->next; address = get_val(t, address, 3, dbg); break;
+ case DIR_BYTE:
+ case DIR_WORD:
+ case DIR_DWORD:
+ case DIR_QWORD: address = handle_dir(t, bc, isasm, address, dbg); break;
+ }
+ break;
+ case TOK_OPCODE: address = handle_opcode(t, bc, isasm, address, dbg); break;
+ case TOK_COMMENT: break;
+ }
+ }
+ return address;
+}
+
+token *make_token(uint8_t id, uint8_t type, uint64_t value, char *str) {
+ token *new_tok = malloc(sizeof(token));
+ (last_tok) ? (last_tok->next = new_tok) : (tokens = new_tok);
+ new_tok->id = id;
+ new_tok->type = type;
+ if (!str[0]) {
+ new_tok->qword = value;
+ } else {
+ new_tok->str = str;
+ }
+ new_tok->next = NULL;
+ last_tok = new_tok;
+ return new_tok;
+}
+
+void assemble(line *ln, bytecount *bc, uint8_t dbg) {
+ uint64_t address = 0;
+ for (; ln; ln = ln->next) {
+ address = parse_tokens(ln->tok, bc, 1, address, dbg);
+ }
+}
+
+void free_tokens(token *t, uint16_t count) {
+ token *tok;
+ while (t != NULL) {
+ if (count--) {
+ break;
+ }
+ tok = t;
+ free(tok);
+ t = t->next;
+ }
+}
+
+void free_lines() {
+ line *l = lines;
+ line *ln;
+ for (; l != NULL; l = l->next) {
+ if (l < ln) {
+ break;
+ }
+ free_tokens(l->tok, l->count);
+ ln = l;
+ free(ln);
+ }
+}