From 04b29166fd226e2464bcfacf6839e3274ff68cc6 Mon Sep 17 00:00:00 2001 From: mrb0nk500 Date: Mon, 8 Feb 2021 23:03:31 -0500 Subject: Completly rewrote the expression handler. It's now separate from the lexer, and was designed to make it both easier to understand, and easier to parse. --- assemble.c | 257 +++++++++++++++++++++++++------------------------------------ 1 file changed, 106 insertions(+), 151 deletions(-) (limited to 'assemble.c') diff --git a/assemble.c b/assemble.c index f4de278..ce35e78 100644 --- a/assemble.c +++ b/assemble.c @@ -113,98 +113,50 @@ static void write_value(uint64_t value, uint64_t address, uint8_t size) { } } -uint64_t get_val(token *t, uint64_t addr, uint8_t size, uint8_t end_expr, uint8_t stop_comma, uint8_t dbg) { +uint64_t get_val(expr *tree, uint64_t addr, uint8_t size, int depth, uint8_t dbg) { uint64_t value = 0; - uint64_t tmp_val = 0; - uint8_t type = EXPR_NONE; - uint8_t isstart = 1; - int done = 0; - do { - if (t->id == TOK_EXPR) { - type = t->type; - t = t->next; - } - if (stop_comma && t->subtype == TOK_CSV) { - done = 1; - } - if (stop_comma && t->id == TOK_REG) { - break; - } - switch (t->id) { - case TOK_HEX: - case TOK_DEC: - case TOK_BIN: - case TOK_CHAR: tmp_val = t->qword; t = t->next; break; - case TOK_SYM: - case TOK_LABEL: - for (; t->sym && t->sym->isstruct && t->next && t->next->id == TOK_SYM; t = t->next); - tmp_val = (t->sym) ? t->sym->val : addr; - t = t->next; - break; - default: tmp_val = 0; - } - if (end_expr != 0xFF && type == end_expr) { - break; - } - switch (type) { - case EXPR_PLUS : (isstart) ? (value = tmp_val) : (value += tmp_val); break; - case EXPR_MINUS: (isstart) ? (value = -tmp_val) : (value -= tmp_val); break; - case EXPR_OR : value |= tmp_val; break; - case EXPR_LSHFT: value <<= tmp_val; break; - case EXPR_RSHFT: value >>= tmp_val; break; - case EXPR_LOW : - value = tmp_val; - switch (size) { - default: - case 2 : value &= 0xFFFFFFFF; break; - case 1 : value &= 0x0000FFFF; break; - case 0 : value &= 0x000000FF; break; - } - break; - case EXPR_HIGH : - value = tmp_val; - switch (size) { - default: - case 2 : value >>= 0x20; break; - case 1 : value >>= 0x10; break; - case 0 : value >>= 0x08; break; - } - break; - case EXPR_NONE : value = tmp_val; break; - } - isstart = 0; - if (dbg) { - printf("get_val(): Value: $%"PRIX64", Expression type: $%X, Expression Value: $%"PRIX64".\n", value, type, tmp_val); - } - } while (!done && t && t->id == TOK_EXPR && isexpr(t->type, dbg)); - return value; -} + uint64_t lvalue = 0; + uint64_t rvalue = 0; + uint64_t expr_val = 0; + int type; -token *skip_expr(token *t, uint8_t end_expr, uint8_t stop_comma, uint8_t dbg) { - int done = 0; - int is_reg = 0; - do { - is_reg = (t && t->next && t->next->id == TOK_REG); - t = (t->id == TOK_EXPR && !is_reg) ? t->next : t; - if (is_reg || (stop_comma && (t->subtype == TOK_CSV))) { - done = 1; - } - switch (t->id) { - case TOK_HEX : - case TOK_DEC : - case TOK_BIN : - case TOK_CHAR : - case TOK_SYM : - case TOK_LABEL: t = t->next; break; - } - if (end_expr != 0xFF && t->id == TOK_EXPR && t->type == end_expr) { - break; - } - } while (!done && t && t->id == TOK_EXPR && isexpr(t->type, dbg)); - return t; -} + int is_start = (!depth && tree->left && tree->right == NULL); + + if (tree->left) { + lvalue = get_val(tree->left, addr, size, depth+1, dbg); + } + + if (tree->right) { + rvalue = get_val(tree->right, addr, size, depth+1, dbg); + } + type = tree->type; + expr_val = tree->value.val; + switch (type) { + case EXPR_HEX: + case EXPR_DEC: + case EXPR_BIN: + case EXPR_CHAR: value = expr_val; break; + case EXPR_SYM: value = (tree->value.sym) ? tree->value.sym->val : addr; break; + case EXPR_PLUS: value = lvalue + rvalue; break; + case EXPR_MINUS: (is_start) ? (value = -lvalue) : (value = lvalue - rvalue); break; + case EXPR_OR: value = lvalue | rvalue; break; + case EXPR_LSHFT: value = lvalue << rvalue; break; + case EXPR_RSHFT: value = lvalue >> rvalue; break; + case EXPR_LOW: + case EXPR_HIGH: + value = lvalue; + switch (size) { + default: + case 2 : (type == EXPR_LOW) ? (value &= 0xFFFFFFFF) : (value >>= 0x20); break; + case 1 : (type == EXPR_LOW) ? (value &= 0x0000FFFF) : (value >>= 0x10); break; + case 0 : (type == EXPR_LOW) ? (value &= 0x000000FF) : (value >>= 0x08); break; + } + break; + } + return value; +} uint8_t get_directivesize(uint8_t type, uint8_t dbg) { switch (type) { @@ -262,7 +214,7 @@ uint16_t handle_struct(line **ln, uint64_t address, uint16_t offset, uint8_t dbg case DIR_QWORD : member_size = 8; break; case DIR_UNION : case DIR_STRUCT: member_size = handle_struct(&l, address, offset, dbg); break; - case DIR_RES : member_size = get_val(t, address, 3, 0xFF, 0, dbg); t = skip_expr(t, 0xFF, 0, dbg); break; + case DIR_RES : t = t->next; member_size = get_val(t->expr, address, 3, 0, dbg); break; } if (member && t->type != DIR_UNION && t->type != DIR_STRUCT) { member->val = offset; @@ -299,13 +251,8 @@ uint64_t handle_directive(token *t, bytecount *bc, uint8_t isasm, uint64_t addre for (; t; t = t->next) { tmp = 0; switch (t->id) { - case TOK_HEX: - case TOK_DEC: - case TOK_BIN: - case TOK_CHAR: - case TOK_SYM: - case TOK_LABEL: - val.u64 = get_val(t, tmpaddr, get_directivesize(type, dbg), 0xFF, 0, dbg); + case TOK_EXPR: + val.u64 = get_val(t->expr, tmpaddr, get_directivesize(type, dbg), 0, dbg); switch (type) { case DIR_QWORD: tmp = 8; break; case DIR_DWORD: tmp = 4; break; @@ -315,9 +262,7 @@ uint64_t handle_directive(token *t, bytecount *bc, uint8_t isasm, uint64_t addre write_value(val.u64, tmpaddr, tmp-1); tmpaddr += tmp; bc->datasize += tmp; - if (t->next && t->next->id == TOK_EXPR && isexpr(t->next->type, dbg)) { - t = skip_expr(t, 0xFF, 0, dbg); - } + break; case TOK_STRING: if (type == DIR_BYTE) { @@ -363,6 +308,9 @@ static uint8_t write_inst(uint8_t prefix, uint8_t ext_prefix, uint8_t opcode, op uint8_t op_ins_size[2]; union reg ins; union reg op_ins[2]; + ins.u64 = 0; + op_ins[0].u64 = 0; + op_ins[1].u64 = 0; memset(op_ins_size, 0, sizeof(op_ins_size)); memset(op_ins, 0, sizeof(op_ins_size)); if (prefix & 3) { @@ -451,6 +399,35 @@ static uint8_t write_inst(uint8_t prefix, uint8_t ext_prefix, uint8_t opcode, op return inst_size; } +int is_value(expr *e, expr **found) { + if (e == NULL) { + return 0; + } + switch (e->type) { + case EXPR_HEX : + case EXPR_DEC : + case EXPR_BIN : + case EXPR_CHAR: + case EXPR_SYM : + if (found) { + *found = e; + } + return 1; + default: + if (e->left) { + return is_value(e->left, found); + } + if (e->right) { + return is_value(e->right, found); + } + break; + } + if (found) { + *found = e; + } + return 0; +} + token *get_operands(token *t, operand *op, uint64_t address, uint8_t rs, uint8_t dbg) { uint8_t op_type; uint8_t op_inst; @@ -478,6 +455,16 @@ token *get_operands(token *t, operand *op, uint64_t address, uint8_t rs, uint8_t printf("t: %p, t->id: $%X, t->id: %s, t->subtype: $%X, t->subtype: %s\n", tmp, tmp->id, (tmp->id <= TOK_MEMBER) ? lex_tok[tmp->id] : "TOK_NONE", tmp->subtype, (tmp->subtype == TOK_IND || tmp->subtype == TOK_CSV) ? lex_tok[tmp->subtype] : "TOK_NONE"); }*/ + if (t) { + switch (t->id) { + case TOK_OPCODE: + case TOK_EXTOP : + case TOK_ORTHO : t = (t->next) ? t->next : t; break; + } + } + + int isvalue = 0; + for (; t && i < 2; t = t->next) { reg = (old_i != i) ? 0 : reg; got_value = (old_i != i) ? 0 : got_value; @@ -485,26 +472,27 @@ token *get_operands(token *t, operand *op, uint64_t address, uint8_t rs, uint8_t brack_done = (t->id == TOK_REG) ? 2 : 1; } switch (t->id) { - case TOK_HEX : - case TOK_DEC : - case TOK_BIN : - case TOK_SYM : - case TOK_CHAR : - case TOK_LABEL: + case TOK_EXPR: + isvalue = is_value(t->expr, NULL); if (!got_value) { - expr_type = (expr_type == 0xFF && t->next && t->next->id == TOK_EXPR) ? t->next->type : expr_type; - switch (expr_type) { - default : stop_comma = 1; break; - case EXPR_MUL : stop_comma = 0; break; + expr *e = t->expr; + if (isvalue) { + if (expr_type == 0xFF && e) { + expr_type = (e->right) ? e->right->type : e->type; + } + stop_comma = (expr_type != EXPR_MUL); + value = get_val(e, address, (rs != 0xFF) ? rs : 0, 0, dbg); + } else { + break; } is_sib = (!stop_comma && op[i].type && op[i].id == MEM_IND); - value = get_val(t, address, (rs != 0xFF) ? rs : 0, (!stop_comma) ? expr_type : 0xFF, stop_comma, dbg); - op[i].value = (!is_sib) ? value : op[i].value; got_value = 1; } else { - op[i].value = (!is_sib) ? value : op[i].value; - got_value = 0; + if (!isvalue) { + break; + } } + op[i].value = (!is_sib) ? value : op[i].value; if ((op[i].type == 1 && op[i].id == MEM_RIND) || (!op[i].type)) { op[i].is_ind = (op[i].type == 1 && op[i].id == MEM_RIND); op[i].type = 1; @@ -528,11 +516,6 @@ token *get_operands(token *t, operand *op, uint64_t address, uint8_t rs, uint8_t op[i].id = MEM_ZMR; } } - if (got_value && !is_comma) { - if (t && t->subtype != TOK_CSV) { - t = skip_expr(t, (!stop_comma) ? expr_type : 0xFF, stop_comma, dbg); - } - } i += is_comma; if (old_i != i) { got_value = 0; @@ -561,21 +544,6 @@ token *get_operands(token *t, operand *op, uint64_t address, uint8_t rs, uint8_t is_comma = (is_comma >= 2) ? 0 : is_comma; i += is_comma; break; - case TOK_EXPR: - expr_type = t->type; - switch (expr_type) { - default : stop_comma = 1; break; - case EXPR_MUL : stop_comma = 0; break; - } - if (!got_value) { - if (t->next && t->next->id != TOK_REG) { - value = get_val(t, address, (rs != 0xFF) ? rs : 0, (!stop_comma) ? expr_type : 0xFF, stop_comma, dbg); - got_value = 1; - } - } else { - got_value = 0; - } - break; case TOK_CC: op[0].cc = t->byte; i = 3; @@ -1017,8 +985,8 @@ uint64_t parse_tokens(token *t, line **l, bytecount *bc, uint8_t isasm, uint64_t switch (t->type) { case DIR_STRUCT: case DIR_UNION : handle_struct(l, address, 0, dbg); break; - case DIR_RES: t = t->next; address += get_val(t, address, 3, 0xFF, 0, dbg); break; - case DIR_ORG: t = t->next; address = get_val(t, address, 3, 0xFF, 0, dbg); break; + case DIR_RES: t = t->next; address += get_val(t->expr, address, 3, 0, dbg); break; + case DIR_ORG: t = t->next; address = get_val(t->expr, address, 3, 0, dbg); break; case DIR_BYTE: case DIR_WORD: case DIR_DWORD: @@ -1034,7 +1002,7 @@ uint64_t parse_tokens(token *t, line **l, bytecount *bc, uint8_t isasm, uint64_t return address; } -token *make_token(uint8_t id, uint8_t type, uint8_t space, uint8_t tab, uint64_t value, char *str, symbol *s) { +token *make_token(uint8_t id, uint8_t type, uint8_t space, uint8_t tab, uint64_t value, char *str, symbol *s, expr *e) { token *new_tok = malloc(sizeof(token)); (last_tok) ? (last_tok->next = new_tok) : (tokens = new_tok); @@ -1052,6 +1020,8 @@ token *make_token(uint8_t id, uint8_t type, uint8_t space, uint8_t tab, uint64_t if (s) { new_tok->sym = s; + } else if (e) { + new_tok->expr = e; } else if (str[0]) { new_tok->str = str; } else { @@ -1286,17 +1256,6 @@ static void free_fixups(fixup *f) { } } -static inline void free_tmp_symtab(tmp_symtab *st) { - tmp_symtab *tmp; - if (st != NULL) { - tmp = st; - st = st->next; - free(tmp); - tmp = NULL; - free_tmp_symtab(st); - } -} - uint64_t get_tokmem(token *t) { uint64_t i = 0; for (; t; t = t->next, i++); @@ -1327,10 +1286,6 @@ void cleanup() { free_fixups(fixups); fixups = NULL; } - if (tmp_sym_table) { - free_tmp_symtab(tmp_sym_table); - tmp_sym_table = NULL; - } while (i < stridx || i < comidx) { if (i < stridx && string[i]) { free(string[i]); -- cgit v1.2.3-13-gbd6f