summaryrefslogtreecommitdiff
path: root/assemble.c
diff options
context:
space:
mode:
authormrb0nk500 <b0nk@b0nk.xyz>2021-02-08 23:03:31 -0500
committermrb0nk500 <b0nk@b0nk.xyz>2021-02-08 23:03:31 -0500
commit04b29166fd226e2464bcfacf6839e3274ff68cc6 (patch)
tree848685973ecbee3b74868820636ac7eb7af4583b /assemble.c
parentcd6982e5da1f5facdc1e0154b3a27c01e8b076c9 (diff)
Completly rewrote the expression handler.
It's now separate from the lexer, and was designed to make it both easier to understand, and easier to parse.
Diffstat (limited to 'assemble.c')
-rw-r--r--assemble.c257
1 files changed, 106 insertions, 151 deletions
diff --git a/assemble.c b/assemble.c
index f4de278..ce35e78 100644
--- a/assemble.c
+++ b/assemble.c
@@ -113,98 +113,50 @@ static void write_value(uint64_t value, uint64_t address, uint8_t size) {
}
}
-uint64_t get_val(token *t, uint64_t addr, uint8_t size, uint8_t end_expr, uint8_t stop_comma, uint8_t dbg) {
+uint64_t get_val(expr *tree, uint64_t addr, uint8_t size, int depth, uint8_t dbg) {
uint64_t value = 0;
- uint64_t tmp_val = 0;
- uint8_t type = EXPR_NONE;
- uint8_t isstart = 1;
- int done = 0;
- do {
- if (t->id == TOK_EXPR) {
- type = t->type;
- t = t->next;
- }
- if (stop_comma && t->subtype == TOK_CSV) {
- done = 1;
- }
- if (stop_comma && t->id == TOK_REG) {
- break;
- }
- switch (t->id) {
- case TOK_HEX:
- case TOK_DEC:
- case TOK_BIN:
- case TOK_CHAR: tmp_val = t->qword; t = t->next; break;
- case TOK_SYM:
- case TOK_LABEL:
- for (; t->sym && t->sym->isstruct && t->next && t->next->id == TOK_SYM; t = t->next);
- tmp_val = (t->sym) ? t->sym->val : addr;
- t = t->next;
- break;
- default: tmp_val = 0;
- }
- if (end_expr != 0xFF && type == end_expr) {
- break;
- }
- switch (type) {
- case EXPR_PLUS : (isstart) ? (value = tmp_val) : (value += tmp_val); break;
- case EXPR_MINUS: (isstart) ? (value = -tmp_val) : (value -= tmp_val); break;
- case EXPR_OR : value |= tmp_val; break;
- case EXPR_LSHFT: value <<= tmp_val; break;
- case EXPR_RSHFT: value >>= tmp_val; break;
- case EXPR_LOW :
- value = tmp_val;
- switch (size) {
- default:
- case 2 : value &= 0xFFFFFFFF; break;
- case 1 : value &= 0x0000FFFF; break;
- case 0 : value &= 0x000000FF; break;
- }
- break;
- case EXPR_HIGH :
- value = tmp_val;
- switch (size) {
- default:
- case 2 : value >>= 0x20; break;
- case 1 : value >>= 0x10; break;
- case 0 : value >>= 0x08; break;
- }
- break;
- case EXPR_NONE : value = tmp_val; break;
- }
- isstart = 0;
- if (dbg) {
- printf("get_val(): Value: $%"PRIX64", Expression type: $%X, Expression Value: $%"PRIX64".\n", value, type, tmp_val);
- }
- } while (!done && t && t->id == TOK_EXPR && isexpr(t->type, dbg));
- return value;
-}
+ uint64_t lvalue = 0;
+ uint64_t rvalue = 0;
+ uint64_t expr_val = 0;
+ int type;
-token *skip_expr(token *t, uint8_t end_expr, uint8_t stop_comma, uint8_t dbg) {
- int done = 0;
- int is_reg = 0;
- do {
- is_reg = (t && t->next && t->next->id == TOK_REG);
- t = (t->id == TOK_EXPR && !is_reg) ? t->next : t;
- if (is_reg || (stop_comma && (t->subtype == TOK_CSV))) {
- done = 1;
- }
- switch (t->id) {
- case TOK_HEX :
- case TOK_DEC :
- case TOK_BIN :
- case TOK_CHAR :
- case TOK_SYM :
- case TOK_LABEL: t = t->next; break;
- }
- if (end_expr != 0xFF && t->id == TOK_EXPR && t->type == end_expr) {
- break;
- }
- } while (!done && t && t->id == TOK_EXPR && isexpr(t->type, dbg));
- return t;
-}
+ int is_start = (!depth && tree->left && tree->right == NULL);
+
+ if (tree->left) {
+ lvalue = get_val(tree->left, addr, size, depth+1, dbg);
+ }
+
+ if (tree->right) {
+ rvalue = get_val(tree->right, addr, size, depth+1, dbg);
+ }
+ type = tree->type;
+ expr_val = tree->value.val;
+ switch (type) {
+ case EXPR_HEX:
+ case EXPR_DEC:
+ case EXPR_BIN:
+ case EXPR_CHAR: value = expr_val; break;
+ case EXPR_SYM: value = (tree->value.sym) ? tree->value.sym->val : addr; break;
+ case EXPR_PLUS: value = lvalue + rvalue; break;
+ case EXPR_MINUS: (is_start) ? (value = -lvalue) : (value = lvalue - rvalue); break;
+ case EXPR_OR: value = lvalue | rvalue; break;
+ case EXPR_LSHFT: value = lvalue << rvalue; break;
+ case EXPR_RSHFT: value = lvalue >> rvalue; break;
+ case EXPR_LOW:
+ case EXPR_HIGH:
+ value = lvalue;
+ switch (size) {
+ default:
+ case 2 : (type == EXPR_LOW) ? (value &= 0xFFFFFFFF) : (value >>= 0x20); break;
+ case 1 : (type == EXPR_LOW) ? (value &= 0x0000FFFF) : (value >>= 0x10); break;
+ case 0 : (type == EXPR_LOW) ? (value &= 0x000000FF) : (value >>= 0x08); break;
+ }
+ break;
+ }
+ return value;
+}
uint8_t get_directivesize(uint8_t type, uint8_t dbg) {
switch (type) {
@@ -262,7 +214,7 @@ uint16_t handle_struct(line **ln, uint64_t address, uint16_t offset, uint8_t dbg
case DIR_QWORD : member_size = 8; break;
case DIR_UNION :
case DIR_STRUCT: member_size = handle_struct(&l, address, offset, dbg); break;
- case DIR_RES : member_size = get_val(t, address, 3, 0xFF, 0, dbg); t = skip_expr(t, 0xFF, 0, dbg); break;
+ case DIR_RES : t = t->next; member_size = get_val(t->expr, address, 3, 0, dbg); break;
}
if (member && t->type != DIR_UNION && t->type != DIR_STRUCT) {
member->val = offset;
@@ -299,13 +251,8 @@ uint64_t handle_directive(token *t, bytecount *bc, uint8_t isasm, uint64_t addre
for (; t; t = t->next) {
tmp = 0;
switch (t->id) {
- case TOK_HEX:
- case TOK_DEC:
- case TOK_BIN:
- case TOK_CHAR:
- case TOK_SYM:
- case TOK_LABEL:
- val.u64 = get_val(t, tmpaddr, get_directivesize(type, dbg), 0xFF, 0, dbg);
+ case TOK_EXPR:
+ val.u64 = get_val(t->expr, tmpaddr, get_directivesize(type, dbg), 0, dbg);
switch (type) {
case DIR_QWORD: tmp = 8; break;
case DIR_DWORD: tmp = 4; break;
@@ -315,9 +262,7 @@ uint64_t handle_directive(token *t, bytecount *bc, uint8_t isasm, uint64_t addre
write_value(val.u64, tmpaddr, tmp-1);
tmpaddr += tmp;
bc->datasize += tmp;
- if (t->next && t->next->id == TOK_EXPR && isexpr(t->next->type, dbg)) {
- t = skip_expr(t, 0xFF, 0, dbg);
- }
+
break;
case TOK_STRING:
if (type == DIR_BYTE) {
@@ -363,6 +308,9 @@ static uint8_t write_inst(uint8_t prefix, uint8_t ext_prefix, uint8_t opcode, op
uint8_t op_ins_size[2];
union reg ins;
union reg op_ins[2];
+ ins.u64 = 0;
+ op_ins[0].u64 = 0;
+ op_ins[1].u64 = 0;
memset(op_ins_size, 0, sizeof(op_ins_size));
memset(op_ins, 0, sizeof(op_ins_size));
if (prefix & 3) {
@@ -451,6 +399,35 @@ static uint8_t write_inst(uint8_t prefix, uint8_t ext_prefix, uint8_t opcode, op
return inst_size;
}
+int is_value(expr *e, expr **found) {
+ if (e == NULL) {
+ return 0;
+ }
+ switch (e->type) {
+ case EXPR_HEX :
+ case EXPR_DEC :
+ case EXPR_BIN :
+ case EXPR_CHAR:
+ case EXPR_SYM :
+ if (found) {
+ *found = e;
+ }
+ return 1;
+ default:
+ if (e->left) {
+ return is_value(e->left, found);
+ }
+ if (e->right) {
+ return is_value(e->right, found);
+ }
+ break;
+ }
+ if (found) {
+ *found = e;
+ }
+ return 0;
+}
+
token *get_operands(token *t, operand *op, uint64_t address, uint8_t rs, uint8_t dbg) {
uint8_t op_type;
uint8_t op_inst;
@@ -478,6 +455,16 @@ token *get_operands(token *t, operand *op, uint64_t address, uint8_t rs, uint8_t
printf("t: %p, t->id: $%X, t->id: %s, t->subtype: $%X, t->subtype: %s\n", tmp, tmp->id, (tmp->id <= TOK_MEMBER) ? lex_tok[tmp->id] : "TOK_NONE", tmp->subtype, (tmp->subtype == TOK_IND || tmp->subtype == TOK_CSV) ? lex_tok[tmp->subtype] : "TOK_NONE");
}*/
+ if (t) {
+ switch (t->id) {
+ case TOK_OPCODE:
+ case TOK_EXTOP :
+ case TOK_ORTHO : t = (t->next) ? t->next : t; break;
+ }
+ }
+
+ int isvalue = 0;
+
for (; t && i < 2; t = t->next) {
reg = (old_i != i) ? 0 : reg;
got_value = (old_i != i) ? 0 : got_value;
@@ -485,26 +472,27 @@ token *get_operands(token *t, operand *op, uint64_t address, uint8_t rs, uint8_t
brack_done = (t->id == TOK_REG) ? 2 : 1;
}
switch (t->id) {
- case TOK_HEX :
- case TOK_DEC :
- case TOK_BIN :
- case TOK_SYM :
- case TOK_CHAR :
- case TOK_LABEL:
+ case TOK_EXPR:
+ isvalue = is_value(t->expr, NULL);
if (!got_value) {
- expr_type = (expr_type == 0xFF && t->next && t->next->id == TOK_EXPR) ? t->next->type : expr_type;
- switch (expr_type) {
- default : stop_comma = 1; break;
- case EXPR_MUL : stop_comma = 0; break;
+ expr *e = t->expr;
+ if (isvalue) {
+ if (expr_type == 0xFF && e) {
+ expr_type = (e->right) ? e->right->type : e->type;
+ }
+ stop_comma = (expr_type != EXPR_MUL);
+ value = get_val(e, address, (rs != 0xFF) ? rs : 0, 0, dbg);
+ } else {
+ break;
}
is_sib = (!stop_comma && op[i].type && op[i].id == MEM_IND);
- value = get_val(t, address, (rs != 0xFF) ? rs : 0, (!stop_comma) ? expr_type : 0xFF, stop_comma, dbg);
- op[i].value = (!is_sib) ? value : op[i].value;
got_value = 1;
} else {
- op[i].value = (!is_sib) ? value : op[i].value;
- got_value = 0;
+ if (!isvalue) {
+ break;
+ }
}
+ op[i].value = (!is_sib) ? value : op[i].value;
if ((op[i].type == 1 && op[i].id == MEM_RIND) || (!op[i].type)) {
op[i].is_ind = (op[i].type == 1 && op[i].id == MEM_RIND);
op[i].type = 1;
@@ -528,11 +516,6 @@ token *get_operands(token *t, operand *op, uint64_t address, uint8_t rs, uint8_t
op[i].id = MEM_ZMR;
}
}
- if (got_value && !is_comma) {
- if (t && t->subtype != TOK_CSV) {
- t = skip_expr(t, (!stop_comma) ? expr_type : 0xFF, stop_comma, dbg);
- }
- }
i += is_comma;
if (old_i != i) {
got_value = 0;
@@ -561,21 +544,6 @@ token *get_operands(token *t, operand *op, uint64_t address, uint8_t rs, uint8_t
is_comma = (is_comma >= 2) ? 0 : is_comma;
i += is_comma;
break;
- case TOK_EXPR:
- expr_type = t->type;
- switch (expr_type) {
- default : stop_comma = 1; break;
- case EXPR_MUL : stop_comma = 0; break;
- }
- if (!got_value) {
- if (t->next && t->next->id != TOK_REG) {
- value = get_val(t, address, (rs != 0xFF) ? rs : 0, (!stop_comma) ? expr_type : 0xFF, stop_comma, dbg);
- got_value = 1;
- }
- } else {
- got_value = 0;
- }
- break;
case TOK_CC:
op[0].cc = t->byte;
i = 3;
@@ -1017,8 +985,8 @@ uint64_t parse_tokens(token *t, line **l, bytecount *bc, uint8_t isasm, uint64_t
switch (t->type) {
case DIR_STRUCT:
case DIR_UNION : handle_struct(l, address, 0, dbg); break;
- case DIR_RES: t = t->next; address += get_val(t, address, 3, 0xFF, 0, dbg); break;
- case DIR_ORG: t = t->next; address = get_val(t, address, 3, 0xFF, 0, dbg); break;
+ case DIR_RES: t = t->next; address += get_val(t->expr, address, 3, 0, dbg); break;
+ case DIR_ORG: t = t->next; address = get_val(t->expr, address, 3, 0, dbg); break;
case DIR_BYTE:
case DIR_WORD:
case DIR_DWORD:
@@ -1034,7 +1002,7 @@ uint64_t parse_tokens(token *t, line **l, bytecount *bc, uint8_t isasm, uint64_t
return address;
}
-token *make_token(uint8_t id, uint8_t type, uint8_t space, uint8_t tab, uint64_t value, char *str, symbol *s) {
+token *make_token(uint8_t id, uint8_t type, uint8_t space, uint8_t tab, uint64_t value, char *str, symbol *s, expr *e) {
token *new_tok = malloc(sizeof(token));
(last_tok) ? (last_tok->next = new_tok) : (tokens = new_tok);
@@ -1052,6 +1020,8 @@ token *make_token(uint8_t id, uint8_t type, uint8_t space, uint8_t tab, uint64_t
if (s) {
new_tok->sym = s;
+ } else if (e) {
+ new_tok->expr = e;
} else if (str[0]) {
new_tok->str = str;
} else {
@@ -1286,17 +1256,6 @@ static void free_fixups(fixup *f) {
}
}
-static inline void free_tmp_symtab(tmp_symtab *st) {
- tmp_symtab *tmp;
- if (st != NULL) {
- tmp = st;
- st = st->next;
- free(tmp);
- tmp = NULL;
- free_tmp_symtab(st);
- }
-}
-
uint64_t get_tokmem(token *t) {
uint64_t i = 0;
for (; t; t = t->next, i++);
@@ -1327,10 +1286,6 @@ void cleanup() {
free_fixups(fixups);
fixups = NULL;
}
- if (tmp_sym_table) {
- free_tmp_symtab(tmp_sym_table);
- tmp_sym_table = NULL;
- }
while (i < stridx || i < comidx) {
if (i < stridx && string[i]) {
free(string[i]);