Completly rewrote the expression handler.

It's now separate from the lexer, and was designed to make it both easier to understand, and easier to parse.
author: mrb0nk500 <b0nk@b0nk.xyz> 2021-02-08 23:03:31 -0500
committer: mrb0nk500 <b0nk@b0nk.xyz> 2021-02-08 23:03:31 -0500
commit: 04b29166fd226e2464bcfacf6839e3274ff68cc6 (patch)
tree: 848685973ecbee3b74868820636ac7eb7af4583b /assemble.c
parent: cd6982e5da1f5facdc1e0154b3a27c01e8b076c9 (diff)
1 files changed, 106 insertions, 151 deletions
diff --git a/assemble.c b/assemble.c
index f4de278..ce35e78 100644
--- a/assemble.c
+++ b/assemble.c
@@ -113,98 +113,50 @@ static void write_value(uint64_t value, uint64_t address, uint8_t size) {
 	}
 }
 
-uint64_t get_val(token *t, uint64_t addr, uint8_t size, uint8_t end_expr, uint8_t stop_comma, uint8_t dbg) {
+uint64_t get_val(expr *tree, uint64_t addr, uint8_t size, int depth, uint8_t dbg) {
 	uint64_t value = 0;
-	uint64_t tmp_val = 0;
-	uint8_t type = EXPR_NONE;
-	uint8_t isstart = 1;
-	int done = 0;
-	do {
-		if (t->id == TOK_EXPR) {
-			type = t->type;
-			t = t->next;
-		}
-		if (stop_comma && t->subtype == TOK_CSV) {
-			done = 1;
-		}
-		if (stop_comma && t->id == TOK_REG) {
-			break;
-		}
-		switch (t->id) {
-			case TOK_HEX:
-			case TOK_DEC:
-			case TOK_BIN:
-			case TOK_CHAR: tmp_val = t->qword; t = t->next; break;
-			case TOK_SYM:
-			case TOK_LABEL:
-				for (; t->sym && t->sym->isstruct && t->next && t->next->id == TOK_SYM; t = t->next);
-				tmp_val = (t->sym) ? t->sym->val : addr;
-				t = t->next;
-				break;
-			default: tmp_val = 0;
-		}
-		if (end_expr != 0xFF && type == end_expr) {
-			break;
-		}
-		switch (type) {
-			case EXPR_PLUS : (isstart) ? (value =  tmp_val) : (value += tmp_val); break;
-			case EXPR_MINUS: (isstart) ? (value = -tmp_val) : (value -= tmp_val); break;
-			case EXPR_OR   : value  |= tmp_val; break;
-			case EXPR_LSHFT: value <<= tmp_val; break;
-			case EXPR_RSHFT: value >>= tmp_val; break;
-			case EXPR_LOW  :
-				value = tmp_val;
-				switch (size) {
-					default:
-					case 2 : value &= 0xFFFFFFFF; break;
-					case 1 : value &= 0x0000FFFF; break;
-					case 0 : value &= 0x000000FF; break;
-				}
-				break;
-			case EXPR_HIGH :
-				value = tmp_val;
-				switch (size) {
-					default:
-					case 2 : value >>= 0x20; break;
-					case 1 : value >>= 0x10; break;
-					case 0 : value >>= 0x08; break;
-				}
-				break;
-			case EXPR_NONE : value  = tmp_val; break;
-		}
-		isstart = 0;
-		if (dbg) {
-			printf("get_val(): Value: $%"PRIX64", Expression type: $%X, Expression Value: $%"PRIX64".\n", value, type, tmp_val);
-		}
-	} while (!done && t && t->id == TOK_EXPR && isexpr(t->type, dbg));
-	return value;
-}
+	uint64_t lvalue = 0;
+	uint64_t rvalue = 0;
+	uint64_t expr_val = 0;
+	int type;
 
-token *skip_expr(token *t, uint8_t end_expr, uint8_t stop_comma, uint8_t dbg) {
-	int done = 0;
-	int is_reg = 0;
-	do {
-		is_reg = (t && t->next && t->next->id == TOK_REG);
-		t = (t->id == TOK_EXPR && !is_reg) ? t->next : t;
-		if (is_reg || (stop_comma && (t->subtype == TOK_CSV))) {
-			done = 1;
-		}
-		switch (t->id) {
-			case TOK_HEX  :
-			case TOK_DEC  :
-			case TOK_BIN  :
-			case TOK_CHAR :
-			case TOK_SYM  :
-			case TOK_LABEL: t = t->next; break;
-		}
-		if (end_expr != 0xFF && t->id == TOK_EXPR && t->type == end_expr) {
-			break;
-		}
-	} while (!done && t && t->id == TOK_EXPR && isexpr(t->type, dbg));
-	return t;
-}
+	int is_start = (!depth && tree->left && tree->right == NULL);
+
+	if (tree->left) {
+		lvalue = get_val(tree->left, addr, size, depth+1, dbg);
+	}
+
+	if (tree->right) {
+		rvalue = get_val(tree->right, addr, size, depth+1, dbg);
+	}
 
+	type = tree->type;
+	expr_val = tree->value.val;
 
+	switch (type) {
+		case EXPR_HEX:
+		case EXPR_DEC:
+		case EXPR_BIN:
+		case EXPR_CHAR: value = expr_val; break;
+		case EXPR_SYM: value = (tree->value.sym) ? tree->value.sym->val : addr; break;
+		case EXPR_PLUS: value = lvalue + rvalue; break;
+		case EXPR_MINUS: (is_start) ? (value = -lvalue) : (value = lvalue - rvalue); break;
+		case EXPR_OR: value = lvalue | rvalue; break;
+		case EXPR_LSHFT: value = lvalue << rvalue; break;
+		case EXPR_RSHFT: value = lvalue >> rvalue; break;
+		case EXPR_LOW:
+		case EXPR_HIGH:
+			value = lvalue;
+			switch (size) {
+				default:
+				case 2 : (type == EXPR_LOW) ? (value &= 0xFFFFFFFF) : (value >>= 0x20); break;
+				case 1 : (type == EXPR_LOW) ? (value &= 0x0000FFFF) : (value >>= 0x10); break;
+				case 0 : (type == EXPR_LOW) ? (value &= 0x000000FF) : (value >>= 0x08); break;
+			}
+			break;
+	}
+	return value;
+}
 
 uint8_t get_directivesize(uint8_t type, uint8_t dbg) {
 	switch (type) {
@@ -262,7 +214,7 @@ uint16_t handle_struct(line **ln, uint64_t address, uint16_t offset, uint8_t dbg
 							case DIR_QWORD : member_size = 8; break;
 							case DIR_UNION :
 							case DIR_STRUCT: member_size = handle_struct(&l, address, offset, dbg); break;
-							case DIR_RES   : member_size = get_val(t, address, 3, 0xFF, 0, dbg); t = skip_expr(t, 0xFF, 0, dbg); break;
+							case DIR_RES   : t = t->next; member_size = get_val(t->expr, address, 3, 0, dbg); break;
 						}
 						if (member && t->type != DIR_UNION && t->type != DIR_STRUCT) {
 							member->val = offset;
@@ -299,13 +251,8 @@ uint64_t handle_directive(token *t, bytecount *bc, uint8_t isasm, uint64_t addre
 	for (; t; t = t->next) {
 		tmp = 0;
 		switch (t->id) {
-			case TOK_HEX:
-			case TOK_DEC:
-			case TOK_BIN:
-			case TOK_CHAR:
-			case TOK_SYM:
-			case TOK_LABEL:
-				val.u64 = get_val(t, tmpaddr, get_directivesize(type, dbg), 0xFF, 0, dbg);
+			case TOK_EXPR:
+				val.u64 = get_val(t->expr, tmpaddr, get_directivesize(type, dbg), 0, dbg);
 				switch (type) {
 					case DIR_QWORD: tmp = 8; break;
 					case DIR_DWORD: tmp = 4; break;
@@ -315,9 +262,7 @@ uint64_t handle_directive(token *t, bytecount *bc, uint8_t isasm, uint64_t addre
 				write_value(val.u64, tmpaddr, tmp-1);
 				tmpaddr += tmp;
 				bc->datasize += tmp;
-				if (t->next && t->next->id == TOK_EXPR && isexpr(t->next->type, dbg)) {
-					t = skip_expr(t, 0xFF, 0, dbg);
-				}
+
 				break;
 			case TOK_STRING:
 				if (type == DIR_BYTE) {
@@ -363,6 +308,9 @@ static uint8_t write_inst(uint8_t prefix, uint8_t ext_prefix, uint8_t opcode, op
 	uint8_t op_ins_size[2];
 	union reg ins;
 	union reg op_ins[2];
+	ins.u64 = 0;
+	op_ins[0].u64 = 0;
+	op_ins[1].u64 = 0;
 	memset(op_ins_size, 0, sizeof(op_ins_size));
 	memset(op_ins, 0, sizeof(op_ins_size));
 	if (prefix & 3) {
@@ -451,6 +399,35 @@ static uint8_t write_inst(uint8_t prefix, uint8_t ext_prefix, uint8_t opcode, op
 	return inst_size;
 }
 
+int is_value(expr *e, expr **found) {
+	if (e == NULL) {
+		return 0;
+	}
+	switch (e->type) {
+		case EXPR_HEX :
+		case EXPR_DEC :
+		case EXPR_BIN :
+		case EXPR_CHAR:
+		case EXPR_SYM :
+			if (found) {
+				*found = e;
+			}
+			return 1;
+		default:
+			if (e->left) {
+				return is_value(e->left, found);
+			}
+			if (e->right) {
+				return is_value(e->right, found);
+			}
+			break;
+	}
+	if (found) {
+		*found = e;
+	}
+	return 0;
+}
+
 token *get_operands(token *t, operand *op, uint64_t address, uint8_t rs, uint8_t dbg) {
 	uint8_t op_type;
 	uint8_t op_inst;
@@ -478,6 +455,16 @@ token *get_operands(token *t, operand *op, uint64_t address, uint8_t rs, uint8_t
 		printf("t: %p, t->id: $%X, t->id: %s, t->subtype: $%X, t->subtype: %s\n", tmp, tmp->id, (tmp->id <= TOK_MEMBER) ? lex_tok[tmp->id] : "TOK_NONE", tmp->subtype, (tmp->subtype == TOK_IND || tmp->subtype == TOK_CSV) ? lex_tok[tmp->subtype] : "TOK_NONE");
 	}*/
 
+	if (t) {
+		switch (t->id) {
+			case TOK_OPCODE:
+			case TOK_EXTOP :
+			case TOK_ORTHO : t = (t->next) ? t->next : t; break;
+		}
+	}
+
+	int isvalue = 0;
+
 	for (; t && i < 2; t = t->next) {
 		reg = (old_i != i) ? 0 : reg;
 		got_value = (old_i != i) ? 0 : got_value;
@@ -485,26 +472,27 @@ token *get_operands(token *t, operand *op, uint64_t address, uint8_t rs, uint8_t
 			brack_done = (t->id == TOK_REG) ? 2 : 1;
 		}
 		switch (t->id) {
-			case TOK_HEX  :
-			case TOK_DEC  :
-			case TOK_BIN  :
-			case TOK_SYM  :
-			case TOK_CHAR :
-			case TOK_LABEL:
+			case TOK_EXPR:
+				isvalue = is_value(t->expr, NULL);
 				if (!got_value) {
-					expr_type = (expr_type == 0xFF && t->next && t->next->id == TOK_EXPR) ? t->next->type : expr_type;
-					switch (expr_type) {
-						default		: stop_comma = 1; break;
-						case EXPR_MUL	: stop_comma = 0; break;
+					expr *e = t->expr;
+					if (isvalue) {
+						if (expr_type == 0xFF && e) {
+							expr_type = (e->right) ? e->right->type : e->type;
+						}
+						stop_comma = (expr_type != EXPR_MUL);
+						value = get_val(e, address, (rs != 0xFF) ? rs : 0, 0, dbg);
+					} else {
+						break;
 					}
 					is_sib = (!stop_comma && op[i].type && op[i].id == MEM_IND);
-					value = get_val(t, address, (rs != 0xFF) ? rs : 0, (!stop_comma) ? expr_type : 0xFF, stop_comma, dbg);
-					op[i].value = (!is_sib) ? value : op[i].value;
 					got_value = 1;
 				} else {
-					op[i].value = (!is_sib) ? value : op[i].value;
-					got_value = 0;
+					if (!isvalue) {
+						break;
+					}
 				}
+				op[i].value = (!is_sib) ? value : op[i].value;
 				if ((op[i].type == 1 && op[i].id == MEM_RIND) || (!op[i].type)) {
 					op[i].is_ind = (op[i].type == 1 && op[i].id == MEM_RIND);
 					op[i].type = 1;
@@ -528,11 +516,6 @@ token *get_operands(token *t, operand *op, uint64_t address, uint8_t rs, uint8_t
 						op[i].id = MEM_ZMR;
 					}
 				}
-				if (got_value && !is_comma) {
-					if (t && t->subtype != TOK_CSV) {
-						t = skip_expr(t, (!stop_comma) ? expr_type : 0xFF, stop_comma, dbg);
-					}
-				}
 				i += is_comma;
 				if (old_i != i) {
 					got_value = 0;
@@ -561,21 +544,6 @@ token *get_operands(token *t, operand *op, uint64_t address, uint8_t rs, uint8_t
 				is_comma = (is_comma >= 2) ? 0 : is_comma;
 				i += is_comma;
 				break;
-			case TOK_EXPR:
-				expr_type = t->type;
-				switch (expr_type) {
-					default		: stop_comma = 1; break;
-					case EXPR_MUL	: stop_comma = 0; break;
-				}
-				if (!got_value) {
-					if (t->next && t->next->id != TOK_REG) {
-						value = get_val(t, address, (rs != 0xFF) ? rs : 0, (!stop_comma) ? expr_type : 0xFF, stop_comma, dbg);
-						got_value = 1;
-					}
-				} else {
-					got_value = 0;
-				}
-				break;
 			case TOK_CC:
 				op[0].cc = t->byte;
 				i = 3;
@@ -1017,8 +985,8 @@ uint64_t parse_tokens(token *t, line **l, bytecount *bc, uint8_t isasm, uint64_t
 				switch (t->type) {
 					case DIR_STRUCT:
 					case DIR_UNION : handle_struct(l, address, 0, dbg); break;
-					case DIR_RES: t = t->next; address += get_val(t, address, 3, 0xFF, 0, dbg); break;
-					case DIR_ORG: t = t->next; address  = get_val(t, address, 3, 0xFF, 0, dbg); break;
+					case DIR_RES: t = t->next; address += get_val(t->expr, address, 3, 0, dbg); break;
+					case DIR_ORG: t = t->next; address  = get_val(t->expr, address, 3, 0, dbg); break;
 					case DIR_BYTE:
 					case DIR_WORD:
 					case DIR_DWORD:
@@ -1034,7 +1002,7 @@ uint64_t parse_tokens(token *t, line **l, bytecount *bc, uint8_t isasm, uint64_t
 	return address;
 }
 
-token *make_token(uint8_t id, uint8_t type, uint8_t space, uint8_t tab, uint64_t value, char *str, symbol *s) {
+token *make_token(uint8_t id, uint8_t type, uint8_t space, uint8_t tab, uint64_t value, char *str, symbol *s, expr *e) {
 	token *new_tok = malloc(sizeof(token));
 	(last_tok) ? (last_tok->next = new_tok) : (tokens = new_tok);
 
@@ -1052,6 +1020,8 @@ token *make_token(uint8_t id, uint8_t type, uint8_t space, uint8_t tab, uint64_t
 
 	if (s) {
 		new_tok->sym = s;
+	} else if (e) {
+		new_tok->expr = e;
 	} else if (str[0]) {
 		new_tok->str = str;
 	} else {
@@ -1286,17 +1256,6 @@ static void free_fixups(fixup *f) {
 	}
 }
 
-static inline void free_tmp_symtab(tmp_symtab *st) {
-	tmp_symtab *tmp;
-	if (st != NULL) {
-		tmp = st;
-		st = st->next;
-		free(tmp);
-		tmp = NULL;
-		free_tmp_symtab(st);
-	}
-}
-
 uint64_t get_tokmem(token *t) {
 	uint64_t i = 0;
 	for (; t; t = t->next, i++);
@@ -1327,10 +1286,6 @@ void cleanup() {
 		free_fixups(fixups);
 		fixups = NULL;
 	}
-	if (tmp_sym_table) {
-		free_tmp_symtab(tmp_sym_table);
-		tmp_sym_table = NULL;
-	}
 	while (i < stridx || i < comidx) {
 		if (i < stridx && string[i]) {
 			free(string[i]);
author	mrb0nk500 <b0nk@b0nk.xyz>	2021-02-08 23:03:31 -0500
committer	mrb0nk500 <b0nk@b0nk.xyz>	2021-02-08 23:03:31 -0500
commit	04b29166fd226e2464bcfacf6839e3274ff68cc6 (patch)
tree	848685973ecbee3b74868820636ac7eb7af4583b /assemble.c
parent	cd6982e5da1f5facdc1e0154b3a27c01e8b076c9 (diff)