summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormrb0nk500 <b0nk@b0nk.xyz>2021-02-08 23:03:31 -0500
committermrb0nk500 <b0nk@b0nk.xyz>2021-02-08 23:03:31 -0500
commit04b29166fd226e2464bcfacf6839e3274ff68cc6 (patch)
tree848685973ecbee3b74868820636ac7eb7af4583b
parentcd6982e5da1f5facdc1e0154b3a27c01e8b076c9 (diff)
Completly rewrote the expression handler.
It's now separate from the lexer, and was designed to make it both easier to understand, and easier to parse.
-rw-r--r--asmmon.h48
-rw-r--r--assemble.c257
-rw-r--r--enums.h1
-rw-r--r--lexer.c735
4 files changed, 695 insertions, 346 deletions
diff --git a/asmmon.h b/asmmon.h
index de1f732..e98afff 100644
--- a/asmmon.h
+++ b/asmmon.h
@@ -9,6 +9,7 @@ typedef struct ln line;
typedef struct sym symbol;
typedef struct fix fixup;
typedef struct inst instruction;
+typedef struct expr expr;
struct tok {
@@ -27,6 +28,7 @@ struct tok {
/* Token value(s). */
union {
+ expr *expr;
symbol *sym;
char *str;
uint8_t byte ;
@@ -74,6 +76,19 @@ struct inst {
uint8_t op; /* Base value used to get the actual opcode. */
};
+struct expr {
+ int type; /* Expression type. */
+
+ expr *left; /* Left side of expression. */
+ expr *right; /* Right side of expression. */
+
+ /* Expression value. */
+ union {
+ uint64_t val;
+ symbol *sym;
+ } value;
+};
+
extern char lexeme[];
extern char *string[];
@@ -89,7 +104,6 @@ extern symbol *locals;
extern symbol *last_loc;
extern fixup *fixups;
extern fixup *last_fix;
-extern tmp_symtab *tmp_sym_table;
extern uint8_t lex_type;
@@ -173,7 +187,7 @@ enum pre_token {
PTOK_OTHER
};
-enum expr {
+enum expression {
EXPR_PLUS,
EXPR_MINUS,
EXPR_LOW,
@@ -182,6 +196,12 @@ enum expr {
EXPR_LSHFT,
EXPR_RSHFT,
EXPR_MUL,
+ EXPR_HEX,
+ EXPR_DEC,
+ EXPR_BIN,
+ EXPR_CHAR,
+ EXPR_SYM,
+ EXPR_REG,
EXPR_NONE
};
@@ -690,6 +710,25 @@ static const char *set_cc[8] = {
"VC"
};
+static const char *reg_name[16] = {
+ [REG_A ] = "A",
+ [REG_B ] = "B",
+ [REG_X ] = "X",
+ [REG_Y ] = "Y",
+ [REG_E ] = "E",
+ [REG_C ] = "C",
+ [REG_D ] = "D",
+ [REG_S ] = "S",
+ [REG_F ] = "F",
+ [REG_SP ] = "SP",
+ [REG_BP ] = "BP",
+ [REG_R11] = "R11",
+ [REG_R12] = "R12",
+ [REG_R13] = "R13",
+ [REG_R14] = "R14",
+ [REG_R15] = "R15"
+};
+
static const char *instdesc[OPNUM] = {
[ADC] = "ADd accumulator, with operand, Carry if needed.",
[AND] = "Bitwise AND accumulator, with operand.",
@@ -805,11 +844,10 @@ extern uint8_t isfixup;
extern line *find_line(uint32_t ln, uint8_t dbg);
extern uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg);
-
-extern uint64_t get_val(token *t, uint64_t addr, uint8_t size, uint8_t end_expr, uint8_t stop_comma, uint8_t dbg);
+extern uint64_t get_val(expr *tree, uint64_t addr, uint8_t size, int depth, uint8_t dbg);
extern token *skip_expr(token *t, uint8_t end_expr, uint8_t stop_comma, uint8_t dbg);
extern uint64_t parse_tokens(token *tm, line **l, bytecount *bc, uint8_t isasm, uint64_t address, uint8_t dbg);
-extern token *make_token(uint8_t id, uint8_t type, uint8_t space, uint8_t tab, uint64_t value, char *str, symbol *s);
+extern token *make_token(uint8_t id, uint8_t type, uint8_t space, uint8_t tab, uint64_t value, char *str, symbol *s, expr *e);
extern void assemble(line *ln, bytecount *bc, uint8_t dbg);
extern void fix_symtree(line *l);
extern void cleanup();
diff --git a/assemble.c b/assemble.c
index f4de278..ce35e78 100644
--- a/assemble.c
+++ b/assemble.c
@@ -113,98 +113,50 @@ static void write_value(uint64_t value, uint64_t address, uint8_t size) {
}
}
-uint64_t get_val(token *t, uint64_t addr, uint8_t size, uint8_t end_expr, uint8_t stop_comma, uint8_t dbg) {
+uint64_t get_val(expr *tree, uint64_t addr, uint8_t size, int depth, uint8_t dbg) {
uint64_t value = 0;
- uint64_t tmp_val = 0;
- uint8_t type = EXPR_NONE;
- uint8_t isstart = 1;
- int done = 0;
- do {
- if (t->id == TOK_EXPR) {
- type = t->type;
- t = t->next;
- }
- if (stop_comma && t->subtype == TOK_CSV) {
- done = 1;
- }
- if (stop_comma && t->id == TOK_REG) {
- break;
- }
- switch (t->id) {
- case TOK_HEX:
- case TOK_DEC:
- case TOK_BIN:
- case TOK_CHAR: tmp_val = t->qword; t = t->next; break;
- case TOK_SYM:
- case TOK_LABEL:
- for (; t->sym && t->sym->isstruct && t->next && t->next->id == TOK_SYM; t = t->next);
- tmp_val = (t->sym) ? t->sym->val : addr;
- t = t->next;
- break;
- default: tmp_val = 0;
- }
- if (end_expr != 0xFF && type == end_expr) {
- break;
- }
- switch (type) {
- case EXPR_PLUS : (isstart) ? (value = tmp_val) : (value += tmp_val); break;
- case EXPR_MINUS: (isstart) ? (value = -tmp_val) : (value -= tmp_val); break;
- case EXPR_OR : value |= tmp_val; break;
- case EXPR_LSHFT: value <<= tmp_val; break;
- case EXPR_RSHFT: value >>= tmp_val; break;
- case EXPR_LOW :
- value = tmp_val;
- switch (size) {
- default:
- case 2 : value &= 0xFFFFFFFF; break;
- case 1 : value &= 0x0000FFFF; break;
- case 0 : value &= 0x000000FF; break;
- }
- break;
- case EXPR_HIGH :
- value = tmp_val;
- switch (size) {
- default:
- case 2 : value >>= 0x20; break;
- case 1 : value >>= 0x10; break;
- case 0 : value >>= 0x08; break;
- }
- break;
- case EXPR_NONE : value = tmp_val; break;
- }
- isstart = 0;
- if (dbg) {
- printf("get_val(): Value: $%"PRIX64", Expression type: $%X, Expression Value: $%"PRIX64".\n", value, type, tmp_val);
- }
- } while (!done && t && t->id == TOK_EXPR && isexpr(t->type, dbg));
- return value;
-}
+ uint64_t lvalue = 0;
+ uint64_t rvalue = 0;
+ uint64_t expr_val = 0;
+ int type;
-token *skip_expr(token *t, uint8_t end_expr, uint8_t stop_comma, uint8_t dbg) {
- int done = 0;
- int is_reg = 0;
- do {
- is_reg = (t && t->next && t->next->id == TOK_REG);
- t = (t->id == TOK_EXPR && !is_reg) ? t->next : t;
- if (is_reg || (stop_comma && (t->subtype == TOK_CSV))) {
- done = 1;
- }
- switch (t->id) {
- case TOK_HEX :
- case TOK_DEC :
- case TOK_BIN :
- case TOK_CHAR :
- case TOK_SYM :
- case TOK_LABEL: t = t->next; break;
- }
- if (end_expr != 0xFF && t->id == TOK_EXPR && t->type == end_expr) {
- break;
- }
- } while (!done && t && t->id == TOK_EXPR && isexpr(t->type, dbg));
- return t;
-}
+ int is_start = (!depth && tree->left && tree->right == NULL);
+
+ if (tree->left) {
+ lvalue = get_val(tree->left, addr, size, depth+1, dbg);
+ }
+
+ if (tree->right) {
+ rvalue = get_val(tree->right, addr, size, depth+1, dbg);
+ }
+ type = tree->type;
+ expr_val = tree->value.val;
+ switch (type) {
+ case EXPR_HEX:
+ case EXPR_DEC:
+ case EXPR_BIN:
+ case EXPR_CHAR: value = expr_val; break;
+ case EXPR_SYM: value = (tree->value.sym) ? tree->value.sym->val : addr; break;
+ case EXPR_PLUS: value = lvalue + rvalue; break;
+ case EXPR_MINUS: (is_start) ? (value = -lvalue) : (value = lvalue - rvalue); break;
+ case EXPR_OR: value = lvalue | rvalue; break;
+ case EXPR_LSHFT: value = lvalue << rvalue; break;
+ case EXPR_RSHFT: value = lvalue >> rvalue; break;
+ case EXPR_LOW:
+ case EXPR_HIGH:
+ value = lvalue;
+ switch (size) {
+ default:
+ case 2 : (type == EXPR_LOW) ? (value &= 0xFFFFFFFF) : (value >>= 0x20); break;
+ case 1 : (type == EXPR_LOW) ? (value &= 0x0000FFFF) : (value >>= 0x10); break;
+ case 0 : (type == EXPR_LOW) ? (value &= 0x000000FF) : (value >>= 0x08); break;
+ }
+ break;
+ }
+ return value;
+}
uint8_t get_directivesize(uint8_t type, uint8_t dbg) {
switch (type) {
@@ -262,7 +214,7 @@ uint16_t handle_struct(line **ln, uint64_t address, uint16_t offset, uint8_t dbg
case DIR_QWORD : member_size = 8; break;
case DIR_UNION :
case DIR_STRUCT: member_size = handle_struct(&l, address, offset, dbg); break;
- case DIR_RES : member_size = get_val(t, address, 3, 0xFF, 0, dbg); t = skip_expr(t, 0xFF, 0, dbg); break;
+ case DIR_RES : t = t->next; member_size = get_val(t->expr, address, 3, 0, dbg); break;
}
if (member && t->type != DIR_UNION && t->type != DIR_STRUCT) {
member->val = offset;
@@ -299,13 +251,8 @@ uint64_t handle_directive(token *t, bytecount *bc, uint8_t isasm, uint64_t addre
for (; t; t = t->next) {
tmp = 0;
switch (t->id) {
- case TOK_HEX:
- case TOK_DEC:
- case TOK_BIN:
- case TOK_CHAR:
- case TOK_SYM:
- case TOK_LABEL:
- val.u64 = get_val(t, tmpaddr, get_directivesize(type, dbg), 0xFF, 0, dbg);
+ case TOK_EXPR:
+ val.u64 = get_val(t->expr, tmpaddr, get_directivesize(type, dbg), 0, dbg);
switch (type) {
case DIR_QWORD: tmp = 8; break;
case DIR_DWORD: tmp = 4; break;
@@ -315,9 +262,7 @@ uint64_t handle_directive(token *t, bytecount *bc, uint8_t isasm, uint64_t addre
write_value(val.u64, tmpaddr, tmp-1);
tmpaddr += tmp;
bc->datasize += tmp;
- if (t->next && t->next->id == TOK_EXPR && isexpr(t->next->type, dbg)) {
- t = skip_expr(t, 0xFF, 0, dbg);
- }
+
break;
case TOK_STRING:
if (type == DIR_BYTE) {
@@ -363,6 +308,9 @@ static uint8_t write_inst(uint8_t prefix, uint8_t ext_prefix, uint8_t opcode, op
uint8_t op_ins_size[2];
union reg ins;
union reg op_ins[2];
+ ins.u64 = 0;
+ op_ins[0].u64 = 0;
+ op_ins[1].u64 = 0;
memset(op_ins_size, 0, sizeof(op_ins_size));
memset(op_ins, 0, sizeof(op_ins_size));
if (prefix & 3) {
@@ -451,6 +399,35 @@ static uint8_t write_inst(uint8_t prefix, uint8_t ext_prefix, uint8_t opcode, op
return inst_size;
}
+int is_value(expr *e, expr **found) {
+ if (e == NULL) {
+ return 0;
+ }
+ switch (e->type) {
+ case EXPR_HEX :
+ case EXPR_DEC :
+ case EXPR_BIN :
+ case EXPR_CHAR:
+ case EXPR_SYM :
+ if (found) {
+ *found = e;
+ }
+ return 1;
+ default:
+ if (e->left) {
+ return is_value(e->left, found);
+ }
+ if (e->right) {
+ return is_value(e->right, found);
+ }
+ break;
+ }
+ if (found) {
+ *found = e;
+ }
+ return 0;
+}
+
token *get_operands(token *t, operand *op, uint64_t address, uint8_t rs, uint8_t dbg) {
uint8_t op_type;
uint8_t op_inst;
@@ -478,6 +455,16 @@ token *get_operands(token *t, operand *op, uint64_t address, uint8_t rs, uint8_t
printf("t: %p, t->id: $%X, t->id: %s, t->subtype: $%X, t->subtype: %s\n", tmp, tmp->id, (tmp->id <= TOK_MEMBER) ? lex_tok[tmp->id] : "TOK_NONE", tmp->subtype, (tmp->subtype == TOK_IND || tmp->subtype == TOK_CSV) ? lex_tok[tmp->subtype] : "TOK_NONE");
}*/
+ if (t) {
+ switch (t->id) {
+ case TOK_OPCODE:
+ case TOK_EXTOP :
+ case TOK_ORTHO : t = (t->next) ? t->next : t; break;
+ }
+ }
+
+ int isvalue = 0;
+
for (; t && i < 2; t = t->next) {
reg = (old_i != i) ? 0 : reg;
got_value = (old_i != i) ? 0 : got_value;
@@ -485,26 +472,27 @@ token *get_operands(token *t, operand *op, uint64_t address, uint8_t rs, uint8_t
brack_done = (t->id == TOK_REG) ? 2 : 1;
}
switch (t->id) {
- case TOK_HEX :
- case TOK_DEC :
- case TOK_BIN :
- case TOK_SYM :
- case TOK_CHAR :
- case TOK_LABEL:
+ case TOK_EXPR:
+ isvalue = is_value(t->expr, NULL);
if (!got_value) {
- expr_type = (expr_type == 0xFF && t->next && t->next->id == TOK_EXPR) ? t->next->type : expr_type;
- switch (expr_type) {
- default : stop_comma = 1; break;
- case EXPR_MUL : stop_comma = 0; break;
+ expr *e = t->expr;
+ if (isvalue) {
+ if (expr_type == 0xFF && e) {
+ expr_type = (e->right) ? e->right->type : e->type;
+ }
+ stop_comma = (expr_type != EXPR_MUL);
+ value = get_val(e, address, (rs != 0xFF) ? rs : 0, 0, dbg);
+ } else {
+ break;
}
is_sib = (!stop_comma && op[i].type && op[i].id == MEM_IND);
- value = get_val(t, address, (rs != 0xFF) ? rs : 0, (!stop_comma) ? expr_type : 0xFF, stop_comma, dbg);
- op[i].value = (!is_sib) ? value : op[i].value;
got_value = 1;
} else {
- op[i].value = (!is_sib) ? value : op[i].value;
- got_value = 0;
+ if (!isvalue) {
+ break;
+ }
}
+ op[i].value = (!is_sib) ? value : op[i].value;
if ((op[i].type == 1 && op[i].id == MEM_RIND) || (!op[i].type)) {
op[i].is_ind = (op[i].type == 1 && op[i].id == MEM_RIND);
op[i].type = 1;
@@ -528,11 +516,6 @@ token *get_operands(token *t, operand *op, uint64_t address, uint8_t rs, uint8_t
op[i].id = MEM_ZMR;
}
}
- if (got_value && !is_comma) {
- if (t && t->subtype != TOK_CSV) {
- t = skip_expr(t, (!stop_comma) ? expr_type : 0xFF, stop_comma, dbg);
- }
- }
i += is_comma;
if (old_i != i) {
got_value = 0;
@@ -561,21 +544,6 @@ token *get_operands(token *t, operand *op, uint64_t address, uint8_t rs, uint8_t
is_comma = (is_comma >= 2) ? 0 : is_comma;
i += is_comma;
break;
- case TOK_EXPR:
- expr_type = t->type;
- switch (expr_type) {
- default : stop_comma = 1; break;
- case EXPR_MUL : stop_comma = 0; break;
- }
- if (!got_value) {
- if (t->next && t->next->id != TOK_REG) {
- value = get_val(t, address, (rs != 0xFF) ? rs : 0, (!stop_comma) ? expr_type : 0xFF, stop_comma, dbg);
- got_value = 1;
- }
- } else {
- got_value = 0;
- }
- break;
case TOK_CC:
op[0].cc = t->byte;
i = 3;
@@ -1017,8 +985,8 @@ uint64_t parse_tokens(token *t, line **l, bytecount *bc, uint8_t isasm, uint64_t
switch (t->type) {
case DIR_STRUCT:
case DIR_UNION : handle_struct(l, address, 0, dbg); break;
- case DIR_RES: t = t->next; address += get_val(t, address, 3, 0xFF, 0, dbg); break;
- case DIR_ORG: t = t->next; address = get_val(t, address, 3, 0xFF, 0, dbg); break;
+ case DIR_RES: t = t->next; address += get_val(t->expr, address, 3, 0, dbg); break;
+ case DIR_ORG: t = t->next; address = get_val(t->expr, address, 3, 0, dbg); break;
case DIR_BYTE:
case DIR_WORD:
case DIR_DWORD:
@@ -1034,7 +1002,7 @@ uint64_t parse_tokens(token *t, line **l, bytecount *bc, uint8_t isasm, uint64_t
return address;
}
-token *make_token(uint8_t id, uint8_t type, uint8_t space, uint8_t tab, uint64_t value, char *str, symbol *s) {
+token *make_token(uint8_t id, uint8_t type, uint8_t space, uint8_t tab, uint64_t value, char *str, symbol *s, expr *e) {
token *new_tok = malloc(sizeof(token));
(last_tok) ? (last_tok->next = new_tok) : (tokens = new_tok);
@@ -1052,6 +1020,8 @@ token *make_token(uint8_t id, uint8_t type, uint8_t space, uint8_t tab, uint64_t
if (s) {
new_tok->sym = s;
+ } else if (e) {
+ new_tok->expr = e;
} else if (str[0]) {
new_tok->str = str;
} else {
@@ -1286,17 +1256,6 @@ static void free_fixups(fixup *f) {
}
}
-static inline void free_tmp_symtab(tmp_symtab *st) {
- tmp_symtab *tmp;
- if (st != NULL) {
- tmp = st;
- st = st->next;
- free(tmp);
- tmp = NULL;
- free_tmp_symtab(st);
- }
-}
-
uint64_t get_tokmem(token *t) {
uint64_t i = 0;
for (; t; t = t->next, i++);
@@ -1327,10 +1286,6 @@ void cleanup() {
free_fixups(fixups);
fixups = NULL;
}
- if (tmp_sym_table) {
- free_tmp_symtab(tmp_sym_table);
- tmp_sym_table = NULL;
- }
while (i < stridx || i < comidx) {
if (i < stridx && string[i]) {
free(string[i]);
diff --git a/enums.h b/enums.h
index 834566d..1658c35 100644
--- a/enums.h
+++ b/enums.h
@@ -38,6 +38,7 @@ enum ortho_reg {
REG_R13,
REG_R14,
REG_R15,
+ REG_PC,
};
enum ortho_mem {
diff --git a/lexer.c b/lexer.c
index 57a7e14..bc954fc 100644
--- a/lexer.c
+++ b/lexer.c
@@ -68,11 +68,11 @@ int add_symbol(symbol *sym, const char *name, symbol **root, symbol **lsym, symb
/*return 0;*/
}
}
- for (; s != NULL && s->next != NULL; s = s->next) {
- if (dbg) {
- printf("s: %p, s->next: %p, s->prev: %p\n", s, s->next, s->prev);
- }
+ for (; s != NULL && s->next != NULL; s = s->next) {
+ if (dbg) {
+ printf("s: %p, s->next: %p, s->prev: %p\n", s, s->next, s->prev);
}
+ }
if ((is_new_scope && *lsym) || *lloc || *csym) {
/*
if (is_new_scope) {
@@ -462,6 +462,29 @@ fixup *find_fixup(fixup *root, const char *name, int depth, uint8_t dbg) {
return NULL;
}
+void find_expr_sym(expr *root, const char *name, symbol *sym, int depth, uint8_t dbg) {
+ size_t name_len = strlen(name);
+ if (root) {
+ if (root->left) {
+ find_expr_sym(root->left, name, sym, depth, dbg);
+ }
+ if (root->right) {
+ find_expr_sym(root->right, name, sym, depth, dbg);
+ }
+ if (root->type == EXPR_SYM) {
+ if (root->value.sym) {
+ symbol *s = root->value.sym;
+ size_t sym_name_len = strlen(s->name);
+ if (name_len == sym_name_len && name[0] == s->name[0] && !strcmp(name, s->name)) {
+ if (s->depth == depth && !s->def) {
+ root->value.sym = sym;
+ }
+ }
+ }
+ }
+ }
+}
+
void resolve_symbol_names(line *l, const char *name, symbol *sym, int depth, uint8_t dbg) {
size_t name_len = strlen(name);
for (token *t = l->tok; t; t = t->next) {
@@ -478,6 +501,7 @@ void resolve_symbol_names(line *l, const char *name, symbol *sym, int depth, uin
}
}
break;
+ case TOK_EXPR: find_expr_sym(t->expr, name, sym, depth, dbg); break;
}
}
if (l->next) {
@@ -512,6 +536,416 @@ void new_symbol(token *t, const char *name, uint64_t value, int depth, uint8_t d
}
}
+char *parse_escape(char *s, char *code) {
+ char dummy;
+ int count;
+ char *end;
+ int base = 0;
+ unsigned int value;
+
+ if (*s++ != '\\') {
+ #if 0
+ ierror(0); /* Start of escape sequence not found. */
+ #endif
+ }
+ if (code == NULL) {
+ code = &dummy;
+ }
+ #if 0
+ if (!esc_sequences) {
+ *code = '\\';
+ return s;
+ }
+ #endif
+
+ switch (*s) {
+ case 'a' : *code = '\a'; return s+1;
+ case 'b' : *code = '\b'; return s+1;
+ case 'f' : *code = '\f'; return s+1;
+ case 'n' : *code = '\n'; return s+1;
+ case 'r' : *code = '\r'; return s+1;
+ case 't' : *code = '\t'; return s+1;
+ case 'v' : *code = '\v'; return s+1;
+ case '\\': *code = '\\'; return s+1;
+ case '\"': *code = '\"'; return s+1;
+ case '\'': *code = '\''; return s+1;
+ case 'e' : *code = '\x1B'; return s+1;
+ case '$' : case 'x' : case 'X' : base = 16; s++; /* Falls Through. */
+ case '%' : base = (!base) ? 2 : base; s += (!base); /* Falls Through. */
+ case '0' : case '1' : case '2' : case '3' : case '4' :
+ case '5' : case '6' : case '7' : case '8' : case '9' :
+ base = (!base) ? 8 : base;
+ value = strtoull(s, &end, base);
+ *code = value;
+ return end;
+ default :
+ #if 0
+ general_error(35, *s); /* No valid escape sequence was found. */
+ #endif
+ return s;
+ }
+}
+
+uint64_t parse_quote(char **s, char delm, int get_value, uint8_t dbg) {
+ uint64_t value = 0;
+ uint8_t *tmp_val = (uint8_t *)&value;
+ char *str = *s+1;
+
+ for (int i = 0; *str; i++) {
+ char c;
+ /* Are we at the start of an escape character? */
+ if (*str == '\\') {
+ str = parse_escape(str, &c);
+ } else {
+ c = *str++;
+ if (c == delm) {
+ if (*str == delm) {
+ /* Allow for multiple repeated
+ * instances of delm to be treated
+ * as a single instance of delm.
+ */
+ str++;
+ } else {
+ break;
+ }
+ }
+ }
+ if (get_value && i < sizeof(uint64_t)) {
+ tmp_val[i] = c;
+ }
+ }
+
+ *s = str;
+
+ return value;
+}
+
+expr *make_expr(int type, uint64_t value, symbol *sym, uint8_t dbg) {
+ expr *new = malloc(sizeof(expr));
+
+ new->type = type;
+ new->left = NULL;
+ new->right = NULL;
+
+ if (sym) {
+ new->value.sym = sym;
+ } else {
+ new->value.val = value;
+ }
+
+ return new;
+}
+
+int is_reg(const char *str) {
+ size_t len = strlen(str);
+ switch (len) {
+ case 1:
+ switch(*str) {
+ case 'a': case 'A': return REG_A;
+ case 'b': case 'B': return REG_B;
+ case 'x': case 'X': return REG_X;
+ case 'y': case 'Y': return REG_Y;
+ case 'e': case 'E': return REG_E;
+ case 'c': case 'C': return REG_C;
+ case 'd': case 'D': return REG_D;
+ case 's': case 'S': return REG_S;
+ case 'f': case 'F': return REG_F;
+ }
+ break;
+ case 2:
+ if (str[1] == 'p' || str[1] == 'P') {
+ if ((*str == 's' || *str == 'S') || (*str == 'b' || *str == 'B')) {
+ return ((*str == 's' || *str == 'S')) ? REG_SP : REG_BP;
+ }
+ } else if (*str == 'p' || *str == 'P') {
+ return (str[1] == 'c' || str[1] == 'C') ? REG_PC : -1;
+ }
+ break;
+ case 3:
+ if (*str == 'r' || *str == 'R') {
+ int regnum = strtoul(str+1, NULL, 10);
+ if (regnum >= REG_R11 && regnum <= REG_R15) {
+ return regnum;
+ }
+ }
+ break;
+ }
+ return -1;
+}
+
+#define SKIP_WHITESPACE(str, dbg) \
+ /* Skip over any whitespace. */ \
+ for (; isdelm(*str, dbg) & 0x10; str++)
+
+
+expr *get_primary_expr(char **line, uint64_t address, int *found_reg, char stop, uint8_t dbg) {
+ char *str = *line;
+ char *tmp;
+ char *scope_name;
+ symbol *s = NULL;
+
+ uint64_t value;
+
+ int i = 0;
+ int base;
+ int type = EXPR_NONE;
+ int depth = 0;
+ int scope_depth = 0;
+
+ uint8_t ptok = get_ptok(*str, dbg);
+ ptok = (is_altok(ptok, dbg)) ? PTOK_ALPHA : ptok;
+
+ if (*str != stop) {
+ switch (ptok) {
+ case PTOK_DOLLAR:
+ case PTOK_PERCENT:
+ case PTOK_NUMBER:
+ switch (ptok) {
+ case PTOK_DOLLAR : base = 16; type = EXPR_HEX; str++; break;
+ case PTOK_PERCENT: base = 2; type = EXPR_BIN; str++; break;
+ case PTOK_NUMBER : base = 10; type = EXPR_DEC; /****/ break;
+ }
+
+ /* Get the number of digits, and
+ * find the end of the number.
+ */
+ for (; isxdigit(str[i]) && !(isdelm(str[i], dbg) & 0x03); i++);
+
+ tmp = malloc(i+1);
+
+ memcpy(tmp, str, i);
+ tmp[i] = '\0';
+
+ value = strtoull(tmp, NULL, base);
+ break;
+ case PTOK_SQUOTE:
+ type = EXPR_CHAR;
+ value = parse_quote(&str, *str, 1, dbg);
+ break;
+ case PTOK_AT:
+ /* Increment the depth count, by the
+ * number of '@' signs before the
+ * symbol name.
+ */
+ for (depth = 0; *str == '@'; str++, depth++);
+ /* Falls through. */
+ case PTOK_ALPHA:
+ /* Find the end of the symbol name.
+ * Also increment the depth count every
+ * time a '.' is found in the symbol name.
+ */
+ for (scope_depth = 0; !isdelm2(str[i], dbg) || str[i] == '.'; scope_depth += (str[i++] == '.'));
+ tmp = malloc(i);
+
+ memcpy(tmp, str, i);
+ tmp[i] = '\0';
+
+ if (is_reg(tmp) >= 0) {
+ *found_reg = 1;
+ return NULL;
+ } else {
+ scope_name = (!scope_depth) ? mk_scope_name(cur_sym, depth, tmp, dbg) : tmp;
+ s = get_sym(scope_name, address, NULL, (scope_depth) ? scope_depth : depth, 1, dbg);
+ isfixup += (s == NULL);
+ type = EXPR_SYM;
+ }
+ break;
+ }
+
+ str += i;
+
+ SKIP_WHITESPACE(str, dbg);
+
+ if (ptok == PTOK_SCOLON || ptok == PTOK_COMMA || *str == stop) {
+ *found_reg = 1;
+ }
+
+ *line = str;
+
+ return make_expr(type, value, s, dbg);
+ }
+ return NULL;
+}
+
+expr *get_unary_expr(char **line, uint64_t address, int *found_reg, char stop, uint8_t dbg) {
+ expr *new = NULL;
+ char *str = *line;
+
+ if (*str != stop && !(*found_reg)) {
+ if (*str == '+' || *str == '-' || *str == '<' || *str == '>') {
+ uint8_t ptok = get_ptok(*str++, dbg);
+ SKIP_WHITESPACE(str, dbg);
+ int type;
+ switch (ptok) {
+ case PTOK_PLUS : type = EXPR_PLUS ; break;
+ case PTOK_MINUS: type = EXPR_MINUS; break;
+ case PTOK_GT : type = EXPR_LOW ; break;
+ case PTOK_LT : type = EXPR_HIGH ; break;
+ }
+ new = make_expr(type, 0, NULL, dbg);
+ new->left = get_primary_expr(&str, address, found_reg, stop, dbg);
+ } else {
+ return get_primary_expr(line, address, found_reg, stop, dbg);
+ }
+ }
+
+ *line = str;
+ return new;
+}
+
+expr *get_shift_expr(char **line, uint64_t address, int *found_reg, char stop, uint8_t dbg) {
+ expr *left = get_unary_expr(line, address, found_reg, stop, dbg);
+ expr *new = NULL;
+ char *str = *line;
+
+ SKIP_WHITESPACE(str, dbg);
+
+ for (; (*str == '<' || *str == '>') && (str[1] == *str);) {
+ uint8_t ptok = get_ptok(*str, dbg);
+
+ str += 2;
+ SKIP_WHITESPACE(str, dbg);
+
+ int type;
+ switch (ptok) {
+ case PTOK_GT: type = EXPR_RSHFT; break;
+ case PTOK_LT: type = EXPR_LSHFT; break;
+ }
+
+ new = make_expr(type, 0, NULL, dbg);
+
+ SKIP_WHITESPACE(str, dbg);
+
+ new->left = left;
+ new->right = get_unary_expr(&str, address, found_reg, stop, dbg);
+ left = new;
+
+ if (*str == stop || *found_reg) {
+ break;
+ }
+ }
+
+ *line = str;
+ return left;
+}
+
+expr *get_or_expr(char **line, uint64_t address, int *found_reg, char stop, uint8_t dbg) {
+ expr *left = get_shift_expr(line, address, found_reg, stop, dbg);
+ expr *new = NULL;
+ char *str = *line;
+
+ SKIP_WHITESPACE(str, dbg);
+
+ for (; *str == '|' && str[1] != '|';) {
+ str++;
+ SKIP_WHITESPACE(str, dbg);
+
+ new = make_expr(EXPR_OR, 0, NULL, dbg);
+
+ SKIP_WHITESPACE(str, dbg);
+
+ new->left = left;
+ new->right = get_shift_expr(&str, address, found_reg, stop, dbg);
+ left = new;
+
+ if (*str == stop || *found_reg) {
+ break;
+ }
+ }
+
+ *line = str;
+ return left;
+}
+
+expr *get_additive_expr(char **line, uint64_t address, int *found_reg, char stop, uint8_t dbg) {
+ expr *left = get_or_expr(line, address, found_reg, stop, dbg);
+ expr *new = NULL;
+ char *str = *line;
+ SKIP_WHITESPACE(str, dbg);
+
+ for (; (*str == '+' && str[1] != '+') || (*str == '-' && str[1] != '-');) {
+ uint8_t ptok = get_ptok(*str++, dbg);
+
+ SKIP_WHITESPACE(str, dbg);
+
+ int type;
+ switch (ptok) {
+ case PTOK_PLUS : type = EXPR_PLUS ; break;
+ case PTOK_MINUS: type = EXPR_MINUS; break;
+ }
+
+ new = make_expr(type, 0, NULL, dbg);
+
+ SKIP_WHITESPACE(str, dbg);
+
+ new->left = left;
+ new->right = get_or_expr(&str, address, found_reg, stop, dbg);
+ left = new;
+
+ if (*str == stop || *found_reg) {
+ break;
+ }
+ }
+
+ *line = str;
+ return left;
+}
+
+#undef SKIP_WHITESPACE
+
+int get_expr_type(char *str, int *found_reg, char stop, uint8_t dbg) {
+ /* Skip over any whitespace. */
+ for (; isdelm(*str, dbg) & 0x10; str++);
+
+ if (*str != stop) {
+ uint8_t ptok = get_ptok(*str, dbg);
+
+ int i = 0;
+ char *tmp;
+
+ ptok = (is_altok(ptok, dbg)) ? PTOK_ALPHA : ptok;
+
+ switch (ptok) {
+ case PTOK_PLUS : return EXPR_PLUS ;
+ case PTOK_MINUS: return EXPR_MINUS;
+ case PTOK_PIPE : return EXPR_OR ;
+ case PTOK_GT : return (get_ptok(*(++str), dbg) == PTOK_GT) ? (EXPR_RSHFT) : (EXPR_LOW) ;
+ case PTOK_LT : return (get_ptok(*(++str), dbg) == PTOK_LT) ? (EXPR_LSHFT) : (EXPR_HIGH);
+ case PTOK_DOLLAR : return EXPR_HEX ;
+ case PTOK_PERCENT: return EXPR_BIN ;
+ case PTOK_NUMBER : return EXPR_DEC ;
+ case PTOK_SQUOTE : return EXPR_CHAR;
+ case PTOK_AT:
+ for (; *str == '@'; str++);
+ /* Falls through. */
+ case PTOK_ALPHA:
+ /* Find the end of the symbol name. */
+ for (; !isdelm2(str[i], dbg) || str[i] == '.'; i++);
+ tmp = malloc(i);
+
+ memcpy(tmp, str, i);
+ tmp[i] = '\0';
+
+ if (is_reg(tmp) < 0) {
+ return EXPR_SYM;
+ }
+ break;
+ }
+ }
+ *found_reg = 1;
+ return EXPR_NONE;
+}
+
+expr *parse_expr(char **line, uint64_t address, int *found_reg, char stop, uint8_t dbg) {
+ char *str = *line;
+ int dummy = 0;
+
+ found_reg = (found_reg == NULL) ? &dummy : found_reg;
+ expr *tree = get_additive_expr(&str, address, found_reg, stop, dbg);
+ *line = str;
+ return tree;
+}
+
uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) {
char sym[0x100];
uint16_t i = 0;
@@ -550,6 +984,8 @@ uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) {
uint8_t fall = 0;
uint8_t done = 0;
+ char delm = ',';
+
line *l = NULL;
token *st = NULL;
@@ -586,70 +1022,19 @@ uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) {
while (isdelm(str[i], dbg) != 1) {
uint8_t offset = 0;
base = 0;
- space = 0;
- tab = 0;
- while (isdelm(str[i+j], dbg) == 16) {
- tab += str[i+j] == '\t';
- space += str[i+j] == ' ';
- j++;
- }
j = 0;
+ for (tab = 0, space = 0; isdelm(str[i], dbg) == 16; tab += (str[i] == '\t'), space += (str[i] == ' '), i++);
if (dbg) {
printf("lex(): tab: %u, space: %u\n", tab, space);
}
- if (isdelm(str[i], dbg) == 16) {
- for (; isdelm(str[i], dbg) == 16; i++);
- }
uint8_t ptok = get_ptok(str[i], dbg);
if (is_altok(ptok, dbg)) {
- offset++;
- if (((ptok == PTOK_S || ptok == PTOK_B) && toupper(str[i+1]) == 'P') || (ptok == PTOK_P && toupper(str[i+1]) == 'C')) {
- offset++;
- }
- int is_alpha = 0;
- switch (get_ptok(str[i+offset], dbg)) {
- case PTOK_B :
- case PTOK_E :
- case PTOK_X :
- case PTOK_Y :
- case PTOK_S :
- case PTOK_P :
- case PTOK_A :
- case PTOK_C :
- case PTOK_D :
- case PTOK_F :
- case PTOK_R :
- case PTOK_ALPHA : ptok = PTOK_ALPHA; is_alpha = 1; break;
- case PTOK_NUMBER:
- if (ptok == PTOK_R) {
- char reg_num[3];
- int isnum;
- for (isnum = 0; isdigit(str[i+offset]) && !(isdelm(str[i+offset], dbg) & 0x03) && isnum < 2; offset++, isnum++) {
- reg_num[isnum] = str[i+offset];
- }
- reg_num[isnum] = '\0';
- if (isnum == 2) {
- int regnum = strtoul(reg_num, NULL, 10);
- if (regnum < 11 || regnum > 15) {
- ptok = PTOK_ALPHA;
- is_alpha = 1;
- }
- } else {
- ptok = PTOK_ALPHA;
- is_alpha = 1;
- }
- } else {
- ptok = PTOK_ALPHA;
- is_alpha = 1;
- }
- break;
- }
- if (ptok == PTOK_P && toupper(str[i+1]) != 'C') {
- ptok = PTOK_ALPHA;
- is_alpha = 1;
- }
-
- ptok = (!is_inst && !is_alpha) ? PTOK_ALPHA : ptok;
+ for (; !isdelm2(str[i+j], dbg) || (is_inst && str[i+j] == '.'); j++);
+ memcpy(lexeme, &str[i], j);
+ lexeme[j] = '\0';
+ j = 0;
+ ptok = (!is_inst || is_reg(lexeme) < 0) ? PTOK_ALPHA : ptok;
+ memset(lexeme, 0, strlen(lexeme)+1);
}
switch (ptok) {
@@ -685,7 +1070,7 @@ uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) {
i -= j;
} else {
l->count++;
- t = make_token(lex_type, k, space, tab, 0, "", NULL);
+ t = make_token(lex_type, k, space, tab, 0, "", NULL, NULL);
}
} else {
for (k = 0; !(isdelm(lexeme[k], dbg) & 17); k++) {
@@ -709,7 +1094,7 @@ uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) {
break;
}
l->count++;
- t = make_token(lex_type, rs, space, tab, 0, "", NULL);
+ t = make_token(lex_type, rs, space, tab, 0, "", NULL, NULL);
if (t) {
lt = t;
t = t->next;
@@ -719,13 +1104,17 @@ uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) {
}
break;
case PTOK_DQUOTE:
- i++;
- for (; isdelm(str[i+j], dbg) != 4 || isesc; j++) {
- isesc = (str[i+j] == '\\' && str[i+(j-1)] != '\\');
- }
- memcpy(lexeme, str+i, j);
- lexeme[j] = '\0';
- i += j;
+ do {
+ char *tmp = (str + i);
+ int get_value = (ptok == PTOK_SQUOTE);
+ value = parse_quote(&tmp, str[i], get_value, dbg);
+ tmp--;
+ i++;
+ j = tmp - (str + i);
+ memcpy(lexeme, str+i, j);
+ lexeme[j] = '\0';
+ i += j;
+ } while (0);
strid = get_string(lexeme, dbg);
if (strid == 0xFFFF) {
strid = stridx;
@@ -743,70 +1132,35 @@ uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) {
}
lex_type = TOK_STRING;
l->count++;
- t = make_token(lex_type, 0, space, tab, 0, string[strid], NULL);
+ t = make_token(lex_type, 0, space, tab, 0, string[strid], NULL, NULL);
break;
+ case PTOK_SQUOTE:
case PTOK_DOLLAR:
case PTOK_PERCENT:
case PTOK_NUMBER:
- value = 0;
- switch (ptok) {
- case PTOK_DOLLAR : base = 16; lex_type = TOK_HEX; i++; break;
- case PTOK_PERCENT: base = 2; lex_type = TOK_BIN; i++; break;
- case PTOK_NUMBER : base = 10; lex_type = TOK_DEC; /**/ break;
- }
- for (; isxdigit(str[i+j]) && !(isdelm(str[i+j], dbg) & 0x03); j++);
- memcpy(lexeme, str+i, j);
- lexeme[j] = '\0';
- i += j;
- value = strtoull(lexeme, NULL, base);
- if (lt->id == TOK_SYM) {
- new_symbol(lt, sym, value, depth, dbg);
- depth = 0;
- if (dbg) {
- printf("lex(): isfixup: %u\n", isfixup);
- }
- }
l->count++;
- t = make_token(lex_type, 0, space, tab, value, "", NULL);
- t->digits = (lt->id != TOK_SYM) ? j : 0;
- break;
- case PTOK_SQUOTE:
- i++;
- k = 0;
- j = 0;
- while (isdelm(str[i], dbg) != 8 || isesc) {
- isesc = (str[i] == '\\' && str[i-1] != '\\');
- lexeme[j++] = str[i++];
- }
- isesc = 0;
- lexeme[j] = '\0';
- for (j = 0; lexeme[k] != '\0' && j < 7; k++) {
- switch (lexeme[k]) {
- case '\\':
- switch (lexeme[++k]) {
- case 'n' : ch.u8[j++] = '\n'; break;
- case 'r' : ch.u8[j++] = '\r'; break;
- case 't' : ch.u8[j++] = '\t'; break;
- case 'b' : ch.u8[j++] = '\b'; break;
- case '\'': ch.u8[j++] = '\''; break;
- case '\"': ch.u8[j++] = '\"'; break;
- case '\\': ch.u8[j++] = '\\'; break;
- }
- break;
- default: ch.u8[j++] = lexeme[k];
- }
- }
- lex_type = TOK_CHAR;
- l->count++;
- t = make_token(lex_type, 0, space, tab, ch.u64, "", NULL);
+ do {
+ lex_type = TOK_EXPR;
+ memset(lexeme, 0, strlen(lexeme)+1);
+ char *tmp = &str[i];
+ expr *e = parse_expr(&tmp, address, NULL, delm, dbg);
+ t = make_token(lex_type, 0, space, tab, 0, "", NULL, e);
+ j = tmp - &str[i];
+ memcpy(lexeme, &str[i], j);
+ j = 0;
+ i = tmp - str;
+ t->subtype = (t->subtype == 0xFF && lex_subtype != 0xFF) ? lex_subtype : t->subtype;
+ lex_subtype = 0xFF;
+ } while (0);
break;
case PTOK_LBRACK:
case PTOK_HASH :
lex_type = TOK_MEM;
value = (ptok == PTOK_LBRACK) ? MEM_IND : MEM_IMM;
l->count++;
- t = make_token(lex_type, value, space, tab, 0, "", NULL);
+ t = make_token(lex_type, value, space, tab, 0, "", NULL, NULL);
lex_type = (ptok == PTOK_LBRACK) ? TOK_IND : TOK_IMM;
+ delm = (ptok == PTOK_LBRACK) ? ')' : delm;
t->subtype = (t->subtype == 0xFF && lex_subtype != 0xFF) ? lex_subtype : t->subtype;
if (lex_subtype != 0xFF) {
lex_subtype = 0xFF;
@@ -820,32 +1174,37 @@ uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) {
case PTOK_LT:
case PTOK_PIPE:
lex_type = TOK_EXPR;
- switch (ptok) {
- case PTOK_PLUS : value = EXPR_PLUS ; break;
- case PTOK_MINUS: value = EXPR_MINUS; break;
- case PTOK_PIPE : value = EXPR_OR ; break;
- case PTOK_GT : value = (get_ptok(str[i+1], dbg) == PTOK_GT) ? (EXPR_RSHFT) : (EXPR_LOW) ; break;
- case PTOK_LT : value = (get_ptok(str[i+1], dbg) == PTOK_LT) ? (EXPR_LSHFT) : (EXPR_HIGH); break;
- }
l->count++;
- t = make_token(lex_type, value, space, tab, 0, "", NULL);
memset(lexeme, 0, strlen(lexeme)+1);
- lexeme[j++] = str[i];
- if (value == EXPR_LSHFT || value == EXPR_RSHFT) {
- lexeme[j++] = str[++i];
- }
+ do {
+ char *tmp = &str[i];
+ expr *e = parse_expr(&tmp, address, NULL, delm, dbg);
+ t = make_token(lex_type, 0, space, tab, 0, "", NULL, e);
+ j = tmp - &str[i];
+ memcpy(lexeme, &str[i], j);
+ j = 0;
+ i = tmp - str;
+ t->subtype = (t->subtype == 0xFF && lex_subtype != 0xFF) ? lex_subtype : t->subtype;
+ lex_subtype = 0xFF;
+ } while (0);
break;
case PTOK_EQU:
i++;
lex_type = TOK_SYM;
memset(lexeme, 0, strlen(lexeme)+1);
lexeme[j] = str[i];
+ if (lt) {
+ lt->id = lex_type;
+ lt->type = depth;
+ }
+ new_symbol(lt, sym, address, depth, dbg);
(t) ? (t->subspace = space) : (lt->subspace = space);
(t) ? (t->subtab = tab) : (lt->subtab = tab);
break;
case PTOK_RBRACK:
i++;
lex_type = TOK_IND;
+ delm = (delm == ')') ? ',' : delm;
lexeme[j] = ')';
lexeme[j+1] = '\0';
lexeme[j+2] = '\0';
@@ -888,27 +1247,12 @@ uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) {
lexeme[j+1] = (ptok == PTOK_R || ((ptok == PTOK_S || ptok == PTOK_B) && get_ptok(str[i], dbg) == PTOK_P)) ? str[i++] : '\0';
lexeme[j+2] = (ptok == PTOK_R) ? str[i++] : '\0';
lexeme[j+3] = '\0';
+
lex_type = TOK_REG;
- switch (ptok) {
- case PTOK_A: value = REG_A; break;
- case PTOK_X: value = REG_X; break;
- case PTOK_Y: value = REG_Y; break;
- case PTOK_E: value = REG_E; break;
- case PTOK_C: value = REG_C; break;
- case PTOK_D: value = REG_D; break;
- case PTOK_S:
- case PTOK_B:
- if (get_ptok(lexeme[j+1], dbg) == PTOK_P) {
- value = (ptok == PTOK_S) ? REG_SP : REG_BP;
- } else {
- value = (ptok == PTOK_S) ? REG_S : REG_B;
- }
- break;
- case PTOK_F: value = REG_F; break;
- case PTOK_R: value = strtoull(lexeme+j+1, NULL, 10); break;
- }
+ value = is_reg(lexeme);
+
l->count++;
- t = make_token(lex_type, value, space, tab, 0, "", NULL);
+ t = make_token(lex_type, value, space, tab, 0, "", NULL, NULL);
t->subtype = (t->subtype == 0xFF && lex_subtype != 0xFF) ? lex_subtype : t->subtype;
lex_subtype = 0xFF;
break;
@@ -919,11 +1263,12 @@ uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) {
of = 2;
lex_type = TOK_OF;
l->count++;
- t = make_token(lex_type, of, space, tab, 0, "", NULL);
+ t = make_token(lex_type, of, space, tab, 0, "", NULL, NULL);
break;
case PTOK_AT:
memset(lexeme, 0, strlen(lexeme)+1);
for (char *tmp = str+i; *tmp++ == '@'; depth++);
+ i += depth;
lexeme[j] = '@';
lex_type = TOK_LOCAL;
if (lt || t) {
@@ -977,20 +1322,36 @@ uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) {
lex_type = TOK_COMMENT;
l->count++;
if (j) {
- t = make_token(lex_type, 0, space, tab, 0, comment[comid], NULL);
+ t = make_token(lex_type, 0, space, tab, 0, comment[comid], NULL, NULL);
} else {
- t = make_token(lex_type, 0, space, tab, 0, "" , NULL);
+ t = make_token(lex_type, 0, space, tab, 0, "" , NULL, NULL);
}
break;
case PTOK_ALPHA:
+ /* Get the length of the token. */
for (; !isdelm2(str[i+j], dbg) || (is_inst && str[i+j] == '.'); j++);
memcpy(lexeme, str+i, j);
lexeme[j] = '\0';
i += j;
isch = 0;
isop = 0;
- if (j > 1 && j <= 3 && str[i] != ':' && !is_struct) {
+ /* We need to figure out if we're allowed to
+ * search for a valid instruction name.
+ *
+ * We're only allowed to so, if:
+ *
+ * 1. The previous token wasn't a directive.
+ * 2. There wasn't an instruction before us.
+ * 3. The length of the token is at, or above
+ * the length of the shortest instruction.
+ * 4. The length of the token is at, or below
+ * the length of the longest instruction.
+ * 5. The character after the token isn't a
+ * label delimiter.
+ * 6. We're not within a struct/union block.
+ */
+ if (!(lt && lt->id == TOK_DIR) && !is_inst && j > 1 && j <= 3 && str[i] != ':' && !is_struct) {
for (k = 0; k < OPNUM; k++) {
int find_ext = (k < EXT_OPNUM);
int find_ortho = (k < ORTHO_OPNUM);
@@ -1010,7 +1371,7 @@ uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) {
isop = 1;
is_inst = 1;
l->count++;
- t = make_token(lex_type, 0xFF, space, tab, k, "", NULL);
+ t = make_token(lex_type, 0xFF, space, tab, k, "", NULL, NULL);
break;
}
}
@@ -1024,7 +1385,7 @@ uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) {
if (!strcasecmp(lexeme, set_cc[k])) {
lex_type = TOK_CC;
l->count++;
- t = make_token(lex_type, 0xFF, space, tab, k, "", NULL);
+ t = make_token(lex_type, 0xFF, space, tab, k, "", NULL, NULL);
}
}
}
@@ -1035,28 +1396,31 @@ uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) {
if (ret == PTOK_COLON || ret == PTOK_EQU) {
depth = (lex_type == TOK_LOCAL);
}
- lex_type = TOK_SYM;
+ int is_expr = (!is_struct && str[i+spaces] != ':' && str[i+spaces] != '=');
l->count++;
- t = make_token(lex_type, depth, space, tab, 0, "", NULL);
memcpy(sym, lexeme, j+1);
- if (dbg) {
- printf("lex(): spaces: %u\n", spaces);
+ if (is_expr) {
+ i -= j + (depth);
+ lex_type = TOK_EXPR;
+ memset(lexeme, 0, strlen(lexeme)+1);
+ char *tmp = &str[i];
+ expr *e = parse_expr(&tmp, address, NULL, delm, dbg);
+ j = tmp - &str[i];
+ memcpy(lexeme, &str[i], j);
+ /*i += j;*/
+ i = tmp - str;
+ t = make_token(lex_type, 0, space, tab, 0, "", NULL, e);
+ t->subtype = (t->subtype == 0xFF && lex_subtype != 0xFF) ? lex_subtype : t->subtype;
+ lex_subtype = 0xFF;
+ } else {
+ memcpy(sym, lexeme, j+1);
+ lex_type = TOK_SYM;
+ t = make_token(lex_type, depth, space, tab, 0, "", NULL, NULL);
}
+
if (is_struct) {
create_struct(cur_sym, l, t, lt, sym, dbg);
depth = 0;
- } else if ((str[i+spaces] != ':' && str[i+spaces] != '=')) {
- symbol *s;
- int scope_depth;
- char *tmp = lexeme;
- for (scope_depth = 0; *tmp; scope_depth += (*tmp++ == '.'));
- char *scope_name = (!scope_depth) ? mk_scope_name(cur_sym, depth, lexeme, dbg) : lexeme;
- t->sym = get_sym(scope_name, address, t, (scope_depth) ? scope_depth : depth, 1, dbg);
- isfixup += (t && t->sym == NULL);
- depth = 0;
- if (dbg) {
- printf("lex(): isfixup: %u\n", isfixup);
- }
}
/*if (!is_struct && t && t->sym && t->sym->isstruct) {
tmp_sym = t->sym;
@@ -1074,7 +1438,7 @@ uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) {
j = 0;
if ((lex_type == TOK_OPCODE || lex_type == TOK_EXTOP) && !isop) {
j = 0;
- } else if (lex_type == TOK_EXPR || (lex_type != TOK_MEMBER && !isdelm2(str[i], dbg))) {
+ } else if (lex_type != TOK_EXPR && lex_type != TOK_LOCAL && lex_type != TOK_MEMBER && !isdelm2(str[i], dbg)) {
i++;
}
switch (lex_type) {
@@ -1093,22 +1457,13 @@ uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) {
}
}
if (i) {
- l->tok = tokens;
+ l->tok = tokens;
token *tok = tokens;
- if (tok->id == TOK_SYM && tok->next) {
+ if ((tok->id == TOK_SYM || tok->id == TOK_LABEL) && tok->next) {
symbol *s = tok->sym;
for (; tok; tok = tok->next) {
- switch (tok->id) {
- case TOK_HEX :
- case TOK_BIN :
- case TOK_DEC :
- case TOK_CHAR:
- case TOK_EXPR:
- s->val = get_val(tok, address, 3, 0xFF, 0, dbg);
- if (tok->next) {
- tok = skip_expr(tok, 0xFF, 0, dbg);
- }
- break;
+ if (tok->id == TOK_EXPR) {
+ s->val = get_val(tok->expr, address, 3, 0, dbg);
}
}
}