summaryrefslogtreecommitdiff
path: root/lexer.c
diff options
context:
space:
mode:
authormrb0nk500 <b0nk@b0nk.xyz>2021-02-08 23:03:31 -0500
committermrb0nk500 <b0nk@b0nk.xyz>2021-02-08 23:03:31 -0500
commit04b29166fd226e2464bcfacf6839e3274ff68cc6 (patch)
tree848685973ecbee3b74868820636ac7eb7af4583b /lexer.c
parentcd6982e5da1f5facdc1e0154b3a27c01e8b076c9 (diff)
Completly rewrote the expression handler.
It's now separate from the lexer, and was designed to make it both easier to understand, and easier to parse.
Diffstat (limited to 'lexer.c')
-rw-r--r--lexer.c735
1 files changed, 545 insertions, 190 deletions
diff --git a/lexer.c b/lexer.c
index 57a7e14..bc954fc 100644
--- a/lexer.c
+++ b/lexer.c
@@ -68,11 +68,11 @@ int add_symbol(symbol *sym, const char *name, symbol **root, symbol **lsym, symb
/*return 0;*/
}
}
- for (; s != NULL && s->next != NULL; s = s->next) {
- if (dbg) {
- printf("s: %p, s->next: %p, s->prev: %p\n", s, s->next, s->prev);
- }
+ for (; s != NULL && s->next != NULL; s = s->next) {
+ if (dbg) {
+ printf("s: %p, s->next: %p, s->prev: %p\n", s, s->next, s->prev);
}
+ }
if ((is_new_scope && *lsym) || *lloc || *csym) {
/*
if (is_new_scope) {
@@ -462,6 +462,29 @@ fixup *find_fixup(fixup *root, const char *name, int depth, uint8_t dbg) {
return NULL;
}
+void find_expr_sym(expr *root, const char *name, symbol *sym, int depth, uint8_t dbg) {
+ size_t name_len = strlen(name);
+ if (root) {
+ if (root->left) {
+ find_expr_sym(root->left, name, sym, depth, dbg);
+ }
+ if (root->right) {
+ find_expr_sym(root->right, name, sym, depth, dbg);
+ }
+ if (root->type == EXPR_SYM) {
+ if (root->value.sym) {
+ symbol *s = root->value.sym;
+ size_t sym_name_len = strlen(s->name);
+ if (name_len == sym_name_len && name[0] == s->name[0] && !strcmp(name, s->name)) {
+ if (s->depth == depth && !s->def) {
+ root->value.sym = sym;
+ }
+ }
+ }
+ }
+ }
+}
+
void resolve_symbol_names(line *l, const char *name, symbol *sym, int depth, uint8_t dbg) {
size_t name_len = strlen(name);
for (token *t = l->tok; t; t = t->next) {
@@ -478,6 +501,7 @@ void resolve_symbol_names(line *l, const char *name, symbol *sym, int depth, uin
}
}
break;
+ case TOK_EXPR: find_expr_sym(t->expr, name, sym, depth, dbg); break;
}
}
if (l->next) {
@@ -512,6 +536,416 @@ void new_symbol(token *t, const char *name, uint64_t value, int depth, uint8_t d
}
}
+char *parse_escape(char *s, char *code) {
+ char dummy;
+ int count;
+ char *end;
+ int base = 0;
+ unsigned int value;
+
+ if (*s++ != '\\') {
+ #if 0
+ ierror(0); /* Start of escape sequence not found. */
+ #endif
+ }
+ if (code == NULL) {
+ code = &dummy;
+ }
+ #if 0
+ if (!esc_sequences) {
+ *code = '\\';
+ return s;
+ }
+ #endif
+
+ switch (*s) {
+ case 'a' : *code = '\a'; return s+1;
+ case 'b' : *code = '\b'; return s+1;
+ case 'f' : *code = '\f'; return s+1;
+ case 'n' : *code = '\n'; return s+1;
+ case 'r' : *code = '\r'; return s+1;
+ case 't' : *code = '\t'; return s+1;
+ case 'v' : *code = '\v'; return s+1;
+ case '\\': *code = '\\'; return s+1;
+ case '\"': *code = '\"'; return s+1;
+ case '\'': *code = '\''; return s+1;
+ case 'e' : *code = '\x1B'; return s+1;
+ case '$' : case 'x' : case 'X' : base = 16; s++; /* Falls Through. */
+ case '%' : base = (!base) ? 2 : base; s += (!base); /* Falls Through. */
+ case '0' : case '1' : case '2' : case '3' : case '4' :
+ case '5' : case '6' : case '7' : case '8' : case '9' :
+ base = (!base) ? 8 : base;
+ value = strtoull(s, &end, base);
+ *code = value;
+ return end;
+ default :
+ #if 0
+ general_error(35, *s); /* No valid escape sequence was found. */
+ #endif
+ return s;
+ }
+}
+
+uint64_t parse_quote(char **s, char delm, int get_value, uint8_t dbg) {
+ uint64_t value = 0;
+ uint8_t *tmp_val = (uint8_t *)&value;
+ char *str = *s+1;
+
+ for (int i = 0; *str; i++) {
+ char c;
+ /* Are we at the start of an escape character? */
+ if (*str == '\\') {
+ str = parse_escape(str, &c);
+ } else {
+ c = *str++;
+ if (c == delm) {
+ if (*str == delm) {
+ /* Allow for multiple repeated
+ * instances of delm to be treated
+ * as a single instance of delm.
+ */
+ str++;
+ } else {
+ break;
+ }
+ }
+ }
+ if (get_value && i < sizeof(uint64_t)) {
+ tmp_val[i] = c;
+ }
+ }
+
+ *s = str;
+
+ return value;
+}
+
+expr *make_expr(int type, uint64_t value, symbol *sym, uint8_t dbg) {
+ expr *new = malloc(sizeof(expr));
+
+ new->type = type;
+ new->left = NULL;
+ new->right = NULL;
+
+ if (sym) {
+ new->value.sym = sym;
+ } else {
+ new->value.val = value;
+ }
+
+ return new;
+}
+
+int is_reg(const char *str) {
+ size_t len = strlen(str);
+ switch (len) {
+ case 1:
+ switch(*str) {
+ case 'a': case 'A': return REG_A;
+ case 'b': case 'B': return REG_B;
+ case 'x': case 'X': return REG_X;
+ case 'y': case 'Y': return REG_Y;
+ case 'e': case 'E': return REG_E;
+ case 'c': case 'C': return REG_C;
+ case 'd': case 'D': return REG_D;
+ case 's': case 'S': return REG_S;
+ case 'f': case 'F': return REG_F;
+ }
+ break;
+ case 2:
+ if (str[1] == 'p' || str[1] == 'P') {
+ if ((*str == 's' || *str == 'S') || (*str == 'b' || *str == 'B')) {
+ return ((*str == 's' || *str == 'S')) ? REG_SP : REG_BP;
+ }
+ } else if (*str == 'p' || *str == 'P') {
+ return (str[1] == 'c' || str[1] == 'C') ? REG_PC : -1;
+ }
+ break;
+ case 3:
+ if (*str == 'r' || *str == 'R') {
+ int regnum = strtoul(str+1, NULL, 10);
+ if (regnum >= REG_R11 && regnum <= REG_R15) {
+ return regnum;
+ }
+ }
+ break;
+ }
+ return -1;
+}
+
+#define SKIP_WHITESPACE(str, dbg) \
+ /* Skip over any whitespace. */ \
+ for (; isdelm(*str, dbg) & 0x10; str++)
+
+
+expr *get_primary_expr(char **line, uint64_t address, int *found_reg, char stop, uint8_t dbg) {
+ char *str = *line;
+ char *tmp;
+ char *scope_name;
+ symbol *s = NULL;
+
+ uint64_t value;
+
+ int i = 0;
+ int base;
+ int type = EXPR_NONE;
+ int depth = 0;
+ int scope_depth = 0;
+
+ uint8_t ptok = get_ptok(*str, dbg);
+ ptok = (is_altok(ptok, dbg)) ? PTOK_ALPHA : ptok;
+
+ if (*str != stop) {
+ switch (ptok) {
+ case PTOK_DOLLAR:
+ case PTOK_PERCENT:
+ case PTOK_NUMBER:
+ switch (ptok) {
+ case PTOK_DOLLAR : base = 16; type = EXPR_HEX; str++; break;
+ case PTOK_PERCENT: base = 2; type = EXPR_BIN; str++; break;
+ case PTOK_NUMBER : base = 10; type = EXPR_DEC; /****/ break;
+ }
+
+ /* Get the number of digits, and
+ * find the end of the number.
+ */
+ for (; isxdigit(str[i]) && !(isdelm(str[i], dbg) & 0x03); i++);
+
+ tmp = malloc(i+1);
+
+ memcpy(tmp, str, i);
+ tmp[i] = '\0';
+
+ value = strtoull(tmp, NULL, base);
+ break;
+ case PTOK_SQUOTE:
+ type = EXPR_CHAR;
+ value = parse_quote(&str, *str, 1, dbg);
+ break;
+ case PTOK_AT:
+ /* Increment the depth count, by the
+ * number of '@' signs before the
+ * symbol name.
+ */
+ for (depth = 0; *str == '@'; str++, depth++);
+ /* Falls through. */
+ case PTOK_ALPHA:
+ /* Find the end of the symbol name.
+ * Also increment the depth count every
+ * time a '.' is found in the symbol name.
+ */
+ for (scope_depth = 0; !isdelm2(str[i], dbg) || str[i] == '.'; scope_depth += (str[i++] == '.'));
+ tmp = malloc(i);
+
+ memcpy(tmp, str, i);
+ tmp[i] = '\0';
+
+ if (is_reg(tmp) >= 0) {
+ *found_reg = 1;
+ return NULL;
+ } else {
+ scope_name = (!scope_depth) ? mk_scope_name(cur_sym, depth, tmp, dbg) : tmp;
+ s = get_sym(scope_name, address, NULL, (scope_depth) ? scope_depth : depth, 1, dbg);
+ isfixup += (s == NULL);
+ type = EXPR_SYM;
+ }
+ break;
+ }
+
+ str += i;
+
+ SKIP_WHITESPACE(str, dbg);
+
+ if (ptok == PTOK_SCOLON || ptok == PTOK_COMMA || *str == stop) {
+ *found_reg = 1;
+ }
+
+ *line = str;
+
+ return make_expr(type, value, s, dbg);
+ }
+ return NULL;
+}
+
+expr *get_unary_expr(char **line, uint64_t address, int *found_reg, char stop, uint8_t dbg) {
+ expr *new = NULL;
+ char *str = *line;
+
+ if (*str != stop && !(*found_reg)) {
+ if (*str == '+' || *str == '-' || *str == '<' || *str == '>') {
+ uint8_t ptok = get_ptok(*str++, dbg);
+ SKIP_WHITESPACE(str, dbg);
+ int type;
+ switch (ptok) {
+ case PTOK_PLUS : type = EXPR_PLUS ; break;
+ case PTOK_MINUS: type = EXPR_MINUS; break;
+ case PTOK_GT : type = EXPR_LOW ; break;
+ case PTOK_LT : type = EXPR_HIGH ; break;
+ }
+ new = make_expr(type, 0, NULL, dbg);
+ new->left = get_primary_expr(&str, address, found_reg, stop, dbg);
+ } else {
+ return get_primary_expr(line, address, found_reg, stop, dbg);
+ }
+ }
+
+ *line = str;
+ return new;
+}
+
+expr *get_shift_expr(char **line, uint64_t address, int *found_reg, char stop, uint8_t dbg) {
+ expr *left = get_unary_expr(line, address, found_reg, stop, dbg);
+ expr *new = NULL;
+ char *str = *line;
+
+ SKIP_WHITESPACE(str, dbg);
+
+ for (; (*str == '<' || *str == '>') && (str[1] == *str);) {
+ uint8_t ptok = get_ptok(*str, dbg);
+
+ str += 2;
+ SKIP_WHITESPACE(str, dbg);
+
+ int type;
+ switch (ptok) {
+ case PTOK_GT: type = EXPR_RSHFT; break;
+ case PTOK_LT: type = EXPR_LSHFT; break;
+ }
+
+ new = make_expr(type, 0, NULL, dbg);
+
+ SKIP_WHITESPACE(str, dbg);
+
+ new->left = left;
+ new->right = get_unary_expr(&str, address, found_reg, stop, dbg);
+ left = new;
+
+ if (*str == stop || *found_reg) {
+ break;
+ }
+ }
+
+ *line = str;
+ return left;
+}
+
+expr *get_or_expr(char **line, uint64_t address, int *found_reg, char stop, uint8_t dbg) {
+ expr *left = get_shift_expr(line, address, found_reg, stop, dbg);
+ expr *new = NULL;
+ char *str = *line;
+
+ SKIP_WHITESPACE(str, dbg);
+
+ for (; *str == '|' && str[1] != '|';) {
+ str++;
+ SKIP_WHITESPACE(str, dbg);
+
+ new = make_expr(EXPR_OR, 0, NULL, dbg);
+
+ SKIP_WHITESPACE(str, dbg);
+
+ new->left = left;
+ new->right = get_shift_expr(&str, address, found_reg, stop, dbg);
+ left = new;
+
+ if (*str == stop || *found_reg) {
+ break;
+ }
+ }
+
+ *line = str;
+ return left;
+}
+
+expr *get_additive_expr(char **line, uint64_t address, int *found_reg, char stop, uint8_t dbg) {
+ expr *left = get_or_expr(line, address, found_reg, stop, dbg);
+ expr *new = NULL;
+ char *str = *line;
+ SKIP_WHITESPACE(str, dbg);
+
+ for (; (*str == '+' && str[1] != '+') || (*str == '-' && str[1] != '-');) {
+ uint8_t ptok = get_ptok(*str++, dbg);
+
+ SKIP_WHITESPACE(str, dbg);
+
+ int type;
+ switch (ptok) {
+ case PTOK_PLUS : type = EXPR_PLUS ; break;
+ case PTOK_MINUS: type = EXPR_MINUS; break;
+ }
+
+ new = make_expr(type, 0, NULL, dbg);
+
+ SKIP_WHITESPACE(str, dbg);
+
+ new->left = left;
+ new->right = get_or_expr(&str, address, found_reg, stop, dbg);
+ left = new;
+
+ if (*str == stop || *found_reg) {
+ break;
+ }
+ }
+
+ *line = str;
+ return left;
+}
+
+#undef SKIP_WHITESPACE
+
+int get_expr_type(char *str, int *found_reg, char stop, uint8_t dbg) {
+ /* Skip over any whitespace. */
+ for (; isdelm(*str, dbg) & 0x10; str++);
+
+ if (*str != stop) {
+ uint8_t ptok = get_ptok(*str, dbg);
+
+ int i = 0;
+ char *tmp;
+
+ ptok = (is_altok(ptok, dbg)) ? PTOK_ALPHA : ptok;
+
+ switch (ptok) {
+ case PTOK_PLUS : return EXPR_PLUS ;
+ case PTOK_MINUS: return EXPR_MINUS;
+ case PTOK_PIPE : return EXPR_OR ;
+ case PTOK_GT : return (get_ptok(*(++str), dbg) == PTOK_GT) ? (EXPR_RSHFT) : (EXPR_LOW) ;
+ case PTOK_LT : return (get_ptok(*(++str), dbg) == PTOK_LT) ? (EXPR_LSHFT) : (EXPR_HIGH);
+ case PTOK_DOLLAR : return EXPR_HEX ;
+ case PTOK_PERCENT: return EXPR_BIN ;
+ case PTOK_NUMBER : return EXPR_DEC ;
+ case PTOK_SQUOTE : return EXPR_CHAR;
+ case PTOK_AT:
+ for (; *str == '@'; str++);
+ /* Falls through. */
+ case PTOK_ALPHA:
+ /* Find the end of the symbol name. */
+ for (; !isdelm2(str[i], dbg) || str[i] == '.'; i++);
+ tmp = malloc(i);
+
+ memcpy(tmp, str, i);
+ tmp[i] = '\0';
+
+ if (is_reg(tmp) < 0) {
+ return EXPR_SYM;
+ }
+ break;
+ }
+ }
+ *found_reg = 1;
+ return EXPR_NONE;
+}
+
+expr *parse_expr(char **line, uint64_t address, int *found_reg, char stop, uint8_t dbg) {
+ char *str = *line;
+ int dummy = 0;
+
+ found_reg = (found_reg == NULL) ? &dummy : found_reg;
+ expr *tree = get_additive_expr(&str, address, found_reg, stop, dbg);
+ *line = str;
+ return tree;
+}
+
uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) {
char sym[0x100];
uint16_t i = 0;
@@ -550,6 +984,8 @@ uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) {
uint8_t fall = 0;
uint8_t done = 0;
+ char delm = ',';
+
line *l = NULL;
token *st = NULL;
@@ -586,70 +1022,19 @@ uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) {
while (isdelm(str[i], dbg) != 1) {
uint8_t offset = 0;
base = 0;
- space = 0;
- tab = 0;
- while (isdelm(str[i+j], dbg) == 16) {
- tab += str[i+j] == '\t';
- space += str[i+j] == ' ';
- j++;
- }
j = 0;
+ for (tab = 0, space = 0; isdelm(str[i], dbg) == 16; tab += (str[i] == '\t'), space += (str[i] == ' '), i++);
if (dbg) {
printf("lex(): tab: %u, space: %u\n", tab, space);
}
- if (isdelm(str[i], dbg) == 16) {
- for (; isdelm(str[i], dbg) == 16; i++);
- }
uint8_t ptok = get_ptok(str[i], dbg);
if (is_altok(ptok, dbg)) {
- offset++;
- if (((ptok == PTOK_S || ptok == PTOK_B) && toupper(str[i+1]) == 'P') || (ptok == PTOK_P && toupper(str[i+1]) == 'C')) {
- offset++;
- }
- int is_alpha = 0;
- switch (get_ptok(str[i+offset], dbg)) {
- case PTOK_B :
- case PTOK_E :
- case PTOK_X :
- case PTOK_Y :
- case PTOK_S :
- case PTOK_P :
- case PTOK_A :
- case PTOK_C :
- case PTOK_D :
- case PTOK_F :
- case PTOK_R :
- case PTOK_ALPHA : ptok = PTOK_ALPHA; is_alpha = 1; break;
- case PTOK_NUMBER:
- if (ptok == PTOK_R) {
- char reg_num[3];
- int isnum;
- for (isnum = 0; isdigit(str[i+offset]) && !(isdelm(str[i+offset], dbg) & 0x03) && isnum < 2; offset++, isnum++) {
- reg_num[isnum] = str[i+offset];
- }
- reg_num[isnum] = '\0';
- if (isnum == 2) {
- int regnum = strtoul(reg_num, NULL, 10);
- if (regnum < 11 || regnum > 15) {
- ptok = PTOK_ALPHA;
- is_alpha = 1;
- }
- } else {
- ptok = PTOK_ALPHA;
- is_alpha = 1;
- }
- } else {
- ptok = PTOK_ALPHA;
- is_alpha = 1;
- }
- break;
- }
- if (ptok == PTOK_P && toupper(str[i+1]) != 'C') {
- ptok = PTOK_ALPHA;
- is_alpha = 1;
- }
-
- ptok = (!is_inst && !is_alpha) ? PTOK_ALPHA : ptok;
+ for (; !isdelm2(str[i+j], dbg) || (is_inst && str[i+j] == '.'); j++);
+ memcpy(lexeme, &str[i], j);
+ lexeme[j] = '\0';
+ j = 0;
+ ptok = (!is_inst || is_reg(lexeme) < 0) ? PTOK_ALPHA : ptok;
+ memset(lexeme, 0, strlen(lexeme)+1);
}
switch (ptok) {
@@ -685,7 +1070,7 @@ uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) {
i -= j;
} else {
l->count++;
- t = make_token(lex_type, k, space, tab, 0, "", NULL);
+ t = make_token(lex_type, k, space, tab, 0, "", NULL, NULL);
}
} else {
for (k = 0; !(isdelm(lexeme[k], dbg) & 17); k++) {
@@ -709,7 +1094,7 @@ uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) {
break;
}
l->count++;
- t = make_token(lex_type, rs, space, tab, 0, "", NULL);
+ t = make_token(lex_type, rs, space, tab, 0, "", NULL, NULL);
if (t) {
lt = t;
t = t->next;
@@ -719,13 +1104,17 @@ uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) {
}
break;
case PTOK_DQUOTE:
- i++;
- for (; isdelm(str[i+j], dbg) != 4 || isesc; j++) {
- isesc = (str[i+j] == '\\' && str[i+(j-1)] != '\\');
- }
- memcpy(lexeme, str+i, j);
- lexeme[j] = '\0';
- i += j;
+ do {
+ char *tmp = (str + i);
+ int get_value = (ptok == PTOK_SQUOTE);
+ value = parse_quote(&tmp, str[i], get_value, dbg);
+ tmp--;
+ i++;
+ j = tmp - (str + i);
+ memcpy(lexeme, str+i, j);
+ lexeme[j] = '\0';
+ i += j;
+ } while (0);
strid = get_string(lexeme, dbg);
if (strid == 0xFFFF) {
strid = stridx;
@@ -743,70 +1132,35 @@ uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) {
}
lex_type = TOK_STRING;
l->count++;
- t = make_token(lex_type, 0, space, tab, 0, string[strid], NULL);
+ t = make_token(lex_type, 0, space, tab, 0, string[strid], NULL, NULL);
break;
+ case PTOK_SQUOTE:
case PTOK_DOLLAR:
case PTOK_PERCENT:
case PTOK_NUMBER:
- value = 0;
- switch (ptok) {
- case PTOK_DOLLAR : base = 16; lex_type = TOK_HEX; i++; break;
- case PTOK_PERCENT: base = 2; lex_type = TOK_BIN; i++; break;
- case PTOK_NUMBER : base = 10; lex_type = TOK_DEC; /**/ break;
- }
- for (; isxdigit(str[i+j]) && !(isdelm(str[i+j], dbg) & 0x03); j++);
- memcpy(lexeme, str+i, j);
- lexeme[j] = '\0';
- i += j;
- value = strtoull(lexeme, NULL, base);
- if (lt->id == TOK_SYM) {
- new_symbol(lt, sym, value, depth, dbg);
- depth = 0;
- if (dbg) {
- printf("lex(): isfixup: %u\n", isfixup);
- }
- }
l->count++;
- t = make_token(lex_type, 0, space, tab, value, "", NULL);
- t->digits = (lt->id != TOK_SYM) ? j : 0;
- break;
- case PTOK_SQUOTE:
- i++;
- k = 0;
- j = 0;
- while (isdelm(str[i], dbg) != 8 || isesc) {
- isesc = (str[i] == '\\' && str[i-1] != '\\');
- lexeme[j++] = str[i++];
- }
- isesc = 0;
- lexeme[j] = '\0';
- for (j = 0; lexeme[k] != '\0' && j < 7; k++) {
- switch (lexeme[k]) {
- case '\\':
- switch (lexeme[++k]) {
- case 'n' : ch.u8[j++] = '\n'; break;
- case 'r' : ch.u8[j++] = '\r'; break;
- case 't' : ch.u8[j++] = '\t'; break;
- case 'b' : ch.u8[j++] = '\b'; break;
- case '\'': ch.u8[j++] = '\''; break;
- case '\"': ch.u8[j++] = '\"'; break;
- case '\\': ch.u8[j++] = '\\'; break;
- }
- break;
- default: ch.u8[j++] = lexeme[k];
- }
- }
- lex_type = TOK_CHAR;
- l->count++;
- t = make_token(lex_type, 0, space, tab, ch.u64, "", NULL);
+ do {
+ lex_type = TOK_EXPR;
+ memset(lexeme, 0, strlen(lexeme)+1);
+ char *tmp = &str[i];
+ expr *e = parse_expr(&tmp, address, NULL, delm, dbg);
+ t = make_token(lex_type, 0, space, tab, 0, "", NULL, e);
+ j = tmp - &str[i];
+ memcpy(lexeme, &str[i], j);
+ j = 0;
+ i = tmp - str;
+ t->subtype = (t->subtype == 0xFF && lex_subtype != 0xFF) ? lex_subtype : t->subtype;
+ lex_subtype = 0xFF;
+ } while (0);
break;
case PTOK_LBRACK:
case PTOK_HASH :
lex_type = TOK_MEM;
value = (ptok == PTOK_LBRACK) ? MEM_IND : MEM_IMM;
l->count++;
- t = make_token(lex_type, value, space, tab, 0, "", NULL);
+ t = make_token(lex_type, value, space, tab, 0, "", NULL, NULL);
lex_type = (ptok == PTOK_LBRACK) ? TOK_IND : TOK_IMM;
+ delm = (ptok == PTOK_LBRACK) ? ')' : delm;
t->subtype = (t->subtype == 0xFF && lex_subtype != 0xFF) ? lex_subtype : t->subtype;
if (lex_subtype != 0xFF) {
lex_subtype = 0xFF;
@@ -820,32 +1174,37 @@ uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) {
case PTOK_LT:
case PTOK_PIPE:
lex_type = TOK_EXPR;
- switch (ptok) {
- case PTOK_PLUS : value = EXPR_PLUS ; break;
- case PTOK_MINUS: value = EXPR_MINUS; break;
- case PTOK_PIPE : value = EXPR_OR ; break;
- case PTOK_GT : value = (get_ptok(str[i+1], dbg) == PTOK_GT) ? (EXPR_RSHFT) : (EXPR_LOW) ; break;
- case PTOK_LT : value = (get_ptok(str[i+1], dbg) == PTOK_LT) ? (EXPR_LSHFT) : (EXPR_HIGH); break;
- }
l->count++;
- t = make_token(lex_type, value, space, tab, 0, "", NULL);
memset(lexeme, 0, strlen(lexeme)+1);
- lexeme[j++] = str[i];
- if (value == EXPR_LSHFT || value == EXPR_RSHFT) {
- lexeme[j++] = str[++i];
- }
+ do {
+ char *tmp = &str[i];
+ expr *e = parse_expr(&tmp, address, NULL, delm, dbg);
+ t = make_token(lex_type, 0, space, tab, 0, "", NULL, e);
+ j = tmp - &str[i];
+ memcpy(lexeme, &str[i], j);
+ j = 0;
+ i = tmp - str;
+ t->subtype = (t->subtype == 0xFF && lex_subtype != 0xFF) ? lex_subtype : t->subtype;
+ lex_subtype = 0xFF;
+ } while (0);
break;
case PTOK_EQU:
i++;
lex_type = TOK_SYM;
memset(lexeme, 0, strlen(lexeme)+1);
lexeme[j] = str[i];
+ if (lt) {
+ lt->id = lex_type;
+ lt->type = depth;
+ }
+ new_symbol(lt, sym, address, depth, dbg);
(t) ? (t->subspace = space) : (lt->subspace = space);
(t) ? (t->subtab = tab) : (lt->subtab = tab);
break;
case PTOK_RBRACK:
i++;
lex_type = TOK_IND;
+ delm = (delm == ')') ? ',' : delm;
lexeme[j] = ')';
lexeme[j+1] = '\0';
lexeme[j+2] = '\0';
@@ -888,27 +1247,12 @@ uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) {
lexeme[j+1] = (ptok == PTOK_R || ((ptok == PTOK_S || ptok == PTOK_B) && get_ptok(str[i], dbg) == PTOK_P)) ? str[i++] : '\0';
lexeme[j+2] = (ptok == PTOK_R) ? str[i++] : '\0';
lexeme[j+3] = '\0';
+
lex_type = TOK_REG;
- switch (ptok) {
- case PTOK_A: value = REG_A; break;
- case PTOK_X: value = REG_X; break;
- case PTOK_Y: value = REG_Y; break;
- case PTOK_E: value = REG_E; break;
- case PTOK_C: value = REG_C; break;
- case PTOK_D: value = REG_D; break;
- case PTOK_S:
- case PTOK_B:
- if (get_ptok(lexeme[j+1], dbg) == PTOK_P) {
- value = (ptok == PTOK_S) ? REG_SP : REG_BP;
- } else {
- value = (ptok == PTOK_S) ? REG_S : REG_B;
- }
- break;
- case PTOK_F: value = REG_F; break;
- case PTOK_R: value = strtoull(lexeme+j+1, NULL, 10); break;
- }
+ value = is_reg(lexeme);
+
l->count++;
- t = make_token(lex_type, value, space, tab, 0, "", NULL);
+ t = make_token(lex_type, value, space, tab, 0, "", NULL, NULL);
t->subtype = (t->subtype == 0xFF && lex_subtype != 0xFF) ? lex_subtype : t->subtype;
lex_subtype = 0xFF;
break;
@@ -919,11 +1263,12 @@ uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) {
of = 2;
lex_type = TOK_OF;
l->count++;
- t = make_token(lex_type, of, space, tab, 0, "", NULL);
+ t = make_token(lex_type, of, space, tab, 0, "", NULL, NULL);
break;
case PTOK_AT:
memset(lexeme, 0, strlen(lexeme)+1);
for (char *tmp = str+i; *tmp++ == '@'; depth++);
+ i += depth;
lexeme[j] = '@';
lex_type = TOK_LOCAL;
if (lt || t) {
@@ -977,20 +1322,36 @@ uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) {
lex_type = TOK_COMMENT;
l->count++;
if (j) {
- t = make_token(lex_type, 0, space, tab, 0, comment[comid], NULL);
+ t = make_token(lex_type, 0, space, tab, 0, comment[comid], NULL, NULL);
} else {
- t = make_token(lex_type, 0, space, tab, 0, "" , NULL);
+ t = make_token(lex_type, 0, space, tab, 0, "" , NULL, NULL);
}
break;
case PTOK_ALPHA:
+ /* Get the length of the token. */
for (; !isdelm2(str[i+j], dbg) || (is_inst && str[i+j] == '.'); j++);
memcpy(lexeme, str+i, j);
lexeme[j] = '\0';
i += j;
isch = 0;
isop = 0;
- if (j > 1 && j <= 3 && str[i] != ':' && !is_struct) {
+ /* We need to figure out if we're allowed to
+ * search for a valid instruction name.
+ *
+ * We're only allowed to so, if:
+ *
+ * 1. The previous token wasn't a directive.
+ * 2. There wasn't an instruction before us.
+ * 3. The length of the token is at, or above
+ * the length of the shortest instruction.
+ * 4. The length of the token is at, or below
+ * the length of the longest instruction.
+ * 5. The character after the token isn't a
+ * label delimiter.
+ * 6. We're not within a struct/union block.
+ */
+ if (!(lt && lt->id == TOK_DIR) && !is_inst && j > 1 && j <= 3 && str[i] != ':' && !is_struct) {
for (k = 0; k < OPNUM; k++) {
int find_ext = (k < EXT_OPNUM);
int find_ortho = (k < ORTHO_OPNUM);
@@ -1010,7 +1371,7 @@ uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) {
isop = 1;
is_inst = 1;
l->count++;
- t = make_token(lex_type, 0xFF, space, tab, k, "", NULL);
+ t = make_token(lex_type, 0xFF, space, tab, k, "", NULL, NULL);
break;
}
}
@@ -1024,7 +1385,7 @@ uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) {
if (!strcasecmp(lexeme, set_cc[k])) {
lex_type = TOK_CC;
l->count++;
- t = make_token(lex_type, 0xFF, space, tab, k, "", NULL);
+ t = make_token(lex_type, 0xFF, space, tab, k, "", NULL, NULL);
}
}
}
@@ -1035,28 +1396,31 @@ uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) {
if (ret == PTOK_COLON || ret == PTOK_EQU) {
depth = (lex_type == TOK_LOCAL);
}
- lex_type = TOK_SYM;
+ int is_expr = (!is_struct && str[i+spaces] != ':' && str[i+spaces] != '=');
l->count++;
- t = make_token(lex_type, depth, space, tab, 0, "", NULL);
memcpy(sym, lexeme, j+1);
- if (dbg) {
- printf("lex(): spaces: %u\n", spaces);
+ if (is_expr) {
+ i -= j + (depth);
+ lex_type = TOK_EXPR;
+ memset(lexeme, 0, strlen(lexeme)+1);
+ char *tmp = &str[i];
+ expr *e = parse_expr(&tmp, address, NULL, delm, dbg);
+ j = tmp - &str[i];
+ memcpy(lexeme, &str[i], j);
+ /*i += j;*/
+ i = tmp - str;
+ t = make_token(lex_type, 0, space, tab, 0, "", NULL, e);
+ t->subtype = (t->subtype == 0xFF && lex_subtype != 0xFF) ? lex_subtype : t->subtype;
+ lex_subtype = 0xFF;
+ } else {
+ memcpy(sym, lexeme, j+1);
+ lex_type = TOK_SYM;
+ t = make_token(lex_type, depth, space, tab, 0, "", NULL, NULL);
}
+
if (is_struct) {
create_struct(cur_sym, l, t, lt, sym, dbg);
depth = 0;
- } else if ((str[i+spaces] != ':' && str[i+spaces] != '=')) {
- symbol *s;
- int scope_depth;
- char *tmp = lexeme;
- for (scope_depth = 0; *tmp; scope_depth += (*tmp++ == '.'));
- char *scope_name = (!scope_depth) ? mk_scope_name(cur_sym, depth, lexeme, dbg) : lexeme;
- t->sym = get_sym(scope_name, address, t, (scope_depth) ? scope_depth : depth, 1, dbg);
- isfixup += (t && t->sym == NULL);
- depth = 0;
- if (dbg) {
- printf("lex(): isfixup: %u\n", isfixup);
- }
}
/*if (!is_struct && t && t->sym && t->sym->isstruct) {
tmp_sym = t->sym;
@@ -1074,7 +1438,7 @@ uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) {
j = 0;
if ((lex_type == TOK_OPCODE || lex_type == TOK_EXTOP) && !isop) {
j = 0;
- } else if (lex_type == TOK_EXPR || (lex_type != TOK_MEMBER && !isdelm2(str[i], dbg))) {
+ } else if (lex_type != TOK_EXPR && lex_type != TOK_LOCAL && lex_type != TOK_MEMBER && !isdelm2(str[i], dbg)) {
i++;
}
switch (lex_type) {
@@ -1093,22 +1457,13 @@ uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) {
}
}
if (i) {
- l->tok = tokens;
+ l->tok = tokens;
token *tok = tokens;
- if (tok->id == TOK_SYM && tok->next) {
+ if ((tok->id == TOK_SYM || tok->id == TOK_LABEL) && tok->next) {
symbol *s = tok->sym;
for (; tok; tok = tok->next) {
- switch (tok->id) {
- case TOK_HEX :
- case TOK_BIN :
- case TOK_DEC :
- case TOK_CHAR:
- case TOK_EXPR:
- s->val = get_val(tok, address, 3, 0xFF, 0, dbg);
- if (tok->next) {
- tok = skip_expr(tok, 0xFF, 0, dbg);
- }
- break;
+ if (tok->id == TOK_EXPR) {
+ s->val = get_val(tok->expr, address, 3, 0, dbg);
}
}
}