summaryrefslogtreecommitdiff
path: root/lexer.c
diff options
context:
space:
mode:
Diffstat (limited to 'lexer.c')
-rw-r--r--lexer.c735
1 files changed, 545 insertions, 190 deletions
diff --git a/lexer.c b/lexer.c
index 57a7e14..bc954fc 100644
--- a/lexer.c
+++ b/lexer.c
@@ -68,11 +68,11 @@ int add_symbol(symbol *sym, const char *name, symbol **root, symbol **lsym, symb
/*return 0;*/
}
}
- for (; s != NULL && s->next != NULL; s = s->next) {
- if (dbg) {
- printf("s: %p, s->next: %p, s->prev: %p\n", s, s->next, s->prev);
- }
+ for (; s != NULL && s->next != NULL; s = s->next) {
+ if (dbg) {
+ printf("s: %p, s->next: %p, s->prev: %p\n", s, s->next, s->prev);
}
+ }
if ((is_new_scope && *lsym) || *lloc || *csym) {
/*
if (is_new_scope) {
@@ -462,6 +462,29 @@ fixup *find_fixup(fixup *root, const char *name, int depth, uint8_t dbg) {
return NULL;
}
+void find_expr_sym(expr *root, const char *name, symbol *sym, int depth, uint8_t dbg) {
+ size_t name_len = strlen(name);
+ if (root) {
+ if (root->left) {
+ find_expr_sym(root->left, name, sym, depth, dbg);
+ }
+ if (root->right) {
+ find_expr_sym(root->right, name, sym, depth, dbg);
+ }
+ if (root->type == EXPR_SYM) {
+ if (root->value.sym) {
+ symbol *s = root->value.sym;
+ size_t sym_name_len = strlen(s->name);
+ if (name_len == sym_name_len && name[0] == s->name[0] && !strcmp(name, s->name)) {
+ if (s->depth == depth && !s->def) {
+ root->value.sym = sym;
+ }
+ }
+ }
+ }
+ }
+}
+
void resolve_symbol_names(line *l, const char *name, symbol *sym, int depth, uint8_t dbg) {
size_t name_len = strlen(name);
for (token *t = l->tok; t; t = t->next) {
@@ -478,6 +501,7 @@ void resolve_symbol_names(line *l, const char *name, symbol *sym, int depth, uin
}
}
break;
+ case TOK_EXPR: find_expr_sym(t->expr, name, sym, depth, dbg); break;
}
}
if (l->next) {
@@ -512,6 +536,416 @@ void new_symbol(token *t, const char *name, uint64_t value, int depth, uint8_t d
}
}
+char *parse_escape(char *s, char *code) {
+ char dummy;
+ int count;
+ char *end;
+ int base = 0;
+ unsigned int value;
+
+ if (*s++ != '\\') {
+ #if 0
+ ierror(0); /* Start of escape sequence not found. */
+ #endif
+ }
+ if (code == NULL) {
+ code = &dummy;
+ }
+ #if 0
+ if (!esc_sequences) {
+ *code = '\\';
+ return s;
+ }
+ #endif
+
+ switch (*s) {
+ case 'a' : *code = '\a'; return s+1;
+ case 'b' : *code = '\b'; return s+1;
+ case 'f' : *code = '\f'; return s+1;
+ case 'n' : *code = '\n'; return s+1;
+ case 'r' : *code = '\r'; return s+1;
+ case 't' : *code = '\t'; return s+1;
+ case 'v' : *code = '\v'; return s+1;
+ case '\\': *code = '\\'; return s+1;
+ case '\"': *code = '\"'; return s+1;
+ case '\'': *code = '\''; return s+1;
+ case 'e' : *code = '\x1B'; return s+1;
+ case '$' : case 'x' : case 'X' : base = 16; s++; /* Falls Through. */
+ case '%' : base = (!base) ? 2 : base; s += (!base); /* Falls Through. */
+ case '0' : case '1' : case '2' : case '3' : case '4' :
+ case '5' : case '6' : case '7' : case '8' : case '9' :
+ base = (!base) ? 8 : base;
+ value = strtoull(s, &end, base);
+ *code = value;
+ return end;
+ default :
+ #if 0
+ general_error(35, *s); /* No valid escape sequence was found. */
+ #endif
+ return s;
+ }
+}
+
+uint64_t parse_quote(char **s, char delm, int get_value, uint8_t dbg) {
+ uint64_t value = 0;
+ uint8_t *tmp_val = (uint8_t *)&value;
+ char *str = *s+1;
+
+ for (int i = 0; *str; i++) {
+ char c;
+ /* Are we at the start of an escape character? */
+ if (*str == '\\') {
+ str = parse_escape(str, &c);
+ } else {
+ c = *str++;
+ if (c == delm) {
+ if (*str == delm) {
+ /* Allow for multiple repeated
+ * instances of delm to be treated
+ * as a single instance of delm.
+ */
+ str++;
+ } else {
+ break;
+ }
+ }
+ }
+ if (get_value && i < sizeof(uint64_t)) {
+ tmp_val[i] = c;
+ }
+ }
+
+ *s = str;
+
+ return value;
+}
+
+expr *make_expr(int type, uint64_t value, symbol *sym, uint8_t dbg) {
+ expr *new = malloc(sizeof(expr));
+
+ new->type = type;
+ new->left = NULL;
+ new->right = NULL;
+
+ if (sym) {
+ new->value.sym = sym;
+ } else {
+ new->value.val = value;
+ }
+
+ return new;
+}
+
+int is_reg(const char *str) {
+ size_t len = strlen(str);
+ switch (len) {
+ case 1:
+ switch(*str) {
+ case 'a': case 'A': return REG_A;
+ case 'b': case 'B': return REG_B;
+ case 'x': case 'X': return REG_X;
+ case 'y': case 'Y': return REG_Y;
+ case 'e': case 'E': return REG_E;
+ case 'c': case 'C': return REG_C;
+ case 'd': case 'D': return REG_D;
+ case 's': case 'S': return REG_S;
+ case 'f': case 'F': return REG_F;
+ }
+ break;
+ case 2:
+ if (str[1] == 'p' || str[1] == 'P') {
+ if ((*str == 's' || *str == 'S') || (*str == 'b' || *str == 'B')) {
+ return ((*str == 's' || *str == 'S')) ? REG_SP : REG_BP;
+ }
+ } else if (*str == 'p' || *str == 'P') {
+ return (str[1] == 'c' || str[1] == 'C') ? REG_PC : -1;
+ }
+ break;
+ case 3:
+ if (*str == 'r' || *str == 'R') {
+ int regnum = strtoul(str+1, NULL, 10);
+ if (regnum >= REG_R11 && regnum <= REG_R15) {
+ return regnum;
+ }
+ }
+ break;
+ }
+ return -1;
+}
+
+#define SKIP_WHITESPACE(str, dbg) \
+ /* Skip over any whitespace. */ \
+ for (; isdelm(*str, dbg) & 0x10; str++)
+
+
+expr *get_primary_expr(char **line, uint64_t address, int *found_reg, char stop, uint8_t dbg) {
+ char *str = *line;
+ char *tmp;
+ char *scope_name;
+ symbol *s = NULL;
+
+ uint64_t value;
+
+ int i = 0;
+ int base;
+ int type = EXPR_NONE;
+ int depth = 0;
+ int scope_depth = 0;
+
+ uint8_t ptok = get_ptok(*str, dbg);
+ ptok = (is_altok(ptok, dbg)) ? PTOK_ALPHA : ptok;
+
+ if (*str != stop) {
+ switch (ptok) {
+ case PTOK_DOLLAR:
+ case PTOK_PERCENT:
+ case PTOK_NUMBER:
+ switch (ptok) {
+ case PTOK_DOLLAR : base = 16; type = EXPR_HEX; str++; break;
+ case PTOK_PERCENT: base = 2; type = EXPR_BIN; str++; break;
+ case PTOK_NUMBER : base = 10; type = EXPR_DEC; /****/ break;
+ }
+
+ /* Get the number of digits, and
+ * find the end of the number.
+ */
+ for (; isxdigit(str[i]) && !(isdelm(str[i], dbg) & 0x03); i++);
+
+ tmp = malloc(i+1);
+
+ memcpy(tmp, str, i);
+ tmp[i] = '\0';
+
+ value = strtoull(tmp, NULL, base);
+ break;
+ case PTOK_SQUOTE:
+ type = EXPR_CHAR;
+ value = parse_quote(&str, *str, 1, dbg);
+ break;
+ case PTOK_AT:
+ /* Increment the depth count, by the
+ * number of '@' signs before the
+ * symbol name.
+ */
+ for (depth = 0; *str == '@'; str++, depth++);
+ /* Falls through. */
+ case PTOK_ALPHA:
+ /* Find the end of the symbol name.
+ * Also increment the depth count every
+ * time a '.' is found in the symbol name.
+ */
+ for (scope_depth = 0; !isdelm2(str[i], dbg) || str[i] == '.'; scope_depth += (str[i++] == '.'));
+ tmp = malloc(i);
+
+ memcpy(tmp, str, i);
+ tmp[i] = '\0';
+
+ if (is_reg(tmp) >= 0) {
+ *found_reg = 1;
+ return NULL;
+ } else {
+ scope_name = (!scope_depth) ? mk_scope_name(cur_sym, depth, tmp, dbg) : tmp;
+ s = get_sym(scope_name, address, NULL, (scope_depth) ? scope_depth : depth, 1, dbg);
+ isfixup += (s == NULL);
+ type = EXPR_SYM;
+ }
+ break;
+ }
+
+ str += i;
+
+ SKIP_WHITESPACE(str, dbg);
+
+ if (ptok == PTOK_SCOLON || ptok == PTOK_COMMA || *str == stop) {
+ *found_reg = 1;
+ }
+
+ *line = str;
+
+ return make_expr(type, value, s, dbg);
+ }
+ return NULL;
+}
+
+expr *get_unary_expr(char **line, uint64_t address, int *found_reg, char stop, uint8_t dbg) {
+ expr *new = NULL;
+ char *str = *line;
+
+ if (*str != stop && !(*found_reg)) {
+ if (*str == '+' || *str == '-' || *str == '<' || *str == '>') {
+ uint8_t ptok = get_ptok(*str++, dbg);
+ SKIP_WHITESPACE(str, dbg);
+ int type;
+ switch (ptok) {
+ case PTOK_PLUS : type = EXPR_PLUS ; break;
+ case PTOK_MINUS: type = EXPR_MINUS; break;
+ case PTOK_GT : type = EXPR_LOW ; break;
+ case PTOK_LT : type = EXPR_HIGH ; break;
+ }
+ new = make_expr(type, 0, NULL, dbg);
+ new->left = get_primary_expr(&str, address, found_reg, stop, dbg);
+ } else {
+ return get_primary_expr(line, address, found_reg, stop, dbg);
+ }
+ }
+
+ *line = str;
+ return new;
+}
+
+expr *get_shift_expr(char **line, uint64_t address, int *found_reg, char stop, uint8_t dbg) {
+ expr *left = get_unary_expr(line, address, found_reg, stop, dbg);
+ expr *new = NULL;
+ char *str = *line;
+
+ SKIP_WHITESPACE(str, dbg);
+
+ for (; (*str == '<' || *str == '>') && (str[1] == *str);) {
+ uint8_t ptok = get_ptok(*str, dbg);
+
+ str += 2;
+ SKIP_WHITESPACE(str, dbg);
+
+ int type;
+ switch (ptok) {
+ case PTOK_GT: type = EXPR_RSHFT; break;
+ case PTOK_LT: type = EXPR_LSHFT; break;
+ }
+
+ new = make_expr(type, 0, NULL, dbg);
+
+ SKIP_WHITESPACE(str, dbg);
+
+ new->left = left;
+ new->right = get_unary_expr(&str, address, found_reg, stop, dbg);
+ left = new;
+
+ if (*str == stop || *found_reg) {
+ break;
+ }
+ }
+
+ *line = str;
+ return left;
+}
+
+expr *get_or_expr(char **line, uint64_t address, int *found_reg, char stop, uint8_t dbg) {
+ expr *left = get_shift_expr(line, address, found_reg, stop, dbg);
+ expr *new = NULL;
+ char *str = *line;
+
+ SKIP_WHITESPACE(str, dbg);
+
+ for (; *str == '|' && str[1] != '|';) {
+ str++;
+ SKIP_WHITESPACE(str, dbg);
+
+ new = make_expr(EXPR_OR, 0, NULL, dbg);
+
+ SKIP_WHITESPACE(str, dbg);
+
+ new->left = left;
+ new->right = get_shift_expr(&str, address, found_reg, stop, dbg);
+ left = new;
+
+ if (*str == stop || *found_reg) {
+ break;
+ }
+ }
+
+ *line = str;
+ return left;
+}
+
+expr *get_additive_expr(char **line, uint64_t address, int *found_reg, char stop, uint8_t dbg) {
+ expr *left = get_or_expr(line, address, found_reg, stop, dbg);
+ expr *new = NULL;
+ char *str = *line;
+ SKIP_WHITESPACE(str, dbg);
+
+ for (; (*str == '+' && str[1] != '+') || (*str == '-' && str[1] != '-');) {
+ uint8_t ptok = get_ptok(*str++, dbg);
+
+ SKIP_WHITESPACE(str, dbg);
+
+ int type;
+ switch (ptok) {
+ case PTOK_PLUS : type = EXPR_PLUS ; break;
+ case PTOK_MINUS: type = EXPR_MINUS; break;
+ }
+
+ new = make_expr(type, 0, NULL, dbg);
+
+ SKIP_WHITESPACE(str, dbg);
+
+ new->left = left;
+ new->right = get_or_expr(&str, address, found_reg, stop, dbg);
+ left = new;
+
+ if (*str == stop || *found_reg) {
+ break;
+ }
+ }
+
+ *line = str;
+ return left;
+}
+
+#undef SKIP_WHITESPACE
+
+int get_expr_type(char *str, int *found_reg, char stop, uint8_t dbg) {
+ /* Skip over any whitespace. */
+ for (; isdelm(*str, dbg) & 0x10; str++);
+
+ if (*str != stop) {
+ uint8_t ptok = get_ptok(*str, dbg);
+
+ int i = 0;
+ char *tmp;
+
+ ptok = (is_altok(ptok, dbg)) ? PTOK_ALPHA : ptok;
+
+ switch (ptok) {
+ case PTOK_PLUS : return EXPR_PLUS ;
+ case PTOK_MINUS: return EXPR_MINUS;
+ case PTOK_PIPE : return EXPR_OR ;
+ case PTOK_GT : return (get_ptok(*(++str), dbg) == PTOK_GT) ? (EXPR_RSHFT) : (EXPR_LOW) ;
+ case PTOK_LT : return (get_ptok(*(++str), dbg) == PTOK_LT) ? (EXPR_LSHFT) : (EXPR_HIGH);
+ case PTOK_DOLLAR : return EXPR_HEX ;
+ case PTOK_PERCENT: return EXPR_BIN ;
+ case PTOK_NUMBER : return EXPR_DEC ;
+ case PTOK_SQUOTE : return EXPR_CHAR;
+ case PTOK_AT:
+ for (; *str == '@'; str++);
+ /* Falls through. */
+ case PTOK_ALPHA:
+ /* Find the end of the symbol name. */
+ for (; !isdelm2(str[i], dbg) || str[i] == '.'; i++);
+ tmp = malloc(i);
+
+ memcpy(tmp, str, i);
+ tmp[i] = '\0';
+
+ if (is_reg(tmp) < 0) {
+ return EXPR_SYM;
+ }
+ break;
+ }
+ }
+ *found_reg = 1;
+ return EXPR_NONE;
+}
+
+expr *parse_expr(char **line, uint64_t address, int *found_reg, char stop, uint8_t dbg) {
+ char *str = *line;
+ int dummy = 0;
+
+ found_reg = (found_reg == NULL) ? &dummy : found_reg;
+ expr *tree = get_additive_expr(&str, address, found_reg, stop, dbg);
+ *line = str;
+ return tree;
+}
+
uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) {
char sym[0x100];
uint16_t i = 0;
@@ -550,6 +984,8 @@ uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) {
uint8_t fall = 0;
uint8_t done = 0;
+ char delm = ',';
+
line *l = NULL;
token *st = NULL;
@@ -586,70 +1022,19 @@ uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) {
while (isdelm(str[i], dbg) != 1) {
uint8_t offset = 0;
base = 0;
- space = 0;
- tab = 0;
- while (isdelm(str[i+j], dbg) == 16) {
- tab += str[i+j] == '\t';
- space += str[i+j] == ' ';
- j++;
- }
j = 0;
+ for (tab = 0, space = 0; isdelm(str[i], dbg) == 16; tab += (str[i] == '\t'), space += (str[i] == ' '), i++);
if (dbg) {
printf("lex(): tab: %u, space: %u\n", tab, space);
}
- if (isdelm(str[i], dbg) == 16) {
- for (; isdelm(str[i], dbg) == 16; i++);
- }
uint8_t ptok = get_ptok(str[i], dbg);
if (is_altok(ptok, dbg)) {
- offset++;
- if (((ptok == PTOK_S || ptok == PTOK_B) && toupper(str[i+1]) == 'P') || (ptok == PTOK_P && toupper(str[i+1]) == 'C')) {
- offset++;
- }
- int is_alpha = 0;
- switch (get_ptok(str[i+offset], dbg)) {
- case PTOK_B :
- case PTOK_E :
- case PTOK_X :
- case PTOK_Y :
- case PTOK_S :
- case PTOK_P :
- case PTOK_A :
- case PTOK_C :
- case PTOK_D :
- case PTOK_F :
- case PTOK_R :
- case PTOK_ALPHA : ptok = PTOK_ALPHA; is_alpha = 1; break;
- case PTOK_NUMBER:
- if (ptok == PTOK_R) {
- char reg_num[3];
- int isnum;
- for (isnum = 0; isdigit(str[i+offset]) && !(isdelm(str[i+offset], dbg) & 0x03) && isnum < 2; offset++, isnum++) {
- reg_num[isnum] = str[i+offset];
- }
- reg_num[isnum] = '\0';
- if (isnum == 2) {
- int regnum = strtoul(reg_num, NULL, 10);
- if (regnum < 11 || regnum > 15) {
- ptok = PTOK_ALPHA;
- is_alpha = 1;
- }
- } else {
- ptok = PTOK_ALPHA;
- is_alpha = 1;
- }
- } else {
- ptok = PTOK_ALPHA;
- is_alpha = 1;
- }
- break;
- }
- if (ptok == PTOK_P && toupper(str[i+1]) != 'C') {
- ptok = PTOK_ALPHA;
- is_alpha = 1;
- }
-
- ptok = (!is_inst && !is_alpha) ? PTOK_ALPHA : ptok;
+ for (; !isdelm2(str[i+j], dbg) || (is_inst && str[i+j] == '.'); j++);
+ memcpy(lexeme, &str[i], j);
+ lexeme[j] = '\0';
+ j = 0;
+ ptok = (!is_inst || is_reg(lexeme) < 0) ? PTOK_ALPHA : ptok;
+ memset(lexeme, 0, strlen(lexeme)+1);
}
switch (ptok) {
@@ -685,7 +1070,7 @@ uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) {
i -= j;
} else {
l->count++;
- t = make_token(lex_type, k, space, tab, 0, "", NULL);
+ t = make_token(lex_type, k, space, tab, 0, "", NULL, NULL);
}
} else {
for (k = 0; !(isdelm(lexeme[k], dbg) & 17); k++) {
@@ -709,7 +1094,7 @@ uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) {
break;
}
l->count++;
- t = make_token(lex_type, rs, space, tab, 0, "", NULL);
+ t = make_token(lex_type, rs, space, tab, 0, "", NULL, NULL);
if (t) {
lt = t;
t = t->next;
@@ -719,13 +1104,17 @@ uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) {
}
break;
case PTOK_DQUOTE:
- i++;
- for (; isdelm(str[i+j], dbg) != 4 || isesc; j++) {
- isesc = (str[i+j] == '\\' && str[i+(j-1)] != '\\');
- }
- memcpy(lexeme, str+i, j);
- lexeme[j] = '\0';
- i += j;
+ do {
+ char *tmp = (str + i);
+ int get_value = (ptok == PTOK_SQUOTE);
+ value = parse_quote(&tmp, str[i], get_value, dbg);
+ tmp--;
+ i++;
+ j = tmp - (str + i);
+ memcpy(lexeme, str+i, j);
+ lexeme[j] = '\0';
+ i += j;
+ } while (0);
strid = get_string(lexeme, dbg);
if (strid == 0xFFFF) {
strid = stridx;
@@ -743,70 +1132,35 @@ uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) {
}
lex_type = TOK_STRING;
l->count++;
- t = make_token(lex_type, 0, space, tab, 0, string[strid], NULL);
+ t = make_token(lex_type, 0, space, tab, 0, string[strid], NULL, NULL);
break;
+ case PTOK_SQUOTE:
case PTOK_DOLLAR:
case PTOK_PERCENT:
case PTOK_NUMBER:
- value = 0;
- switch (ptok) {
- case PTOK_DOLLAR : base = 16; lex_type = TOK_HEX; i++; break;
- case PTOK_PERCENT: base = 2; lex_type = TOK_BIN; i++; break;
- case PTOK_NUMBER : base = 10; lex_type = TOK_DEC; /**/ break;
- }
- for (; isxdigit(str[i+j]) && !(isdelm(str[i+j], dbg) & 0x03); j++);
- memcpy(lexeme, str+i, j);
- lexeme[j] = '\0';
- i += j;
- value = strtoull(lexeme, NULL, base);
- if (lt->id == TOK_SYM) {
- new_symbol(lt, sym, value, depth, dbg);
- depth = 0;
- if (dbg) {
- printf("lex(): isfixup: %u\n", isfixup);
- }
- }
l->count++;
- t = make_token(lex_type, 0, space, tab, value, "", NULL);
- t->digits = (lt->id != TOK_SYM) ? j : 0;
- break;
- case PTOK_SQUOTE:
- i++;
- k = 0;
- j = 0;
- while (isdelm(str[i], dbg) != 8 || isesc) {
- isesc = (str[i] == '\\' && str[i-1] != '\\');
- lexeme[j++] = str[i++];
- }
- isesc = 0;
- lexeme[j] = '\0';
- for (j = 0; lexeme[k] != '\0' && j < 7; k++) {
- switch (lexeme[k]) {
- case '\\':
- switch (lexeme[++k]) {
- case 'n' : ch.u8[j++] = '\n'; break;
- case 'r' : ch.u8[j++] = '\r'; break;
- case 't' : ch.u8[j++] = '\t'; break;
- case 'b' : ch.u8[j++] = '\b'; break;
- case '\'': ch.u8[j++] = '\''; break;
- case '\"': ch.u8[j++] = '\"'; break;
- case '\\': ch.u8[j++] = '\\'; break;
- }
- break;
- default: ch.u8[j++] = lexeme[k];
- }
- }
- lex_type = TOK_CHAR;
- l->count++;
- t = make_token(lex_type, 0, space, tab, ch.u64, "", NULL);
+ do {
+ lex_type = TOK_EXPR;
+ memset(lexeme, 0, strlen(lexeme)+1);
+ char *tmp = &str[i];
+ expr *e = parse_expr(&tmp, address, NULL, delm, dbg);
+ t = make_token(lex_type, 0, space, tab, 0, "", NULL, e);
+ j = tmp - &str[i];
+ memcpy(lexeme, &str[i], j);
+ j = 0;
+ i = tmp - str;
+ t->subtype = (t->subtype == 0xFF && lex_subtype != 0xFF) ? lex_subtype : t->subtype;
+ lex_subtype = 0xFF;
+ } while (0);
break;
case PTOK_LBRACK:
case PTOK_HASH :
lex_type = TOK_MEM;
value = (ptok == PTOK_LBRACK) ? MEM_IND : MEM_IMM;
l->count++;
- t = make_token(lex_type, value, space, tab, 0, "", NULL);
+ t = make_token(lex_type, value, space, tab, 0, "", NULL, NULL);
lex_type = (ptok == PTOK_LBRACK) ? TOK_IND : TOK_IMM;
+ delm = (ptok == PTOK_LBRACK) ? ')' : delm;
t->subtype = (t->subtype == 0xFF && lex_subtype != 0xFF) ? lex_subtype : t->subtype;
if (lex_subtype != 0xFF) {
lex_subtype = 0xFF;
@@ -820,32 +1174,37 @@ uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) {
case PTOK_LT:
case PTOK_PIPE:
lex_type = TOK_EXPR;
- switch (ptok) {
- case PTOK_PLUS : value = EXPR_PLUS ; break;
- case PTOK_MINUS: value = EXPR_MINUS; break;
- case PTOK_PIPE : value = EXPR_OR ; break;
- case PTOK_GT : value = (get_ptok(str[i+1], dbg) == PTOK_GT) ? (EXPR_RSHFT) : (EXPR_LOW) ; break;
- case PTOK_LT : value = (get_ptok(str[i+1], dbg) == PTOK_LT) ? (EXPR_LSHFT) : (EXPR_HIGH); break;
- }
l->count++;
- t = make_token(lex_type, value, space, tab, 0, "", NULL);
memset(lexeme, 0, strlen(lexeme)+1);
- lexeme[j++] = str[i];
- if (value == EXPR_LSHFT || value == EXPR_RSHFT) {
- lexeme[j++] = str[++i];
- }
+ do {
+ char *tmp = &str[i];
+ expr *e = parse_expr(&tmp, address, NULL, delm, dbg);
+ t = make_token(lex_type, 0, space, tab, 0, "", NULL, e);
+ j = tmp - &str[i];
+ memcpy(lexeme, &str[i], j);
+ j = 0;
+ i = tmp - str;
+ t->subtype = (t->subtype == 0xFF && lex_subtype != 0xFF) ? lex_subtype : t->subtype;
+ lex_subtype = 0xFF;
+ } while (0);
break;
case PTOK_EQU:
i++;
lex_type = TOK_SYM;
memset(lexeme, 0, strlen(lexeme)+1);
lexeme[j] = str[i];
+ if (lt) {
+ lt->id = lex_type;
+ lt->type = depth;
+ }
+ new_symbol(lt, sym, address, depth, dbg);
(t) ? (t->subspace = space) : (lt->subspace = space);
(t) ? (t->subtab = tab) : (lt->subtab = tab);
break;
case PTOK_RBRACK:
i++;
lex_type = TOK_IND;
+ delm = (delm == ')') ? ',' : delm;
lexeme[j] = ')';
lexeme[j+1] = '\0';
lexeme[j+2] = '\0';
@@ -888,27 +1247,12 @@ uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) {
lexeme[j+1] = (ptok == PTOK_R || ((ptok == PTOK_S || ptok == PTOK_B) && get_ptok(str[i], dbg) == PTOK_P)) ? str[i++] : '\0';
lexeme[j+2] = (ptok == PTOK_R) ? str[i++] : '\0';
lexeme[j+3] = '\0';
+
lex_type = TOK_REG;
- switch (ptok) {
- case PTOK_A: value = REG_A; break;
- case PTOK_X: value = REG_X; break;
- case PTOK_Y: value = REG_Y; break;
- case PTOK_E: value = REG_E; break;
- case PTOK_C: value = REG_C; break;
- case PTOK_D: value = REG_D; break;
- case PTOK_S:
- case PTOK_B:
- if (get_ptok(lexeme[j+1], dbg) == PTOK_P) {
- value = (ptok == PTOK_S) ? REG_SP : REG_BP;
- } else {
- value = (ptok == PTOK_S) ? REG_S : REG_B;
- }
- break;
- case PTOK_F: value = REG_F; break;
- case PTOK_R: value = strtoull(lexeme+j+1, NULL, 10); break;
- }
+ value = is_reg(lexeme);
+
l->count++;
- t = make_token(lex_type, value, space, tab, 0, "", NULL);
+ t = make_token(lex_type, value, space, tab, 0, "", NULL, NULL);
t->subtype = (t->subtype == 0xFF && lex_subtype != 0xFF) ? lex_subtype : t->subtype;
lex_subtype = 0xFF;
break;
@@ -919,11 +1263,12 @@ uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) {
of = 2;
lex_type = TOK_OF;
l->count++;
- t = make_token(lex_type, of, space, tab, 0, "", NULL);
+ t = make_token(lex_type, of, space, tab, 0, "", NULL, NULL);
break;
case PTOK_AT:
memset(lexeme, 0, strlen(lexeme)+1);
for (char *tmp = str+i; *tmp++ == '@'; depth++);
+ i += depth;
lexeme[j] = '@';
lex_type = TOK_LOCAL;
if (lt || t) {
@@ -977,20 +1322,36 @@ uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) {
lex_type = TOK_COMMENT;
l->count++;
if (j) {
- t = make_token(lex_type, 0, space, tab, 0, comment[comid], NULL);
+ t = make_token(lex_type, 0, space, tab, 0, comment[comid], NULL, NULL);
} else {
- t = make_token(lex_type, 0, space, tab, 0, "" , NULL);
+ t = make_token(lex_type, 0, space, tab, 0, "" , NULL, NULL);
}
break;
case PTOK_ALPHA:
+ /* Get the length of the token. */
for (; !isdelm2(str[i+j], dbg) || (is_inst && str[i+j] == '.'); j++);
memcpy(lexeme, str+i, j);
lexeme[j] = '\0';
i += j;
isch = 0;
isop = 0;
- if (j > 1 && j <= 3 && str[i] != ':' && !is_struct) {
+ /* We need to figure out if we're allowed to
+ * search for a valid instruction name.
+ *
+ * We're only allowed to so, if:
+ *
+ * 1. The previous token wasn't a directive.
+ * 2. There wasn't an instruction before us.
+ * 3. The length of the token is at, or above
+ * the length of the shortest instruction.
+ * 4. The length of the token is at, or below
+ * the length of the longest instruction.
+ * 5. The character after the token isn't a
+ * label delimiter.
+ * 6. We're not within a struct/union block.
+ */
+ if (!(lt && lt->id == TOK_DIR) && !is_inst && j > 1 && j <= 3 && str[i] != ':' && !is_struct) {
for (k = 0; k < OPNUM; k++) {
int find_ext = (k < EXT_OPNUM);
int find_ortho = (k < ORTHO_OPNUM);
@@ -1010,7 +1371,7 @@ uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) {
isop = 1;
is_inst = 1;
l->count++;
- t = make_token(lex_type, 0xFF, space, tab, k, "", NULL);
+ t = make_token(lex_type, 0xFF, space, tab, k, "", NULL, NULL);
break;
}
}
@@ -1024,7 +1385,7 @@ uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) {
if (!strcasecmp(lexeme, set_cc[k])) {
lex_type = TOK_CC;
l->count++;
- t = make_token(lex_type, 0xFF, space, tab, k, "", NULL);
+ t = make_token(lex_type, 0xFF, space, tab, k, "", NULL, NULL);
}
}
}
@@ -1035,28 +1396,31 @@ uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) {
if (ret == PTOK_COLON || ret == PTOK_EQU) {
depth = (lex_type == TOK_LOCAL);
}
- lex_type = TOK_SYM;
+ int is_expr = (!is_struct && str[i+spaces] != ':' && str[i+spaces] != '=');
l->count++;
- t = make_token(lex_type, depth, space, tab, 0, "", NULL);
memcpy(sym, lexeme, j+1);
- if (dbg) {
- printf("lex(): spaces: %u\n", spaces);
+ if (is_expr) {
+ i -= j + (depth);
+ lex_type = TOK_EXPR;
+ memset(lexeme, 0, strlen(lexeme)+1);
+ char *tmp = &str[i];
+ expr *e = parse_expr(&tmp, address, NULL, delm, dbg);
+ j = tmp - &str[i];
+ memcpy(lexeme, &str[i], j);
+ /*i += j;*/
+ i = tmp - str;
+ t = make_token(lex_type, 0, space, tab, 0, "", NULL, e);
+ t->subtype = (t->subtype == 0xFF && lex_subtype != 0xFF) ? lex_subtype : t->subtype;
+ lex_subtype = 0xFF;
+ } else {
+ memcpy(sym, lexeme, j+1);
+ lex_type = TOK_SYM;
+ t = make_token(lex_type, depth, space, tab, 0, "", NULL, NULL);
}
+
if (is_struct) {
create_struct(cur_sym, l, t, lt, sym, dbg);
depth = 0;
- } else if ((str[i+spaces] != ':' && str[i+spaces] != '=')) {
- symbol *s;
- int scope_depth;
- char *tmp = lexeme;
- for (scope_depth = 0; *tmp; scope_depth += (*tmp++ == '.'));
- char *scope_name = (!scope_depth) ? mk_scope_name(cur_sym, depth, lexeme, dbg) : lexeme;
- t->sym = get_sym(scope_name, address, t, (scope_depth) ? scope_depth : depth, 1, dbg);
- isfixup += (t && t->sym == NULL);
- depth = 0;
- if (dbg) {
- printf("lex(): isfixup: %u\n", isfixup);
- }
}
/*if (!is_struct && t && t->sym && t->sym->isstruct) {
tmp_sym = t->sym;
@@ -1074,7 +1438,7 @@ uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) {
j = 0;
if ((lex_type == TOK_OPCODE || lex_type == TOK_EXTOP) && !isop) {
j = 0;
- } else if (lex_type == TOK_EXPR || (lex_type != TOK_MEMBER && !isdelm2(str[i], dbg))) {
+ } else if (lex_type != TOK_EXPR && lex_type != TOK_LOCAL && lex_type != TOK_MEMBER && !isdelm2(str[i], dbg)) {
i++;
}
switch (lex_type) {
@@ -1093,22 +1457,13 @@ uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) {
}
}
if (i) {
- l->tok = tokens;
+ l->tok = tokens;
token *tok = tokens;
- if (tok->id == TOK_SYM && tok->next) {
+ if ((tok->id == TOK_SYM || tok->id == TOK_LABEL) && tok->next) {
symbol *s = tok->sym;
for (; tok; tok = tok->next) {
- switch (tok->id) {
- case TOK_HEX :
- case TOK_BIN :
- case TOK_DEC :
- case TOK_CHAR:
- case TOK_EXPR:
- s->val = get_val(tok, address, 3, 0xFF, 0, dbg);
- if (tok->next) {
- tok = skip_expr(tok, 0xFF, 0, dbg);
- }
- break;
+ if (tok->id == TOK_EXPR) {
+ s->val = get_val(tok->expr, address, 3, 0, dbg);
}
}
}