summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormrb0nk500 <b0nk@b0nk.xyz>2021-02-11 21:38:56 -0500
committermrb0nk500 <b0nk@b0nk.xyz>2021-02-11 21:38:56 -0500
commitd27b3a2845ff28610edaa48eaefdde14025fe216 (patch)
tree108fa7f849481482249ef1eb5c817708064ab226
parent04b29166fd226e2464bcfacf6839e3274ff68cc6 (diff)
Simplified the expression parser.
I was finally able to figure out how to implement the expression parser into two functions. It can be done in a single function, but I used two functions to make it easier to read.
-rw-r--r--lexer.c276
1 files changed, 84 insertions, 192 deletions
diff --git a/lexer.c b/lexer.c
index bc954fc..4c83517 100644
--- a/lexer.c
+++ b/lexer.c
@@ -678,25 +678,33 @@ int is_reg(const char *str) {
for (; isdelm(*str, dbg) & 0x10; str++)
-expr *get_primary_expr(char **line, uint64_t address, int *found_reg, char stop, uint8_t dbg) {
- char *str = *line;
- char *tmp;
- char *scope_name;
- symbol *s = NULL;
-
- uint64_t value;
-
- int i = 0;
- int base;
+int get_expr_type(char **p, uint64_t address, void *val, int *found_reg, char stop, uint8_t dbg) {
+ char *str = *p;
int type = EXPR_NONE;
- int depth = 0;
- int scope_depth = 0;
+
+ SKIP_WHITESPACE(str, dbg);
uint8_t ptok = get_ptok(*str, dbg);
ptok = (is_altok(ptok, dbg)) ? PTOK_ALPHA : ptok;
- if (*str != stop) {
+ if (ptok != PTOK_SCOLON && ptok != PTOK_COMMA && *str != stop) {
+ char *tmp = NULL;
+ int i = 0;
+
+ int base = 0;
+ uint64_t value = 0;
+
+ symbol *s = NULL;
+ int depth = 0;
+ int scope_depth = 0;
+ char *scope_name = NULL;
+
switch (ptok) {
+ case PTOK_PLUS : type = EXPR_PLUS ; str++; break;
+ case PTOK_MINUS: type = EXPR_MINUS; str++; break;
+ case PTOK_PIPE : type = EXPR_OR ; str++; break;
+ case PTOK_GT : type = (get_ptok(str[1], dbg) == PTOK_GT) ? (EXPR_RSHFT) : (EXPR_LOW) ; str += 2; break;
+ case PTOK_LT : type = (get_ptok(str[1], dbg) == PTOK_LT) ? (EXPR_LSHFT) : (EXPR_HIGH); str += 2; break;
case PTOK_DOLLAR:
case PTOK_PERCENT:
case PTOK_NUMBER:
@@ -717,24 +725,26 @@ expr *get_primary_expr(char **line, uint64_t address, int *found_reg, char stop,
tmp[i] = '\0';
value = strtoull(tmp, NULL, base);
+ *(uint64_t *)val = value;
break;
case PTOK_SQUOTE:
type = EXPR_CHAR;
value = parse_quote(&str, *str, 1, dbg);
+ *(uint64_t *)val = value;
break;
case PTOK_AT:
/* Increment the depth count, by the
* number of '@' signs before the
* symbol name.
*/
- for (depth = 0; *str == '@'; str++, depth++);
+ for (; *str == '@'; str++, depth++);
/* Falls through. */
case PTOK_ALPHA:
/* Find the end of the symbol name.
* Also increment the depth count every
* time a '.' is found in the symbol name.
*/
- for (scope_depth = 0; !isdelm2(str[i], dbg) || str[i] == '.'; scope_depth += (str[i++] == '.'));
+ for (; !isdelm2(str[i], dbg) || str[i] == '.'; scope_depth += (str[i++] == '.'));
tmp = malloc(i);
memcpy(tmp, str, i);
@@ -742,148 +752,85 @@ expr *get_primary_expr(char **line, uint64_t address, int *found_reg, char stop,
if (is_reg(tmp) >= 0) {
*found_reg = 1;
- return NULL;
} else {
scope_name = (!scope_depth) ? mk_scope_name(cur_sym, depth, tmp, dbg) : tmp;
s = get_sym(scope_name, address, NULL, (scope_depth) ? scope_depth : depth, 1, dbg);
isfixup += (s == NULL);
type = EXPR_SYM;
+ *(symbol **)val = s;
}
break;
}
-
str += i;
-
- SKIP_WHITESPACE(str, dbg);
-
- if (ptok == PTOK_SCOLON || ptok == PTOK_COMMA || *str == stop) {
- *found_reg = 1;
- }
-
- *line = str;
-
- return make_expr(type, value, s, dbg);
- }
- return NULL;
-}
-
-expr *get_unary_expr(char **line, uint64_t address, int *found_reg, char stop, uint8_t dbg) {
- expr *new = NULL;
- char *str = *line;
-
- if (*str != stop && !(*found_reg)) {
- if (*str == '+' || *str == '-' || *str == '<' || *str == '>') {
- uint8_t ptok = get_ptok(*str++, dbg);
- SKIP_WHITESPACE(str, dbg);
- int type;
- switch (ptok) {
- case PTOK_PLUS : type = EXPR_PLUS ; break;
- case PTOK_MINUS: type = EXPR_MINUS; break;
- case PTOK_GT : type = EXPR_LOW ; break;
- case PTOK_LT : type = EXPR_HIGH ; break;
- }
- new = make_expr(type, 0, NULL, dbg);
- new->left = get_primary_expr(&str, address, found_reg, stop, dbg);
- } else {
- return get_primary_expr(line, address, found_reg, stop, dbg);
- }
- }
-
- *line = str;
- return new;
-}
-
-expr *get_shift_expr(char **line, uint64_t address, int *found_reg, char stop, uint8_t dbg) {
- expr *left = get_unary_expr(line, address, found_reg, stop, dbg);
- expr *new = NULL;
- char *str = *line;
-
- SKIP_WHITESPACE(str, dbg);
-
- for (; (*str == '<' || *str == '>') && (str[1] == *str);) {
- uint8_t ptok = get_ptok(*str, dbg);
-
- str += 2;
- SKIP_WHITESPACE(str, dbg);
-
- int type;
- switch (ptok) {
- case PTOK_GT: type = EXPR_RSHFT; break;
- case PTOK_LT: type = EXPR_LSHFT; break;
- }
-
- new = make_expr(type, 0, NULL, dbg);
-
- SKIP_WHITESPACE(str, dbg);
-
- new->left = left;
- new->right = get_unary_expr(&str, address, found_reg, stop, dbg);
- left = new;
-
- if (*str == stop || *found_reg) {
- break;
- }
+ } else {
+ *found_reg = 1;
}
- *line = str;
- return left;
+ *p = str;
+ return type;
}
+#undef SKIP_WHITESPACE
-expr *get_or_expr(char **line, uint64_t address, int *found_reg, char stop, uint8_t dbg) {
- expr *left = get_shift_expr(line, address, found_reg, stop, dbg);
- expr *new = NULL;
+expr *parse_expr(char **line, uint64_t address, int *found_reg, int is_left, char stop, uint8_t dbg) {
char *str = *line;
+ int dummy = 0;
- SKIP_WHITESPACE(str, dbg);
-
- for (; *str == '|' && str[1] != '|';) {
- str++;
- SKIP_WHITESPACE(str, dbg);
-
- new = make_expr(EXPR_OR, 0, NULL, dbg);
-
- SKIP_WHITESPACE(str, dbg);
-
- new->left = left;
- new->right = get_shift_expr(&str, address, found_reg, stop, dbg);
- left = new;
-
- if (*str == stop || *found_reg) {
- break;
- }
- }
-
- *line = str;
- return left;
-}
+ found_reg = (found_reg == NULL) ? &dummy : found_reg;
-expr *get_additive_expr(char **line, uint64_t address, int *found_reg, char stop, uint8_t dbg) {
- expr *left = get_or_expr(line, address, found_reg, stop, dbg);
+ expr *left = (is_left) ? parse_expr(line, address, found_reg, 0, stop, dbg) : NULL;
expr *new = NULL;
- char *str = *line;
- SKIP_WHITESPACE(str, dbg);
-
- for (; (*str == '+' && str[1] != '+') || (*str == '-' && str[1] != '-');) {
- uint8_t ptok = get_ptok(*str++, dbg);
- SKIP_WHITESPACE(str, dbg);
+ int type = EXPR_NONE;
- int type;
- switch (ptok) {
- case PTOK_PLUS : type = EXPR_PLUS ; break;
- case PTOK_MINUS: type = EXPR_MINUS; break;
+ for (; !(isdelm(*str, dbg) & 3) && !(*found_reg);) {
+
+ int old_type = type;
+ uint64_t value = 0;
+ symbol *s = NULL;
+
+ uintptr_t tmp = 0;
+
+ type = get_expr_type(&str, address, &tmp, found_reg, stop, dbg);
+
+ int expr_type = 0;
+
+ switch (type) {
+ case EXPR_NONE : expr_type = -1; break;
+ case EXPR_HEX :
+ case EXPR_BIN :
+ case EXPR_DEC :
+ case EXPR_CHAR : value = (uint64_t)tmp; break;
+ case EXPR_SYM : s = (symbol *)tmp; break;
+ case EXPR_PLUS :
+ case EXPR_MINUS :
+ switch (old_type) {
+ case EXPR_HEX :
+ case EXPR_BIN :
+ case EXPR_DEC :
+ case EXPR_CHAR :
+ case EXPR_SYM : expr_type = 2; break;
+ default : expr_type = 1; break;
+ }
+ break;
+ case EXPR_HIGH :
+ case EXPR_LOW : expr_type = 1; break;
+ case EXPR_OR :
+ case EXPR_LSHFT :
+ case EXPR_RSHFT : expr_type = 2; break;
}
- new = make_expr(type, 0, NULL, dbg);
-
- SKIP_WHITESPACE(str, dbg);
+ if (expr_type >= 0) {
+ new = make_expr(type, value, s, dbg);
- new->left = left;
- new->right = get_or_expr(&str, address, found_reg, stop, dbg);
- left = new;
+ switch (expr_type) {
+ case 1: new->left = parse_expr(&str, address, found_reg, 1, stop, dbg); break;
+ case 2:
+ new->left = left;
+ new->right = parse_expr(&str, address, found_reg, 1, stop, dbg);
+ break;
+ }
- if (*str == stop || *found_reg) {
- break;
+ left = new;
}
}
@@ -891,61 +838,6 @@ expr *get_additive_expr(char **line, uint64_t address, int *found_reg, char stop
return left;
}
-#undef SKIP_WHITESPACE
-
-int get_expr_type(char *str, int *found_reg, char stop, uint8_t dbg) {
- /* Skip over any whitespace. */
- for (; isdelm(*str, dbg) & 0x10; str++);
-
- if (*str != stop) {
- uint8_t ptok = get_ptok(*str, dbg);
-
- int i = 0;
- char *tmp;
-
- ptok = (is_altok(ptok, dbg)) ? PTOK_ALPHA : ptok;
-
- switch (ptok) {
- case PTOK_PLUS : return EXPR_PLUS ;
- case PTOK_MINUS: return EXPR_MINUS;
- case PTOK_PIPE : return EXPR_OR ;
- case PTOK_GT : return (get_ptok(*(++str), dbg) == PTOK_GT) ? (EXPR_RSHFT) : (EXPR_LOW) ;
- case PTOK_LT : return (get_ptok(*(++str), dbg) == PTOK_LT) ? (EXPR_LSHFT) : (EXPR_HIGH);
- case PTOK_DOLLAR : return EXPR_HEX ;
- case PTOK_PERCENT: return EXPR_BIN ;
- case PTOK_NUMBER : return EXPR_DEC ;
- case PTOK_SQUOTE : return EXPR_CHAR;
- case PTOK_AT:
- for (; *str == '@'; str++);
- /* Falls through. */
- case PTOK_ALPHA:
- /* Find the end of the symbol name. */
- for (; !isdelm2(str[i], dbg) || str[i] == '.'; i++);
- tmp = malloc(i);
-
- memcpy(tmp, str, i);
- tmp[i] = '\0';
-
- if (is_reg(tmp) < 0) {
- return EXPR_SYM;
- }
- break;
- }
- }
- *found_reg = 1;
- return EXPR_NONE;
-}
-
-expr *parse_expr(char **line, uint64_t address, int *found_reg, char stop, uint8_t dbg) {
- char *str = *line;
- int dummy = 0;
-
- found_reg = (found_reg == NULL) ? &dummy : found_reg;
- expr *tree = get_additive_expr(&str, address, found_reg, stop, dbg);
- *line = str;
- return tree;
-}
-
uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) {
char sym[0x100];
uint16_t i = 0;
@@ -1143,7 +1035,7 @@ uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) {
lex_type = TOK_EXPR;
memset(lexeme, 0, strlen(lexeme)+1);
char *tmp = &str[i];
- expr *e = parse_expr(&tmp, address, NULL, delm, dbg);
+ expr *e = parse_expr(&tmp, address, NULL, 0, delm, dbg);
t = make_token(lex_type, 0, space, tab, 0, "", NULL, e);
j = tmp - &str[i];
memcpy(lexeme, &str[i], j);
@@ -1178,7 +1070,7 @@ uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) {
memset(lexeme, 0, strlen(lexeme)+1);
do {
char *tmp = &str[i];
- expr *e = parse_expr(&tmp, address, NULL, delm, dbg);
+ expr *e = parse_expr(&tmp, address, NULL, 0, delm, dbg);
t = make_token(lex_type, 0, space, tab, 0, "", NULL, e);
j = tmp - &str[i];
memcpy(lexeme, &str[i], j);
@@ -1404,7 +1296,7 @@ uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) {
lex_type = TOK_EXPR;
memset(lexeme, 0, strlen(lexeme)+1);
char *tmp = &str[i];
- expr *e = parse_expr(&tmp, address, NULL, delm, dbg);
+ expr *e = parse_expr(&tmp, address, NULL, 0, delm, dbg);
j = tmp - &str[i];
memcpy(lexeme, &str[i], j);
/*i += j;*/