From d27b3a2845ff28610edaa48eaefdde14025fe216 Mon Sep 17 00:00:00 2001 From: mrb0nk500 Date: Thu, 11 Feb 2021 21:38:56 -0500 Subject: Simplified the expression parser. I was finally able to figure out how to implement the expression parser into two functions. It can be done in a single function, but I used two functions to make it easier to read. --- lexer.c | 276 ++++++++++++++++++++-------------------------------------------- 1 file changed, 84 insertions(+), 192 deletions(-) diff --git a/lexer.c b/lexer.c index bc954fc..4c83517 100644 --- a/lexer.c +++ b/lexer.c @@ -678,25 +678,33 @@ int is_reg(const char *str) { for (; isdelm(*str, dbg) & 0x10; str++) -expr *get_primary_expr(char **line, uint64_t address, int *found_reg, char stop, uint8_t dbg) { - char *str = *line; - char *tmp; - char *scope_name; - symbol *s = NULL; - - uint64_t value; - - int i = 0; - int base; +int get_expr_type(char **p, uint64_t address, void *val, int *found_reg, char stop, uint8_t dbg) { + char *str = *p; int type = EXPR_NONE; - int depth = 0; - int scope_depth = 0; + + SKIP_WHITESPACE(str, dbg); uint8_t ptok = get_ptok(*str, dbg); ptok = (is_altok(ptok, dbg)) ? PTOK_ALPHA : ptok; - if (*str != stop) { + if (ptok != PTOK_SCOLON && ptok != PTOK_COMMA && *str != stop) { + char *tmp = NULL; + int i = 0; + + int base = 0; + uint64_t value = 0; + + symbol *s = NULL; + int depth = 0; + int scope_depth = 0; + char *scope_name = NULL; + switch (ptok) { + case PTOK_PLUS : type = EXPR_PLUS ; str++; break; + case PTOK_MINUS: type = EXPR_MINUS; str++; break; + case PTOK_PIPE : type = EXPR_OR ; str++; break; + case PTOK_GT : type = (get_ptok(str[1], dbg) == PTOK_GT) ? (EXPR_RSHFT) : (EXPR_LOW) ; str += 2; break; + case PTOK_LT : type = (get_ptok(str[1], dbg) == PTOK_LT) ? (EXPR_LSHFT) : (EXPR_HIGH); str += 2; break; case PTOK_DOLLAR: case PTOK_PERCENT: case PTOK_NUMBER: @@ -717,24 +725,26 @@ expr *get_primary_expr(char **line, uint64_t address, int *found_reg, char stop, tmp[i] = '\0'; value = strtoull(tmp, NULL, base); + *(uint64_t *)val = value; break; case PTOK_SQUOTE: type = EXPR_CHAR; value = parse_quote(&str, *str, 1, dbg); + *(uint64_t *)val = value; break; case PTOK_AT: /* Increment the depth count, by the * number of '@' signs before the * symbol name. */ - for (depth = 0; *str == '@'; str++, depth++); + for (; *str == '@'; str++, depth++); /* Falls through. */ case PTOK_ALPHA: /* Find the end of the symbol name. * Also increment the depth count every * time a '.' is found in the symbol name. */ - for (scope_depth = 0; !isdelm2(str[i], dbg) || str[i] == '.'; scope_depth += (str[i++] == '.')); + for (; !isdelm2(str[i], dbg) || str[i] == '.'; scope_depth += (str[i++] == '.')); tmp = malloc(i); memcpy(tmp, str, i); @@ -742,148 +752,85 @@ expr *get_primary_expr(char **line, uint64_t address, int *found_reg, char stop, if (is_reg(tmp) >= 0) { *found_reg = 1; - return NULL; } else { scope_name = (!scope_depth) ? mk_scope_name(cur_sym, depth, tmp, dbg) : tmp; s = get_sym(scope_name, address, NULL, (scope_depth) ? scope_depth : depth, 1, dbg); isfixup += (s == NULL); type = EXPR_SYM; + *(symbol **)val = s; } break; } - str += i; - - SKIP_WHITESPACE(str, dbg); - - if (ptok == PTOK_SCOLON || ptok == PTOK_COMMA || *str == stop) { - *found_reg = 1; - } - - *line = str; - - return make_expr(type, value, s, dbg); - } - return NULL; -} - -expr *get_unary_expr(char **line, uint64_t address, int *found_reg, char stop, uint8_t dbg) { - expr *new = NULL; - char *str = *line; - - if (*str != stop && !(*found_reg)) { - if (*str == '+' || *str == '-' || *str == '<' || *str == '>') { - uint8_t ptok = get_ptok(*str++, dbg); - SKIP_WHITESPACE(str, dbg); - int type; - switch (ptok) { - case PTOK_PLUS : type = EXPR_PLUS ; break; - case PTOK_MINUS: type = EXPR_MINUS; break; - case PTOK_GT : type = EXPR_LOW ; break; - case PTOK_LT : type = EXPR_HIGH ; break; - } - new = make_expr(type, 0, NULL, dbg); - new->left = get_primary_expr(&str, address, found_reg, stop, dbg); - } else { - return get_primary_expr(line, address, found_reg, stop, dbg); - } - } - - *line = str; - return new; -} - -expr *get_shift_expr(char **line, uint64_t address, int *found_reg, char stop, uint8_t dbg) { - expr *left = get_unary_expr(line, address, found_reg, stop, dbg); - expr *new = NULL; - char *str = *line; - - SKIP_WHITESPACE(str, dbg); - - for (; (*str == '<' || *str == '>') && (str[1] == *str);) { - uint8_t ptok = get_ptok(*str, dbg); - - str += 2; - SKIP_WHITESPACE(str, dbg); - - int type; - switch (ptok) { - case PTOK_GT: type = EXPR_RSHFT; break; - case PTOK_LT: type = EXPR_LSHFT; break; - } - - new = make_expr(type, 0, NULL, dbg); - - SKIP_WHITESPACE(str, dbg); - - new->left = left; - new->right = get_unary_expr(&str, address, found_reg, stop, dbg); - left = new; - - if (*str == stop || *found_reg) { - break; - } + } else { + *found_reg = 1; } - *line = str; - return left; + *p = str; + return type; } +#undef SKIP_WHITESPACE -expr *get_or_expr(char **line, uint64_t address, int *found_reg, char stop, uint8_t dbg) { - expr *left = get_shift_expr(line, address, found_reg, stop, dbg); - expr *new = NULL; +expr *parse_expr(char **line, uint64_t address, int *found_reg, int is_left, char stop, uint8_t dbg) { char *str = *line; + int dummy = 0; - SKIP_WHITESPACE(str, dbg); - - for (; *str == '|' && str[1] != '|';) { - str++; - SKIP_WHITESPACE(str, dbg); - - new = make_expr(EXPR_OR, 0, NULL, dbg); - - SKIP_WHITESPACE(str, dbg); - - new->left = left; - new->right = get_shift_expr(&str, address, found_reg, stop, dbg); - left = new; - - if (*str == stop || *found_reg) { - break; - } - } - - *line = str; - return left; -} + found_reg = (found_reg == NULL) ? &dummy : found_reg; -expr *get_additive_expr(char **line, uint64_t address, int *found_reg, char stop, uint8_t dbg) { - expr *left = get_or_expr(line, address, found_reg, stop, dbg); + expr *left = (is_left) ? parse_expr(line, address, found_reg, 0, stop, dbg) : NULL; expr *new = NULL; - char *str = *line; - SKIP_WHITESPACE(str, dbg); - - for (; (*str == '+' && str[1] != '+') || (*str == '-' && str[1] != '-');) { - uint8_t ptok = get_ptok(*str++, dbg); - SKIP_WHITESPACE(str, dbg); + int type = EXPR_NONE; - int type; - switch (ptok) { - case PTOK_PLUS : type = EXPR_PLUS ; break; - case PTOK_MINUS: type = EXPR_MINUS; break; + for (; !(isdelm(*str, dbg) & 3) && !(*found_reg);) { + + int old_type = type; + uint64_t value = 0; + symbol *s = NULL; + + uintptr_t tmp = 0; + + type = get_expr_type(&str, address, &tmp, found_reg, stop, dbg); + + int expr_type = 0; + + switch (type) { + case EXPR_NONE : expr_type = -1; break; + case EXPR_HEX : + case EXPR_BIN : + case EXPR_DEC : + case EXPR_CHAR : value = (uint64_t)tmp; break; + case EXPR_SYM : s = (symbol *)tmp; break; + case EXPR_PLUS : + case EXPR_MINUS : + switch (old_type) { + case EXPR_HEX : + case EXPR_BIN : + case EXPR_DEC : + case EXPR_CHAR : + case EXPR_SYM : expr_type = 2; break; + default : expr_type = 1; break; + } + break; + case EXPR_HIGH : + case EXPR_LOW : expr_type = 1; break; + case EXPR_OR : + case EXPR_LSHFT : + case EXPR_RSHFT : expr_type = 2; break; } - new = make_expr(type, 0, NULL, dbg); - - SKIP_WHITESPACE(str, dbg); + if (expr_type >= 0) { + new = make_expr(type, value, s, dbg); - new->left = left; - new->right = get_or_expr(&str, address, found_reg, stop, dbg); - left = new; + switch (expr_type) { + case 1: new->left = parse_expr(&str, address, found_reg, 1, stop, dbg); break; + case 2: + new->left = left; + new->right = parse_expr(&str, address, found_reg, 1, stop, dbg); + break; + } - if (*str == stop || *found_reg) { - break; + left = new; } } @@ -891,61 +838,6 @@ expr *get_additive_expr(char **line, uint64_t address, int *found_reg, char stop return left; } -#undef SKIP_WHITESPACE - -int get_expr_type(char *str, int *found_reg, char stop, uint8_t dbg) { - /* Skip over any whitespace. */ - for (; isdelm(*str, dbg) & 0x10; str++); - - if (*str != stop) { - uint8_t ptok = get_ptok(*str, dbg); - - int i = 0; - char *tmp; - - ptok = (is_altok(ptok, dbg)) ? PTOK_ALPHA : ptok; - - switch (ptok) { - case PTOK_PLUS : return EXPR_PLUS ; - case PTOK_MINUS: return EXPR_MINUS; - case PTOK_PIPE : return EXPR_OR ; - case PTOK_GT : return (get_ptok(*(++str), dbg) == PTOK_GT) ? (EXPR_RSHFT) : (EXPR_LOW) ; - case PTOK_LT : return (get_ptok(*(++str), dbg) == PTOK_LT) ? (EXPR_LSHFT) : (EXPR_HIGH); - case PTOK_DOLLAR : return EXPR_HEX ; - case PTOK_PERCENT: return EXPR_BIN ; - case PTOK_NUMBER : return EXPR_DEC ; - case PTOK_SQUOTE : return EXPR_CHAR; - case PTOK_AT: - for (; *str == '@'; str++); - /* Falls through. */ - case PTOK_ALPHA: - /* Find the end of the symbol name. */ - for (; !isdelm2(str[i], dbg) || str[i] == '.'; i++); - tmp = malloc(i); - - memcpy(tmp, str, i); - tmp[i] = '\0'; - - if (is_reg(tmp) < 0) { - return EXPR_SYM; - } - break; - } - } - *found_reg = 1; - return EXPR_NONE; -} - -expr *parse_expr(char **line, uint64_t address, int *found_reg, char stop, uint8_t dbg) { - char *str = *line; - int dummy = 0; - - found_reg = (found_reg == NULL) ? &dummy : found_reg; - expr *tree = get_additive_expr(&str, address, found_reg, stop, dbg); - *line = str; - return tree; -} - uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) { char sym[0x100]; uint16_t i = 0; @@ -1143,7 +1035,7 @@ uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) { lex_type = TOK_EXPR; memset(lexeme, 0, strlen(lexeme)+1); char *tmp = &str[i]; - expr *e = parse_expr(&tmp, address, NULL, delm, dbg); + expr *e = parse_expr(&tmp, address, NULL, 0, delm, dbg); t = make_token(lex_type, 0, space, tab, 0, "", NULL, e); j = tmp - &str[i]; memcpy(lexeme, &str[i], j); @@ -1178,7 +1070,7 @@ uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) { memset(lexeme, 0, strlen(lexeme)+1); do { char *tmp = &str[i]; - expr *e = parse_expr(&tmp, address, NULL, delm, dbg); + expr *e = parse_expr(&tmp, address, NULL, 0, delm, dbg); t = make_token(lex_type, 0, space, tab, 0, "", NULL, e); j = tmp - &str[i]; memcpy(lexeme, &str[i], j); @@ -1404,7 +1296,7 @@ uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) { lex_type = TOK_EXPR; memset(lexeme, 0, strlen(lexeme)+1); char *tmp = &str[i]; - expr *e = parse_expr(&tmp, address, NULL, delm, dbg); + expr *e = parse_expr(&tmp, address, NULL, 0, delm, dbg); j = tmp - &str[i]; memcpy(lexeme, &str[i], j); /*i += j;*/ -- cgit v1.2.3-13-gbd6f