diff options
Diffstat (limited to 'lexer.c')
-rw-r--r-- | lexer.c | 328 |
1 files changed, 150 insertions, 178 deletions
@@ -1,4 +1,5 @@ #include "asmmon.h" +#include "lexer.h" uint8_t lex_type; uint16_t sym_count = 0; @@ -215,8 +216,8 @@ uint16_t reslv_fixups(uint8_t dbg) { } -line *find_line(uint16_t ln, uint8_t dbg) { - uint16_t i = 0; +line *find_line(uint32_t ln, uint8_t dbg) { + uint32_t i = 0; line *l = lines; for (; l && l->linenum != ln; l = l->next); /*if (dbg) { @@ -276,7 +277,7 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) { token *t = NULL; token *lt = NULL; - while (isdigit(str[i]) && !isspace(str[i])) { + while (isdigit(str[i]) && isdelm(str[i], dbg) != 16) { lnum[j++] = str[i++]; } islinenum = i; @@ -284,10 +285,11 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) { lnum[j] = '\0'; ln = strtol(lnum, NULL, 10); j = 0; + l = find_line(ln, dbg); } else { ln = linenum; + l = NULL; } - l = find_line(ln, dbg); if (l) { address = l->addr; } else { @@ -299,11 +301,11 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) { last_line = l; } - while (str[i] != '\0' && str[i] != '\n') { + while (isdelm(str[i], dbg) != 1) { base = 0; space = 0; tab = 0; - while (isspace(str[i+j])) { + while (isdelm(str[i+j], dbg) == 16) { tab += str[i+j] == '\t'; space += str[i+j] == ' '; j++; @@ -319,18 +321,23 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) { printf("lex(): starting tabs: %u, starting spaces: %u\n", l->stab, l->sspace); } } - if (isspace(str[i])) { - while (isspace(str[i])) { - i++; + if (isdelm(str[i], dbg) == 16) { + for (; isdelm(str[i], dbg) == 16; i++); + } + uint8_t ptok = get_ptok(str[i], dbg); + if (ptok == PTOK_X || ptok == PTOK_Y) { + switch (get_ptok(str[i+1], dbg)) { + case PTOK_ALPHA : + case PTOK_NUMBER: ptok = PTOK_ALPHA; break; } } - switch (str[i]) { - case '.': + switch (ptok) { + case PTOK_DOT: i++; - while (!isspace(str[i])) { - lexeme[j++] = str[i++]; - } + for (; isdelm(str[i+j], dbg) != 16; j++); + memcpy(lexeme, str+i, j); lexeme[j] = '\0'; + i += j; if (!isop) { for (k = 0; k < 6; k++) { if (tolower(lexeme[0]) == dir_t[k][0] && !strcasecmp(lexeme, dir_t[k])) { @@ -361,11 +368,12 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) { isop = 0; } break; - case '\"': + case PTOK_DQUOTE: i++; - while (str[i] != '\"') { - lexeme[j++] = str[i++]; - } + for (; isdelm(str[i+j], dbg) != 4; j++); + memcpy(lexeme, str+i, j); + lexeme[j] = '\0'; + i += j; strid = get_string(lexeme, dbg); if (strid == 0xFFFF) { strid = stridx; @@ -378,22 +386,26 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) { printf("lex(): str[0x%04X]: %s\n", strid, string[strid]); } if (lt->id == TOK_DIR && lt->type == DIR_INCLUDE) { - incl[inc_file++] = strid; + incl[inc_count] = strid; + inc_file++; } lex_type = TOK_STRING; l->count++; t = make_token(lex_type, 0, 0, string[strid]); break; - case '$': - case '%': + case PTOK_DOLLAR: + case PTOK_PERCENT: + case PTOK_NUMBER: value = 0; - base = (str[i] == '$') ? 16 : 2; - i++; - while (isxdigit(str[i]) && (str[i] != '\0' && str[i] != '\n' && str[i] != ',')) { - lexeme[j++] = str[i++]; + switch (ptok) { + case PTOK_DOLLAR : base = 16; lex_type = TOK_HEX; i++; break; + case PTOK_PERCENT: base = 2; lex_type = TOK_BIN; i++; break; + case PTOK_NUMBER : base = 10; lex_type = TOK_DEC; /**/ break; } + for (; isxdigit(str[i+j]) && !(isdelm(str[i+j], dbg) & 0x03); j++); + memcpy(lexeme, str+i, j); lexeme[j] = '\0'; - lex_type = (base == 16) ? TOK_HEX : TOK_BIN; + i += j; value = strtoull(lexeme, NULL, base); if (lt->id == TOK_SYM) { mksymbol(sym, value, 1, 0, 0, dbg); @@ -408,10 +420,10 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) { l->count++; t = make_token(lex_type, 0, value, ""); break; - case '\'': + case PTOK_SQUOTE: i++; k = j; - while (str[i] != '\'' || isesc) { + while (isdelm(str[i], dbg) != 8 || isesc) { isesc = (str[i] == '\\' && str[i-1] != '\\'); lexeme[j++] = str[i++]; } @@ -434,25 +446,25 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) { l->count++; t = make_token(lex_type, 0, ch, ""); break; - case '(': l->tok->type = IND; memset(lexeme, 0, strlen(lexeme)+1); lexeme[j++] = str[i]; break; - case '#': l->tok->type = IMM; memset(lexeme, 0, strlen(lexeme)+1); lexeme[j++] = str[i]; break; - case '+': - case '-': - case '>': - case '<': + case PTOK_LBRACK: l->tok->type = IND; memset(lexeme, 0, strlen(lexeme)+1); lexeme[j++] = str[i]; break; + case PTOK_HASH : l->tok->type = IMM; memset(lexeme, 0, strlen(lexeme)+1); lexeme[j++] = str[i]; break; + case PTOK_PLUS: + case PTOK_MINUS: + case PTOK_GT: + case PTOK_LT: lex_type = TOK_EXPR; - switch (str[i]) { - case '+': value = EXPR_PLUS ; break; - case '-': value = EXPR_MINUS; break; - case '>': value = EXPR_LOW ; break; - case '<': value = EXPR_HIGH ; break; + switch (ptok) { + case PTOK_PLUS : value = EXPR_PLUS ; break; + case PTOK_MINUS: value = EXPR_MINUS; break; + case PTOK_GT : value = EXPR_LOW ; break; + case PTOK_LT : value = EXPR_HIGH ; break; } l->count++; t = make_token(lex_type, value, 0, ""); memset(lexeme, 0, strlen(lexeme)+1); lexeme[j++] = str[i]; break; - case '=': + case PTOK_EQU: i++; lex_type = TOK_SYM; l->count++; @@ -460,49 +472,34 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) { memset(lexeme, 0, strlen(lexeme)+1); lexeme[j] = str[i]; break; - case ')': + case PTOK_RBRACK: i++; - if (str[i] == ',') { - i++; - while (isspace(str[i])) { - lexeme[j++] = str[i++]; - } - if (l->tok->type == IND && tolower(str[i]) == 'y') { - lexeme[j++] = 'y'; - l->tok->type = INDY; - } - lexeme[j] = '\0'; - } else { - lexeme[j] = ')'; - lexeme[j+1] = '\0'; - lexeme[j+2] = '\0'; - } + lex_type = TOK_IND; + lexeme[j] = ')'; + lexeme[j+1] = '\0'; + lexeme[j+2] = '\0'; break; - case ',': + case PTOK_COMMA: i++; - while (isspace(str[i])) { - lexeme[j++] = str[i++]; + if (lex_type != TOK_IND) { + lex_type = TOK_CSV; } - if (l->tok->type == IND && tolower(str[i]) == 'x') { - l->tok->type = INDX; - lexeme[j++] = 'x'; - i++; - } else { - switch (tolower(str[i])) { - case 'x': - l->tok->type = ZMX; - lexeme[j++] = 'x'; - break; - case 'y': - l->tok->type = ZMY; - lexeme[j++] = 'y'; - break; - default: lex_type = TOK_COMMA; i--; break; - } + lexeme[j] = ','; + lexeme[j+1] = '\0'; + lexeme[j+2] = '\0'; + break; + case PTOK_X: + case PTOK_Y: + switch (ptok) { + case PTOK_X: l->tok->type = (lex_type == TOK_IND) ? INDX : ZMX; break; + case PTOK_Y: l->tok->type = (lex_type == TOK_IND) ? INDY : ZMY; break; } - lexeme[j] = '\0'; + lexeme[j] = str[i]; + lexeme[j+1] = '\0'; + lexeme[j+2] = '\0'; + i++; break; - case ':': + case PTOK_COLON: i++; lexeme[j] = ':'; lexeme[j+1] = '\0'; @@ -519,12 +516,12 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) { printf("lex(): isfixup: %u\n", isfixup); } break; - case ';': + case PTOK_SCOLON: i++; - while (str[i] != '\0' && str[i] != '\n') { - lexeme[j++] = str[i++]; - } + for (; isdelm(str[i+j], dbg) != 1; j++); + memcpy(lexeme, str+i, j); lexeme[j] = '\0'; + i += j; comid = get_comment(lexeme, dbg); if (comid == 0xFFFF) { /*if (line != lineidx && l[line].com != 0xFFFF) { @@ -545,118 +542,88 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) { l->count++; t = make_token(lex_type, 0, 0, comment[comid]); break; - default: - if (isalnum(str[i]) || str[i] == '_') { - while (!isspace(str[i])) { - switch (str[i]) { + case PTOK_ALPHA: + while (isdelm(str[i], dbg) != 16) { + switch (str[i]) { + case ')': + case ',': + case '.': + case '+': + case '<': + case '>': + case '-': + case ':': + case '=': + case ';': + case '\0': + case '\n': + isch = 0; + break; + default: + isch = 1; + lexeme[j++] = str[i++]; + break; + } + if (!isch) { + break; + } + } + lexeme[j] = '\0'; + isch = 0; + isop = 0; + if (j == 3 && str[i] != ':') { + for (k = 0; k < OPNUM; k++) { + if (toupper(lexeme[0]) == mne[k][0]) { + if (!strcasecmp(lexeme, mne[k])) { + lex_type = TOK_OPCODE; + isop = 1; + l->count++; + t = make_token(lex_type, 0xFF, k, ""); + break; + } + } + } + } + if (!isop) { + for (k = 0; lexeme[k] != '\0';) { + switch (lexeme[k]) { case ')': case ',': case '.': case '+': + case '-': case '<': case '>': - case '-': case ':': - case '=': case ';': + case '=': case '\0': case '\n': - isch = 0; + fall = 1; break; default: - isch = 1; - lexeme[j++] = str[i++]; + fall = 0; break; } - if (!isch) { + if (fall) { break; } + k++; } - lexeme[j] = '\0'; - isch = 0; - isop = 0; - if (j == 3 && str[i] != ':') { - for (k = 0; k < OPNUM; k++) { - if (toupper(lexeme[0]) == mne[k][0]) { - if (!strcasecmp(lexeme, mne[k])) { - lex_type = TOK_OPCODE; - isop = 1; - l->count++; - t = make_token(lex_type, 0xFF, k, ""); - break; - } - } - } + lex_type = TOK_SYM; + l->count++; + t = make_token(lex_type, 0, 0, ""); + memcpy(sym, lexeme, j+1); + uint8_t spaces = 0; + for (; isdelm(str[i+spaces], dbg) == 16; spaces++); + if (dbg) { + printf("lex(): spaces: %u\n", spaces); } - if (!isop) { - for (k = 0; lexeme[k] != '\0';) { - switch (lexeme[k]) { - case ')': - case ',': - case '.': - case '+': - case '-': - case '<': - case '>': - case ':': - case ';': - case '=': - case '\0': - case '\n': - fall = 1; - break; - default: - fall = 0; - break; - } - if (fall) { - break; - } - if ((isalnum(lexeme[k]) || lexeme[k] == '_')) { - if (!isch) { - isch = isalpha(lexeme[k]); - } - num = isdigit(lexeme[k]) && !isch; - k++; - } else { - break; - } - } - if (lexeme[k] == '\0') { - if (num) { - value = 0; - value = strtoull(lexeme, NULL, 10); - if (lt->id == TOK_SYM) { - mksymbol(sym, value, 1, 0, 0, dbg); - if (lt) { - lt->word = get_symid(sym, address, lt, dbg); - } - isfixup += (lt->word == 0xFFFF); - if (dbg) { - printf("lex(): isfixup: %u\n", isfixup); - } - } - lex_type = TOK_DEC; - l->count++; - t = make_token(lex_type, 0, value, ""); - } else if (isch && lex_type != TOK_HEX && lex_type != TOK_BIN) { - lex_type = TOK_SYM; - l->count++; - t = make_token(lex_type, 0, 0, ""); - memcpy(sym, lexeme, j+1); - uint8_t spaces = 0; - for (; isspace(str[i+spaces]); spaces++); - if (dbg) { - printf("lex(): spaces: %u\n", spaces); - } - if (str[i+spaces] != ':' && str[i+spaces] != '=') { - t->word = get_symid(lexeme, address, t, dbg); - isfixup += (t->word == 0xFFFF); - if (dbg) { - printf("lex(): isfixup: %u\n", isfixup); - } - } - } + if (str[i+spaces] != ':' && str[i+spaces] != '=') { + t->word = get_symid(lexeme, address, t, dbg); + isfixup += (t->word == 0xFFFF); + if (dbg) { + printf("lex(): isfixup: %u\n", isfixup); } } } @@ -707,9 +674,14 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) { } } } - if (lex_type != TOK_SYM) { - memset(lexeme, 0, strlen(lexeme)+1); - lex_type = 0xFF; + switch (lex_type) { + default: + lex_type = 0xFF; + case TOK_CSV: + case TOK_IND: + memset(lexeme, 0, strlen(lexeme)+1); + case TOK_SYM: + break; } if (t) { lt = t; |