diff options
Diffstat (limited to 'lexer.c')
-rw-r--r-- | lexer.c | 154 |
1 files changed, 80 insertions, 74 deletions
@@ -153,14 +153,9 @@ uint16_t get_comment(const char *com, uint8_t dbg) { break; } } - if (comment[i] == NULL) { + if (comment[i] == NULL || i == comidx) { if (dbg) { printf("get_comment(): oof, the index $%04X is NULL.\n", i); - } - return 0xFFFF; - } - if (i == comidx) { - if (dbg) { printf("get_comment(): oof, the comment \"%s\", was not found in the comment table.\n", com); } return 0xFFFF; @@ -175,25 +170,17 @@ uint16_t get_string(const char *str, uint8_t dbg) { uint16_t i = 0; uint8_t isstr = 0; for (; i < stridx; i++) { - if (string[i] != NULL) { + if (isstr || string[i] == NULL) { + break; + } else { if (str[0] == string[i][0]) { isstr = !strcmp(str, string[i]); } - } else { - break; - } - if (isstr) { - break; } } - if (string[i] == NULL) { + if (string[i] == NULL || i == stridx) { if (dbg) { printf("get_string(): oof, the index $%04X is NULL.\n", i); - } - return 0xFFFF; - } - if (i == stridx) { - if (dbg) { printf("get_string(): oof, the string \"%s\", was not found in the string table.\n", str); } return 0xFFFF; @@ -405,12 +392,10 @@ uint64_t update_addr(struct line *ln, uint64_t address, uint8_t fixup, uint16_t uint16_t find_line(struct line *l, uint16_t ln, uint8_t dbg) { uint16_t i = 0; for (; i < lineidx && l[i].linenum != ln; i++); - if (l[i].linenum == ln) { - if (dbg) { + if (dbg) { + if (l[i].linenum == ln) { printf("find_line(): Found line number %u, at line index %X.\n", ln, i); } - } - if (dbg) { printf("find_line(): linenum: %u, i: %X\n", l[i].linenum, i); } return i; @@ -425,15 +410,22 @@ uint64_t lex(char *str, struct line *l, uint64_t address, uint8_t dbg) { uint16_t symid = 0; uint16_t line = 0; lex_type = 0xFF; + uint8_t k = 0; + uint8_t ch = 0; uint8_t rs = 0; - uint8_t isop = 0; uint8_t base = 0; + + uint8_t isop = 0; int num = 0; int isch = 0; + uint8_t isesc = 0; + uint8_t islinenum; + int16_t ln = -1; + char lnum[6]; - uint8_t islinenum; + uint8_t space = 0; uint8_t tab = 0; uint8_t isstart = 1; @@ -505,11 +497,9 @@ uint64_t lex(char *str, struct line *l, uint64_t address, uint8_t dbg) { lexeme[j] = '\0'; if (!isop) { for (k = 0; k < 6; k++) { - if (tolower(lexeme[0]) == dir_t[k][0]) { - if (!strcasecmp(lexeme, dir_t[k])) { - lex_type = TOK_DIR; - break; - } + if (tolower(lexeme[0]) == dir_t[k][0] && !strcasecmp(lexeme, dir_t[k])) { + lex_type = TOK_DIR; + break; } } l[line].dir = k; @@ -549,28 +539,18 @@ uint64_t lex(char *str, struct line *l, uint64_t address, uint8_t dbg) { string[strid] = malloc(j+1); memcpy(string[strid], lexeme, j+1); l[line].str = strid; - if (dbg) { - printf("lex(): str[0x%04X]: %s\n", strid, string[strid]); - } stridx += (line == lineidx); } else { l[line].str = strid; - if (dbg) { - printf("lex(): str[0x%04X]: %s\n", strid, string[strid]); - } + } + if (dbg) { + printf("lex(): str[0x%04X]: %s\n", strid, string[strid]); } if (l[line].dir == DIR_INCLUDE) { l[line].incl = strid; } lex_type = TOK_STRING; break; - case '#': - lexeme[j] = '#'; - lexeme[j+1] = '\0'; - lexeme[j+2] = '\0'; - l[line].am = IMM; - lex_type = TOK_IMM; - break; if (str[i] == '$') { case '$': base = 16; } else if (str[i] == '%') { @@ -586,10 +566,10 @@ uint64_t lex(char *str, struct line *l, uint64_t address, uint8_t dbg) { if (l[line].cm != 0xFF) { case TOK_PLUS : case TOK_MINUS: l[line].aop = strtoull(lexeme, NULL, base); - l[line].aopbase = (base & 16) ? TOK_HEX : TOK_BIN; + l[line].aopbase = (base & 16) ? BASE_HEX : BASE_BIN; } else { case TOK_SYM: l[line].op = strtoull(lexeme, NULL, base); - l[line].opbase = (base & 16) ? TOK_HEX : TOK_BIN; + l[line].opbase = (base & 16) ? BASE_HEX : BASE_BIN; } if (lex_type == TOK_SYM) { mksymbol(sym, l[line].op, 1, 0, 0, dbg); @@ -604,23 +584,59 @@ uint64_t lex(char *str, struct line *l, uint64_t address, uint8_t dbg) { lex_type = (base & 16) ? TOK_HEX : TOK_BIN; break; - case '+': - lexeme[j] = '+'; - lexeme[j+1] = '\0'; - l[line].cm = 0; - lex_type = TOK_PLUS; - break; - case '-': - lexeme[j] = '-'; - lexeme[j+1] = '\0'; - l[line].cm = 1; - lex_type = TOK_MINUS; + case '\'': + i++; + k = j; + while (str[i] != '\'' || isesc) { + isesc = (str[i] == '\\' && str[i-1] != '\\'); + lexeme[j++] = str[i++]; + } + isesc = 0; + lexeme[j] = '\0'; + switch (lexeme[k]) { + case '\\': + switch (lexeme[++k]) { + case 'n' : ch = '\n'; break; + case 'r' : ch = '\r'; break; + case 'b' : ch = '\b'; break; + case '\'': ch = '\''; break; + case '\"': ch = '\"'; break; + case '\\': ch = '\\'; break; + } + break; + default: ch = lexeme[k]; + } + switch (lex_type) { + case TOK_PLUS : + case TOK_MINUS: + l[line].aop = ch; + l[line].aopbase = BASE_CHAR; + break; + default: + l[line].op = ch; + l[line].opbase = BASE_CHAR; + break; + } + lex_type = TOK_CHAR; break; case '(': - lexeme[j] = '('; - lexeme[j+1] = '\0'; - lexeme[j+2] = '\0'; - l[line].am = IND; + if (str[i] == '#' || str[i] == '(') { + if (str[i] == '#') { + case '#': lex_type = TOK_IMM; + } + l[line].am = (str[i] == '#') ? IMM : IND; + } else { + case '+': + case '-': l[line].cm = (str[i] == '-'); + lex_type = (str[i] == '-') ? TOK_MINUS : TOK_PLUS; + } + memset(lexeme, 0, strlen(lexeme)+1); + lexeme[j] = str[i]; + if (str[i] == '=') { + case '=': i++; + l[line].issym = 1; + lex_type = TOK_SYM; + } break; case ')': i++; @@ -679,13 +695,6 @@ uint64_t lex(char *str, struct line *l, uint64_t address, uint8_t dbg) { printf("lex(): isfixup: %u\n", isfixup); } break; - case '=': - i++; - lexeme[j] = '='; - lexeme[j+1] = 0; - l[line].issym = 1; - lex_type = TOK_SYM; - break; case ';': i++; while (str[i] != '\0' && str[i] != '\n') { @@ -702,15 +711,12 @@ uint64_t lex(char *str, struct line *l, uint64_t address, uint8_t dbg) { comment[comid] = malloc(j+1); memcpy(comment[comid], lexeme, j+1); l[line].com = comid; - if (dbg) { - printf("lex(): com[0x%04X]: %s\n", comid, comment[comid]); - } comidx += (line == lineidx); } else { l[line].com = comid; - if (dbg) { - printf("lex(): com[0x%04X]: %s\n", comid, comment[comid]); - } + } + if (dbg) { + printf("lex(): com[0x%04X]: %s\n", comid, comment[comid]); } lex_type = TOK_COMMENT; break; @@ -794,10 +800,10 @@ uint64_t lex(char *str, struct line *l, uint64_t address, uint8_t dbg) { if (l[line].cm != 0xFF) { case TOK_PLUS : case TOK_MINUS: l[line].aop = strtoull(lexeme, NULL, 10); - l[line].aopbase = TOK_DEC; + l[line].aopbase = BASE_DEC; } else { case TOK_SYM: l[line].op = strtoull(lexeme, NULL, 10); - l[line].opbase = TOK_DEC; + l[line].opbase = BASE_DEC; } if (lex_type == TOK_SYM) { mksymbol(sym, l[line].op, 1, 0, 0, dbg); |