summaryrefslogtreecommitdiff
path: root/lexer.c
diff options
context:
space:
mode:
Diffstat (limited to 'lexer.c')
-rw-r--r--lexer.c328
1 files changed, 150 insertions, 178 deletions
diff --git a/lexer.c b/lexer.c
index ea83801..9610297 100644
--- a/lexer.c
+++ b/lexer.c
@@ -1,4 +1,5 @@
#include "asmmon.h"
+#include "lexer.h"
uint8_t lex_type;
uint16_t sym_count = 0;
@@ -215,8 +216,8 @@ uint16_t reslv_fixups(uint8_t dbg) {
}
-line *find_line(uint16_t ln, uint8_t dbg) {
- uint16_t i = 0;
+line *find_line(uint32_t ln, uint8_t dbg) {
+ uint32_t i = 0;
line *l = lines;
for (; l && l->linenum != ln; l = l->next);
/*if (dbg) {
@@ -276,7 +277,7 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) {
token *t = NULL;
token *lt = NULL;
- while (isdigit(str[i]) && !isspace(str[i])) {
+ while (isdigit(str[i]) && isdelm(str[i], dbg) != 16) {
lnum[j++] = str[i++];
}
islinenum = i;
@@ -284,10 +285,11 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) {
lnum[j] = '\0';
ln = strtol(lnum, NULL, 10);
j = 0;
+ l = find_line(ln, dbg);
} else {
ln = linenum;
+ l = NULL;
}
- l = find_line(ln, dbg);
if (l) {
address = l->addr;
} else {
@@ -299,11 +301,11 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) {
last_line = l;
}
- while (str[i] != '\0' && str[i] != '\n') {
+ while (isdelm(str[i], dbg) != 1) {
base = 0;
space = 0;
tab = 0;
- while (isspace(str[i+j])) {
+ while (isdelm(str[i+j], dbg) == 16) {
tab += str[i+j] == '\t';
space += str[i+j] == ' ';
j++;
@@ -319,18 +321,23 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) {
printf("lex(): starting tabs: %u, starting spaces: %u\n", l->stab, l->sspace);
}
}
- if (isspace(str[i])) {
- while (isspace(str[i])) {
- i++;
+ if (isdelm(str[i], dbg) == 16) {
+ for (; isdelm(str[i], dbg) == 16; i++);
+ }
+ uint8_t ptok = get_ptok(str[i], dbg);
+ if (ptok == PTOK_X || ptok == PTOK_Y) {
+ switch (get_ptok(str[i+1], dbg)) {
+ case PTOK_ALPHA :
+ case PTOK_NUMBER: ptok = PTOK_ALPHA; break;
}
}
- switch (str[i]) {
- case '.':
+ switch (ptok) {
+ case PTOK_DOT:
i++;
- while (!isspace(str[i])) {
- lexeme[j++] = str[i++];
- }
+ for (; isdelm(str[i+j], dbg) != 16; j++);
+ memcpy(lexeme, str+i, j);
lexeme[j] = '\0';
+ i += j;
if (!isop) {
for (k = 0; k < 6; k++) {
if (tolower(lexeme[0]) == dir_t[k][0] && !strcasecmp(lexeme, dir_t[k])) {
@@ -361,11 +368,12 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) {
isop = 0;
}
break;
- case '\"':
+ case PTOK_DQUOTE:
i++;
- while (str[i] != '\"') {
- lexeme[j++] = str[i++];
- }
+ for (; isdelm(str[i+j], dbg) != 4; j++);
+ memcpy(lexeme, str+i, j);
+ lexeme[j] = '\0';
+ i += j;
strid = get_string(lexeme, dbg);
if (strid == 0xFFFF) {
strid = stridx;
@@ -378,22 +386,26 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) {
printf("lex(): str[0x%04X]: %s\n", strid, string[strid]);
}
if (lt->id == TOK_DIR && lt->type == DIR_INCLUDE) {
- incl[inc_file++] = strid;
+ incl[inc_count] = strid;
+ inc_file++;
}
lex_type = TOK_STRING;
l->count++;
t = make_token(lex_type, 0, 0, string[strid]);
break;
- case '$':
- case '%':
+ case PTOK_DOLLAR:
+ case PTOK_PERCENT:
+ case PTOK_NUMBER:
value = 0;
- base = (str[i] == '$') ? 16 : 2;
- i++;
- while (isxdigit(str[i]) && (str[i] != '\0' && str[i] != '\n' && str[i] != ',')) {
- lexeme[j++] = str[i++];
+ switch (ptok) {
+ case PTOK_DOLLAR : base = 16; lex_type = TOK_HEX; i++; break;
+ case PTOK_PERCENT: base = 2; lex_type = TOK_BIN; i++; break;
+ case PTOK_NUMBER : base = 10; lex_type = TOK_DEC; /**/ break;
}
+ for (; isxdigit(str[i+j]) && !(isdelm(str[i+j], dbg) & 0x03); j++);
+ memcpy(lexeme, str+i, j);
lexeme[j] = '\0';
- lex_type = (base == 16) ? TOK_HEX : TOK_BIN;
+ i += j;
value = strtoull(lexeme, NULL, base);
if (lt->id == TOK_SYM) {
mksymbol(sym, value, 1, 0, 0, dbg);
@@ -408,10 +420,10 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) {
l->count++;
t = make_token(lex_type, 0, value, "");
break;
- case '\'':
+ case PTOK_SQUOTE:
i++;
k = j;
- while (str[i] != '\'' || isesc) {
+ while (isdelm(str[i], dbg) != 8 || isesc) {
isesc = (str[i] == '\\' && str[i-1] != '\\');
lexeme[j++] = str[i++];
}
@@ -434,25 +446,25 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) {
l->count++;
t = make_token(lex_type, 0, ch, "");
break;
- case '(': l->tok->type = IND; memset(lexeme, 0, strlen(lexeme)+1); lexeme[j++] = str[i]; break;
- case '#': l->tok->type = IMM; memset(lexeme, 0, strlen(lexeme)+1); lexeme[j++] = str[i]; break;
- case '+':
- case '-':
- case '>':
- case '<':
+ case PTOK_LBRACK: l->tok->type = IND; memset(lexeme, 0, strlen(lexeme)+1); lexeme[j++] = str[i]; break;
+ case PTOK_HASH : l->tok->type = IMM; memset(lexeme, 0, strlen(lexeme)+1); lexeme[j++] = str[i]; break;
+ case PTOK_PLUS:
+ case PTOK_MINUS:
+ case PTOK_GT:
+ case PTOK_LT:
lex_type = TOK_EXPR;
- switch (str[i]) {
- case '+': value = EXPR_PLUS ; break;
- case '-': value = EXPR_MINUS; break;
- case '>': value = EXPR_LOW ; break;
- case '<': value = EXPR_HIGH ; break;
+ switch (ptok) {
+ case PTOK_PLUS : value = EXPR_PLUS ; break;
+ case PTOK_MINUS: value = EXPR_MINUS; break;
+ case PTOK_GT : value = EXPR_LOW ; break;
+ case PTOK_LT : value = EXPR_HIGH ; break;
}
l->count++;
t = make_token(lex_type, value, 0, "");
memset(lexeme, 0, strlen(lexeme)+1);
lexeme[j++] = str[i];
break;
- case '=':
+ case PTOK_EQU:
i++;
lex_type = TOK_SYM;
l->count++;
@@ -460,49 +472,34 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) {
memset(lexeme, 0, strlen(lexeme)+1);
lexeme[j] = str[i];
break;
- case ')':
+ case PTOK_RBRACK:
i++;
- if (str[i] == ',') {
- i++;
- while (isspace(str[i])) {
- lexeme[j++] = str[i++];
- }
- if (l->tok->type == IND && tolower(str[i]) == 'y') {
- lexeme[j++] = 'y';
- l->tok->type = INDY;
- }
- lexeme[j] = '\0';
- } else {
- lexeme[j] = ')';
- lexeme[j+1] = '\0';
- lexeme[j+2] = '\0';
- }
+ lex_type = TOK_IND;
+ lexeme[j] = ')';
+ lexeme[j+1] = '\0';
+ lexeme[j+2] = '\0';
break;
- case ',':
+ case PTOK_COMMA:
i++;
- while (isspace(str[i])) {
- lexeme[j++] = str[i++];
+ if (lex_type != TOK_IND) {
+ lex_type = TOK_CSV;
}
- if (l->tok->type == IND && tolower(str[i]) == 'x') {
- l->tok->type = INDX;
- lexeme[j++] = 'x';
- i++;
- } else {
- switch (tolower(str[i])) {
- case 'x':
- l->tok->type = ZMX;
- lexeme[j++] = 'x';
- break;
- case 'y':
- l->tok->type = ZMY;
- lexeme[j++] = 'y';
- break;
- default: lex_type = TOK_COMMA; i--; break;
- }
+ lexeme[j] = ',';
+ lexeme[j+1] = '\0';
+ lexeme[j+2] = '\0';
+ break;
+ case PTOK_X:
+ case PTOK_Y:
+ switch (ptok) {
+ case PTOK_X: l->tok->type = (lex_type == TOK_IND) ? INDX : ZMX; break;
+ case PTOK_Y: l->tok->type = (lex_type == TOK_IND) ? INDY : ZMY; break;
}
- lexeme[j] = '\0';
+ lexeme[j] = str[i];
+ lexeme[j+1] = '\0';
+ lexeme[j+2] = '\0';
+ i++;
break;
- case ':':
+ case PTOK_COLON:
i++;
lexeme[j] = ':';
lexeme[j+1] = '\0';
@@ -519,12 +516,12 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) {
printf("lex(): isfixup: %u\n", isfixup);
}
break;
- case ';':
+ case PTOK_SCOLON:
i++;
- while (str[i] != '\0' && str[i] != '\n') {
- lexeme[j++] = str[i++];
- }
+ for (; isdelm(str[i+j], dbg) != 1; j++);
+ memcpy(lexeme, str+i, j);
lexeme[j] = '\0';
+ i += j;
comid = get_comment(lexeme, dbg);
if (comid == 0xFFFF) {
/*if (line != lineidx && l[line].com != 0xFFFF) {
@@ -545,118 +542,88 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) {
l->count++;
t = make_token(lex_type, 0, 0, comment[comid]);
break;
- default:
- if (isalnum(str[i]) || str[i] == '_') {
- while (!isspace(str[i])) {
- switch (str[i]) {
+ case PTOK_ALPHA:
+ while (isdelm(str[i], dbg) != 16) {
+ switch (str[i]) {
+ case ')':
+ case ',':
+ case '.':
+ case '+':
+ case '<':
+ case '>':
+ case '-':
+ case ':':
+ case '=':
+ case ';':
+ case '\0':
+ case '\n':
+ isch = 0;
+ break;
+ default:
+ isch = 1;
+ lexeme[j++] = str[i++];
+ break;
+ }
+ if (!isch) {
+ break;
+ }
+ }
+ lexeme[j] = '\0';
+ isch = 0;
+ isop = 0;
+ if (j == 3 && str[i] != ':') {
+ for (k = 0; k < OPNUM; k++) {
+ if (toupper(lexeme[0]) == mne[k][0]) {
+ if (!strcasecmp(lexeme, mne[k])) {
+ lex_type = TOK_OPCODE;
+ isop = 1;
+ l->count++;
+ t = make_token(lex_type, 0xFF, k, "");
+ break;
+ }
+ }
+ }
+ }
+ if (!isop) {
+ for (k = 0; lexeme[k] != '\0';) {
+ switch (lexeme[k]) {
case ')':
case ',':
case '.':
case '+':
+ case '-':
case '<':
case '>':
- case '-':
case ':':
- case '=':
case ';':
+ case '=':
case '\0':
case '\n':
- isch = 0;
+ fall = 1;
break;
default:
- isch = 1;
- lexeme[j++] = str[i++];
+ fall = 0;
break;
}
- if (!isch) {
+ if (fall) {
break;
}
+ k++;
}
- lexeme[j] = '\0';
- isch = 0;
- isop = 0;
- if (j == 3 && str[i] != ':') {
- for (k = 0; k < OPNUM; k++) {
- if (toupper(lexeme[0]) == mne[k][0]) {
- if (!strcasecmp(lexeme, mne[k])) {
- lex_type = TOK_OPCODE;
- isop = 1;
- l->count++;
- t = make_token(lex_type, 0xFF, k, "");
- break;
- }
- }
- }
+ lex_type = TOK_SYM;
+ l->count++;
+ t = make_token(lex_type, 0, 0, "");
+ memcpy(sym, lexeme, j+1);
+ uint8_t spaces = 0;
+ for (; isdelm(str[i+spaces], dbg) == 16; spaces++);
+ if (dbg) {
+ printf("lex(): spaces: %u\n", spaces);
}
- if (!isop) {
- for (k = 0; lexeme[k] != '\0';) {
- switch (lexeme[k]) {
- case ')':
- case ',':
- case '.':
- case '+':
- case '-':
- case '<':
- case '>':
- case ':':
- case ';':
- case '=':
- case '\0':
- case '\n':
- fall = 1;
- break;
- default:
- fall = 0;
- break;
- }
- if (fall) {
- break;
- }
- if ((isalnum(lexeme[k]) || lexeme[k] == '_')) {
- if (!isch) {
- isch = isalpha(lexeme[k]);
- }
- num = isdigit(lexeme[k]) && !isch;
- k++;
- } else {
- break;
- }
- }
- if (lexeme[k] == '\0') {
- if (num) {
- value = 0;
- value = strtoull(lexeme, NULL, 10);
- if (lt->id == TOK_SYM) {
- mksymbol(sym, value, 1, 0, 0, dbg);
- if (lt) {
- lt->word = get_symid(sym, address, lt, dbg);
- }
- isfixup += (lt->word == 0xFFFF);
- if (dbg) {
- printf("lex(): isfixup: %u\n", isfixup);
- }
- }
- lex_type = TOK_DEC;
- l->count++;
- t = make_token(lex_type, 0, value, "");
- } else if (isch && lex_type != TOK_HEX && lex_type != TOK_BIN) {
- lex_type = TOK_SYM;
- l->count++;
- t = make_token(lex_type, 0, 0, "");
- memcpy(sym, lexeme, j+1);
- uint8_t spaces = 0;
- for (; isspace(str[i+spaces]); spaces++);
- if (dbg) {
- printf("lex(): spaces: %u\n", spaces);
- }
- if (str[i+spaces] != ':' && str[i+spaces] != '=') {
- t->word = get_symid(lexeme, address, t, dbg);
- isfixup += (t->word == 0xFFFF);
- if (dbg) {
- printf("lex(): isfixup: %u\n", isfixup);
- }
- }
- }
+ if (str[i+spaces] != ':' && str[i+spaces] != '=') {
+ t->word = get_symid(lexeme, address, t, dbg);
+ isfixup += (t->word == 0xFFFF);
+ if (dbg) {
+ printf("lex(): isfixup: %u\n", isfixup);
}
}
}
@@ -707,9 +674,14 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) {
}
}
}
- if (lex_type != TOK_SYM) {
- memset(lexeme, 0, strlen(lexeme)+1);
- lex_type = 0xFF;
+ switch (lex_type) {
+ default:
+ lex_type = 0xFF;
+ case TOK_CSV:
+ case TOK_IND:
+ memset(lexeme, 0, strlen(lexeme)+1);
+ case TOK_SYM:
+ break;
}
if (t) {
lt = t;