#include "asmmon.h"
#include "lexer.h"
uint8_t lex_type;
uint16_t sym_count = 0;
token *tokens = NULL;
token *last_tok = NULL;
symbol *locals = NULL;
symbol *last_loc = NULL;
symbol *cur_sym = NULL;
symbol *mksymbol(const char *name, uint64_t val, uint8_t def, uint8_t islocal, uint8_t useid, uint16_t id, uint8_t dbg) {
uint16_t i = 0;
symbol *s = (!islocal) ? symbols : locals;
uint8_t flag = 0;
for (; s; s = s->next, i++) {
if (!useid && name[0] != s->name[0]) {
continue;
}
flag = (useid) ? (id == s->id) : !strcmp(name, s->name);
if (flag) {
if (def) {
if (s->def) {
if (dbg) {
printf("mksymbol(): oof, you cannot redefine the symbol: %s\n", name);
}
defined = 1;
} else {
defined = 0;
}
s->def = def;
s->val = val;
s->id = i;
if (dbg) {
printf("mksymbol(): def: %u, val: $%016"PRIX64", name: %s\n", def, val, name);
printf("mksymbol(): i: $%X, id: $%04X\n", i, s->id);
}
}
return s;
}
}
size_t str_size = strlen(name)+1;
s = malloc(sizeof(symbol));
s->local = NULL;
if (!islocal) {
(last_sym) ? (last_sym->next = s) : (symbols = s);
} else {
(last_loc) ? (last_loc->next = s) : (locals = s);
}
s->name = malloc(str_size);
s->def = def;
s->val = val;
s->count = 0;
memcpy(s->name, name, str_size);
s->next = NULL;
s->id = sym_count++;
(!islocal) ? (last_sym = s) : (last_loc = s);
if (!islocal) {
s->local = NULL;
} else {
cur_sym->count++;
}
defined = 0;
if (dbg) {
printf("mksymbol(): def: %u, val: $%016"PRIX64", name: %s, id: $%04X\n", def, val, name, sym_count-1);
}
return s;
}
uint16_t fixup_cnt = 0;
symbol *get_sym(const char *name, uint64_t val, token *t, uint8_t islocal, uint8_t dbg) {
symbol *s = mksymbol(name, 0, 0, islocal, 0, 0, dbg);
if (dbg) {
printf("get_sym(): Symbol ID: $%X.\n", s->id);
}
if (s->def) {
return s;
} else {
if (dbg) {
printf("get_sym(): oof, symbol %s, does not exist, yet.\n", name);
}
fixup *f = malloc(sizeof(fixup));
(last_fix) ? (last_fix->next = f) : (fixups = f);
f->adr = val;
f->t = t;
f->s = s;
f->next = NULL;
last_fix = f;
fixup_cnt++;
return NULL;
}
}
uint16_t reslv_fixups(uint8_t dbg) {
fixup *f = fixups;
symbol *ls;
uint16_t i = 0, j = 0;
for (; f; f = f->next) {
if (f->s->def) {
if (dbg) {
printf("reslv_fixups(): Symbol ID: $%X, Symbol Name: %s, Symbol Value: $%"PRIX64".\n", f->s->id, f->s->name, f->s->val);
}
f->t->sym = f->s;
} else {
if (dbg) {
printf("reslv_fixups(): oof, undefined reference to '%s', at $%016"PRIX64".\n", f->s->name, f->adr);
}
i++;
}
}
return i;
}
uint16_t get_comment(const char *com, uint8_t dbg) {
uint16_t i = 0;
for (; comment[i] && i < comidx; i++) {
if (com[0] == comment[i][0] && !strcmp(com, comment[i])) {
break;
}
}
if (comment[i] == NULL) {
if (dbg) {
printf("get_comment(): oof, the index $%04X is NULL.\n", i);
printf("get_comment(): oof, the comment \"%s\", was not found in the comment table.\n", com);
}
size_t size = strlen(com)+1;
comment[comidx] = malloc(size);
memcpy(comment[comidx], com, size);
return comidx++;
}
if (dbg) {
if (strcmp(com, comment[i])) {
printf("get_comment(): oof, the comment \"%s\" is somehow not in the comment table, even though it should be at index $%04X.\n", com, i);
}
printf("get_comment(): The return value of strcmp(com, comment[$%04X]) is %i.\n", i, strcmp(com, comment[i]));
printf("get_comment(): Found comment \"%s\", in the table, at index $%04X.\n", com, i);
}
return i;
}
uint16_t get_string(const char *str, uint8_t dbg) {
uint16_t i = 0;
uint8_t isstr = 0;
for (; i < stridx; i++) {
if (isstr || string[i] == NULL) {
break;
} else if (str[0] == string[i][0]) {
isstr = !strcmp(str, string[i]);
}
}
if (string[i] == NULL || i == stridx) {
if (dbg) {
printf("get_string(): oof, the index $%04X is NULL.\n", i);
printf("get_string(): oof, the string \"%s\", was not found in the string table.\n", str);
}
return 0xFFFF;
}
if (dbg) {
printf("get_string(): Found string \"%s\", in the table, at index $%04X.\n", str, i);
}
return i;
}
line *find_line(uint32_t ln, uint8_t dbg) {
uint32_t i = 0;
line *l = lines;
for (; l && l->linenum != ln; l = l->next);
if (l != NULL) {
if (l->linenum == ln) {
if (dbg) {
printf("find_line(): Found line number %u.\n", ln);
}
return l;
}
} else {
if (dbg) {
printf("find_line(): oof, could not find line number %u.\n", ln);
}
return NULL;
}
return l;
}
uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) {
char sym[0x100];
uint16_t i = 0;
uint16_t j = 0;
uint16_t comid = 0;
uint16_t strid = 0;
uint16_t symid = 0;
uint64_t value = 0;
lex_type = 0xFF;
uint8_t k = 0;
uint8_t ch = 0;
uint8_t rs = 0;
uint8_t of = 0;
uint8_t base = 0;
uint8_t islocal = 0;
uint8_t isop = 0;
int num = 0;
int isch = 0;
uint8_t isesc = 0;
uint8_t islinenum;
int16_t ln = -1;
char lnum[6];
uint8_t space = 0;
uint8_t tab = 0;
uint8_t fall = 0;
uint8_t done = 0;
line *l = NULL;
token *st = NULL;
token *t = NULL;
token *lt = NULL;
while (isdigit(str[i]) && isdelm(str[i], dbg) != 16) {
lnum[j++] = str[i++];
}
islinenum = i;
if (i) {
lnum[j] = '\0';
ln = strtol(lnum, NULL, 10);
j = 0;
l = find_line(ln, dbg);
} else {
ln = linenum;
l = NULL;
}
if (l) {
address = l->addr;
} else {
l = malloc(sizeof(line));
(last_line) ? (last_line->next = l) : (lines = l);
l->tok = NULL;
l->next = NULL;
l->count = 0;
l->bline = bline;
last_line = l;
}
l->addr = address;
while (isdelm(str[i], dbg) != 1) {
uint8_t offset = 0;
base = 0;
space = 0;
tab = 0;
while (isdelm(str[i+j], dbg) == 16) {
tab += str[i+j] == '\t';
space += str[i+j] == ' ';
j++;
}
j = 0;
if (dbg) {
printf("lex(): tab: %u, space: %u\n", tab, space);
}
if (isdelm(str[i], dbg) == 16) {
for (; isdelm(str[i], dbg) == 16; i++);
}
uint8_t ptok = get_ptok(str[i], dbg);
if (is_altok(ptok, dbg)) {
offset++;
if ((ptok == PTOK_S && toupper(str[i+1]) == 'P') || (ptok == PTOK_P && toupper(str[i+1]) == 'C')) {
offset++;
} else if (ptok == PTOK_S || ptok == PTOK_P) {
}
switch (get_ptok(str[i+offset], dbg)) {
case PTOK_B :
case PTOK_X :
case PTOK_Y :
case PTOK_S :
case PTOK_P :
case PTOK_ALPHA :
case PTOK_NUMBER: ptok = PTOK_ALPHA; break;
}
}
switch (ptok) {
case PTOK_DOT:
i++;
for (; isdelm(str[i+j], dbg) != 16; j++);
memcpy(lexeme, str+i, j);
lexeme[j] = '\0';
i += j;
if (!isop) {
for (k = 0; k < 7; k++) {
if (tolower(lexeme[0]) == dir_t[k][0] && !strcasecmp(lexeme, dir_t[k])) {
lex_type = TOK_DIR;
break;
}
}
l->count++;
t = make_token(lex_type, k, space, tab, 0, "", NULL);
} else {
lex_type = TOK_RS;
switch (tolower(lexeme[j-1])) {
case '2':
case 'w':
rs = 1;
break;
case '4':
case 'd':
rs = 2;
break;
case '8':
case 'q':
rs = 3;
break;
}
l->count++;
t = make_token(lex_type, rs, space, tab, 0, "", NULL);
isop = 0;
}
break;
case PTOK_DQUOTE:
i++;
for (; isdelm(str[i+j], dbg) != 4 || isesc; j++) {
isesc = (str[i+j] == '\\' && str[i+(j-1)] != '\\');
}
memcpy(lexeme, str+i, j);
lexeme[j] = '\0';
i += j;
strid = get_string(lexeme, dbg);
if (strid == 0xFFFF) {
strid = stridx;
string[strid] = malloc(j+1);
memcpy(string[strid], lexeme, j+1);
stridx++;
} else {
}
if (dbg) {
printf("lex(): str[0x%04X]: %s\n", strid, string[strid]);
}
if (lt->id == TOK_DIR && lt->type == DIR_INCLUDE) {
incl[inc_count+inc_file] = strid;
inc_file++;
}
lex_type = TOK_STRING;
l->count++;
t = make_token(lex_type, 0, space, tab, 0, string[strid], NULL);
break;
case PTOK_DOLLAR:
case PTOK_PERCENT:
case PTOK_NUMBER:
value = 0;
switch (ptok) {
case PTOK_DOLLAR : base = 16; lex_type = TOK_HEX; i++; break;
case PTOK_PERCENT: base = 2; lex_type = TOK_BIN; i++; break;
case PTOK_NUMBER : base = 10; lex_type = TOK_DEC; break;
}
for (; isxdigit(str[i+j]) && !(isdelm(str[i+j], dbg) & 0x03); j++);
memcpy(lexeme, str+i, j);
lexeme[j] = '\0';
i += j;
value = strtoull(lexeme, NULL, base);
if (lt->id == TOK_SYM) {
mksymbol(sym, value, 1, islocal, 0, 0, dbg);
if (lt) {
lt->sym = get_sym(sym, address, lt, islocal, dbg);
}
if (!islocal) {
cur_sym = last_sym;
}
islocal = 0;
isfixup += (lt->sym == NULL);
if (dbg) {
printf("lex(): isfixup: %u\n", isfixup);
}
}
l->count++;
t = make_token(lex_type, 0, space, tab, value, "", NULL);
t->digits = (lt->id != TOK_SYM) ? j : 0;
break;
case PTOK_SQUOTE:
i++;
k = 0;
j = 0;
while (isdelm(str[i], dbg) != 8 || isesc) {
isesc = (str[i] == '\\' && str[i-1] != '\\');
lexeme[j++] = str[i++];
}
isesc = 0;
lexeme[j] = '\0';
switch (lexeme[k]) {
case '\\':
switch (lexeme[++k]) {
case 'n' : ch = '\n'; break;
case 'r' : ch = '\r'; break;
case 't' : ch = '\t'; break;
case 'b' : ch = '\b'; break;
case '\'': ch = '\''; break;
case '\"': ch = '\"'; break;
case '\\': ch = '\\'; break;
}
break;
default: ch = lexeme[k];
}
lex_type = TOK_CHAR;
l->count++;
t = make_token(lex_type, 0, space, tab, ch, "", NULL);
break;
case PTOK_LBRACK:
case PTOK_HASH :
l->tok->type = (ptok == PTOK_LBRACK) ? IND : IMM;
lex_type = (ptok == PTOK_LBRACK) ? TOK_IND : TOK_IMM;
memset(lexeme, 0, strlen(lexeme)+1);
lexeme[j++] = str[i];
(t) ? (t->subspace = space) : (lt->subspace = space);
(t) ? (t->subtab = tab) : (lt->subtab = tab);
break;
case PTOK_PLUS:
case PTOK_MINUS:
case PTOK_GT:
case PTOK_LT:
case PTOK_PIPE:
lex_type = TOK_EXPR;
switch (ptok) {
case PTOK_PLUS : value = EXPR_PLUS ; break;
case PTOK_MINUS: value = EXPR_MINUS; break;
case PTOK_PIPE : value = EXPR_OR ; break;
case PTOK_GT : value = (get_ptok(str[i+1], dbg) == PTOK_GT) ? (EXPR_RSHFT) : (EXPR_LOW) ; break;
case PTOK_LT : value = (get_ptok(str[i+1], dbg) == PTOK_LT) ? (EXPR_LSHFT) : (EXPR_HIGH); break;
}
l->count++;
t = make_token(lex_type, value, space, tab, 0, "", NULL);
memset(lexeme, 0, strlen(lexeme)+1);
lexeme[j++] = str[i];
if (value == EXPR_LSHFT || value == EXPR_RSHFT) {
lexeme[j++] = str[++i];
}
break;
case PTOK_EQU:
i++;
lex_type = TOK_SYM;
memset(lexeme, 0, strlen(lexeme)+1);
lexeme[j] = str[i];
(t) ? (t->subspace = space) : (lt->subspace = space);
(t) ? (t->subtab = tab) : (lt->subtab = tab);
break;
case PTOK_RBRACK:
i++;
lex_type = TOK_IND;
lexeme[j] = ')';
lexeme[j+1] = '\0';
lexeme[j+2] = '\0';
break;
case PTOK_COMMA:
i++;
if (lex_type != TOK_IND && lex_type != TOK_OF) {
lex_type = TOK_CSV;
}
lexeme[j] = ',';
lexeme[j+1] = '\0';
lexeme[j+2] = '\0';
break;
case PTOK_B:
lexeme[j] = str[i++];
lexeme[j+1] = '\0';
lexeme[j+2] = '\0';
lex_type = TOK_BREG;
l->tok->type = BREG;
(t) ? (t->subspace = space) : (lt->subspace = space);
(t) ? (t->subtab = tab) : (lt->subtab = tab);
break;
case PTOK_X:
case PTOK_Y:
lexeme[j] = str[i++];
lexeme[j+1] = '\0';
lexeme[j+2] = '\0';
if (lex_type != TOK_IND && lex_type != TOK_CSV) {
break;
}
switch (ptok) {
case PTOK_X: l->tok->type = (lex_type == TOK_IND) ? INDX : ZMX; break;
case PTOK_Y: l->tok->type = (lex_type == TOK_IND) ? INDY : ZMY; break;
}
break;
case PTOK_S:
case PTOK_P:
lexeme[j] = str[i++];
if (str[i] != ',') {
lexeme[j+1] = str[i++];
} else {
lexeme[j+1] = '\0';
}
lexeme[j+2] = '\0';
switch (ptok) {
case PTOK_S: of = 1; break;
case PTOK_P: of = 2; break;
}
lex_type = TOK_OF;
t = make_token(lex_type, of, space, tab, 0, "", NULL);
break;
case PTOK_AT:
memset(lexeme, 0, strlen(lexeme)+1);
lexeme[j] = '@';
islocal = 1;
lex_type = TOK_LOCAL;
if (lt || t) {
(t) ? (t->subspace = space) : (lt->subspace = space);
(t) ? (t->subtab = tab) : (lt->subtab = tab);
}
break;
case PTOK_COLON:
i++;
lexeme[j] = ':';
lexeme[j+1] = '\0';
lex_type = TOK_LABEL;
mksymbol(sym, address, 1, islocal, 0, 0, dbg);
if (isfixup) {
isfixup = reslv_fixups(dbg);
}
if (lt) {
lt->id = lex_type;
lt->type = islocal;
lt->sym = get_sym(sym, address, t, islocal, dbg);
isfixup += (lt->sym == NULL);
}
if (!islocal) {
cur_sym = last_sym;
cur_sym->local = NULL;
locals = NULL;
last_loc = NULL;
} else if (cur_sym->local == NULL) {
cur_sym->local = locals;
}
islocal = 0;
if (dbg) {
printf("lex(): isfixup: %u\n", isfixup);
}
break;
case PTOK_SCOLON:
i++;
for (; isdelm(str[i+j], dbg) != 1; j++);
if (!j) {
lexeme[j] = ' ';
lexeme[j+1] = '\0';
} else {
memcpy(lexeme, str+i, j);
lexeme[j] = '\0';
i += j;
comid = get_comment(lexeme, dbg);
if (dbg) {
printf("lex(): com[0x%04X]: %s\n", comid, comment[comid]);
}
}
lex_type = TOK_COMMENT;
l->count++;
if (j) {
t = make_token(lex_type, 0, space, tab, 0, comment[comid], NULL);
} else {
t = make_token(lex_type, 0, space, tab, 0, "" , NULL);
}
break;
case PTOK_ALPHA:
for (; !isdelm2(str[i+j], dbg); j++);
memcpy(lexeme, str+i, j);
lexeme[j] = '\0';
i += j;
isch = 0;
isop = 0;
if (j == 3 && str[i] != ':') {
for (k = 0; k < OPNUM; k++) {
if (toupper(lexeme[0]) == mne[k][0]) {
if (!strcasecmp(lexeme, mne[k])) {
lex_type = TOK_OPCODE;
isop = 1;
l->count++;
t = make_token(lex_type, 0xFF, space, tab, k, "", NULL);
break;
}
}
}
}
if (!isop) {
uint8_t spaces = 0;
for (; isdelm(str[i+spaces], dbg) == 16; spaces++);
if (get_ptok(str[i+spaces], dbg) == PTOK_COLON) {
islocal = (lex_type == TOK_LOCAL);
}
lex_type = TOK_SYM;
l->count++;
t = make_token(lex_type, islocal, space, tab, 0, "", NULL);
memcpy(sym, lexeme, j+1);
if (dbg) {
printf("lex(): spaces: %u\n", spaces);
}
if (str[i+spaces] != ':' && str[i+spaces] != '=') {
t->sym = get_sym(lexeme, address, t, islocal, dbg);
islocal = 0;
isfixup += (t->sym == NULL);
if (dbg) {
printf("lex(): isfixup: %u\n", isfixup);
}
}
}
break;
}
if (!l->tok && t) {
l->tok = tokens;
}
if (dbg) {
printf("lex(): lexeme: %s, lex_type: %s\n", lexeme, (lex_type != 0xFF) ? lex_tok[lex_type] : "TOK_NONE");
}
j = 0;
if (lex_type == TOK_OPCODE && !isop) {
j = 0;
} else if (lex_type == TOK_EXPR || !isdelm2(str[i], dbg)) {
i++;
}
switch (lex_type) {
default:
lex_type = 0xFF;
case TOK_CSV:
case TOK_IND:
case TOK_LOCAL:
memset(lexeme, 0, strlen(lexeme)+1);
case TOK_SYM:
break;
}
if (t) {
lt = t;
t = t->next;
}
}
if (i) {
l->tok = tokens;
tokens = NULL;
last_tok = NULL;
bytecount dummy;
address = parse_tokens(l->tok, &dummy, 0, address, dbg);
if (dbg) {
printf("lex(): Next address: $%"PRIX64"\n", address);
}
if (ln > linenum || islinenum) {
l->linenum = ln;
if (ln > linenum) {
linenum+=(10+(ln & 10));
}
} else if (!islinenum) {
l->linenum = linenum;
linenum += 10;
}
}
return address;
}