summaryrefslogtreecommitdiff
path: root/lexer.c
diff options
context:
space:
mode:
Diffstat (limited to 'lexer.c')
-rw-r--r--lexer.c408
1 files changed, 153 insertions, 255 deletions
diff --git a/lexer.c b/lexer.c
index 9610297..47109ca 100644
--- a/lexer.c
+++ b/lexer.c
@@ -5,31 +5,21 @@ uint8_t lex_type;
uint16_t sym_count = 0;
token *tokens = NULL;
token *last_tok = NULL;
+symbol *locals = NULL;
+symbol *cur_sym = NULL;
-void init_symbol() {
- uint16_t i = 0;
- for (; i < 0x1000; i++) {
- symbols[i] = 0;
- fixups[i] = 0;
- }
-}
-
-uint16_t mksymbol(const char *name, uint64_t val, uint8_t def, uint8_t useid, uint16_t id, uint8_t dbg) {
+symbol *mksymbol(const char *name, uint64_t val, uint8_t def, uint8_t islocal, uint8_t useid, uint16_t id, uint8_t dbg) {
uint16_t i = 0;
+ symbol *s = (!islocal || islocal == 2) ? symbols : locals;
uint8_t flag = 0;
- for (; i < sym_count; i++) {
- if (useid) {
- flag = (id == symbols[i]->id);
- } else {
- if (name[0] == symbols[i]->name[0]) {
- flag = !strcmp(name, symbols[i]->name);
- } else {
- continue;
- }
+ for (; s; s = s->next, i++) {
+ if (!useid && name[0] != s->name[0]) {
+ continue;
}
+ flag = (useid) ? (id == s->id) : !strcmp(name, s->name);
if (flag) {
if (def) {
- if (symbols[i]->def) {
+ if (s->def) {
if (dbg) {
printf("mksymbol(): oof, you cannot redefine the symbol: %s\n", name);
}
@@ -37,126 +27,92 @@ uint16_t mksymbol(const char *name, uint64_t val, uint8_t def, uint8_t useid, ui
} else {
defined = 0;
}
- symbols[i]->def = def;
- symbols[i]->val = val;
- symbols[i]->id = i;
+ s->def = def;
+ s->val = val;
+ s->id = i;
if (dbg) {
printf("mksymbol(): def: %u, val: $%016"PRIX64", name: %s\n", def, val, name);
- printf("mksymbol(): i: $%X, id: $%04X\n", i, symbols[i]->id);
+ printf("mksymbol(): i: $%X, id: $%04X\n", i, s->id);
}
}
- return symbols[i]->id;
+ return s;
}
}
- symbols[i] = malloc(sizeof(**symbols) + strlen(name));
- symbols[i]->def = def;
- symbols[i]->val = val;
- strcpy(symbols[i]->name, name);
- symbols[i]->id = sym_count++;
+ size_t str_size = strlen(name)+1;
+ s = malloc(sizeof(*s));
+ s->name = malloc(str_size);
+ s->def = def;
+ s->val = val;
+ s->count = 0;
+ memcpy(s->name, name, str_size);
+ s->next = (!islocal) ? symbols : locals;
+ s->id = sym_count++;
+ if (!islocal) {
+ s->local = NULL;
+ symbols = s;
+ } else {
+ cur_sym->count++;
+ locals = s;
+ }
defined = 0;
if (dbg) {
printf("mksymbol(): def: %u, val: $%016"PRIX64", name: %s, id: $%04X\n", def, val, name, sym_count-1);
}
- return sym_count-1;
+ return s;
}
-uint64_t use_symbol(const char *name, uint16_t id, uint64_t val, uint8_t useid, uint8_t dbg) {
- uint16_t i;
- i = mksymbol(name, 0, 0, useid, id, dbg);
- uint8_t is_defined = (i != 0xFFFF);
- val++;
+uint16_t fixup_cnt = 0;
+symbol *get_sym(const char *name, uint64_t val, token *t, uint8_t islocal, uint8_t dbg) {
+ symbol *s = mksymbol(name, 0, 0, islocal, 0, 0, dbg);
if (dbg) {
- puts("use_symbol(): We also got here.");
- printf("use_symbol(): i: $%X\n", i);
+ printf("get_sym(): Symbol ID: $%X.\n", s->id);
}
- if (symbols[i] != NULL) {
- if (symbols[i]->def) {
- return symbols[i]->val;
- } else {
- if (dbg) {
- printf("use_symbol(): ");
- printf("oof, symbol ");
- if (useid) {
- printf("id $%04X, ", id);
- } else {
- printf("%s, ", name);
- }
- puts("does not exist, yet.");
- }
- return val-1;
+ if (s->def) {
+ return s;
+ } else {
+ if (dbg) {
+ printf("get_sym(): oof, symbol %s, does not exist, yet.\n", name);
}
+ fixup *f = malloc(sizeof(*f));
+ f->next = fixups;
+ f->adr = val;
+ f->t = t;
+ f->s = s;
+ fixups = f;
+ fixup_cnt++;
+ return NULL;
}
- return val-1;
}
-uint8_t set_symval(const char *name, uint16_t id, uint64_t val, uint8_t useid, uint8_t dbg) {
- uint16_t i = mksymbol(name, 0, 0, useid, id, dbg);
- if (symbols[i] != NULL) {
- if (symbols[i]->def) {
- symbols[i]->val = val;
- return 1;
+uint16_t reslv_fixups(uint8_t dbg) {
+ fixup *f = fixups;
+ symbol *ls;
+ uint16_t i = 0, j = 0;
+ for (; f; f = f->next) {
+ if (f->s->def) {
+ if (dbg) {
+ printf("reslv_fixups(): Symbol ID: $%X, Symbol Name: %s, Symbol Value: $%"PRIX64".\n", f->s->id, f->s->name, f->s->val);
+ }
+ f->t->sym = f->s;
} else {
if (dbg) {
- printf("set_symval(): ");
- printf("oof, symbol ");
- if (useid) {
- printf("id $%04X, ", id);
- } else {
- printf("%s, ", name);
- }
- puts("does not exist, yet.");
+ printf("reslv_fixups(): oof, undefined reference to '%s', at $%016"PRIX64".\n", f->s->name, f->adr);
}
- return 0;
- }
- }
- return 0;
-}
-
-char *get_symname(uint16_t id, uint8_t dbg) {
- if (symbols[id]->def) {
- return symbols[id]->name;
- } else {
- if (dbg) {
- printf("get_symname(): oof, symbol id $%04X, has not been defined, yet.\n", id);
+ i++;
}
- return NULL;
}
-}
+ return i;
-uint16_t fixup_cnt = 0;
-uint16_t get_symid(const char *name, uint64_t val, token *t, uint8_t dbg) {
- uint16_t i = mksymbol(name, 0, 0, 0, 0, dbg);
- if (dbg) {
- printf("get_symid(): Symbol ID: $%X, i: $%X.\n", symbols[i]->id, i);
- }
- if (symbols[i]->def) {
- return symbols[i]->id;
- } else {
- if (dbg) {
- printf("get_symid(): oof, symbol %s, does not exist, yet.\n", name);
- }
- fixups[fixup_cnt] = malloc(sizeof(**fixups));
- fixups[fixup_cnt]->adr = val;
- fixups[fixup_cnt]->t = t;
- fixups[fixup_cnt]->s = symbols[i];
- fixup_cnt++;
- return 0xFFFF;
- }
}
uint16_t get_comment(const char *com, uint8_t dbg) {
uint16_t i = 0;
uint8_t iscom = 0;
for (; i < comidx; i++) {
- if (comment[i] != NULL) {
- if (com[0] == comment[i][0]) {
- iscom = !strcmp(com, comment[i]);
- }
- } else {
- break;
- }
- if (iscom) {
+ if (comment[i] == NULL || iscom) {
break;
+ } else if (com[0] == comment[i][0]) {
+ iscom = !strcmp(com, comment[i]);
}
}
if (comment[i] == NULL || i == comidx) {
@@ -178,10 +134,8 @@ uint16_t get_string(const char *str, uint8_t dbg) {
for (; i < stridx; i++) {
if (isstr || string[i] == NULL) {
break;
- } else {
- if (str[0] == string[i][0]) {
- isstr = !strcmp(str, string[i]);
- }
+ } else if (str[0] == string[i][0]) {
+ isstr = !strcmp(str, string[i]);
}
}
if (string[i] == NULL || i == stridx) {
@@ -197,35 +151,10 @@ uint16_t get_string(const char *str, uint8_t dbg) {
return i;
}
-uint16_t reslv_fixups(uint8_t dbg) {
- uint16_t i = 0, j = 0;
- for (; fixups[j]; j++) {
- if (fixups[j]->s->def) {
- if (dbg) {
- printf("reslv_fixups(): Symbol ID: $%X, Symbol Name: %s, Symbol Value: $%"PRIX64".\n", fixups[j]->s->id, fixups[j]->s->name, fixups[j]->s->val);
- }
- fixups[j]->t->word = fixups[j]->s->id;
- } else {
- if (dbg) {
- printf("reslv_fixups(): oof, undefined reference to '%s', at $%016"PRIX64".\n", fixups[j]->s->name, fixups[j]->adr);
- }
- i++;
- }
- }
- return i;
-
-}
-
line *find_line(uint32_t ln, uint8_t dbg) {
uint32_t i = 0;
line *l = lines;
for (; l && l->linenum != ln; l = l->next);
- /*if (dbg) {
- if (l->linenum == ln) {
- printf("find_line(): Found line number %u, at line index %X.\n", ln, i);
- }
- printf("find_line(): linenum: %u, i: %X\n", l->linenum, i);
- }*/
if (l != NULL) {
if (l->linenum == ln) {
if (dbg) {
@@ -257,6 +186,8 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) {
uint8_t rs = 0;
uint8_t base = 0;
+ uint8_t islocal = 0;
+
uint8_t isop = 0;
int num = 0;
int isch = 0;
@@ -346,7 +277,7 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) {
}
}
l->count++;
- t = make_token(lex_type, k, 0, "");
+ t = make_token(lex_type, k, 0, "", NULL);
} else {
lex_type = TOK_RS;
switch (tolower(lexeme[j-1])) {
@@ -364,7 +295,7 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) {
break;
}
l->count++;
- t = make_token(lex_type, rs, 0, "");
+ t = make_token(lex_type, rs, 0, "", NULL);
isop = 0;
}
break;
@@ -391,7 +322,7 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) {
}
lex_type = TOK_STRING;
l->count++;
- t = make_token(lex_type, 0, 0, string[strid]);
+ t = make_token(lex_type, 0, 0, string[strid], NULL);
break;
case PTOK_DOLLAR:
case PTOK_PERCENT:
@@ -408,17 +339,22 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) {
i += j;
value = strtoull(lexeme, NULL, base);
if (lt->id == TOK_SYM) {
- mksymbol(sym, value, 1, 0, 0, dbg);
+ mksymbol(sym, value, 1, islocal, 0, 0, dbg);
if (lt) {
- lt->word = get_symid(sym, address, lt, dbg);
+ lt->sym = get_sym(sym, address, lt, islocal, dbg);
+ }
+ if (!islocal) {
+ cur_sym = symbols;
+ locals = cur_sym->local;
}
- isfixup += (lt->word == 0xFFFF);
+ islocal = 0;
+ isfixup += (lt->sym == NULL);
if (dbg) {
printf("lex(): isfixup: %u\n", isfixup);
}
}
l->count++;
- t = make_token(lex_type, 0, value, "");
+ t = make_token(lex_type, 0, value, "", NULL);
break;
case PTOK_SQUOTE:
i++;
@@ -444,10 +380,15 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) {
}
lex_type = TOK_CHAR;
l->count++;
- t = make_token(lex_type, 0, ch, "");
+ t = make_token(lex_type, 0, ch, "", NULL);
+ break;
+ case PTOK_LBRACK:
+ case PTOK_HASH :
+ l->tok->type = (ptok == PTOK_LBRACK) ? IND : IMM;
+ lex_type = (ptok == PTOK_LBRACK) ? TOK_IND : TOK_IMM;
+ memset(lexeme, 0, strlen(lexeme)+1);
+ lexeme[j++] = str[i];
break;
- case PTOK_LBRACK: l->tok->type = IND; memset(lexeme, 0, strlen(lexeme)+1); lexeme[j++] = str[i]; break;
- case PTOK_HASH : l->tok->type = IMM; memset(lexeme, 0, strlen(lexeme)+1); lexeme[j++] = str[i]; break;
case PTOK_PLUS:
case PTOK_MINUS:
case PTOK_GT:
@@ -460,15 +401,13 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) {
case PTOK_LT : value = EXPR_HIGH ; break;
}
l->count++;
- t = make_token(lex_type, value, 0, "");
+ t = make_token(lex_type, value, 0, "", NULL);
memset(lexeme, 0, strlen(lexeme)+1);
lexeme[j++] = str[i];
break;
case PTOK_EQU:
i++;
lex_type = TOK_SYM;
- l->count++;
- t = make_token(lex_type, 0, 0, "");
memset(lexeme, 0, strlen(lexeme)+1);
lexeme[j] = str[i];
break;
@@ -490,28 +429,43 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) {
break;
case PTOK_X:
case PTOK_Y:
+ lexeme[j] = str[i++];
+ lexeme[j+1] = '\0';
+ lexeme[j+2] = '\0';
+ if (lex_type != TOK_IND && lex_type != TOK_CSV) {
+ break;
+ }
switch (ptok) {
case PTOK_X: l->tok->type = (lex_type == TOK_IND) ? INDX : ZMX; break;
case PTOK_Y: l->tok->type = (lex_type == TOK_IND) ? INDY : ZMY; break;
}
- lexeme[j] = str[i];
- lexeme[j+1] = '\0';
- lexeme[j+2] = '\0';
- i++;
+ break;
+ case PTOK_AT:
+ memset(lexeme, 0, strlen(lexeme)+1);
+ lexeme[j] = '@';
+ islocal = 1;
+ lex_type = TOK_LOCAL;
break;
case PTOK_COLON:
i++;
lexeme[j] = ':';
lexeme[j+1] = '\0';
lex_type = TOK_LABEL;
- l->count++;
- t = make_token(lex_type, 0, 0, "");
- mksymbol(sym, address, 1, 0, 0, dbg);
+ mksymbol(sym, address, 1, islocal, 0, 0, dbg);
if (isfixup) {
isfixup = reslv_fixups(dbg);
}
- t->word = get_symid(sym, address, t, dbg);
- isfixup += (t->word == 0xFFFF);
+ if (lt) {
+ lt->id = lex_type;
+ lt->type = islocal;
+ lt->sym = get_sym(sym, address, t, islocal, dbg);
+ isfixup += (lt->sym == NULL);
+ }
+ if (!islocal) {
+ cur_sym = symbols;
+ locals = cur_sym->local;
+ }
+ islocal = 0;
if (dbg) {
printf("lex(): isfixup: %u\n", isfixup);
}
@@ -519,54 +473,43 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) {
case PTOK_SCOLON:
i++;
for (; isdelm(str[i+j], dbg) != 1; j++);
- memcpy(lexeme, str+i, j);
- lexeme[j] = '\0';
- i += j;
- comid = get_comment(lexeme, dbg);
- if (comid == 0xFFFF) {
- /*if (line != lineidx && l[line].com != 0xFFFF) {
- comid = l[line].com;
- } else {
- comid = comidx;
- }*/
- comid = comidx;
- comment[comid] = malloc(j+1);
- memcpy(comment[comid], lexeme, j+1);
- comidx++;
+ if (!j) {
+ lexeme[j] = ' ';
+ lexeme[j+1] = '\0';
+
} else {
- }
- if (dbg) {
- printf("lex(): com[0x%04X]: %s\n", comid, comment[comid]);
+ memcpy(lexeme, str+i, j);
+ lexeme[j] = '\0';
+ i += j;
+ comid = get_comment(lexeme, dbg);
+ if (comid == 0xFFFF) {
+ /*if (line != lineidx && l[line].com != 0xFFFF) {
+ comid = l[line].com;
+ } else {
+ comid = comidx;
+ }*/
+ comid = comidx;
+ comment[comid] = malloc(j+1);
+ memcpy(comment[comid], lexeme, j+1);
+ comidx++;
+ } else {
+ }
+ if (dbg) {
+ printf("lex(): com[0x%04X]: %s\n", comid, comment[comid]);
+ }
}
lex_type = TOK_COMMENT;
l->count++;
- t = make_token(lex_type, 0, 0, comment[comid]);
+ if (j) {
+ t = make_token(lex_type, 0, 0, comment[comid], NULL);
+ } else {
+ t = make_token(lex_type, 0, 0, "" , NULL);
+ }
+
break;
case PTOK_ALPHA:
- while (isdelm(str[i], dbg) != 16) {
- switch (str[i]) {
- case ')':
- case ',':
- case '.':
- case '+':
- case '<':
- case '>':
- case '-':
- case ':':
- case '=':
- case ';':
- case '\0':
- case '\n':
- isch = 0;
- break;
- default:
- isch = 1;
- lexeme[j++] = str[i++];
- break;
- }
- if (!isch) {
- break;
- }
+ while (!isdelm2(str[i], dbg)) {
+ lexeme[j++] = str[i++];
}
lexeme[j] = '\0';
isch = 0;
@@ -578,41 +521,16 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) {
lex_type = TOK_OPCODE;
isop = 1;
l->count++;
- t = make_token(lex_type, 0xFF, k, "");
+ t = make_token(lex_type, 0xFF, k, "", NULL);
break;
}
}
}
}
if (!isop) {
- for (k = 0; lexeme[k] != '\0';) {
- switch (lexeme[k]) {
- case ')':
- case ',':
- case '.':
- case '+':
- case '-':
- case '<':
- case '>':
- case ':':
- case ';':
- case '=':
- case '\0':
- case '\n':
- fall = 1;
- break;
- default:
- fall = 0;
- break;
- }
- if (fall) {
- break;
- }
- k++;
- }
lex_type = TOK_SYM;
l->count++;
- t = make_token(lex_type, 0, 0, "");
+ t = make_token(lex_type, islocal, 0, "", NULL);
memcpy(sym, lexeme, j+1);
uint8_t spaces = 0;
for (; isdelm(str[i+spaces], dbg) == 16; spaces++);
@@ -620,8 +538,9 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) {
printf("lex(): spaces: %u\n", spaces);
}
if (str[i+spaces] != ':' && str[i+spaces] != '=') {
- t->word = get_symid(lexeme, address, t, dbg);
- isfixup += (t->word == 0xFFFF);
+ t->sym = get_sym(lexeme, address, t, islocal, dbg);
+ islocal = 0;
+ isfixup += (t->sym == NULL);
if (dbg) {
printf("lex(): isfixup: %u\n", isfixup);
}
@@ -639,31 +558,8 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) {
j = 0;
if (lex_type == TOK_OPCODE && !isop) {
j = 0;
- } else {
- if (lex_type == TOK_EXPR) {
- i++;
- } else {
- switch (str[i]) {
- case ')':
- case ',':
- case '.':
- case '+':
- case '-':
- case '<':
- case '>':
- case ':':
- case ';':
- case '=':
- case ' ':
- case '\t':
- case '\n':
- case '\0':
- break;
- default:
- i++;
- break;
- }
- }
+ } else if (lex_type == TOK_EXPR || !isdelm2(str[i], dbg)) {
+ i++;
}
if (lex_type == TOK_COMMENT) {
if (!isstart) {
@@ -679,6 +575,7 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) {
lex_type = 0xFF;
case TOK_CSV:
case TOK_IND:
+ case TOK_LOCAL:
memset(lexeme, 0, strlen(lexeme)+1);
case TOK_SYM:
break;
@@ -707,5 +604,6 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) {
linenum += 10;
}
}
+ l->addr = address;
return address;
}