From df81112b8369eeca5788a6f28c6b6b85ca911a95 Mon Sep 17 00:00:00 2001 From: mrb0nk500 Date: Mon, 1 Jun 2020 15:15:17 -0400 Subject: Did some more refactoring to the assembler. - Refactored the symbol, and fixup table to now use a linked list - Added support for local symbols to the assembler. - Rewrote SuBEditor, and SuBAsm to use local symbols. --- lexer.c | 408 ++++++++++++++++++++++++---------------------------------------- 1 file changed, 153 insertions(+), 255 deletions(-) (limited to 'lexer.c') diff --git a/lexer.c b/lexer.c index 9610297..47109ca 100644 --- a/lexer.c +++ b/lexer.c @@ -5,31 +5,21 @@ uint8_t lex_type; uint16_t sym_count = 0; token *tokens = NULL; token *last_tok = NULL; +symbol *locals = NULL; +symbol *cur_sym = NULL; -void init_symbol() { - uint16_t i = 0; - for (; i < 0x1000; i++) { - symbols[i] = 0; - fixups[i] = 0; - } -} - -uint16_t mksymbol(const char *name, uint64_t val, uint8_t def, uint8_t useid, uint16_t id, uint8_t dbg) { +symbol *mksymbol(const char *name, uint64_t val, uint8_t def, uint8_t islocal, uint8_t useid, uint16_t id, uint8_t dbg) { uint16_t i = 0; + symbol *s = (!islocal || islocal == 2) ? symbols : locals; uint8_t flag = 0; - for (; i < sym_count; i++) { - if (useid) { - flag = (id == symbols[i]->id); - } else { - if (name[0] == symbols[i]->name[0]) { - flag = !strcmp(name, symbols[i]->name); - } else { - continue; - } + for (; s; s = s->next, i++) { + if (!useid && name[0] != s->name[0]) { + continue; } + flag = (useid) ? (id == s->id) : !strcmp(name, s->name); if (flag) { if (def) { - if (symbols[i]->def) { + if (s->def) { if (dbg) { printf("mksymbol(): oof, you cannot redefine the symbol: %s\n", name); } @@ -37,126 +27,92 @@ uint16_t mksymbol(const char *name, uint64_t val, uint8_t def, uint8_t useid, ui } else { defined = 0; } - symbols[i]->def = def; - symbols[i]->val = val; - symbols[i]->id = i; + s->def = def; + s->val = val; + s->id = i; if (dbg) { printf("mksymbol(): def: %u, val: $%016"PRIX64", name: %s\n", def, val, name); - printf("mksymbol(): i: $%X, id: $%04X\n", i, symbols[i]->id); + printf("mksymbol(): i: $%X, id: $%04X\n", i, s->id); } } - return symbols[i]->id; + return s; } } - symbols[i] = malloc(sizeof(**symbols) + strlen(name)); - symbols[i]->def = def; - symbols[i]->val = val; - strcpy(symbols[i]->name, name); - symbols[i]->id = sym_count++; + size_t str_size = strlen(name)+1; + s = malloc(sizeof(*s)); + s->name = malloc(str_size); + s->def = def; + s->val = val; + s->count = 0; + memcpy(s->name, name, str_size); + s->next = (!islocal) ? symbols : locals; + s->id = sym_count++; + if (!islocal) { + s->local = NULL; + symbols = s; + } else { + cur_sym->count++; + locals = s; + } defined = 0; if (dbg) { printf("mksymbol(): def: %u, val: $%016"PRIX64", name: %s, id: $%04X\n", def, val, name, sym_count-1); } - return sym_count-1; + return s; } -uint64_t use_symbol(const char *name, uint16_t id, uint64_t val, uint8_t useid, uint8_t dbg) { - uint16_t i; - i = mksymbol(name, 0, 0, useid, id, dbg); - uint8_t is_defined = (i != 0xFFFF); - val++; +uint16_t fixup_cnt = 0; +symbol *get_sym(const char *name, uint64_t val, token *t, uint8_t islocal, uint8_t dbg) { + symbol *s = mksymbol(name, 0, 0, islocal, 0, 0, dbg); if (dbg) { - puts("use_symbol(): We also got here."); - printf("use_symbol(): i: $%X\n", i); + printf("get_sym(): Symbol ID: $%X.\n", s->id); } - if (symbols[i] != NULL) { - if (symbols[i]->def) { - return symbols[i]->val; - } else { - if (dbg) { - printf("use_symbol(): "); - printf("oof, symbol "); - if (useid) { - printf("id $%04X, ", id); - } else { - printf("%s, ", name); - } - puts("does not exist, yet."); - } - return val-1; + if (s->def) { + return s; + } else { + if (dbg) { + printf("get_sym(): oof, symbol %s, does not exist, yet.\n", name); } + fixup *f = malloc(sizeof(*f)); + f->next = fixups; + f->adr = val; + f->t = t; + f->s = s; + fixups = f; + fixup_cnt++; + return NULL; } - return val-1; } -uint8_t set_symval(const char *name, uint16_t id, uint64_t val, uint8_t useid, uint8_t dbg) { - uint16_t i = mksymbol(name, 0, 0, useid, id, dbg); - if (symbols[i] != NULL) { - if (symbols[i]->def) { - symbols[i]->val = val; - return 1; +uint16_t reslv_fixups(uint8_t dbg) { + fixup *f = fixups; + symbol *ls; + uint16_t i = 0, j = 0; + for (; f; f = f->next) { + if (f->s->def) { + if (dbg) { + printf("reslv_fixups(): Symbol ID: $%X, Symbol Name: %s, Symbol Value: $%"PRIX64".\n", f->s->id, f->s->name, f->s->val); + } + f->t->sym = f->s; } else { if (dbg) { - printf("set_symval(): "); - printf("oof, symbol "); - if (useid) { - printf("id $%04X, ", id); - } else { - printf("%s, ", name); - } - puts("does not exist, yet."); + printf("reslv_fixups(): oof, undefined reference to '%s', at $%016"PRIX64".\n", f->s->name, f->adr); } - return 0; - } - } - return 0; -} - -char *get_symname(uint16_t id, uint8_t dbg) { - if (symbols[id]->def) { - return symbols[id]->name; - } else { - if (dbg) { - printf("get_symname(): oof, symbol id $%04X, has not been defined, yet.\n", id); + i++; } - return NULL; } -} + return i; -uint16_t fixup_cnt = 0; -uint16_t get_symid(const char *name, uint64_t val, token *t, uint8_t dbg) { - uint16_t i = mksymbol(name, 0, 0, 0, 0, dbg); - if (dbg) { - printf("get_symid(): Symbol ID: $%X, i: $%X.\n", symbols[i]->id, i); - } - if (symbols[i]->def) { - return symbols[i]->id; - } else { - if (dbg) { - printf("get_symid(): oof, symbol %s, does not exist, yet.\n", name); - } - fixups[fixup_cnt] = malloc(sizeof(**fixups)); - fixups[fixup_cnt]->adr = val; - fixups[fixup_cnt]->t = t; - fixups[fixup_cnt]->s = symbols[i]; - fixup_cnt++; - return 0xFFFF; - } } uint16_t get_comment(const char *com, uint8_t dbg) { uint16_t i = 0; uint8_t iscom = 0; for (; i < comidx; i++) { - if (comment[i] != NULL) { - if (com[0] == comment[i][0]) { - iscom = !strcmp(com, comment[i]); - } - } else { - break; - } - if (iscom) { + if (comment[i] == NULL || iscom) { break; + } else if (com[0] == comment[i][0]) { + iscom = !strcmp(com, comment[i]); } } if (comment[i] == NULL || i == comidx) { @@ -178,10 +134,8 @@ uint16_t get_string(const char *str, uint8_t dbg) { for (; i < stridx; i++) { if (isstr || string[i] == NULL) { break; - } else { - if (str[0] == string[i][0]) { - isstr = !strcmp(str, string[i]); - } + } else if (str[0] == string[i][0]) { + isstr = !strcmp(str, string[i]); } } if (string[i] == NULL || i == stridx) { @@ -197,35 +151,10 @@ uint16_t get_string(const char *str, uint8_t dbg) { return i; } -uint16_t reslv_fixups(uint8_t dbg) { - uint16_t i = 0, j = 0; - for (; fixups[j]; j++) { - if (fixups[j]->s->def) { - if (dbg) { - printf("reslv_fixups(): Symbol ID: $%X, Symbol Name: %s, Symbol Value: $%"PRIX64".\n", fixups[j]->s->id, fixups[j]->s->name, fixups[j]->s->val); - } - fixups[j]->t->word = fixups[j]->s->id; - } else { - if (dbg) { - printf("reslv_fixups(): oof, undefined reference to '%s', at $%016"PRIX64".\n", fixups[j]->s->name, fixups[j]->adr); - } - i++; - } - } - return i; - -} - line *find_line(uint32_t ln, uint8_t dbg) { uint32_t i = 0; line *l = lines; for (; l && l->linenum != ln; l = l->next); - /*if (dbg) { - if (l->linenum == ln) { - printf("find_line(): Found line number %u, at line index %X.\n", ln, i); - } - printf("find_line(): linenum: %u, i: %X\n", l->linenum, i); - }*/ if (l != NULL) { if (l->linenum == ln) { if (dbg) { @@ -257,6 +186,8 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) { uint8_t rs = 0; uint8_t base = 0; + uint8_t islocal = 0; + uint8_t isop = 0; int num = 0; int isch = 0; @@ -346,7 +277,7 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) { } } l->count++; - t = make_token(lex_type, k, 0, ""); + t = make_token(lex_type, k, 0, "", NULL); } else { lex_type = TOK_RS; switch (tolower(lexeme[j-1])) { @@ -364,7 +295,7 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) { break; } l->count++; - t = make_token(lex_type, rs, 0, ""); + t = make_token(lex_type, rs, 0, "", NULL); isop = 0; } break; @@ -391,7 +322,7 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) { } lex_type = TOK_STRING; l->count++; - t = make_token(lex_type, 0, 0, string[strid]); + t = make_token(lex_type, 0, 0, string[strid], NULL); break; case PTOK_DOLLAR: case PTOK_PERCENT: @@ -408,17 +339,22 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) { i += j; value = strtoull(lexeme, NULL, base); if (lt->id == TOK_SYM) { - mksymbol(sym, value, 1, 0, 0, dbg); + mksymbol(sym, value, 1, islocal, 0, 0, dbg); if (lt) { - lt->word = get_symid(sym, address, lt, dbg); + lt->sym = get_sym(sym, address, lt, islocal, dbg); + } + if (!islocal) { + cur_sym = symbols; + locals = cur_sym->local; } - isfixup += (lt->word == 0xFFFF); + islocal = 0; + isfixup += (lt->sym == NULL); if (dbg) { printf("lex(): isfixup: %u\n", isfixup); } } l->count++; - t = make_token(lex_type, 0, value, ""); + t = make_token(lex_type, 0, value, "", NULL); break; case PTOK_SQUOTE: i++; @@ -444,10 +380,15 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) { } lex_type = TOK_CHAR; l->count++; - t = make_token(lex_type, 0, ch, ""); + t = make_token(lex_type, 0, ch, "", NULL); + break; + case PTOK_LBRACK: + case PTOK_HASH : + l->tok->type = (ptok == PTOK_LBRACK) ? IND : IMM; + lex_type = (ptok == PTOK_LBRACK) ? TOK_IND : TOK_IMM; + memset(lexeme, 0, strlen(lexeme)+1); + lexeme[j++] = str[i]; break; - case PTOK_LBRACK: l->tok->type = IND; memset(lexeme, 0, strlen(lexeme)+1); lexeme[j++] = str[i]; break; - case PTOK_HASH : l->tok->type = IMM; memset(lexeme, 0, strlen(lexeme)+1); lexeme[j++] = str[i]; break; case PTOK_PLUS: case PTOK_MINUS: case PTOK_GT: @@ -460,15 +401,13 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) { case PTOK_LT : value = EXPR_HIGH ; break; } l->count++; - t = make_token(lex_type, value, 0, ""); + t = make_token(lex_type, value, 0, "", NULL); memset(lexeme, 0, strlen(lexeme)+1); lexeme[j++] = str[i]; break; case PTOK_EQU: i++; lex_type = TOK_SYM; - l->count++; - t = make_token(lex_type, 0, 0, ""); memset(lexeme, 0, strlen(lexeme)+1); lexeme[j] = str[i]; break; @@ -490,28 +429,43 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) { break; case PTOK_X: case PTOK_Y: + lexeme[j] = str[i++]; + lexeme[j+1] = '\0'; + lexeme[j+2] = '\0'; + if (lex_type != TOK_IND && lex_type != TOK_CSV) { + break; + } switch (ptok) { case PTOK_X: l->tok->type = (lex_type == TOK_IND) ? INDX : ZMX; break; case PTOK_Y: l->tok->type = (lex_type == TOK_IND) ? INDY : ZMY; break; } - lexeme[j] = str[i]; - lexeme[j+1] = '\0'; - lexeme[j+2] = '\0'; - i++; + break; + case PTOK_AT: + memset(lexeme, 0, strlen(lexeme)+1); + lexeme[j] = '@'; + islocal = 1; + lex_type = TOK_LOCAL; break; case PTOK_COLON: i++; lexeme[j] = ':'; lexeme[j+1] = '\0'; lex_type = TOK_LABEL; - l->count++; - t = make_token(lex_type, 0, 0, ""); - mksymbol(sym, address, 1, 0, 0, dbg); + mksymbol(sym, address, 1, islocal, 0, 0, dbg); if (isfixup) { isfixup = reslv_fixups(dbg); } - t->word = get_symid(sym, address, t, dbg); - isfixup += (t->word == 0xFFFF); + if (lt) { + lt->id = lex_type; + lt->type = islocal; + lt->sym = get_sym(sym, address, t, islocal, dbg); + isfixup += (lt->sym == NULL); + } + if (!islocal) { + cur_sym = symbols; + locals = cur_sym->local; + } + islocal = 0; if (dbg) { printf("lex(): isfixup: %u\n", isfixup); } @@ -519,54 +473,43 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) { case PTOK_SCOLON: i++; for (; isdelm(str[i+j], dbg) != 1; j++); - memcpy(lexeme, str+i, j); - lexeme[j] = '\0'; - i += j; - comid = get_comment(lexeme, dbg); - if (comid == 0xFFFF) { - /*if (line != lineidx && l[line].com != 0xFFFF) { - comid = l[line].com; - } else { - comid = comidx; - }*/ - comid = comidx; - comment[comid] = malloc(j+1); - memcpy(comment[comid], lexeme, j+1); - comidx++; + if (!j) { + lexeme[j] = ' '; + lexeme[j+1] = '\0'; + } else { - } - if (dbg) { - printf("lex(): com[0x%04X]: %s\n", comid, comment[comid]); + memcpy(lexeme, str+i, j); + lexeme[j] = '\0'; + i += j; + comid = get_comment(lexeme, dbg); + if (comid == 0xFFFF) { + /*if (line != lineidx && l[line].com != 0xFFFF) { + comid = l[line].com; + } else { + comid = comidx; + }*/ + comid = comidx; + comment[comid] = malloc(j+1); + memcpy(comment[comid], lexeme, j+1); + comidx++; + } else { + } + if (dbg) { + printf("lex(): com[0x%04X]: %s\n", comid, comment[comid]); + } } lex_type = TOK_COMMENT; l->count++; - t = make_token(lex_type, 0, 0, comment[comid]); + if (j) { + t = make_token(lex_type, 0, 0, comment[comid], NULL); + } else { + t = make_token(lex_type, 0, 0, "" , NULL); + } + break; case PTOK_ALPHA: - while (isdelm(str[i], dbg) != 16) { - switch (str[i]) { - case ')': - case ',': - case '.': - case '+': - case '<': - case '>': - case '-': - case ':': - case '=': - case ';': - case '\0': - case '\n': - isch = 0; - break; - default: - isch = 1; - lexeme[j++] = str[i++]; - break; - } - if (!isch) { - break; - } + while (!isdelm2(str[i], dbg)) { + lexeme[j++] = str[i++]; } lexeme[j] = '\0'; isch = 0; @@ -578,41 +521,16 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) { lex_type = TOK_OPCODE; isop = 1; l->count++; - t = make_token(lex_type, 0xFF, k, ""); + t = make_token(lex_type, 0xFF, k, "", NULL); break; } } } } if (!isop) { - for (k = 0; lexeme[k] != '\0';) { - switch (lexeme[k]) { - case ')': - case ',': - case '.': - case '+': - case '-': - case '<': - case '>': - case ':': - case ';': - case '=': - case '\0': - case '\n': - fall = 1; - break; - default: - fall = 0; - break; - } - if (fall) { - break; - } - k++; - } lex_type = TOK_SYM; l->count++; - t = make_token(lex_type, 0, 0, ""); + t = make_token(lex_type, islocal, 0, "", NULL); memcpy(sym, lexeme, j+1); uint8_t spaces = 0; for (; isdelm(str[i+spaces], dbg) == 16; spaces++); @@ -620,8 +538,9 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) { printf("lex(): spaces: %u\n", spaces); } if (str[i+spaces] != ':' && str[i+spaces] != '=') { - t->word = get_symid(lexeme, address, t, dbg); - isfixup += (t->word == 0xFFFF); + t->sym = get_sym(lexeme, address, t, islocal, dbg); + islocal = 0; + isfixup += (t->sym == NULL); if (dbg) { printf("lex(): isfixup: %u\n", isfixup); } @@ -639,31 +558,8 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) { j = 0; if (lex_type == TOK_OPCODE && !isop) { j = 0; - } else { - if (lex_type == TOK_EXPR) { - i++; - } else { - switch (str[i]) { - case ')': - case ',': - case '.': - case '+': - case '-': - case '<': - case '>': - case ':': - case ';': - case '=': - case ' ': - case '\t': - case '\n': - case '\0': - break; - default: - i++; - break; - } - } + } else if (lex_type == TOK_EXPR || !isdelm2(str[i], dbg)) { + i++; } if (lex_type == TOK_COMMENT) { if (!isstart) { @@ -679,6 +575,7 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) { lex_type = 0xFF; case TOK_CSV: case TOK_IND: + case TOK_LOCAL: memset(lexeme, 0, strlen(lexeme)+1); case TOK_SYM: break; @@ -707,5 +604,6 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) { linenum += 10; } } + l->addr = address; return address; } -- cgit v1.2.3-13-gbd6f