diff options
Diffstat (limited to 'lexer.c')
-rw-r--r-- | lexer.c | 885 |
1 files changed, 885 insertions, 0 deletions
@@ -0,0 +1,885 @@ +#include "asmmon.h" + +struct symbol *symbols = 0; +struct fixup *fixups = 0; + +struct symbol *mksymbol(const char *name, uint64_t val, uint8_t def, uint8_t useid, uint16_t id, uint8_t dbg) { + struct symbol *s; + uint16_t i = 0; + uint8_t flag; + for (s = symbols; s; s = s->nxt) { + if (useid) { + flag = id == s->id; + } else { + flag = !strcmp(name, s->name); + } + if (flag) { + if (def) { + if (s->def) { + if (dbg) { + printf("mksymbol(): oof, you cannot redefine the symbol: %s\n", name); + } + defined = 1; + } else { + defined = 0; + } + s->def = def; + s->val = val; + if (dbg) { + printf("mksymbol(): def: %u, val: $%016llX, name: %s\n", def, val, name); + } + } + return s; + } + i++; + } + s = malloc(sizeof(*s) + strlen(name)); + s->def = def; + s->val = val; + strcpy(s->name, name); + s->nxt = symbols; + s->id = i; + symbols = s; + defined = 0; + if (dbg) { + printf("mksymbol(): def: %u, val: $%016llX, name: %s, id: $%04X\n", def, val, name, i); + } + return s; +} + +uint64_t use_symbol(const char *name, uint16_t id, uint64_t val, uint8_t useid, uint8_t dbg) { + struct symbol *s = mksymbol(name, 0, 0, useid, id, dbg); + val++; + if (s->def) { + return s->val; + } else { + if (dbg) { + printf("use_symbol(): "); + printf("oof, symbol "); + if (useid) { + printf("id $%04X, ", id); + } else { + printf("%s, ", name); + } + puts("does not exist, yet."); + } + return val-1; + } +} + +uint8_t set_symval(const char *name, uint16_t id, uint64_t val, uint8_t useid, uint8_t dbg) { + struct symbol *s = mksymbol(name, 0, 0, useid, id, dbg); + if (s->def) { + s->val = val; + return 1; + } else { + if (dbg) { + printf("set_symval(): "); + printf("oof, symbol "); + if (useid) { + printf("id $%04X, ", id); + } else { + printf("%s, ", name); + } + puts("does not exist, yet."); + } + return 0; + } +} + +char *get_symname(uint16_t id, uint8_t dbg) { + struct symbol *s = mksymbol("", 0, 0, 1, id, dbg); + if (s->def) { + return s->name; + } else { + if (dbg) { + printf("get_symname(): oof, symbol id $%04X, has not been defined, yet.\n", id); + } + return NULL; + } +} + +uint16_t get_symid(const char *name, uint64_t val, uint16_t ln, uint8_t dbg) { + struct symbol *s = mksymbol(name, 0, 0, 0, 0, dbg); + if (s->def) { + return s->id; + } else { + if (dbg) { + printf("get_symid(): oof, symbol %s, does not exist, yet.\n", name); + } + struct fixup *f = malloc(sizeof(*f)); + f->nxt = fixups; + f->adr = val; + f->ln = ln; + f->s = s; + fixups = f; + return 0xFFFF; + } +} + +uint16_t get_comment(const char *cmnt, uint8_t dbg) { + uint16_t i = 0; + uint8_t iscom = 0; + for (; i < comidx; i++) { + if (comment[i] != NULL) { + iscom = !strcmp(cmnt, comment[i]); + } else { + break; + } + if (iscom) { + break; + } + } + if (comment[i] == NULL) { + if (dbg) { + printf("get_comment(): oof, the index $%04X is NULL.\n", i); + } + return 0xFFFF; + } + if (i == comidx) { + if (dbg) { + printf("get_comment(): oof, the comment \"%s\", was not found in the comment table.\n", cmnt); + } + return 0xFFFF; + } + if (dbg) { + printf("get_comment(): Found comment \"%s\", in the table, at index $%04X.\n", cmnt, i); + } + return i; +} + +uint16_t reslv_fixups(uint8_t dbg) { + uint16_t i = 0, j = 0; + struct fixup *f; + f = fixups; + for (; f;) { + /*printf("f: $%016llX, f->nxt: $%016llX, f->s->name: %s, f->s->val: $%016llX\n", &f, &f->nxt, f->s->name, f->s->val);*/ + if (f->s->def) { + if (f->ln == 0xFFFF) { + addr[f->adr] = f->s->val & 0xFF; + if (f->s->val & 0xFF00) + addr[f->adr+1] = f->s->val >> 8; + if (f->s->val & 0xFF000000) { + addr[f->adr+2] = f->s->val >> 16; + addr[f->adr+3] = f->s->val >> 24; + } + if (f->s->val & 0xFF00000000000000) { + addr[f->adr+4] = f->s->val >> 32; + addr[f->adr+5] = f->s->val >> 40; + addr[f->adr+6] = f->s->val >> 48; + addr[f->adr+7] = f->s->val >> 56; + } + } else { + tokline[f->ln].sym = f->s->id; + } + } else { + if (dbg) { + printf("reslv_fixups(): oof, undefined reference to '%s', at $%016llX.\n", f->s->name, f->adr); + } + i++; + } + f = f->nxt; + j++; + } + return i; + +} + +uint64_t update_addr(uint64_t address, uint8_t fixup, uint8_t dbg) { + uint64_t value = 0; + uint16_t i = 0; + uint16_t j = 0; + uint16_t flags = 0; + uint8_t opsize = 0; + + uint16_t l = lineidx; + uint16_t symid = tokline[l].sym; + uint16_t str = tokline[l].str; + uint16_t com = tokline[l].com; + uint8_t islabel = tokline[l].islabel; + uint8_t issym = tokline[l].issym; + uint8_t opbase = tokline[l].opbase; + uint8_t aopbase = tokline[l].aopbase; + uint8_t dir = tokline[l].dir; + uint8_t am = tokline[l].am; + uint8_t cm = tokline[l].cm; + uint8_t rs = tokline[l].rs; + uint8_t mne = tokline[l].mne; + + flags |= (dir != 0x00FF) << 0x00; + flags |= (mne != 0x00FF) << 0x01; + flags |= (rs != 0x00FF) << 0x02; + flags |= (am != 0x00FF) << 0x03; + flags |= (opbase != 0x00FF) << 0x04; + flags |= (aopbase != 0x00FF) << 0x05; + flags |= (symid != 0xFFFF) << 0x06; + flags |= (fixup > 0x0000) << 0x06; + flags |= (islabel ) << 0x07; + flags |= (issym ) << 0x07; + flags |= (am != 0x00FF) << 0x08; + flags |= (cm != 0x00FF) << 0x09; + flags |= (str != 0xFFFF) << 0x0A; + + if (dbg) { + printf("update_addr(): "); + printf("flags: $%04X\n", flags); + } + if (!flags || flags == 0x40) { + if (dbg) { + printf("update_addr(): "); + puts("This line only contains a comment, so don't update the address."); + } + return address; + } + if (((flags & 0x53) == 0x42)) { + if (isfixup && symid == 0xFFFF && (opcodes[mne][IMPL] == 0xFF)) { + value = address; + } else { + value = use_symbol("", symid, address, 1, dbg); + } + } else { + value = tokline[l].op; + } + if (flags & 0x220) { + switch (cm) { + case 0: value += tokline[l].aop; break; + case 1: value -= tokline[l].aop; break; + } + } + if (dbg) { + printf("update_addr(): value: $%llX\n", value); + } + switch (dir) { + case DIR_ORG: + address = value; + if (dbg) { + printf("update_addr(): "); + printf("Set the Program Counter's Origin to $%llX.\n", address); + } + break; + case DIR_BYTE: + if (flags & 0x400) { + for (; string[str][i] != '\0'; i++, j++, address++) { + i += string[str][i] == '\\'; + } + j++; + address++; + if (dbg) { + printf("update_addr(): "); + printf("Increment Program Counter by $%04X", j); + puts(", to make room for the string."); + } + } else { + address += 1; + } + break; + case DIR_WORD: address += 2; break; + case DIR_DWORD: address += 4; break; + case DIR_QWORD: address += 8; break; + } + if (flags & 0x01) { + if (dbg) { + printf("update_addr(): "); + puts("This line contains a directive, so skip everything else."); + } + return address; + } + if ((flags & 0x15B) == 0x02 || (opcodes[mne][IMPL] != 0xFF && am == 0xFF && opbase == 0xFF && symid == 0xFFFF)) { + tokline[l].am = IMPL; + am = IMPL; + if (dbg) { + printf("update_addr(): "); + puts("Addressing Mode has been set to Implied."); + } + } + if (am == IMPL) { + opsize = 0; + } else if (am == IMM) { + switch (rs) { + case 3: address += 8; break; + case 2: address += 4; break; + case 1: address += 2; break; + default: address += 1; break; + } + if (dbg) { + if (!(flags & 0x04)) { + rs = 0; + } + printf("update_addr(): "); + printf("Increment Program Counter by $%02X", 1 << rs); + puts(", to make room for the operand."); + } + } else if ((flags & 0x158) && (!(flags & 0x80))) { + opsize = 0; + opsize = (value <= 0x00000000000000FF) ? 1 : opsize; + opsize = (value > 0x00000000000000FF) ? 2 : opsize; + opsize = (value > 0x000000000000FFFF) ? 3 : opsize; + opsize = (value > 0x0000000000FFFFFF) ? 4 : opsize; + opsize = (value > 0x00000000FFFFFFFF) ? 5 : opsize; + opsize = (value > 0x000000FFFFFFFFFF) ? 6 : opsize; + opsize = (value > 0x0000FFFFFFFFFFFF) ? 7 : opsize; + opsize = (value > 0x00FFFFFFFFFFFFFF) ? 8 : opsize; + if (opsize) { + switch (opsize-1) { + case 0: + case 2: + case 5: + case 3: + if (!(flags & 0x100)) { + am = ZM; + tokline[l].am = am; + if (dbg) { + printf("update_addr(): "); + puts("Addressing Mode has been set to Zero Matrix."); + } + /*address += !(opsize-1);*/ + } + break; + case 1: + case 4: + case 6: + case 7: + if (!(flags & 0x100)) { + am = ABS; + tokline[l].am = am; + if (dbg) { + printf("update_addr(): "); + puts("Addressing Mode has been set to Absolute."); + } + } + break; + } + address += opsize; + if (dbg) { + printf("update_addr(): "); + printf("Increment Program Counter by $%02X", opsize); + puts(", to make room for the address."); + } + } + } + if (dbg) { + printf("update_addr(): "); + printf("Address: $%llX\n", address); + } + return address; +} + +uint64_t lex(char *str, uint64_t address, uint8_t dbg) { + char sym[0x100]; + uint16_t i = 0; + uint16_t j = 0; + uint16_t comid = 0; + lex_type = 0xFF; + uint8_t k = 0; + uint8_t rs = 0; + uint8_t isop = 0; + int num = 0; + int isch = 0; + int16_t ln = -1; + char lnum[6]; + uint8_t islinenum; + uint8_t space = 0; + uint8_t tab = 0; + uint8_t isstart = 1; + uint8_t fall = 0; + tokline[lineidx].dir = 0xFF; + tokline[lineidx].mne = 0xFF; + tokline[lineidx].rs = 0xFF; + tokline[lineidx].am = 0xFF; + tokline[lineidx].cm = 0xFF; + tokline[lineidx].opbase = 0xFF; + tokline[lineidx].aopbase = 0xFF; + tokline[lineidx].islabel = 0; + tokline[lineidx].issym = 0; + tokline[lineidx].str = 0xFFFF; + tokline[lineidx].com = 0xFFFF; + tokline[lineidx].sym = 0xFFFF; + tokline[lineidx].op = 0; + tokline[lineidx].aop = 0; + tokline[lineidx].addr = address; + + while (isdigit(str[i]) && !isspace(str[i])) { + lnum[j++] = str[i++]; + } + islinenum = i; + if (i) { + lnum[j] = '\0'; + ln = strtol(lnum, NULL, 10); + j = 0; + } else { + ln = linenum; + } + uint8_t done = 0; + while (str[i] != '\0' && str[i] != '\n') { + space = 0; + tab = 0; + while (isspace(str[i+j])) { + tab += str[i+j] == '\t'; + space += str[i+j] == ' '; + j++; + } + j = 0; + if (dbg) { + printf("lex(): tab: %u, space: %u\n", tab, space); + } + if (isstart) { + tokline[lineidx].stab = tab; + tokline[lineidx].sspace = space; + if (dbg) { + printf("lex(): starting tabs: %u, starting spaces: %u\n", tokline[lineidx].stab, tokline[lineidx].sspace); + } + } + if (isspace(str[i])) { + while (isspace(str[i])) { + i++; + } + } + switch (str[i]) { + case '.': + i++; + while (!isspace(str[i])) { + lexeme[j++] = str[i++]; + } + lexeme[j] = '\0'; + if (!isop) { + for (k = 0; k < 5; k++) { + if (!strcasecmp(lexeme, dir_t[k])) { + lex_type = TOK_DIR; + break; + } + } + tokline[lineidx].dir = k; + } else { + lex_type = TOK_RS; + switch (tolower(lexeme[j-1])) { + case '2': + case 'w': + rs = 1; + break; + case '4': + case 'd': + rs = 2; + break; + case '8': + case 'q': + rs = 3; + break; + } + address++; + tokline[lineidx].rs = rs; + isop = 0; + } + break; + case '\"': + i++; + while (str[i] != '\"') { + lexeme[j++] = str[i++]; + } + lexeme[j] = '\0'; + string[stridx] = malloc(j+1); + memcpy(string[stridx], lexeme, j+1); + tokline[lineidx].str = stridx; + if (dbg) { + printf("lex(): str[0x%04X]: %s\n", stridx, string[stridx]); + } + stridx++; + lex_type = TOK_STRING; + break; + case '#': + lexeme[j] = '#'; + lexeme[j+1] = '\0'; + lexeme[j+2] = '\0'; + tokline[lineidx].am = IMM; + lex_type = TOK_IMM; + break; + case '$': + i++; + while (isxdigit(str[i]) && (str[i] != '\0' && str[i] != '\n')) { + lexeme[j++] = str[i++]; + } + lexeme[j] = '\0'; + switch (lex_type) { + case TOK_SYM: + tokline[lineidx].op = strtoull(lexeme, NULL, 16); + mksymbol(sym, tokline[lineidx].op, 1, 0, 0, dbg); + tokline[lineidx].sym = get_symid(sym, address, lineidx, dbg); + isfixup += tokline[lineidx].sym == 0xFFFF; + if (dbg) { + printf("lex(): isfixup: %u\n", isfixup); + } + tokline[lineidx].opbase = BASE_HEX; + break; + case TOK_PLUS: + case TOK_MINUS: + tokline[lineidx].aop = strtoull(lexeme, NULL, 16); + tokline[lineidx].aopbase = BASE_HEX; + break; + default: + if (tokline[lineidx].cm != 0xFF) { + tokline[lineidx].aop = strtoull(lexeme, NULL, 16); + tokline[lineidx].aopbase = BASE_HEX; + } else { + tokline[lineidx].op = strtoull(lexeme, NULL, 16); + tokline[lineidx].opbase = BASE_HEX; + } + break; + + } + lex_type = TOK_HEX; + break; + case '%': + i++; + while (isdigit(str[i]) && (str[i] != '\0' && str[i] != '\n')) { + lexeme[j++] = str[i++]; + } + lexeme[j] = '\0'; + switch (lex_type) { + case TOK_SYM: + tokline[lineidx].op = strtoull(lexeme, NULL, 2); + mksymbol(sym, tokline[lineidx].op, 1, 0, 0, dbg); + tokline[lineidx].sym = get_symid(sym, address, lineidx, dbg); + isfixup += tokline[lineidx].sym == 0xFFFF; + if (dbg) { + printf("lex(): isfixup: %u\n", isfixup); + } + tokline[lineidx].opbase = BASE_BIN; + break; + case TOK_PLUS: + case TOK_MINUS: + tokline[lineidx].aop = strtoull(lexeme, NULL, 2); + tokline[lineidx].aopbase = BASE_BIN; + break; + default: + if (tokline[lineidx].cm != 0xFF) { + tokline[lineidx].aop = strtoull(lexeme, NULL, 2); + tokline[lineidx].aopbase = BASE_BIN; + } else { + tokline[lineidx].op = strtoull(lexeme, NULL, 2); + tokline[lineidx].opbase = BASE_BIN; + } + break; + + } + lex_type = TOK_BIN; + break; + case '+': + lexeme[j] = '+'; + lexeme[j+1] = '\0'; + tokline[lineidx].cm = 0; + lex_type = TOK_PLUS; + break; + case '-': + lexeme[j] = '-'; + lexeme[j+1] = '\0'; + tokline[lineidx].cm = 1; + lex_type = TOK_MINUS; + break; + case '(': + lexeme[j] = '('; + lexeme[j+1] = '\0'; + lexeme[j+2] = '\0'; + tokline[lineidx].am = IND; + break; + case ')': + i++; + if (str[i] == ',') { + i++; + while (isspace(str[i])) { + lexeme[j++] = str[i++]; + } + if (tokline[lineidx].am == IND && tolower(str[i]) == 'y') { + lexeme[j++] = 'y'; + tokline[lineidx].am = INDY; + } + lexeme[j] = '\0'; + } else { + lexeme[j] = ')'; + lexeme[j+1] = '\0'; + lexeme[j+2] = '\0'; + } + break; + case ',': + i++; + while (isspace(str[i])) { + lexeme[j++] = str[i++]; + } + if (tokline[lineidx].am == IND && tolower(str[i]) == 'x') { + tokline[lineidx].am = INDX; + lexeme[j++] = 'x'; + i++; + } else { + switch (tolower(str[i])) { + case 'x': + tokline[lineidx].am = ZMX; + lexeme[j++] = 'x'; + break; + case 'y': + tokline[lineidx].am = ZMY; + lexeme[j++] = 'y'; + break; + } + } + lexeme[j] = '\0'; + break; + case ':': + i++; + lexeme[j] = ':'; + lexeme[j+1] = '\0'; + lex_type = TOK_LABEL; + tokline[lineidx].islabel = 1; + mksymbol(sym, address, 1, 0, 0, dbg); + if (isfixup) { + isfixup = reslv_fixups(dbg); + } + tokline[lineidx].sym = get_symid(sym, address, lineidx, dbg); + isfixup += tokline[lineidx].sym == 0xFFFF; + if (dbg) { + printf("lex(): isfixup: %u\n", isfixup); + } + break; + case '=': + i++; + lexeme[j] = '='; + lexeme[j+1] = 0; + tokline[lineidx].issym = 1; + lex_type = TOK_SYM; + break; + case ';': + i++; + while (str[i] != '\0' && str[i] != '\n') { + lexeme[j++] = str[i++]; + } + lexeme[j] = '\0'; + comid = get_comment(lexeme, dbg); + if (comid == 0xFFFF) { + comment[comidx] = malloc(j+1); + memcpy(comment[comidx], lexeme, j+1); + tokline[lineidx].com = comidx; + if (dbg) { + printf("lex(): com[0x%04X]: %s\n", comidx, comment[comidx]); + } + comidx++; + } else { + tokline[lineidx].com = comid; + if (dbg) { + printf("lex(): com[0x%04X]: %s\n", comid, comment[comid]); + } + } + lex_type = TOK_COMMENT; + break; + default: + if (isalnum(str[i]) || str[i] == '_') { + while (!isspace(str[i])) { + switch (str[i]) { + case ')': + case ',': + case '.': + case '+': + case '-': + case ':': + case '=': + case ';': + case '\0': + case '\n': + isch = 0; + break; + default: + isch = 1; + lexeme[j++] = str[i++]; + break; + } + if (!isch) { + break; + } + } + lexeme[j] = '\0'; + isch = 0; + isop = 0; + if (j == 3 && str[i] != ':') { + for (k = 0; k < OPNUM; k++) { + if (!strcasecmp(lexeme, mne[k])) { + lex_type = TOK_OPCODE; + isop = 1; + tokline[lineidx].mne = k; + address++; + break; + } + } + } + if (!isop) { + for (k = 0; lexeme[k] != '\0';) { + switch (lexeme[k]) { + case ')': + case ',': + case '.': + case '+': + case '-': + case ':': + case ';': + case '=': + case '\0': + case '\n': + fall = 1; + break; + default: + fall = 0; + break; + } + if (fall) { + break; + } + if ((isalnum(lexeme[k]) || lexeme[k] == '_')) { + if (!isch) { + isch = isalpha(lexeme[k]); + } + num = isdigit(lexeme[k]) && !isch; + k++; + } else { + break; + } + } + if (lexeme[k] == '\0') { + if (num) { + switch (lex_type) { + case TOK_SYM: + tokline[lineidx].op = strtoull(lexeme, NULL, 10); + mksymbol(sym, tokline[lineidx].op, 1, 0, 0, dbg); + if (isfixup) { + isfixup = reslv_fixups(dbg); + } + tokline[lineidx].sym = get_symid(sym, address, lineidx, dbg); + isfixup += tokline[lineidx].sym == 0xFFFF; + if (dbg) { + printf("lex(): isfixup: %u\n", isfixup); + } + tokline[lineidx].opbase = BASE_DEC; + break; + case TOK_PLUS: + case TOK_MINUS: + tokline[lineidx].aop = strtoull(lexeme, NULL, 10); + tokline[lineidx].aopbase = BASE_DEC; + break; + default: + if (tokline[lineidx].cm != 0xFF) { + tokline[lineidx].aop = strtoull(lexeme, NULL, 10); + tokline[lineidx].aopbase = BASE_DEC; + } else { + tokline[lineidx].op = strtoull(lexeme, NULL, 10); + tokline[lineidx].opbase = BASE_DEC; + } + break; + + } + lex_type = TOK_DEC; + } else if (isch && lex_type != TOK_HEX && lex_type != TOK_BIN) { + lex_type = TOK_SYM; + memcpy(sym, lexeme, j+1); + uint8_t spaces = 0; + for (; isspace(str[i+spaces]); spaces++); + if (dbg) { + printf("lex(): spaces: %u\n", spaces); + } + if (str[i+spaces] != ':' && str[i+spaces] != '=') { + tokline[lineidx].sym = get_symid(lexeme, address, lineidx, dbg); + isfixup += tokline[lineidx].sym == 0xFFFF; + if (dbg) { + printf("lex(): isfixup: %u\n", isfixup); + } + } + } + } + } + } + break; + } + if (dbg) { + printf("lex(): lexeme: %s, lex_type: %s\n", lexeme, (lex_type != 0xFF) ? lex_tok[lex_type] : "TOK_NONE"); + } + isstart = 0; + /*lex_type = 0xFF;*/ + j = 0; + if (lex_type == TOK_OPCODE && !isop) { + j = 0; + } else { + if (lex_type == TOK_PLUS || lex_type == TOK_MINUS) { + i++; + } else { + switch (str[i]) { + case ')': + case ',': + case '.': + case '+': + case '-': + case ':': + case ';': + case '=': + case ' ': + case '\t': + case '\n': + case '\0': + break; + default: + i++; + break; + } + } + } + if (lex_type == TOK_COMMENT) { + if (!isstart) { + tokline[lineidx].etab = tab; + tokline[lineidx].espace = space; + if (dbg) { + printf("lex(): ending tabs: %u, ending spaces: %u\n", tokline[lineidx].etab, tokline[lineidx].espace); + } + } + } + if (lex_type != TOK_SYM) { + for (k = 0; lexeme[k] != '\0';) { + lexeme[k] = 0; + ++k; + } + lex_type = 0xFF; + } + } + if (i) { + address = update_addr(address, isfixup, dbg); + if (dbg) { + printf("lex(): Next address: $%llX\n", address); + printf( + "lex(): " + "address: $%llX" + ", dir: %u" + ", mne: $%02X" + ", rs: %u" + ", am: %u" + ", cm: %u" + ", opbase: %u" + ", com: $%04X" + ", sym: $%04X" + ", op: $%016X" + ", aop: $%016X" + ", ln: %i\n" + , tokline[lineidx].addr + , tokline[lineidx].dir + , tokline[lineidx].mne + , tokline[lineidx].rs + , tokline[lineidx].am + , tokline[lineidx].cm + , tokline[lineidx].opbase + , tokline[lineidx].com + , tokline[lineidx].sym + , tokline[lineidx].op + , tokline[lineidx].aop + , lineidx); + } + if (ln > linenum) { + linenum+=(10+(ln & 10)); + tokline[lineidx].linenum = ln; + } else if (!islinenum) { + tokline[lineidx].linenum = linenum; + linenum += 10; + } + lineidx++; + } + return address; +} |