#include "asmmon.h" uint8_t lex_type; uint16_t sym_count = 0; token *tokens = NULL; token *last_tok = NULL; void init_symbol() { uint16_t i = 0; for (; i < 0x1000; i++) { symbols[i] = 0; fixups[i] = 0; } } uint16_t mksymbol(const char *name, uint64_t val, uint8_t def, uint8_t useid, uint16_t id, uint8_t dbg) { uint16_t i = 0; uint8_t flag = 0; for (; i < sym_count; i++) { if (useid) { flag = (id == symbols[i]->id); } else { if (name[0] == symbols[i]->name[0]) { flag = !strcmp(name, symbols[i]->name); } else { continue; } } if (flag) { if (def) { if (symbols[i]->def) { if (dbg) { printf("mksymbol(): oof, you cannot redefine the symbol: %s\n", name); } defined = 1; } else { defined = 0; } symbols[i]->def = def; symbols[i]->val = val; symbols[i]->id = i; if (dbg) { printf("mksymbol(): def: %u, val: $%016"PRIX64", name: %s\n", def, val, name); printf("mksymbol(): i: $%X, id: $%04X\n", i, symbols[i]->id); } } return symbols[i]->id; } } symbols[i] = malloc(sizeof(**symbols) + strlen(name)); symbols[i]->def = def; symbols[i]->val = val; strcpy(symbols[i]->name, name); symbols[i]->id = sym_count++; defined = 0; if (dbg) { printf("mksymbol(): def: %u, val: $%016"PRIX64", name: %s, id: $%04X\n", def, val, name, sym_count-1); } return sym_count-1; } uint64_t use_symbol(const char *name, uint16_t id, uint64_t val, uint8_t useid, uint8_t dbg) { uint16_t i; i = mksymbol(name, 0, 0, useid, id, dbg); uint8_t is_defined = (i != 0xFFFF); val++; if (dbg) { puts("use_symbol(): We also got here."); printf("use_symbol(): i: $%X\n", i); } if (symbols[i] != NULL) { if (symbols[i]->def) { return symbols[i]->val; } else { if (dbg) { printf("use_symbol(): "); printf("oof, symbol "); if (useid) { printf("id $%04X, ", id); } else { printf("%s, ", name); } puts("does not exist, yet."); } return val-1; } } return val-1; } uint8_t set_symval(const char *name, uint16_t id, uint64_t val, uint8_t useid, uint8_t dbg) { uint16_t i = mksymbol(name, 0, 0, useid, id, dbg); if (symbols[i] != NULL) { if (symbols[i]->def) { symbols[i]->val = val; return 1; } else { if (dbg) { printf("set_symval(): "); printf("oof, symbol "); if (useid) { printf("id $%04X, ", id); } else { printf("%s, ", name); } puts("does not exist, yet."); } return 0; } } return 0; } char *get_symname(uint16_t id, uint8_t dbg) { if (symbols[id]->def) { return symbols[id]->name; } else { if (dbg) { printf("get_symname(): oof, symbol id $%04X, has not been defined, yet.\n", id); } return NULL; } } uint16_t fixup_cnt = 0; uint16_t get_symid(const char *name, uint64_t val, token *t, uint8_t dbg) { uint16_t i = mksymbol(name, 0, 0, 0, 0, dbg); if (dbg) { printf("get_symid(): Symbol ID: $%X, i: $%X.\n", symbols[i]->id, i); } if (symbols[i]->def) { return symbols[i]->id; } else { if (dbg) { printf("get_symid(): oof, symbol %s, does not exist, yet.\n", name); } fixups[fixup_cnt] = malloc(sizeof(**fixups)); fixups[fixup_cnt]->adr = val; fixups[fixup_cnt]->t = t; fixups[fixup_cnt]->s = symbols[i]; fixup_cnt++; return 0xFFFF; } } uint16_t get_comment(const char *com, uint8_t dbg) { uint16_t i = 0; uint8_t iscom = 0; for (; i < comidx; i++) { if (comment[i] != NULL) { if (com[0] == comment[i][0]) { iscom = !strcmp(com, comment[i]); } } else { break; } if (iscom) { break; } } if (comment[i] == NULL || i == comidx) { if (dbg) { printf("get_comment(): oof, the index $%04X is NULL.\n", i); printf("get_comment(): oof, the comment \"%s\", was not found in the comment table.\n", com); } return 0xFFFF; } if (dbg) { printf("get_comment(): Found comment \"%s\", in the table, at index $%04X.\n", com, i); } return i; } uint16_t get_string(const char *str, uint8_t dbg) { uint16_t i = 0; uint8_t isstr = 0; for (; i < stridx; i++) { if (isstr || string[i] == NULL) { break; } else { if (str[0] == string[i][0]) { isstr = !strcmp(str, string[i]); } } } if (string[i] == NULL || i == stridx) { if (dbg) { printf("get_string(): oof, the index $%04X is NULL.\n", i); printf("get_string(): oof, the string \"%s\", was not found in the string table.\n", str); } return 0xFFFF; } if (dbg) { printf("get_string(): Found string \"%s\", in the table, at index $%04X.\n", str, i); } return i; } uint16_t reslv_fixups(uint8_t dbg) { uint16_t i = 0, j = 0; for (; fixups[j]; j++) { if (fixups[j]->s->def) { if (dbg) { printf("reslv_fixups(): Symbol ID: $%X, Symbol Name: %s, Symbol Value: $%"PRIX64".\n", fixups[j]->s->id, fixups[j]->s->name, fixups[j]->s->val); } fixups[j]->t->word = fixups[j]->s->id; } else { if (dbg) { printf("reslv_fixups(): oof, undefined reference to '%s', at $%016"PRIX64".\n", fixups[j]->s->name, fixups[j]->adr); } i++; } } return i; } line *find_line(uint16_t ln, uint8_t dbg) { uint16_t i = 0; line *l = lines; for (; l && l->linenum != ln; l = l->next); /*if (dbg) { if (l->linenum == ln) { printf("find_line(): Found line number %u, at line index %X.\n", ln, i); } printf("find_line(): linenum: %u, i: %X\n", l->linenum, i); }*/ if (l != NULL) { if (l->linenum == ln) { if (dbg) { printf("find_line(): Found line number %u.\n", ln); } return l; } } else { if (dbg) { printf("find_line(): oof, could not find line number %u.\n", ln); } return NULL; } return l; } uint64_t lex(char *str, uint64_t address, uint8_t dbg) { char sym[0x100]; uint16_t i = 0; uint16_t j = 0; uint16_t comid = 0; uint16_t strid = 0; uint16_t symid = 0; uint64_t value = 0; lex_type = 0xFF; uint8_t k = 0; uint8_t ch = 0; uint8_t rs = 0; uint8_t base = 0; uint8_t isop = 0; int num = 0; int isch = 0; uint8_t isesc = 0; uint8_t islinenum; int16_t ln = -1; char lnum[6]; uint8_t space = 0; uint8_t tab = 0; uint8_t isstart = 1; uint8_t fall = 0; uint8_t done = 0; line *l = NULL; token *st = NULL; token *t = NULL; token *lt = NULL; while (isdigit(str[i]) && !isspace(str[i])) { lnum[j++] = str[i++]; } islinenum = i; if (i) { lnum[j] = '\0'; ln = strtol(lnum, NULL, 10); j = 0; } else { ln = linenum; } l = find_line(ln, dbg); if (l) { address = l->addr; } else { l = malloc(sizeof(line)); (last_line) ? (last_line->next = l) : (lines = l); l->tok = NULL; l->next = NULL; l->count = 0; last_line = l; } while (str[i] != '\0' && str[i] != '\n') { base = 0; space = 0; tab = 0; while (isspace(str[i+j])) { tab += str[i+j] == '\t'; space += str[i+j] == ' '; j++; } j = 0; if (dbg) { printf("lex(): tab: %u, space: %u\n", tab, space); } if (isstart) { l->stab = tab; l->sspace = space; if (dbg) { printf("lex(): starting tabs: %u, starting spaces: %u\n", l->stab, l->sspace); } } if (isspace(str[i])) { while (isspace(str[i])) { i++; } } switch (str[i]) { case '.': i++; while (!isspace(str[i])) { lexeme[j++] = str[i++]; } lexeme[j] = '\0'; if (!isop) { for (k = 0; k < 6; k++) { if (tolower(lexeme[0]) == dir_t[k][0] && !strcasecmp(lexeme, dir_t[k])) { lex_type = TOK_DIR; break; } } l->count++; t = make_token(lex_type, k, 0, ""); } else { lex_type = TOK_RS; switch (tolower(lexeme[j-1])) { case '2': case 'w': rs = 1; break; case '4': case 'd': rs = 2; break; case '8': case 'q': rs = 3; break; } l->count++; t = make_token(lex_type, rs, 0, ""); isop = 0; } break; case '\"': i++; while (str[i] != '\"') { lexeme[j++] = str[i++]; } strid = get_string(lexeme, dbg); if (strid == 0xFFFF) { strid = stridx; string[strid] = malloc(j+1); memcpy(string[strid], lexeme, j+1); stridx++; } else { } if (dbg) { printf("lex(): str[0x%04X]: %s\n", strid, string[strid]); } if (lt->id == TOK_DIR && lt->type == DIR_INCLUDE) { incl[inc_file++] = strid; } lex_type = TOK_STRING; l->count++; t = make_token(lex_type, 0, 0, string[strid]); break; case '$': case '%': value = 0; base = (str[i] == '$') ? 16 : 2; i++; while (isxdigit(str[i]) && (str[i] != '\0' && str[i] != '\n' && str[i] != ',')) { lexeme[j++] = str[i++]; } lexeme[j] = '\0'; lex_type = (base == 16) ? TOK_HEX : TOK_BIN; value = strtoull(lexeme, NULL, base); if (lt->id == TOK_SYM) { mksymbol(sym, value, 1, 0, 0, dbg); if (lt) { lt->word = get_symid(sym, address, lt, dbg); } isfixup += (lt->word == 0xFFFF); if (dbg) { printf("lex(): isfixup: %u\n", isfixup); } } l->count++; t = make_token(lex_type, 0, value, ""); break; case '\'': i++; k = j; while (str[i] != '\'' || isesc) { isesc = (str[i] == '\\' && str[i-1] != '\\'); lexeme[j++] = str[i++]; } isesc = 0; lexeme[j] = '\0'; switch (lexeme[k]) { case '\\': switch (lexeme[++k]) { case 'n' : ch = '\n'; break; case 'r' : ch = '\r'; break; case 'b' : ch = '\b'; break; case '\'': ch = '\''; break; case '\"': ch = '\"'; break; case '\\': ch = '\\'; break; } break; default: ch = lexeme[k]; } lex_type = TOK_CHAR; l->count++; t = make_token(lex_type, 0, ch, ""); break; case '(': l->tok->type = IND; memset(lexeme, 0, strlen(lexeme)+1); lexeme[j++] = str[i]; break; case '#': l->tok->type = IMM; memset(lexeme, 0, strlen(lexeme)+1); lexeme[j++] = str[i]; break; case '+': case '-': case '>': case '<': lex_type = TOK_EXPR; switch (str[i]) { case '+': value = EXPR_PLUS ; break; case '-': value = EXPR_MINUS; break; case '>': value = EXPR_LOW ; break; case '<': value = EXPR_HIGH ; break; } l->count++; t = make_token(lex_type, value, 0, ""); memset(lexeme, 0, strlen(lexeme)+1); lexeme[j++] = str[i]; break; case '=': i++; lex_type = TOK_SYM; l->count++; t = make_token(lex_type, 0, 0, ""); memset(lexeme, 0, strlen(lexeme)+1); lexeme[j] = str[i]; break; case ')': i++; if (str[i] == ',') { i++; while (isspace(str[i])) { lexeme[j++] = str[i++]; } if (l->tok->type == IND && tolower(str[i]) == 'y') { lexeme[j++] = 'y'; l->tok->type = INDY; } lexeme[j] = '\0'; } else { lexeme[j] = ')'; lexeme[j+1] = '\0'; lexeme[j+2] = '\0'; } break; case ',': i++; while (isspace(str[i])) { lexeme[j++] = str[i++]; } if (l->tok->type == IND && tolower(str[i]) == 'x') { l->tok->type = INDX; lexeme[j++] = 'x'; i++; } else { switch (tolower(str[i])) { case 'x': l->tok->type = ZMX; lexeme[j++] = 'x'; break; case 'y': l->tok->type = ZMY; lexeme[j++] = 'y'; break; default: lex_type = TOK_COMMA; i--; break; } } lexeme[j] = '\0'; break; case ':': i++; lexeme[j] = ':'; lexeme[j+1] = '\0'; lex_type = TOK_LABEL; l->count++; t = make_token(lex_type, 0, 0, ""); mksymbol(sym, address, 1, 0, 0, dbg); if (isfixup) { isfixup = reslv_fixups(dbg); } t->word = get_symid(sym, address, t, dbg); isfixup += (t->word == 0xFFFF); if (dbg) { printf("lex(): isfixup: %u\n", isfixup); } break; case ';': i++; while (str[i] != '\0' && str[i] != '\n') { lexeme[j++] = str[i++]; } lexeme[j] = '\0'; comid = get_comment(lexeme, dbg); if (comid == 0xFFFF) { /*if (line != lineidx && l[line].com != 0xFFFF) { comid = l[line].com; } else { comid = comidx; }*/ comid = comidx; comment[comid] = malloc(j+1); memcpy(comment[comid], lexeme, j+1); comidx++; } else { } if (dbg) { printf("lex(): com[0x%04X]: %s\n", comid, comment[comid]); } lex_type = TOK_COMMENT; l->count++; t = make_token(lex_type, 0, 0, comment[comid]); break; default: if (isalnum(str[i]) || str[i] == '_') { while (!isspace(str[i])) { switch (str[i]) { case ')': case ',': case '.': case '+': case '<': case '>': case '-': case ':': case '=': case ';': case '\0': case '\n': isch = 0; break; default: isch = 1; lexeme[j++] = str[i++]; break; } if (!isch) { break; } } lexeme[j] = '\0'; isch = 0; isop = 0; if (j == 3 && str[i] != ':') { for (k = 0; k < OPNUM; k++) { if (toupper(lexeme[0]) == mne[k][0]) { if (!strcasecmp(lexeme, mne[k])) { lex_type = TOK_OPCODE; isop = 1; l->count++; t = make_token(lex_type, 0xFF, k, ""); break; } } } } if (!isop) { for (k = 0; lexeme[k] != '\0';) { switch (lexeme[k]) { case ')': case ',': case '.': case '+': case '-': case '<': case '>': case ':': case ';': case '=': case '\0': case '\n': fall = 1; break; default: fall = 0; break; } if (fall) { break; } if ((isalnum(lexeme[k]) || lexeme[k] == '_')) { if (!isch) { isch = isalpha(lexeme[k]); } num = isdigit(lexeme[k]) && !isch; k++; } else { break; } } if (lexeme[k] == '\0') { if (num) { value = 0; value = strtoull(lexeme, NULL, 10); if (lt->id == TOK_SYM) { mksymbol(sym, value, 1, 0, 0, dbg); if (lt) { lt->word = get_symid(sym, address, lt, dbg); } isfixup += (lt->word == 0xFFFF); if (dbg) { printf("lex(): isfixup: %u\n", isfixup); } } lex_type = TOK_DEC; l->count++; t = make_token(lex_type, 0, value, ""); } else if (isch && lex_type != TOK_HEX && lex_type != TOK_BIN) { lex_type = TOK_SYM; l->count++; t = make_token(lex_type, 0, 0, ""); memcpy(sym, lexeme, j+1); uint8_t spaces = 0; for (; isspace(str[i+spaces]); spaces++); if (dbg) { printf("lex(): spaces: %u\n", spaces); } if (str[i+spaces] != ':' && str[i+spaces] != '=') { t->word = get_symid(lexeme, address, t, dbg); isfixup += (t->word == 0xFFFF); if (dbg) { printf("lex(): isfixup: %u\n", isfixup); } } } } } } break; } if (!l->tok && t) { l->tok = tokens; } if (dbg) { printf("lex(): lexeme: %s, lex_type: %s\n", lexeme, (lex_type != 0xFF) ? lex_tok[lex_type] : "TOK_NONE"); } isstart = 0; j = 0; if (lex_type == TOK_OPCODE && !isop) { j = 0; } else { if (lex_type == TOK_EXPR) { i++; } else { switch (str[i]) { case ')': case ',': case '.': case '+': case '-': case '<': case '>': case ':': case ';': case '=': case ' ': case '\t': case '\n': case '\0': break; default: i++; break; } } } if (lex_type == TOK_COMMENT) { if (!isstart) { l->etab = tab; l->espace = space; if (dbg) { printf("lex(): ending tabs: %u, ending spaces: %u\n", l->etab, l->espace); } } } if (lex_type != TOK_SYM) { memset(lexeme, 0, strlen(lexeme)+1); lex_type = 0xFF; } if (t) { lt = t; t = t->next; } } if (i) { l->tok = tokens; tokens = NULL; last_tok = NULL; bytecount dummy; address = parse_tokens(l->tok, &dummy, 0, address, dbg); if (dbg) { printf("lex(): Next address: $%"PRIX64"\n", address); } if (ln > linenum || islinenum) { l->linenum = ln; if (ln > linenum) { linenum+=(10+(ln & 10)); } } else if (!islinenum) { l->linenum = linenum; linenum += 10; } } return address; }