diff options
Diffstat (limited to 'lexer.c')
-rw-r--r-- | lexer.c | 388 |
1 files changed, 245 insertions, 143 deletions
@@ -131,8 +131,8 @@ symbol *find_member(char *name, symbol* root, uint8_t dbg) { return m; } } - for (; s->next && !s->down; s = s->next); - } while (s->down); + for (;s && s->next && !s->down; s = s->next); + } while (s && s->down); return NULL; } @@ -228,8 +228,95 @@ line *find_line(uint32_t ln, uint8_t dbg) { return l; } -uint8_t is_struct = 0; -uint8_t is_anon = 0; +int is_struct = 0; +int is_anon = 0; + +void create_struct(symbol *c_sym, line *l, token *t, token *lt, char *name, uint8_t dbg) { + uint8_t ismember = !(is_struct == 1 && lt && lt->id == TOK_DIR); + mksymbol(name, 0, 1, ismember, 0, 0, dbg); + if (isfixup) { + isfixup = reslv_fixups(dbg); + } + t->sym = get_sym(name, 0, t, ismember, dbg); + if (lt && lt->id == TOK_DIR) { + t->sym->isstruct = 1; + t->id = (lt->type == DIR_STRUCT) ? TOK_STRUCT : TOK_UNION; + tmp_line = l; + } else { + t->id = TOK_MEMBER; + t->sym->isanon = (is_anon > 0); + } + isfixup += (t->sym == NULL); + int is_top = (c_sym == NULL); + c_sym = (!ismember && !c_sym) ? last_sym : c_sym; + if (!ismember) { + if (!is_top) { + c_sym = t->sym; + locals = NULL; + last_loc = NULL; + } else { + c_sym->down = locals; + } + } else { + if (lt && lt->id == TOK_DIR) { + if (lt->type == DIR_UNION || lt->type == DIR_STRUCT) { + c_sym->down = locals; + c_sym->down->up = c_sym; + last_loc->up = c_sym; + c_sym = last_loc; + locals = NULL; + last_loc = NULL; + } + } + } + cur_sym = c_sym; +} + +void end_struct(symbol *c_sym, symbol *s_sym, uint8_t dbg) { + int skip = 0; + if (/*s_sym &&*/ is_anon > 0) { + if ((c_sym && c_sym->isanon) || (c_sym->up && !c_sym->up->isanon) || (c_sym && s_sym->isanon)) { + is_anon--; + } else if (is_struct <= 0) { + is_anon = 0; + } + skip = (!is_anon); + } + if (((is_struct-is_anon) > 0 && !skip) || (is_anon <= 0 && is_struct <= 0)) { + symbol *s; + for (s = locals; s; s = s->next) { + if (s->up == NULL) { + s->up = c_sym; + } + if (dbg) { + printf("s: %p, s->up: %p, c_sym: %p, last_loc: %p\n", s, s->up, c_sym, last_loc); + } + } + if (c_sym->down == NULL) { + c_sym->down = locals; + } + } + if ((is_anon <= 0 || is_struct <= 0)) { + for (s_sym = c_sym; /*s_sym &&*/ s_sym->prev && !s_sym->isanon; s_sym = s_sym->prev); + struct_sym = s_sym; + } + if ((is_struct-is_anon) > 0 && !skip) { + symbol *s = c_sym; + for (; s->prev; s = s->prev) { + if (s->up == NULL && c_sym->up) { + s->up = c_sym->up; + } + if (dbg) { + printf("s: %p, s->up: %p, c_sym->up: %p, last_loc: %p\n", s, s->up, c_sym->up, last_loc); + } + } + if (c_sym->up) { + cur_sym = c_sym->up; + } + for (locals = locals->up; locals->prev; locals = locals->prev); + for (last_loc = locals; last_loc->next; last_loc = last_loc->next); + } +} uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) { char sym[0x100]; @@ -240,6 +327,7 @@ uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) { uint16_t symid = 0; uint64_t value = 0; lex_type = 0xFF; + uint8_t lex_subtype = 0xFF; uint8_t k = 0; uint8_t k2 = 0; @@ -321,7 +409,7 @@ uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) { uint8_t ptok = get_ptok(str[i], dbg); if (is_altok(ptok, dbg)) { offset++; - if ((ptok == PTOK_S && toupper(str[i+1]) == 'P') || (ptok == PTOK_P && toupper(str[i+1]) == 'C')) { + if (((ptok == PTOK_S || ptok == PTOK_B) && toupper(str[i+1]) == 'P') || (ptok == PTOK_P && toupper(str[i+1]) == 'C')) { offset++; } switch (get_ptok(str[i+offset], dbg)) { @@ -331,13 +419,36 @@ uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) { case PTOK_Y : case PTOK_S : case PTOK_P : - case PTOK_ALPHA : - case PTOK_NUMBER: ptok = PTOK_ALPHA; break; + case PTOK_A : + case PTOK_C : + case PTOK_D : + case PTOK_F : + case PTOK_R : + case PTOK_ALPHA : ptok = PTOK_ALPHA; break; + case PTOK_NUMBER: + if (ptok == PTOK_R) { + char reg_num[3]; + int isnum; + for (isnum = 0; isdigit(str[i+offset]) && !(isdelm(str[i+offset], dbg) & 0x03) && isnum < 2; offset++, isnum++) { + reg_num[isnum] = str[i+offset]; + } + reg_num[isnum] = '\0'; + if (isnum == 2) { + int regnum = strtoul(reg_num, NULL, 10); + ptok = (regnum < 11 || regnum > 15) ? PTOK_ALPHA : ptok; + } else { + ptok = PTOK_ALPHA; + } + } else { + ptok = PTOK_ALPHA; + } + break; } - if ((ptok == PTOK_S && toupper(str[i+1]) != 'P') || (ptok == PTOK_P && toupper(str[i+1]) != 'C')) { + if ((ptok == PTOK_S && str[i+1] && toupper(str[i+1]) != 'P') || (ptok == PTOK_P && toupper(str[i+1]) != 'C')) { ptok = PTOK_ALPHA; } } + /*i = ptok_handler[ptok](str, i, lex_type, l, t, dbg);*/ switch (ptok) { case PTOK_DOT: i++; @@ -353,83 +464,53 @@ uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) { for (j = 0; isdelm(str[i+j], dbg) & 16; j++); uint8_t ret = get_ptok(str[i+j], dbg); j = tmp; + if ((k == DIR_STRUCT || k == DIR_UNION) && (ret != PTOK_ALPHA || (is_anon && ret == PTOK_ALPHA))) { is_anon++; } is_struct += (k == DIR_STRUCT || k == DIR_UNION); is_struct -= (k == DIR_ENDSTRUCT || k == DIR_ENDUNION); if ((k == DIR_ENDSTRUCT || k == DIR_ENDUNION)) { - int skip = 0; - if ((int)is_anon > 0) { - if ((cur_sym && cur_sym->isanon) || (cur_sym->up && !cur_sym->up->isanon) || (struct_sym && struct_sym->isanon)) { - is_anon--; - } else if ((int)is_struct <= 0) { - is_anon = 0; - } - skip = (!is_anon); - } - if (((int)(is_struct-is_anon) > 0 && !skip) || ((int)is_anon <= 0 && (int)is_struct <= 0)) { - symbol *s; - for (s = locals; s; s = s->next) { - if (s->up == NULL) { - s->up = cur_sym; - } - if (dbg) { - printf("s: %p, s->up: %p, cur_sym: %p, last_loc: %p\n", s, s->up, cur_sym, last_loc); - } - } - if (cur_sym->down == NULL) { - cur_sym->down = locals; - } - } - if (((int)is_anon <= 0 || (int)is_struct <= 0)) { - for (struct_sym = cur_sym; struct_sym->prev && !struct_sym->isanon; struct_sym = struct_sym->prev); - } - if ((int)(is_struct-is_anon) > 0 && !skip) { - symbol *s = cur_sym; - for (; s->prev; s = s->prev) { - if (s->up == NULL && cur_sym->up) { - s->up = cur_sym->up; - } - if (dbg) { - printf("s: %p, s->up: %p, cur_sym->up: %p, last_loc: %p\n", s, s->up, cur_sym->up, last_loc); - } - } - if (cur_sym->up) { - cur_sym = cur_sym->up; - } - for (locals = locals->up; locals->prev; locals = locals->prev); - for (last_loc = locals; last_loc->next; last_loc = last_loc->next); - } + end_struct(cur_sym, struct_sym, dbg); } break; } } - l->count++; if (lex_type != TOK_DIR && lt && lt->id == TOK_SYM) { lex_type = TOK_MEMBER; i -= j; } else { + l->count++; t = make_token(lex_type, k, space, tab, 0, "", NULL); } } else { - lex_type = TOK_RS; - switch (tolower(lexeme[j-1])) { - case '2': - case 'w': - rs = 1; - break; - case '4': - case 'd': - rs = 2; - break; - case '8': - case 'q': - rs = 3; - break; + for (k = 0; !(isdelm(lexeme[k], dbg) & 17); k++) { + lex_type = TOK_RS; + switch (tolower(lexeme[k])) { + case '2': + case 'w': + rs = 1; + break; + case '4': + case 'd': + rs = 2; + break; + case '8': + case 'q': + rs = 3; + break; + case 'o': + /* Set Our token type to ortho suffix. */ + lex_type = TOK_OS; + break; + } + l->count++; + t = make_token(lex_type, rs, space, tab, 0, "", NULL); + if (t) { + lt = t; + t = t->next; + } } - l->count++; - t = make_token(lex_type, rs, space, tab, 0, "", NULL); isop = 0; } break; @@ -525,12 +606,20 @@ uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) { break; case PTOK_LBRACK: case PTOK_HASH : - l->tok->type = (ptok == PTOK_LBRACK) ? IND : IMM; + /*l->tok->type = (ptok == PTOK_LBRACK) ? IND : IMM; + lex_type = (ptok == PTOK_LBRACK) ? TOK_IND : TOK_IMM;*/ + lex_type = TOK_MEM; + value = (ptok == PTOK_LBRACK) ? MEM_IND : MEM_IMM; + l->count++; + t = make_token(lex_type, value, space, tab, 0, "", NULL); lex_type = (ptok == PTOK_LBRACK) ? TOK_IND : TOK_IMM; + if (lex_subtype != 0xFF) { + lex_subtype = 0xFF; + } memset(lexeme, 0, strlen(lexeme)+1); lexeme[j++] = str[i]; - (t) ? (t->subspace = space) : (lt->subspace = space); - (t) ? (t->subtab = tab) : (lt->subtab = tab); + /*(t) ? (t->subspace = space) : (lt->subspace = space); + (t) ? (t->subtab = tab) : (lt->subtab = tab);*/ break; case PTOK_PLUS: case PTOK_MINUS: @@ -567,54 +656,83 @@ uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) { lexeme[j] = ')'; lexeme[j+1] = '\0'; lexeme[j+2] = '\0'; + if (t && t->subtype == 0xFF) { + t->subtype = TOK_IND; + } else if (lt && lt->subtype == 0xFF) { + lt->subtype = TOK_IND; + } + (t) ? (t->subspace = space) : (lt->subspace = space); + (t) ? (t->subtab = tab) : (lt->subtab = tab); break; case PTOK_COMMA: i++; - if (lex_type != TOK_IND && lex_type != TOK_OF) { + if ((lex_type != TOK_IND && lex_type != TOK_OF)) { lex_type = TOK_CSV; } + lex_subtype = TOK_CSV; + if (t && t->subtype == 0xFF) { + t->subtype = TOK_CSV; + } else if (lt && lt->subtype == 0xFF) { + lt->subtype = TOK_CSV; + } + (t) ? (t->subspace = space) : (lt->subspace = space); + (t) ? (t->subtab = tab) : (lt->subtab = tab); lexeme[j] = ','; lexeme[j+1] = '\0'; lexeme[j+2] = '\0'; break; case PTOK_B: - lexeme[j] = str[i++]; - lexeme[j+1] = '\0'; - lexeme[j+2] = '\0'; - lex_type = TOK_BREG; - l->tok->type = BREG; - (t) ? (t->subspace = space) : (lt->subspace = space); - (t) ? (t->subtab = tab) : (lt->subtab = tab); - break; case PTOK_E: case PTOK_X: case PTOK_Y: - lexeme[j] = str[i++]; - lexeme[j+1] = '\0'; - lexeme[j+2] = '\0'; - if (lex_type != TOK_IND && lex_type != TOK_CSV) { - break; - } + case PTOK_S: + case PTOK_A: + case PTOK_C: + case PTOK_D: + case PTOK_F: + case PTOK_R: + lexeme[j+0] = str[i++]; + lexeme[j+1] = (ptok == PTOK_R || ((ptok == PTOK_S || ptok == PTOK_B) && get_ptok(str[i], dbg) == PTOK_P)) ? str[i++] : '\0'; + lexeme[j+2] = (ptok == PTOK_R) ? str[i++] : '\0'; + lexeme[j+3] = '\0'; + /*lex_subtype = (lex_type == TOK_CSV && lt && lt->subtype != TOK_CSV) ? lex_type : lex_subtype;*/ + lex_type = TOK_REG; switch (ptok) { - case PTOK_E: l->tok->type = (lex_type == TOK_IND) ? EIND : l->tok->type; break; - case PTOK_X: l->tok->type = (lex_type == TOK_IND) ? INDX : ZMX; break; - case PTOK_Y: l->tok->type = (lex_type == TOK_IND) ? INDY : ZMY; break; + case PTOK_A: value = REG_A; break; + case PTOK_X: value = REG_X; break; + case PTOK_Y: value = REG_Y; break; + case PTOK_E: value = REG_E; break; + case PTOK_C: value = REG_C; break; + case PTOK_D: value = REG_D; break; + case PTOK_S: + case PTOK_B: + if (get_ptok(lexeme[j+1], dbg) == PTOK_P) { + value = (ptok == PTOK_S) ? REG_SP : REG_BP; + } else { + value = (ptok == PTOK_S) ? REG_S : REG_B; + } + break; + case PTOK_F: value = REG_F; break; + case PTOK_R: value = strtoull(lexeme+j+1, NULL, 10); break; } + l->count++; + t = make_token(lex_type, value, space, tab, 0, "", NULL); + t->subtype = (t->subtype == 0xFF && lex_subtype != 0xFF) ? lex_subtype : t->subtype; + lex_subtype = 0xFF; + /*(t) ? (t->subspace = space) : (lt->subspace = space); + (t) ? (t->subtab = tab) : (lt->subtab = tab);*/ break; - case PTOK_S: case PTOK_P: lexeme[j] = str[i++]; - if (str[i] != ',') { - lexeme[j+1] = str[i++]; - } else { - lexeme[j+1] = '\0'; - } + lexeme[j+1] = (str[i] != ',') ? str[i++] : '\0'; lexeme[j+2] = '\0'; - switch (ptok) { + /*switch (ptok) { case PTOK_S: of = 1; break; case PTOK_P: of = 2; break; - } + }*/ + of = 2; lex_type = TOK_OF; + l->count++; t = make_token(lex_type, of, space, tab, 0, "", NULL); break; case PTOK_AT: @@ -700,18 +818,26 @@ uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) { i += j; isch = 0; isop = 0; - if (j == 3 && str[i] != ':' && !is_struct) { - for (k = 0, k2 = 0; k < OPNUM || k2 < EXT_OPNUM; k++, k2++) { - int find_ext = (k2 < EXT_OPNUM); + if (j > 1 && j <= 3 && str[i] != ':' && !is_struct) { + for (k = 0; k < OPNUM; k++) { + int find_ext = (k < EXT_OPNUM); + int find_ortho = (k < ORTHO_OPNUM); int upper = toupper(lexeme[0]); - if (upper == mne[k][0] || (find_ext && upper == ext_mne[k2][0])) { + int isbase = (upper == mne[k][0]); + int isext = (find_ext && upper == ext_mne[k][0]); + int isortho = (find_ortho && upper == ortho_mne[k][0]); + + if (isbase || isext || isortho) { int is_base = !strcasecmp(lexeme, mne[k]); - int is_ext = (find_ext && !strcasecmp(lexeme, ext_mne[k2])); - if (is_base || is_ext) { - lex_type = (is_base && !is_ext) ? TOK_OPCODE : TOK_EXTOP; + int is_ext = (find_ext && !strcasecmp(lexeme, ext_mne[k])); + int is_ortho = (find_ortho && !strcasecmp(lexeme, ortho_mne[k])); + if (is_base || is_ext || is_ortho) { + lex_type = (is_base) ? TOK_OPCODE : lex_type; + lex_type = (is_ext) ? TOK_EXTOP : lex_type; + lex_type = (is_ortho) ? TOK_ORTHO : lex_type; isop = 1; l->count++; - t = make_token(lex_type, 0xFF, space, tab, (is_base && !is_ext) ? k : k2, "", NULL); + t = make_token(lex_type, 0xFF, space, tab, k, "", NULL); break; } } @@ -719,6 +845,18 @@ uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) { } if (!isop) { uint8_t spaces = 0; + if (l->tok && l->tok->type == TOK_ORTHO && l->tok->byte == SET) { + for (k = 0; k < 8; k++) { + int upper = toupper(lexeme[0]); + if (upper == set_cc[k][0]) { + if (!strcasecmp(lexeme, set_cc[k])) { + lex_type = TOK_CC; + l->count++; + t = make_token(lex_type, 0xFF, space, tab, k, "", NULL); + } + } + } + } for (; isdelm(str[i+spaces], dbg) == 16; spaces++); uint8_t ret = get_ptok(str[i+spaces], dbg); if (ret == PTOK_COLON || ret == PTOK_EQU) { @@ -732,43 +870,7 @@ uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) { printf("lex(): spaces: %u\n", spaces); } if (is_struct) { - islocal = !(is_struct == 1 && lt && lt->id == TOK_DIR); - mksymbol(sym, 0, 1, islocal, 0, 0, dbg); - if (isfixup) { - isfixup = reslv_fixups(dbg); - } - t->sym = get_sym(sym, 0, t, islocal, dbg); - if (lt && lt->id == TOK_DIR) { - t->sym->isstruct = 1; - t->id = (lt->type == DIR_STRUCT) ? TOK_STRUCT : TOK_UNION; - tmp_line = l; - } else { - t->id = TOK_MEMBER; - t->sym->isanon = (is_anon > 0); - } - isfixup += (t->sym == NULL); - int is_top = (cur_sym == NULL); - cur_sym = (!islocal && !cur_sym) ? last_sym : cur_sym; - if (!islocal) { - if (!is_top) { - cur_sym = t->sym; - locals = NULL; - last_loc = NULL; - } else { - cur_sym->down = locals; - } - } else { - if (lt && lt->id == TOK_DIR) { - if (lt->type == DIR_UNION || lt->type == DIR_STRUCT) { - cur_sym->down = locals; - cur_sym->down->up = cur_sym; - last_loc->up = cur_sym; - cur_sym = last_loc; - locals = NULL; - last_loc = NULL; - } - } - } + create_struct(cur_sym, l, t, lt, sym, dbg); islocal = 0; } else if ((str[i+spaces] != ':' && str[i+spaces] != '=')) { uint8_t sym_struct = 0; @@ -831,9 +933,9 @@ uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) { case TOK_DEC : case TOK_CHAR: case TOK_EXPR: - s->val = get_val(tok, address, 3, dbg); + s->val = get_val(tok, address, 3, 0xFF, 0, dbg); if (tok->next) { - tok = skip_expr(tok, dbg); + tok = skip_expr(tok, 0xFF, 0, dbg); } break; } |