#include "asmmon.h"
#include "lexer.h"
uint8_t lex_type;
uint16_t sym_count = 0;
token *tokens = NULL;
token *last_tok = NULL;
symbol *locals = NULL;
symbol *last_loc = NULL;
symbol *cur_sym = NULL;
symbol *struct_sym = NULL;
line *tmp_line = NULL;
symbol *find_symbol(symbol *root, const char *name, int depth, uint8_t dbg) {
symbol *s = root;
symbol *ret = NULL;
char *tmp = malloc(strlen(name)+1);
memcpy(tmp, name, strlen(name)+1);
char *scope_name = strtok_r(tmp, ".", &tmp);
size_t name_len = strlen(scope_name);
for (; s != NULL; s = s->next) {
size_t sym_name_len = strlen(s->name);
if (name_len == sym_name_len && scope_name[0] == s->name[0] && !strcmp(scope_name, s->name)) {
if (depth) {
if (s->down) {
ret = find_symbol(s->down, (tmp == NULL) ? scope_name : tmp, depth-1, dbg);
if (ret) {
return ret;
}
}
} else {
return s;
}
}
}
return NULL;
}
int add_symbol(symbol *sym, const char *name, symbol **root, symbol **lsym, symbol **rloc, symbol **lloc, symbol **csym, int csym_depth, int depth, uint8_t dbg) {
symbol *s = (!depth) ? ((*lsym) ? *lsym : *root): ((*lloc) ? *lloc : *rloc);
sym->next = NULL;
sym->up = NULL;
sym->down = NULL;
sym->prev = NULL;
if (depth && depth >= csym_depth) {
s = find_symbol(*root, name, depth-1, dbg);
depth -= (!csym_depth) ? depth-1 : csym_depth;
}
int j = 0;
int is_new_scope = 0;
for (int i = 0; i < depth; i++) {
if (s) {
if (s->down != NULL) {
s->down->up = (s->down->up == NULL) ? s : s->down->up;
s = s->down;
} else if (j) {
return 0;
} else {
int is_first_entry = ((s->up == NULL) || (s->next == NULL && s->up));
is_new_scope = (depth <= 1 && is_first_entry);
j++;
}
} else {
is_new_scope = 1;
}
}
for (; s != NULL && s->next != NULL; s = s->next) {
if (dbg) {
printf("s: %p, s->next: %p, s->prev: %p\n", s, s->next, s->prev);
}
}
if ((is_new_scope && *lsym) || *lloc || *csym) {
if (is_new_scope && sym->depth > s->depth) {
s->down = sym;
s->down->up = s;
s->down->prev = NULL;
} else {
s->next = sym;
}
if (dbg) {
for (symbol *tmp = s; tmp && tmp->prev; tmp = tmp->prev) {
printf("add_symbol(): s: %p, s->name: %s, s->next: %p, s->prev: %p, s->up: %p, s->down: %p\n", tmp, tmp->name, tmp->next, tmp->prev, tmp->up, tmp->down);
}
}
sym->next = NULL;
if (s->next == sym) {
sym->prev = s;
if (sym->depth) {
sym->up = s->up;
}
}
sym->down = NULL;
}
if ((*root && (*root)->prev)) {
(*root)->prev = NULL;
}
if ((*rloc && (*rloc)->prev)) {
(*rloc)->prev = NULL;
}
if (!depth) {
if (*lsym == NULL) {
*root = sym;
sym->next = NULL;
sym->prev = NULL;
sym->up = NULL;
sym->down = NULL;
}
*lsym = sym;
*rloc = NULL;
*lloc = NULL;
} else {
if (is_new_scope) {
*lloc = NULL;
}
if (*lloc == NULL) {
*rloc = sym;
sym->next = NULL;
sym->prev = NULL;
sym->down = NULL;
}
*lloc = sym;
}
*csym = sym;
return 1;
}
char *find_deepest_scope(const char *name) {
int i;
for (i = strlen(name); i >= 0 && name[i] != '.'; i--);
return (char *)name+(i+1);
}
symbol *mksymbol(const char *name, uint64_t val, uint8_t def, int depth, uint8_t use_scope, uint16_t id, uint8_t dbg) {
uint16_t i = 0;
symbol *s = find_symbol(symbols, (char *)name, depth, dbg);
char *scope_name = (!use_scope) ? find_deepest_scope(name) : (char *)name;
if (s) {
if (def) {
if (s->def) {
if (dbg) {
printf("mksymbol(): oof, you cannot redefine the symbol: %s\n", scope_name);
}
defined = 1;
} else {
defined = 0;
}
s->def = def;
s->val = val;
s->id = i;
if (dbg) {
printf("mksymbol(): def: %u, val: $%016"PRIX64", name: %s\n", def, val, scope_name);
printf("mksymbol(): i: $%X, id: $%04X\n", i, s->id);
}
}
return s;
}
size_t str_size = strlen(scope_name)+1;
s = malloc(sizeof(symbol));
s->name = malloc(str_size);
s->def = def;
s->val = val;
s->count = 0;
s->isstruct = 0;
s->depth = depth;
s->next = NULL;
s->up = NULL;
s->down = NULL;
s->prev = NULL;
s->id = sym_count++;
memcpy(s->name, scope_name, str_size);
defined = 0;
if (dbg) {
printf("mksymbol(): def: %u, val: $%016"PRIX64", name: %s, id: $%04X\n", def, val, scope_name, sym_count-1);
}
return s;
}
uint16_t fixup_cnt = 0;
symbol *get_sym(const char *name, uint64_t val, token *t, uint8_t depth, uint8_t use_scope, uint8_t dbg) {
symbol *s = mksymbol(name, val, 0, depth, use_scope, use_scope, dbg);
if (dbg) {
printf("get_sym(): Symbol ID: $%X.\n", s->id);
}
if (!s->def) {
if (dbg) {
printf("get_sym(): oof, symbol %s, does not exist, yet.\n", name);
}
fixup *f = malloc(sizeof(fixup));
(last_fix) ? (last_fix->next = f) : (fixups = f);
f->adr = val;
f->t = t;
f->s = s;
f->next = NULL;
last_fix = f;
fixup_cnt++;
}
return s;
}
uint16_t reslv_fixups(uint8_t dbg) {
fixup *f = fixups;
symbol *ls;
uint16_t i = 0, j = 0;
for (; f; f = f->next) {
if (f->s->def) {
if (dbg) {
printf("reslv_fixups(): Symbol ID: $%X, Symbol Name: %s, Symbol Value: $%"PRIX64".\n", f->s->id, f->s->name, f->s->val);
}
f->t->sym = f->s;
} else {
if (dbg) {
printf("reslv_fixups(): oof, undefined reference to '%s', at $%016"PRIX64".\n", f->s->name, f->adr);
}
i++;
}
}
return i;
}
uint16_t get_comment(const char *com, uint8_t dbg) {
uint16_t i = 0;
for (; comment[i] && i < comidx; i++) {
if (com[0] == comment[i][0] && !strcmp(com, comment[i])) {
break;
}
}
if (comment[i] == NULL) {
if (dbg) {
printf("get_comment(): oof, the index $%04X is NULL.\n", i);
printf("get_comment(): oof, the comment \"%s\", was not found in the comment table.\n", com);
}
size_t size = strlen(com)+1;
comment[comidx] = malloc(size);
memcpy(comment[comidx], com, size);
return comidx++;
}
if (dbg) {
if (strcmp(com, comment[i])) {
printf("get_comment(): oof, the comment \"%s\" is somehow not in the comment table, even though it should be at index $%04X.\n", com, i);
}
printf("get_comment(): The return value of strcmp(com, comment[$%04X]) is %i.\n", i, strcmp(com, comment[i]));
printf("get_comment(): Found comment \"%s\", in the table, at index $%04X.\n", com, i);
}
return i;
}
uint16_t get_string(const char *str, uint8_t dbg) {
uint16_t i = 0;
uint8_t isstr = 0;
for (; i < stridx; i++) {
if (isstr || string[i] == NULL) {
break;
} else if (str[0] == string[i][0]) {
isstr = !strcmp(str, string[i]);
}
}
if (string[i] == NULL || i == stridx) {
if (dbg) {
printf("get_string(): oof, the index $%04X is NULL.\n", i);
printf("get_string(): oof, the string \"%s\", was not found in the string table.\n", str);
}
return 0xFFFF;
}
if (dbg) {
printf("get_string(): Found string \"%s\", in the table, at index $%04X.\n", str, i);
}
return i;
}
line *find_line(uint32_t ln, uint8_t dbg) {
uint32_t i = 0;
line *l = lines;
for (; l && l->linenum != ln; l = l->next);
if (l != NULL) {
if (l->linenum == ln) {
if (dbg) {
printf("find_line(): Found line number %u.\n", ln);
}
return l;
}
} else {
if (dbg) {
printf("find_line(): oof, could not find line number %u.\n", ln);
}
return NULL;
}
return l;
}
char *mk_scope_name(symbol *csym, int depth, const char *name, uint8_t dbg) {
size_t len = strlen(name);
size_t name_len = len;
symbol *s = csym;
char **scopes;
if (depth) {
scopes = malloc(sizeof(char *)*depth);
}
for (; s && s->depth > depth; s = s->up);
for (int i = depth; i && s; i--) {
if (dbg) {
printf("mk_scope_name(): s->depth: %i\n", s->depth);
}
s = (s->depth >= i && s->up) ? s->up : s;
len += strlen(s->name);
scopes[i-1] = s->name;
}
len += depth;
char *scope_name = malloc(len+1);
char *tmp = scope_name;
memset(tmp, 0, len+1);
for (int i = 0; i < depth; i++) {
size_t name_len = strlen(scopes[i]);
memcpy(tmp, scopes[i], name_len);
tmp += name_len;
*tmp++ = '.';
if (dbg) {
printf("mk_scope_name(): scope_name: %s\n", scope_name);
}
}
memcpy(tmp, name, name_len);
if (dbg) {
printf("mk_scope_name(): scope_name: %s\n", scope_name);
}
return scope_name;
}
int is_struct = 0;
int is_anon = 0;
void create_struct(symbol *c_sym, line *l, token *t, token *lt, const char *name, uint8_t dbg) {
int depth = is_struct-is_anon;
uint8_t ismember = !(depth == 1 && lt && lt->id == TOK_DIR);
int is_new_scope = (lt && lt->id == TOK_DIR);
depth -= is_new_scope;
char *struct_name = mk_scope_name(c_sym, depth, name, dbg);
symbol *s = mksymbol(struct_name, 0, 1, depth, 0, 0, dbg);
int is_sym_added = add_symbol(s, struct_name, &symbols, &last_sym, &locals, &last_loc, &c_sym, 0, depth, dbg);
t->sym = get_sym(struct_name, 0, t, depth, 0, dbg);
if (isfixup) {
isfixup = reslv_fixups(dbg);
}
if (lt && lt->id == TOK_DIR) {
t->sym->isstruct = 1;
t->id = (lt->type == DIR_STRUCT) ? TOK_STRUCT : TOK_UNION;
tmp_line = l;
} else {
t->id = TOK_MEMBER;
t->sym->isanon = (is_anon > 0);
}
isfixup += (t->sym == NULL);
int is_top = (c_sym == NULL);
c_sym = (!ismember && !c_sym) ? last_sym : c_sym;
if (!ismember) {
if (!is_top) {
c_sym = t->sym;
locals = NULL;
last_loc = NULL;
} else {
c_sym->down = locals;
}
}
cur_sym = c_sym;
}
void end_struct(symbol *c_sym, symbol *s_sym, uint8_t dbg) {
int skip = 0;
if (is_anon > 0) {
if ((c_sym && c_sym->isanon) || (c_sym->up && !c_sym->up->isanon) || (c_sym && s_sym->isanon)) {
int depth = is_struct-is_anon;
if ((depth > 0 || (is_struct > 1 && is_anon >= is_struct)) && c_sym->depth > depth) {
for (; c_sym->depth > is_struct-is_anon && c_sym->up; c_sym = c_sym->up) {
if (dbg) {
printf("end_struct(): c_sym->depth: %i, is_struct-is_anon: %i\n", c_sym->depth, is_struct-is_anon);
}
}
} else if (c_sym->depth) {
is_anon--;
}
} else if (is_struct <= 0) {
is_anon = 0;
}
skip = (!is_anon);
}
if (((is_struct-is_anon) > 0 && !skip) || (is_anon <= 0 && is_struct <= 0)) {
symbol *s;
for (s = locals; s; s = s->next) {
if (s->up == NULL) {
s->up = c_sym;
}
if (dbg) {
printf("end_struct(): s: %p, s->up: %p, c_sym: %p, last_loc: %p\n", s, s->up, c_sym, last_loc);
}
}
}
if ((is_anon <= 0 || is_struct <= 0)) {
if (dbg) {
for (symbol *s = c_sym; s->next && !s->isanon; s = s->next) {
printf("end_struct(): %p, s->next: %p, s->prev: %p, s->isanon: %i\n", s, s->next, s->prev, s->isanon);
}
}
for (s_sym = c_sym; s_sym->prev && !s_sym->isanon; s_sym = s_sym->prev) {
if (dbg) {
printf("end_struct(): s_sym: %p, s_sym->next: %p, s_sym->prev: %p, s_sym->isanon: %i\n", s_sym, s_sym->next, s_sym->prev, s_sym->isanon);
}
}
struct_sym = s_sym;
}
if ((is_struct-is_anon) > 0 && !skip) {
symbol *s = c_sym;
for (; s->prev; s = s->prev) {
if (s->up == NULL && c_sym->up) {
s->up = c_sym->up;
}
if (dbg) {
printf("end_struct(): s: %p, s->up: %p, c_sym->up: %p, last_loc: %p\n", s, s->up, c_sym->up, last_loc);
}
}
if (c_sym->up) {
cur_sym = c_sym->up;
}
for (locals = locals->up; locals->prev; locals = locals->prev);
for (last_loc = locals; last_loc->next; last_loc = last_loc->next);
}
}
fixup *find_fixup(fixup *root, const char *name, int depth, uint8_t dbg) {
size_t name_len = strlen(name);
for (fixup *f = root; f; f = f->next) {
symbol *s = f->s;
if (s && s->name) {
if (s->depth == depth) {
size_t sym_name_len = strlen(s->name);
if (name_len == sym_name_len && name[0] == s->name[0] && !strcmp(name, s->name)) {
if (!s->def) {
return f;
}
}
}
}
}
return NULL;
}
void find_expr_sym(expr *root, const char *name, symbol *sym, int depth, uint8_t dbg) {
size_t name_len = strlen(name);
if (root) {
if (root->left) {
find_expr_sym(root->left, name, sym, depth, dbg);
}
if (root->right) {
find_expr_sym(root->right, name, sym, depth, dbg);
}
if (root->type == EXPR_SYM) {
if (root->value.sym) {
symbol *s = root->value.sym;
size_t sym_name_len = strlen(s->name);
if (name_len == sym_name_len && name[0] == s->name[0] && !strcmp(name, s->name)) {
if (s->depth == depth && !s->def) {
root->value.sym = sym;
}
}
}
}
}
}
void resolve_symbol_names(line *l, const char *name, symbol *sym, int depth, uint8_t dbg) {
size_t name_len = strlen(name);
for (token *t = l->tok; t; t = t->next) {
size_t sym_name_len;
switch (t->id) {
case TOK_LABEL:
case TOK_SYM:
if (t->sym) {
sym_name_len = strlen(t->sym->name);
if (name_len == sym_name_len && name[0] == t->sym->name[0] && !strcmp(name, t->sym->name)) {
if (t->sym->depth == depth && !t->sym->def) {
t->sym = sym;
}
}
}
break;
case TOK_EXPR: find_expr_sym(t->expr, name, sym, depth, dbg); break;
}
}
if (l->next) {
resolve_symbol_names(l->next, name, sym, depth, dbg);
}
}
void new_symbol(token *t, const char *name, uint64_t value, int depth, uint8_t dbg) {
size_t name_len = strlen(name);
char *scope_name = mk_scope_name(cur_sym, depth, name, dbg);
symbol *s;
fixup *f = find_fixup(fixups, scope_name, depth, dbg);
if (f == NULL) {
s = mksymbol(scope_name, value, 1, depth, 0, 0, dbg);
} else {
s = f->s;
resolve_symbol_names(lines, scope_name, s, depth, dbg);
s->def = 1;
s->val = value;
free(s->name);
s->name = NULL;
s->name = malloc(name_len+1);
memcpy(s->name, name, name_len+1);
}
int is_sym_added = add_symbol(s, scope_name, &symbols, &last_sym, &locals, &last_loc, &cur_sym, 0, depth, dbg);
if (isfixup) {
isfixup = reslv_fixups(dbg);
}
if (t) {
t->sym = get_sym(scope_name, value, t, depth, 0, dbg);
isfixup += (t->sym == NULL);
}
}
char *parse_escape(char *s, char *code) {
char dummy;
int count;
char *end;
int base = 0;
unsigned int value;
if (*s++ != '\\') {
#if 0
#endif
}
if (code == NULL) {
code = &dummy;
}
#if 0
#endif
switch (*s) {
case 'a' : *code = '\a'; return s+1;
case 'b' : *code = '\b'; return s+1;
case 'f' : *code = '\f'; return s+1;
case 'n' : *code = '\n'; return s+1;
case 'r' : *code = '\r'; return s+1;
case 't' : *code = '\t'; return s+1;
case 'v' : *code = '\v'; return s+1;
case '\\': *code = '\\'; return s+1;
case '\"': *code = '\"'; return s+1;
case '\'': *code = '\''; return s+1;
case 'e' : *code = '\x1B'; return s+1;
case '$' : case 'x' : case 'X' : base = 16; s++;
case '%' : base = (!base) ? 2 : base; s += (!base);
case '0' : case '1' : case '2' : case '3' : case '4' :
case '5' : case '6' : case '7' : case '8' : case '9' :
base = (!base) ? 8 : base;
value = strtoull(s, &end, base);
*code = value;
return end;
default :
#if 0
#endif
return s;
}
}
uint64_t parse_quote(char **s, char delm, int get_value, uint8_t dbg) {
uint64_t value = 0;
uint8_t *tmp_val = (uint8_t *)&value;
char *str = *s+1;
for (int i = 0; *str; i++) {
char c;
if (*str == '\\') {
str = parse_escape(str, &c);
} else {
c = *str++;
if (c == delm) {
if (*str == delm) {
str++;
} else {
break;
}
}
}
if (get_value && i < sizeof(uint64_t)) {
tmp_val[i] = c;
}
}
*s = str;
return value;
}
expr *make_expr(int type, uint64_t value, symbol *sym, uint8_t dbg) {
expr *new = malloc(sizeof(expr));
new->type = type;
new->left = NULL;
new->right = NULL;
if (sym) {
new->value.sym = sym;
} else {
new->value.val = value;
}
return new;
}
int is_reg(const char *str) {
size_t len = strlen(str);
switch (len) {
case 1:
switch(*str) {
case 'a': case 'A': return REG_A;
case 'b': case 'B': return REG_B;
case 'x': case 'X': return REG_X;
case 'y': case 'Y': return REG_Y;
case 'e': case 'E': return REG_E;
case 'c': case 'C': return REG_C;
case 'd': case 'D': return REG_D;
case 's': case 'S': return REG_S;
case 'f': case 'F': return REG_F;
}
break;
case 2:
if (str[1] == 'p' || str[1] == 'P') {
if ((*str == 's' || *str == 'S') || (*str == 'b' || *str == 'B')) {
return ((*str == 's' || *str == 'S')) ? REG_SP : REG_BP;
}
} else if (*str == 'p' || *str == 'P') {
return (str[1] == 'c' || str[1] == 'C') ? REG_PC : -1;
}
break;
case 3:
if (*str == 'r' || *str == 'R') {
int regnum = strtoul(str+1, NULL, 10);
if (regnum >= REG_R11 && regnum <= REG_R15) {
return regnum;
}
}
break;
}
return -1;
}
#define SKIP_WHITESPACE(str, dbg) \
\
for (; isdelm(*str, dbg) & 0x10; str++)
int get_expr_type(char **p, uint64_t address, void *val, int *found_reg, char stop, uint8_t dbg) {
char *str = *p;
int type = EXPR_NONE;
SKIP_WHITESPACE(str, dbg);
uint8_t ptok = get_ptok(*str, dbg);
ptok = (is_altok(ptok, dbg)) ? PTOK_ALPHA : ptok;
if (ptok != PTOK_SCOLON && ptok != PTOK_COMMA && *str != stop) {
char *tmp = NULL;
int i = 0;
int base = 0;
uint64_t value = 0;
symbol *s = NULL;
int depth = 0;
int scope_depth = 0;
char *scope_name = NULL;
switch (ptok) {
case PTOK_PLUS : type = EXPR_PLUS ; str++; break;
case PTOK_MINUS : type = EXPR_MINUS ; str++; break;
case PTOK_ASTRSK: type = EXPR_MUL ; str++; break;
case PTOK_PIPE : type = EXPR_OR ; str++; break;
case PTOK_GT : type = (get_ptok(str[1], dbg) == PTOK_GT) ? (EXPR_RSHFT) : (EXPR_LOW) ; str += 2; break;
case PTOK_LT : type = (get_ptok(str[1], dbg) == PTOK_LT) ? (EXPR_LSHFT) : (EXPR_HIGH); str += 2; break;
case PTOK_DOLLAR:
case PTOK_PERCENT:
case PTOK_NUMBER:
switch (ptok) {
case PTOK_DOLLAR : base = 16; type = EXPR_HEX; str++; break;
case PTOK_PERCENT: base = 2; type = EXPR_BIN; str++; break;
case PTOK_NUMBER : base = 10; type = EXPR_DEC; break;
}
for (; isxdigit(str[i]) && !(isdelm(str[i], dbg) & 0x03); i++);
tmp = malloc(i+1);
memcpy(tmp, str, i);
tmp[i] = '\0';
value = strtoull(tmp, NULL, base);
*(uint64_t *)val = value;
break;
case PTOK_SQUOTE:
type = EXPR_CHAR;
value = parse_quote(&str, *str, 1, dbg);
*(uint64_t *)val = value;
break;
case PTOK_AT:
for (; *str == '@'; str++, depth++);
case PTOK_ALPHA:
for (; !isdelm2(str[i], dbg) || str[i] == '.'; scope_depth += (str[i++] == '.'));
tmp = malloc(i);
memcpy(tmp, str, i);
tmp[i] = '\0';
if (is_reg(tmp) >= 0) {
*found_reg = 1;
} else {
int total_depth = depth+scope_depth;
scope_name = (!scope_depth || total_depth) ? mk_scope_name(cur_sym, depth, tmp, dbg) : tmp;
s = get_sym(scope_name, address, NULL, total_depth, 1, dbg);
isfixup += (s == NULL);
type = EXPR_SYM;
*(symbol **)val = s;
}
break;
}
str += i;
} else {
*found_reg = 1;
}
*p = str;
return type;
}
#undef SKIP_WHITESPACE
expr *parse_expr(char **line, uint64_t address, int *found_reg, int is_left, char stop, uint8_t dbg) {
char *str = *line;
char *start = *line;
int dummy = 0;
found_reg = (found_reg == NULL) ? &dummy : found_reg;
expr *left = (is_left) ? parse_expr(line, address, found_reg, 0, stop, dbg) : NULL;
expr *new = NULL;
int type = EXPR_NONE;
for (; !(isdelm(*str, dbg) & 3) && !(*found_reg);) {
int old_type = type;
uint64_t value = 0;
symbol *s = NULL;
uintptr_t tmp = 0;
type = get_expr_type(&str, address, &tmp, found_reg, stop, dbg);
int expr_type = 0;
switch (type) {
case EXPR_NONE : expr_type = -1; break;
case EXPR_HEX :
case EXPR_BIN :
case EXPR_DEC :
case EXPR_CHAR : value = (uint64_t)tmp; break;
case EXPR_SYM : s = (symbol *)tmp; break;
case EXPR_PLUS :
case EXPR_MINUS :
switch (old_type) {
case EXPR_HEX :
case EXPR_BIN :
case EXPR_DEC :
case EXPR_CHAR :
case EXPR_SYM : expr_type = 2; break;
default : expr_type = 1; break;
}
break;
case EXPR_HIGH :
case EXPR_LOW : expr_type = 1; break;
case EXPR_MUL :
case EXPR_OR :
case EXPR_LSHFT :
case EXPR_RSHFT : expr_type = 2; break;
}
if (expr_type >= 0) {
new = make_expr(type, value, s, dbg);
switch (expr_type) {
case 1: new->left = parse_expr(&str, address, found_reg, 1, stop, dbg); break;
case 2:
new->left = left;
new->right = parse_expr(&str, address, found_reg, 1, stop, dbg);
break;
}
left = new;
}
}
*line = str;
return left;
}
uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) {
char sym[0x100];
uint16_t i = 0;
uint16_t j = 0;
uint16_t comid = 0;
uint16_t strid = 0;
uint16_t symid = 0;
uint64_t value = 0;
lex_type = 0xFF;
uint8_t lex_subtype = 0xFF;
uint8_t k = 0;
uint8_t k2 = 0;
union reg ch;
ch.u64 = 0;
uint8_t rs = 0;
uint8_t of = 0;
uint8_t base = 0;
int depth = 0;
int is_inst = 0;
uint8_t isop = 0;
int num = 0;
int isch = 0;
uint8_t isesc = 0;
uint8_t islinenum;
int16_t ln = -1;
char lnum[6];
uint8_t space = 0;
uint8_t tab = 0;
uint8_t fall = 0;
uint8_t done = 0;
char delm = ',';
line *l = NULL;
token *st = NULL;
token *t = NULL;
token *lt = NULL;
symbol *tmp_sym = NULL;
while (isdigit(str[i]) && isdelm(str[i], dbg) != 16) {
lnum[j++] = str[i++];
}
islinenum = i;
if (i) {
lnum[j] = '\0';
ln = strtol(lnum, NULL, 10);
j = 0;
l = find_line(ln, dbg);
} else {
ln = linenum;
l = NULL;
}
if (l) {
address = l->addr;
} else {
l = malloc(sizeof(line));
(last_line) ? (last_line->next = l) : (lines = l);
l->tok = NULL;
l->next = NULL;
l->count = 0;
l->bline = bline;
last_line = l;
}
l->addr = address;
while (isdelm(str[i], dbg) != 1) {
uint8_t offset = 0;
base = 0;
j = 0;
for (tab = 0, space = 0; isdelm(str[i], dbg) == 16; tab += (str[i] == '\t'), space += (str[i] == ' '), i++);
if (dbg) {
printf("lex(): tab: %u, space: %u\n", tab, space);
}
uint8_t ptok = get_ptok(str[i], dbg);
if (is_altok(ptok, dbg)) {
for (; !isdelm2(str[i+j], dbg) || (is_inst && str[i+j] == '.'); j++);
memcpy(lexeme, &str[i], j);
lexeme[j] = '\0';
j = 0;
ptok = (!is_inst || is_reg(lexeme) < 0) ? PTOK_ALPHA : ptok;
memset(lexeme, 0, strlen(lexeme)+1);
}
switch (ptok) {
case PTOK_DOT:
i++;
for (; !(isdelm(str[i+j], dbg) & 17); j++);
memcpy(lexeme, str+i, j);
lexeme[j] = '\0';
i += j;
if (!isop) {
for (k = 0; k < 11; k++) {
if (tolower(lexeme[0]) == dir_t[k][0] && !strcasecmp(lexeme, dir_t[k])) {
lex_type = TOK_DIR;
uint16_t tmp = j;
for (j = 0; isdelm(str[i+j], dbg) & 16; j++);
uint8_t ret = get_ptok(str[i+j], dbg);
ret = (is_altok(ret, dbg)) ? PTOK_ALPHA : ret;
j = tmp;
if ((k == DIR_STRUCT || k == DIR_UNION) && ret != PTOK_ALPHA) {
is_anon++;
}
is_struct += (k == DIR_STRUCT || k == DIR_UNION);
is_struct -= (k == DIR_ENDSTRUCT || k == DIR_ENDUNION);
if ((k == DIR_ENDSTRUCT || k == DIR_ENDUNION)) {
end_struct(cur_sym, struct_sym, dbg);
}
break;
}
}
if (lex_type != TOK_DIR && lt && lt->id == TOK_SYM) {
lex_type = TOK_MEMBER;
i -= j;
} else {
l->count++;
t = make_token(lex_type, k, space, tab, 0, "", NULL, NULL);
}
} else {
for (k = 0; !(isdelm(lexeme[k], dbg) & 17); k++) {
lex_type = TOK_RS;
switch (tolower(lexeme[k])) {
case '2':
case 'w':
rs = 1;
break;
case '4':
case 'd':
rs = 2;
break;
case '8':
case 'q':
rs = 3;
break;
case 'o':
lex_type = TOK_OS;
break;
}
l->count++;
t = make_token(lex_type, rs, space, tab, 0, "", NULL, NULL);
if (t) {
lt = t;
t = t->next;
}
}
isop = 0;
}
break;
case PTOK_DQUOTE:
do {
char *tmp = (str + i);
int get_value = (ptok == PTOK_SQUOTE);
value = parse_quote(&tmp, str[i], get_value, dbg);
tmp--;
i++;
j = tmp - (str + i);
memcpy(lexeme, str+i, j);
lexeme[j] = '\0';
i += j;
} while (0);
strid = get_string(lexeme, dbg);
if (strid == 0xFFFF) {
strid = stridx;
string[strid] = malloc(j+1);
memcpy(string[strid], lexeme, j+1);
stridx++;
} else {
}
if (dbg) {
printf("lex(): str[0x%04X]: %s\n", strid, string[strid]);
}
if (lt->id == TOK_DIR && lt->type == DIR_INCLUDE) {
incl[inc_count+inc_file] = strid;
inc_file++;
}
lex_type = TOK_STRING;
l->count++;
t = make_token(lex_type, 0, space, tab, 0, string[strid], NULL, NULL);
break;
case PTOK_SQUOTE:
case PTOK_DOLLAR:
case PTOK_PERCENT:
case PTOK_NUMBER:
l->count++;
do {
lex_type = TOK_EXPR;
memset(lexeme, 0, strlen(lexeme)+1);
char *tmp = &str[i];
expr *e = parse_expr(&tmp, address, NULL, 0, delm, dbg);
t = make_token(lex_type, 0, space, tab, 0, "", NULL, e);
j = tmp - &str[i];
memcpy(lexeme, &str[i], j);
j = 0;
i = tmp - str;
t->subtype = (t->subtype == 0xFF && lex_subtype != 0xFF) ? lex_subtype : t->subtype;
lex_subtype = 0xFF;
} while (0);
break;
case PTOK_LBRACK:
case PTOK_HASH :
lex_type = TOK_MEM;
value = (ptok == PTOK_LBRACK) ? MEM_IND : MEM_IMM;
l->count++;
t = make_token(lex_type, value, space, tab, 0, "", NULL, NULL);
lex_type = (ptok == PTOK_LBRACK) ? TOK_IND : TOK_IMM;
delm = (ptok == PTOK_LBRACK) ? ')' : delm;
t->subtype = (t->subtype == 0xFF && lex_subtype != 0xFF) ? lex_subtype : t->subtype;
if (lex_subtype != 0xFF) {
lex_subtype = 0xFF;
}
memset(lexeme, 0, strlen(lexeme)+1);
lexeme[j++] = str[i];
break;
case PTOK_PLUS:
case PTOK_MINUS:
case PTOK_ASTRSK:
case PTOK_GT:
case PTOK_LT:
case PTOK_PIPE:
lex_type = TOK_EXPR;
l->count++;
memset(lexeme, 0, strlen(lexeme)+1);
do {
char *tmp = &str[i];
expr *e = parse_expr(&tmp, address, NULL, 0, delm, dbg);
t = make_token(lex_type, 0, space, tab, 0, "", NULL, e);
j = tmp - &str[i];
memcpy(lexeme, &str[i], j);
j = 0;
i = tmp - str;
t->subtype = (t->subtype == 0xFF && lex_subtype != 0xFF) ? lex_subtype : t->subtype;
lex_subtype = 0xFF;
} while (0);
break;
case PTOK_EQU:
i++;
lex_type = TOK_SYM;
memset(lexeme, 0, strlen(lexeme)+1);
lexeme[j] = str[i];
if (lt) {
lt->id = lex_type;
lt->type = depth;
}
new_symbol(lt, sym, address, depth, dbg);
(t) ? (t->subspace = space) : (lt->subspace = space);
(t) ? (t->subtab = tab) : (lt->subtab = tab);
break;
case PTOK_RBRACK:
i++;
lex_type = TOK_IND;
delm = (delm == ')') ? ',' : delm;
lexeme[j] = ')';
lexeme[j+1] = '\0';
lexeme[j+2] = '\0';
if (t && t->subtype == 0xFF) {
t->subtype = TOK_IND;
} else if (lt && lt->subtype == 0xFF) {
lt->subtype = TOK_IND;
}
(t) ? (t->subspace = space) : (lt->subspace = space);
(t) ? (t->subtab = tab) : (lt->subtab = tab);
break;
case PTOK_COMMA:
i++;
if ((lex_type != TOK_IND && lex_type != TOK_OF)) {
lex_type = TOK_CSV;
}
lex_subtype = TOK_CSV;
if (t && t->subtype == 0xFF) {
t->subtype = TOK_CSV;
} else if (lt && lt->subtype == 0xFF) {
lt->subtype = TOK_CSV;
}
(t) ? (t->subspace = space) : (lt->subspace = space);
(t) ? (t->subtab = tab) : (lt->subtab = tab);
lexeme[j] = ',';
lexeme[j+1] = '\0';
lexeme[j+2] = '\0';
break;
case PTOK_B:
case PTOK_E:
case PTOK_X:
case PTOK_Y:
case PTOK_S:
case PTOK_A:
case PTOK_C:
case PTOK_D:
case PTOK_F:
case PTOK_R:
lexeme[j+0] = str[i++];
lexeme[j+1] = (ptok == PTOK_R || ((ptok == PTOK_S || ptok == PTOK_B) && get_ptok(str[i], dbg) == PTOK_P)) ? str[i++] : '\0';
lexeme[j+2] = (ptok == PTOK_R) ? str[i++] : '\0';
lexeme[j+3] = '\0';
lex_type = TOK_REG;
value = is_reg(lexeme);
l->count++;
t = make_token(lex_type, value, space, tab, 0, "", NULL, NULL);
t->subtype = (t->subtype == 0xFF && lex_subtype != 0xFF) ? lex_subtype : t->subtype;
lex_subtype = 0xFF;
break;
case PTOK_P:
lexeme[j] = str[i++];
lexeme[j+1] = (str[i] != ',') ? str[i++] : '\0';
lexeme[j+2] = '\0';
of = 2;
lex_type = TOK_OF;
l->count++;
t = make_token(lex_type, of, space, tab, 0, "", NULL, NULL);
break;
case PTOK_AT:
memset(lexeme, 0, strlen(lexeme)+1);
for (char *tmp = str+i; *tmp++ == '@'; depth++);
i += depth;
lexeme[j] = '@';
lex_type = TOK_LOCAL;
if (lt || t) {
(t) ? (t->subspace = space) : (lt->subspace = space);
(t) ? (t->subtab = tab) : (lt->subtab = tab);
}
break;
case PTOK_COLON:
i++;
lexeme[j] = ':';
lexeme[j+1] = '\0';
lex_type = TOK_LABEL;
if (lt) {
lt->id = lex_type;
lt->type = depth;
}
new_symbol(lt, sym, address, depth, dbg);
depth = 0;
if (dbg) {
printf("lex(): isfixup: %u\n", isfixup);
}
break;
case PTOK_SCOLON:
i++;
for (; isdelm(str[i+j], dbg) != 1; j++);
if (!j) {
lexeme[j] = ' ';
lexeme[j+1] = '\0';
} else {
memcpy(lexeme, str+i, j);
lexeme[j] = '\0';
i += j;
comid = get_comment(lexeme, dbg);
if (dbg) {
printf("lex(): com[0x%04X]: %s\n", comid, comment[comid]);
}
}
lex_type = TOK_COMMENT;
l->count++;
if (j) {
t = make_token(lex_type, 0, space, tab, 0, comment[comid], NULL, NULL);
} else {
t = make_token(lex_type, 0, space, tab, 0, "" , NULL, NULL);
}
break;
case PTOK_ALPHA:
for (; !isdelm2(str[i+j], dbg) || (is_inst && str[i+j] == '.'); j++);
memcpy(lexeme, str+i, j);
lexeme[j] = '\0';
i += j;
isch = 0;
isop = 0;
if (!(lt && lt->id == TOK_DIR) && !is_inst && j > 1 && j <= 3 && str[i] != ':' && !is_struct) {
for (k = 0; k < OPNUM; k++) {
int find_ext = (k < EXT_OPNUM);
int find_ortho = (k < ORTHO_OPNUM);
int upper = toupper(lexeme[0]);
int isbase = (upper == mne[k][0]);
int isext = (find_ext && upper == ext_mne[k][0]);
int isortho = (find_ortho && upper == ortho_mne[k][0]);
if (isbase || isext || isortho) {
int is_base = !strcasecmp(lexeme, mne[k]);
int is_ext = (find_ext && !strcasecmp(lexeme, ext_mne[k]));
int is_ortho = (find_ortho && !strcasecmp(lexeme, ortho_mne[k]));
if (is_base || is_ext || is_ortho) {
lex_type = (is_base) ? TOK_OPCODE : lex_type;
lex_type = (is_ext) ? TOK_EXTOP : lex_type;
lex_type = (is_ortho) ? TOK_ORTHO : lex_type;
isop = 1;
is_inst = 1;
l->count++;
t = make_token(lex_type, 0xFF, space, tab, k, "", NULL, NULL);
break;
}
}
}
}
if (!isop) {
if (l->tok && l->tok->id == TOK_ORTHO && l->tok->byte == SET) {
for (k = 0; k < 8; k++) {
int upper = toupper(lexeme[0]);
if (upper == set_cc[k][0]) {
if (!strcasecmp(lexeme, set_cc[k])) {
lex_type = TOK_CC;
l->count++;
t = make_token(lex_type, 0xFF, space, tab, k, "", NULL, NULL);
}
}
}
} else {
uint8_t spaces = 0;
for (; isdelm(str[i+spaces], dbg) == 16; spaces++);
uint8_t ret = get_ptok(str[i+spaces], dbg);
if (ret == PTOK_COLON || ret == PTOK_EQU) {
depth = (lex_type != TOK_LOCAL) ? 0 : depth;
}
int is_expr = (!is_struct && str[i+spaces] != ':' && str[i+spaces] != '=');
l->count++;
memcpy(sym, lexeme, j+1);
if (is_expr) {
i -= j + (depth);
lex_type = TOK_EXPR;
memset(lexeme, 0, strlen(lexeme)+1);
char *tmp = &str[i];
expr *e = parse_expr(&tmp, address, NULL, 0, delm, dbg);
j = tmp - &str[i];
memcpy(lexeme, &str[i], j);
i = tmp - str;
t = make_token(lex_type, 0, space, tab, 0, "", NULL, e);
t->subtype = (t->subtype == 0xFF && lex_subtype != 0xFF) ? lex_subtype : t->subtype;
lex_subtype = 0xFF;
} else {
memcpy(sym, lexeme, j+1);
lex_type = TOK_SYM;
t = make_token(lex_type, depth, space, tab, 0, "", NULL, NULL);
}
if (is_struct) {
create_struct(cur_sym, l, t, lt, sym, dbg);
depth = 0;
}
}
}
break;
}
if (!l->tok && t) {
l->tok = tokens;
}
if (dbg) {
printf("lex(): lexeme: %s, lex_type: %s\n", lexeme, (lex_type != 0xFF) ? lex_tok[lex_type] : "TOK_NONE");
}
j = 0;
if ((lex_type == TOK_OPCODE || lex_type == TOK_EXTOP) && !isop) {
j = 0;
} else if (lex_type != TOK_EXPR && lex_type != TOK_LOCAL && lex_type != TOK_MEMBER && !isdelm2(str[i], dbg)) {
i++;
}
switch (lex_type) {
default:
lex_type = 0xFF;
case TOK_CSV:
case TOK_IND:
case TOK_LOCAL:
memset(lexeme, 0, strlen(lexeme)+1);
case TOK_SYM:
break;
}
if (t) {
lt = t;
t = t->next;
}
}
if (i) {
l->tok = tokens;
token *tok = tokens;
if ((tok->id == TOK_SYM || tok->id == TOK_LABEL) && tok->next) {
symbol *s = tok->sym;
for (; tok; tok = tok->next) {
if (tok->id == TOK_EXPR) {
s->val = get_val(tok->expr, address, 3, 0, dbg);
}
}
}
tokens = NULL;
last_tok = NULL;
bytecount dummy;
if (!is_struct) {
l = (tmp_line) ? tmp_line : l;
address = parse_tokens(l->tok, &l, &dummy, 0, address, dbg);
if (tmp_line) {
tmp_line = NULL;
}
}
if (dbg) {
printf("lex(): Next address: $%"PRIX64"\n", address);
}
if (ln > linenum || islinenum) {
l->linenum = ln;
if (ln > linenum) {
linenum+=(10+(ln & 10));
}
} else if (!islinenum) {
l->linenum = linenum;
linenum += 10;
}
}
return address;
}