From d17eb092f620d217f08ae7fb27122bb30799eaf4 Mon Sep 17 00:00:00 2001 From: mrb0nk500 Date: Tue, 11 Aug 2020 12:13:21 -0400 Subject: - Changed the spacing format to now store the number of spaces, and tabs before the current token. Along with also now storing a sub-token count which is used in case the token's secondary id is meant to be treated as a sub-token. - Added suppor for saving the number of blank lines before the current tokenized line. --- asmmon.c | 178 ++++++++++++++++++++++++++++++++++--------------------------- asmmon.h | 17 +++--- assemble.c | 12 ++++- lexer.c | 88 +++++++++++++++--------------- 4 files changed, 163 insertions(+), 132 deletions(-) diff --git a/asmmon.c b/asmmon.c index 79a9862..6b71ac3 100644 --- a/asmmon.c +++ b/asmmon.c @@ -112,19 +112,31 @@ char *showbits(uint64_t value, uint8_t bitnum, uint8_t dbg) { } +static inline uint8_t isopdone(token *t) { + switch (t->id) { + case TOK_OF : + case TOK_HEX : + case TOK_BIN : + case TOK_DEC : + case TOK_CHAR: + case TOK_EXPR: return 0; + default : return 1; + } +} + void list(uint16_t start, uint16_t end, uint8_t all, uint8_t ln, uint8_t addr, uint64_t address, uint8_t dbg) { - line *s = (!all) ? find_line(start, dbg) : lines ; - line *e = (!all) ? find_line( end, dbg) : last_line; + line *s = (!all) ? find_line(start, dbg) : lines; + line *e = (!all) ? find_line( end, dbg) : NULL; uint8_t j = 0; uint8_t flags = 0; uint8_t isstr; uint8_t iscom; uint8_t iscm = 0; - uint8_t fall = 0; uint8_t bitnum; uint8_t opsize = 0; uint8_t spaces; uint8_t tabs; + char mne_lower[4]; char ch[6]; do { @@ -132,29 +144,48 @@ void list(uint16_t start, uint16_t end, uint8_t all, uint8_t ln, uint8_t addr, u token *t = s->tok; uint8_t am = 0xFF; uint8_t rs = 0xFF; + uint8_t am_done = 1; + uint8_t op_done = 1; + uint16_t bline = s->bline; + for (; bline; bline--) { + putchar('\n'); + } if (dbg) { printf("list(): "); } if (ln) { - printf("%u\t\t", s->linenum); + printf("%5u\t\t", s->linenum); } else if (addr) { printf("$%"PRIX64":\t\t", s->addr); } - spaces = s->sspace; - tabs = s->stab; - while (spaces || tabs) { - if (spaces) { - putchar(' '); - spaces--; + while (t) { + if (am != 0xFF && op_done && t->id != TOK_RS) { + switch (am) { + case IMM : putchar('#'); am_done = 1; break; + case IND : + case INDX: + case INDY: putchar('('); am_done = 0; break; + case ZMY : + case ZMX : am_done = 0; break; + case BREG: putchar('b'); am_done = 1; break; + } + am = (am_done) ? 0xFF : am; } - if (tabs) { - putchar('\t'); - tabs--; + spaces = t->space; + tabs = t->tab; + while (spaces || tabs) { + if (spaces) { + putchar(' '); + spaces--; + } + if (tabs) { + putchar('\t'); + tabs--; + } } - } - while (t && t->id != TOK_COMMENT) { switch (t->id) { - case TOK_DIR : printf(".%s ", dir_t[t->type] ); break; + case TOK_DIR : printf(".%s", dir_t[t->type]); break; + case TOK_RS : printf("%s", rs_t[t->type]); break; case TOK_OPCODE: for (; j < 3; j++) { mne_lower[j] = tolower(mne[t->byte][j]); @@ -163,19 +194,6 @@ void list(uint16_t start, uint16_t end, uint8_t all, uint8_t ln, uint8_t addr, u j = 0; printf("%s", mne_lower); am = t->type; - if (t->next && t->next->id == TOK_RS) { - t = t->next; - rs = t->type; - printf("%s", rs_t[t->type]); - } - putchar(' '); - switch (am) { - case IMM : putchar('#'); break; - case IND : - case INDX: - case INDY: putchar('('); break; - case BREG: putchar('b'); break; - } break; case TOK_OF: switch (t->type) { @@ -191,18 +209,16 @@ void list(uint16_t start, uint16_t end, uint8_t all, uint8_t ln, uint8_t addr, u printf("%s", (t->sym) ? t->sym->name : "unknown"); if (t->id == TOK_LABEL) { putchar(':'); - } else if (t == s->tok && t->id == TOK_SYM) { - printf(" = "); } break; case TOK_HEX: if (t->id == TOK_HEX) { - printf("$%02"PRIX64, t->qword); + printf("$%0*"PRIX64, t->digits, t->qword); } else if (t->id == TOK_DEC) { - case TOK_DEC: printf( "%"PRIu64, t->qword); + case TOK_DEC: printf( "%0*"PRIu64, t->digits, t->qword); } else if (t->id == TOK_BIN) { - case TOK_BIN: if (rs != 0xFF) { - bitnum = (rs << 3); + case TOK_BIN: if (t->digits) { + bitnum = t->digits; } else { opsize = 1; opsize = (t->qword > 0x000000FF) ? 2 : opsize; @@ -238,15 +254,14 @@ void list(uint16_t start, uint16_t end, uint8_t all, uint8_t ln, uint8_t addr, u if (t->next) { switch (t->next->id) { case TOK_STRING: - case TOK_HEX: - case TOK_BIN: - case TOK_DEC: - case TOK_CHAR: - printf(", "); - break; + case TOK_HEX : + case TOK_BIN : + case TOK_DEC : + case TOK_CHAR : putchar(','); break; } } break; + case TOK_COMMENT: printf(";%s", (t->str) ? t->str : ""); break; case TOK_EXPR: switch (t->type) { case EXPR_PLUS : putchar('+'); break; @@ -258,50 +273,49 @@ void list(uint16_t start, uint16_t end, uint8_t all, uint8_t ln, uint8_t addr, u case EXPR_OR : putchar('|'); break; } break; + } - t = t->next; - } - if (am != 0xFF) { - if (fall) { - fall = 0; - } - switch (am) { - case INDX: - case ZMX: - printf(", x"); - if (am == ZMX) { - break; + if (t->subspace || t->subtab) { + spaces = t->subspace; + tabs = t->subtab; + while (spaces || tabs) { + if (spaces) { + putchar(' '); + spaces--; } - fall = 1; - /* Falls Through. */ - case INDY: - fall = !fall; - /* Falls Through. */ - case IND: - putchar(')'); - if (!fall) { - break; + if (tabs) { + putchar('\t'); + tabs--; } - /* Falls Through. */ - case ZMY: - printf(", y"); - break; + } } - } - spaces = s->espace; - tabs = s->etab; - while (spaces || tabs) { - if (spaces) { - putchar(' '); - spaces--; + if (t->next && !isopdone(t)) { + op_done = isopdone(t->next); } - if (tabs) { - putchar('\t'); - tabs--; + if (am != 0xFF && !am_done && op_done) { + switch (am) { + case INDX: + case ZMX : + printf(", x"); + if (am == ZMX) { + break; + } + /* Falls Through. */ + case INDY: + case IND : + putchar(')'); + if (am == IND) { + break; + } + case ZMY : printf(", y"); break; + } + am = 0xFF; + am_done = 1; } - } - if (t && t->id == TOK_COMMENT) { - printf(";%s", (t->str) ? t->str : ""); + if (t == s->tok && t->id == TOK_SYM) { + putchar('='); + } + t = t->next; } puts(""); s = s->next; @@ -339,6 +353,7 @@ int asmmon(const char *fn) { uint8_t dbg = 0; uint8_t isinclude = 0; uint16_t tmp_lineidx = 0; + uint16_t bline = 0; while (!done) { char *cmd; char *arg = malloc(sizeof(char *)*128); @@ -534,9 +549,12 @@ int asmmon(const char *fn) { case 0xFF: break; default: - address = lex(lex_line, address, dbg); + address = lex(lex_line, address, bline, dbg); + bline = 0; break; } + } else if (lex_line[0] == '\n') { + bline++; } } free(path); diff --git a/asmmon.h b/asmmon.h index 6c71a4a..a03213d 100644 --- a/asmmon.h +++ b/asmmon.h @@ -16,6 +16,14 @@ struct tok { uint8_t id; /* Token ID. */ uint8_t type; /* Token type ID. */ + uint8_t tab; /* Number of tabs. */ + uint8_t space; /* Number of spaces. */ + + uint8_t subtab; /* Number of sub-token tabs. */ + uint8_t subspace; /* Number of sub-token spaces. */ + + uint8_t digits; /* Number of digits. */ + /* Token value(s). */ union { symbol *sym; @@ -31,12 +39,9 @@ struct ln { line *next; /* Pointer to the next line. */ token *tok; /* The token(s) for this line. */ uint16_t count; /* Total tokens for this line. */ + uint16_t bline; /* Number of blank lines. */ uint32_t linenum; /* Line number. */ uint64_t addr; /* The address of this line. */ - uint8_t stab; /* Number of starting tabs. */ - uint8_t sspace; /* Number of starting spaces. */ - uint8_t etab; /* Number of ending tabs. */ - uint8_t espace; /* Number of ending spaces. */ }; @@ -536,9 +541,9 @@ extern uint8_t defined; extern uint8_t isfixup; extern line *find_line(uint32_t ln, uint8_t dbg); -extern uint64_t lex(char *str, uint64_t address, uint8_t dbg); +extern uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg); extern uint64_t parse_tokens(token *tm, bytecount *bc, uint8_t isasm, uint64_t address, uint8_t dbg); -extern token *make_token(uint8_t id, uint8_t type, uint64_t value, char *str, symbol *sym); +extern token *make_token(uint8_t id, uint8_t type, uint8_t space, uint8_t tab, uint64_t value, char *str, symbol *s); extern void assemble(line *ln, bytecount *bc, uint8_t dbg); extern void cleanup(); diff --git a/assemble.c b/assemble.c index d127ce5..5cec345 100644 --- a/assemble.c +++ b/assemble.c @@ -456,11 +456,21 @@ uint64_t parse_tokens(token *t, bytecount *bc, uint8_t isasm, uint64_t address, return address; } -token *make_token(uint8_t id, uint8_t type, uint64_t value, char *str, symbol *s) { +token *make_token(uint8_t id, uint8_t type, uint8_t space, uint8_t tab, uint64_t value, char *str, symbol *s) { token *new_tok = malloc(sizeof(token)); (last_tok) ? (last_tok->next = new_tok) : (tokens = new_tok); + new_tok->id = id; new_tok->type = type; + + new_tok->tab = tab; + new_tok->space = space; + + new_tok->subtab = 0; + new_tok->subspace = 0; + + new_tok->digits = 0; + if (s) { new_tok->sym = s; } else if (str[0]) { diff --git a/lexer.c b/lexer.c index 699589c..63eef47 100644 --- a/lexer.c +++ b/lexer.c @@ -118,22 +118,27 @@ uint16_t reslv_fixups(uint8_t dbg) { uint16_t get_comment(const char *com, uint8_t dbg) { uint16_t i = 0; - uint8_t iscom = 0; - for (; i < comidx; i++) { - if (comment[i] == NULL || iscom) { + for (; comment[i] && i < comidx; i++) { + if (com[0] == comment[i][0] && !strcmp(com, comment[i])) { break; - } else if (com[0] == comment[i][0]) { - iscom = !strcmp(com, comment[i]); } } - if (comment[i] == NULL || i == comidx) { + if (comment[i] == NULL) { if (dbg) { printf("get_comment(): oof, the index $%04X is NULL.\n", i); printf("get_comment(): oof, the comment \"%s\", was not found in the comment table.\n", com); } - return 0xFFFF; + size_t size = strlen(com)+1; + comment[comidx] = malloc(size); + memcpy(comment[comidx], com, size); + return comidx++; + } if (dbg) { + if (strcmp(com, comment[i])) { + printf("get_comment(): oof, the comment \"%s\" is somehow not in the comment table, even though it should be at index $%04X.\n", com, i); + } + printf("get_comment(): The return value of strcmp(com, comment[$%04X]) is %i.\n", i, strcmp(com, comment[i])); printf("get_comment(): Found comment \"%s\", in the table, at index $%04X.\n", com, i); } return i; @@ -182,7 +187,7 @@ line *find_line(uint32_t ln, uint8_t dbg) { return l; } -uint64_t lex(char *str, uint64_t address, uint8_t dbg) { +uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) { char sym[0x100]; uint16_t i = 0; uint16_t j = 0; @@ -212,9 +217,11 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) { uint8_t space = 0; uint8_t tab = 0; - uint8_t isstart = 1; uint8_t fall = 0; uint8_t done = 0; + + + /*uint8_t is_newcom = 0;*/ line *l = NULL; token *st = NULL; token *t = NULL; @@ -241,10 +248,7 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) { l->tok = NULL; l->next = NULL; l->count = 0; - l->espace = 0; - l->etab = 0; - l->sspace = 0; - l->stab = 0; + l->bline = bline; last_line = l; } @@ -263,13 +267,6 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) { if (dbg) { printf("lex(): tab: %u, space: %u\n", tab, space); } - if (isstart) { - l->stab = tab; - l->sspace = space; - if (dbg) { - printf("lex(): starting tabs: %u, starting spaces: %u\n", l->stab, l->sspace); - } - } if (isdelm(str[i], dbg) == 16) { for (; isdelm(str[i], dbg) == 16; i++); } @@ -306,7 +303,7 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) { } } l->count++; - t = make_token(lex_type, k, 0, "", NULL); + t = make_token(lex_type, k, space, tab, 0, "", NULL); } else { lex_type = TOK_RS; switch (tolower(lexeme[j-1])) { @@ -324,7 +321,7 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) { break; } l->count++; - t = make_token(lex_type, rs, 0, "", NULL); + t = make_token(lex_type, rs, space, tab, 0, "", NULL); isop = 0; } break; @@ -353,7 +350,7 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) { } lex_type = TOK_STRING; l->count++; - t = make_token(lex_type, 0, 0, string[strid], NULL); + t = make_token(lex_type, 0, space, tab, 0, string[strid], NULL); break; case PTOK_DOLLAR: case PTOK_PERCENT: @@ -385,7 +382,8 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) { } } l->count++; - t = make_token(lex_type, 0, value, "", NULL); + t = make_token(lex_type, 0, space, tab, value, "", NULL); + t->digits = (lt->id != TOK_SYM) ? j : 0; break; case PTOK_SQUOTE: i++; @@ -413,7 +411,7 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) { } lex_type = TOK_CHAR; l->count++; - t = make_token(lex_type, 0, ch, "", NULL); + t = make_token(lex_type, 0, space, tab, ch, "", NULL); break; case PTOK_LBRACK: case PTOK_HASH : @@ -421,6 +419,8 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) { lex_type = (ptok == PTOK_LBRACK) ? TOK_IND : TOK_IMM; memset(lexeme, 0, strlen(lexeme)+1); lexeme[j++] = str[i]; + (t) ? (t->subspace = space) : (lt->subspace = space); + (t) ? (t->subtab = tab) : (lt->subtab = tab); break; case PTOK_PLUS: case PTOK_MINUS: @@ -436,7 +436,7 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) { case PTOK_LT : value = (get_ptok(str[i+1], dbg) == PTOK_LT) ? (EXPR_LSHFT) : (EXPR_HIGH); break; } l->count++; - t = make_token(lex_type, value, 0, "", NULL); + t = make_token(lex_type, value, space, tab, 0, "", NULL); memset(lexeme, 0, strlen(lexeme)+1); lexeme[j++] = str[i]; if (value == EXPR_LSHFT || value == EXPR_RSHFT) { @@ -448,6 +448,8 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) { lex_type = TOK_SYM; memset(lexeme, 0, strlen(lexeme)+1); lexeme[j] = str[i]; + (t) ? (t->subspace = space) : (lt->subspace = space); + (t) ? (t->subtab = tab) : (lt->subtab = tab); break; case PTOK_RBRACK: i++; @@ -471,6 +473,8 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) { lexeme[j+2] = '\0'; lex_type = TOK_BREG; l->tok->type = BREG; + (t) ? (t->subspace = space) : (lt->subspace = space); + (t) ? (t->subtab = tab) : (lt->subtab = tab); break; case PTOK_X: case PTOK_Y: @@ -499,13 +503,17 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) { case PTOK_P: of = 2; break; } lex_type = TOK_OF; - t = make_token(lex_type, of, 0, "", NULL); + t = make_token(lex_type, of, space, tab, 0, "", NULL); break; case PTOK_AT: memset(lexeme, 0, strlen(lexeme)+1); lexeme[j] = '@'; islocal = 1; lex_type = TOK_LOCAL; + if (lt || t) { + (t) ? (t->subspace = space) : (lt->subspace = space); + (t) ? (t->subtab = tab) : (lt->subtab = tab); + } break; case PTOK_COLON: i++; @@ -547,18 +555,18 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) { lexeme[j] = '\0'; i += j; comid = get_comment(lexeme, dbg); + /*is_newcom = (comid == 0xFFFF); if (comid == 0xFFFF) { - /*if (line != lineidx && l[line].com != 0xFFFF) { + if (line != lineidx && l[line].com != 0xFFFF) { comid = l[line].com; } else { comid = comidx; - }*/ + } comid = comidx; comment[comid] = malloc(j+1); memcpy(comment[comid], lexeme, j+1); comidx++; - } else { - } + }*/ if (dbg) { printf("lex(): com[0x%04X]: %s\n", comid, comment[comid]); } @@ -566,9 +574,9 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) { lex_type = TOK_COMMENT; l->count++; if (j) { - t = make_token(lex_type, 0, 0, comment[comid], NULL); + t = make_token(lex_type, 0, space, tab, 0, comment[comid], NULL); } else { - t = make_token(lex_type, 0, 0, "" , NULL); + t = make_token(lex_type, 0, space, tab, 0, "" , NULL); } break; @@ -586,7 +594,7 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) { lex_type = TOK_OPCODE; isop = 1; l->count++; - t = make_token(lex_type, 0xFF, k, "", NULL); + t = make_token(lex_type, 0xFF, space, tab, k, "", NULL); break; } } @@ -600,7 +608,7 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) { } lex_type = TOK_SYM; l->count++; - t = make_token(lex_type, islocal, 0, "", NULL); + t = make_token(lex_type, islocal, space, tab, 0, "", NULL); memcpy(sym, lexeme, j+1); if (dbg) { printf("lex(): spaces: %u\n", spaces); @@ -622,22 +630,12 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) { if (dbg) { printf("lex(): lexeme: %s, lex_type: %s\n", lexeme, (lex_type != 0xFF) ? lex_tok[lex_type] : "TOK_NONE"); } - isstart = 0; j = 0; if (lex_type == TOK_OPCODE && !isop) { j = 0; } else if (lex_type == TOK_EXPR || !isdelm2(str[i], dbg)) { i++; } - if (lex_type == TOK_COMMENT) { - if (!isstart) { - l->etab = tab; - l->espace = space; - if (dbg) { - printf("lex(): ending tabs: %u, ending spaces: %u\n", l->etab, l->espace); - } - } - } switch (lex_type) { default: lex_type = 0xFF; -- cgit v1.2.3-13-gbd6f