From d17eb092f620d217f08ae7fb27122bb30799eaf4 Mon Sep 17 00:00:00 2001
From: mrb0nk500 <b0nk@b0nk.xyz>
Date: Tue, 11 Aug 2020 12:13:21 -0400
Subject: - Changed the spacing format to now store the number of   spaces, and
 tabs before the current token.

  Along with also now storing a sub-token count which
  is used in case the token's secondary id is meant to
  be treated as a sub-token.

- Added suppor for saving the number of blank lines
  before the current tokenized line.
---
 asmmon.c   | 178 ++++++++++++++++++++++++++++++++++---------------------------
 asmmon.h   |  17 +++---
 assemble.c |  12 ++++-
 lexer.c    |  88 +++++++++++++++---------------
 4 files changed, 163 insertions(+), 132 deletions(-)

diff --git a/asmmon.c b/asmmon.c
index 79a9862..6b71ac3 100644
--- a/asmmon.c
+++ b/asmmon.c
@@ -112,19 +112,31 @@ char *showbits(uint64_t value, uint8_t bitnum, uint8_t dbg) {
 
 }
 
+static inline uint8_t isopdone(token *t) {
+	switch (t->id) {
+		case TOK_OF  :
+		case TOK_HEX :
+		case TOK_BIN :
+		case TOK_DEC :
+		case TOK_CHAR:
+		case TOK_EXPR: return 0;
+		default      : return 1;
+	}
+}
+
 void list(uint16_t start, uint16_t end, uint8_t all, uint8_t ln, uint8_t addr, uint64_t address, uint8_t dbg) {
-	line *s = (!all) ? find_line(start, dbg) : lines    ;
-	line *e = (!all) ? find_line(  end, dbg) : last_line;
+	line *s = (!all) ? find_line(start, dbg) : lines;
+	line *e = (!all) ? find_line(  end, dbg) :  NULL;
 	uint8_t j = 0;
 	uint8_t flags = 0;
 	uint8_t isstr;
 	uint8_t iscom;
 	uint8_t iscm = 0;
-	uint8_t fall = 0;
 	uint8_t bitnum;
 	uint8_t opsize = 0;
 	uint8_t spaces;
 	uint8_t tabs;
+
 	char mne_lower[4];
 	char ch[6];
 	do {
@@ -132,29 +144,48 @@ void list(uint16_t start, uint16_t end, uint8_t all, uint8_t ln, uint8_t addr, u
 		token *t = s->tok;
 		uint8_t am = 0xFF;
 		uint8_t rs = 0xFF;
+		uint8_t am_done = 1;
+		uint8_t op_done = 1;
+		uint16_t bline = s->bline;
+		for (; bline; bline--) {
+			putchar('\n');
+		}
 		if (dbg) {
 			printf("list(): ");
 		}
 		if (ln) {
-			printf("%u\t\t", s->linenum);
+			printf("%5u\t\t", s->linenum);
 		} else if (addr) {
 			printf("$%"PRIX64":\t\t", s->addr);
 		}
-		spaces = s->sspace;
-		tabs = s->stab;
-		while (spaces || tabs) {
-			if (spaces) {
-				putchar(' ');
-				spaces--;
+		while (t) {
+			if (am != 0xFF && op_done && t->id != TOK_RS) {
+				switch (am) {
+					case IMM : putchar('#'); am_done = 1; break;
+					case IND :
+					case INDX:
+					case INDY: putchar('('); am_done = 0; break;
+					case ZMY :
+					case ZMX : am_done = 0; break;
+					case BREG: putchar('b'); am_done = 1; break;
+				}
+				am = (am_done) ? 0xFF : am;
 			}
-			if (tabs) {
-				putchar('\t');
-				tabs--;
+			spaces = t->space;
+			tabs = t->tab;
+			while (spaces || tabs) {
+				if (spaces) {
+					putchar(' ');
+					spaces--;
+				}
+				if (tabs) {
+					putchar('\t');
+					tabs--;
+				}
 			}
-		}
-		while (t && t->id != TOK_COMMENT) {
 			switch (t->id) {
-				case TOK_DIR   : printf(".%s ", dir_t[t->type]          ); break;
+				case TOK_DIR   : printf(".%s", dir_t[t->type]); break;
+				case TOK_RS    : printf("%s",   rs_t[t->type]); break;
 				case TOK_OPCODE:
 					for (; j < 3; j++) {
 						mne_lower[j] = tolower(mne[t->byte][j]);
@@ -163,19 +194,6 @@ void list(uint16_t start, uint16_t end, uint8_t all, uint8_t ln, uint8_t addr, u
 					j = 0;
 					printf("%s", mne_lower);
 					am = t->type;
-					if (t->next && t->next->id == TOK_RS) {
-						t = t->next;
-						rs = t->type;
-						printf("%s", rs_t[t->type]);
-					}
-					putchar(' ');
-					switch (am) {
-						case IMM : putchar('#'); break;
-						case IND :
-						case INDX:
-						case INDY: putchar('('); break;
-						case BREG: putchar('b'); break;
-					}
 					break;
 				case TOK_OF:
 					switch (t->type) {
@@ -191,18 +209,16 @@ void list(uint16_t start, uint16_t end, uint8_t all, uint8_t ln, uint8_t addr, u
 					printf("%s", (t->sym) ? t->sym->name : "unknown");
 					if (t->id == TOK_LABEL) {
 						putchar(':');
-					} else if (t == s->tok && t->id == TOK_SYM) {
-						printf(" = ");
 					}
 					break;
 				case TOK_HEX:
 					if (t->id == TOK_HEX) {
-						printf("$%02"PRIX64, t->qword);
+						printf("$%0*"PRIX64, t->digits, t->qword);
 					} else if (t->id == TOK_DEC) {
-				case TOK_DEC:	printf( "%"PRIu64, t->qword);
+				case TOK_DEC:	printf( "%0*"PRIu64, t->digits, t->qword);
 					} else if (t->id == TOK_BIN) {
-				case TOK_BIN:	if (rs != 0xFF) {
-							bitnum = (rs << 3);
+				case TOK_BIN:	if (t->digits) {
+							bitnum = t->digits;
 						} else {
 							opsize = 1;
 							opsize = (t->qword > 0x000000FF) ? 2 : opsize;
@@ -238,15 +254,14 @@ void list(uint16_t start, uint16_t end, uint8_t all, uint8_t ln, uint8_t addr, u
 					if (t->next) {
 						switch (t->next->id) {
 							case TOK_STRING:
-							case TOK_HEX:
-							case TOK_BIN:
-							case TOK_DEC:
-							case TOK_CHAR:
-								printf(", ");
-								break;
+							case TOK_HEX   :
+							case TOK_BIN   :
+							case TOK_DEC   :
+							case TOK_CHAR  : putchar(','); break;
 						}
 					}
 					break;
+				case TOK_COMMENT: printf(";%s", (t->str) ? t->str : ""); break;
 				case TOK_EXPR:
 					switch (t->type) {
 						case EXPR_PLUS : putchar('+'); break;
@@ -258,50 +273,49 @@ void list(uint16_t start, uint16_t end, uint8_t all, uint8_t ln, uint8_t addr, u
 						case EXPR_OR   : putchar('|'); break;
 					}
 					break;
+
 			}
-			t = t->next;
-		}
-		if (am != 0xFF) {
-			if (fall) {
-				fall = 0;
-			}
-			switch (am) {
-				case INDX:
-				case ZMX:
-					printf(", x");
-					if (am == ZMX) {
-						break;
+			if (t->subspace || t->subtab) {
+				spaces = t->subspace;
+				tabs = t->subtab;
+				while (spaces || tabs) {
+					if (spaces) {
+						putchar(' ');
+						spaces--;
 					}
-					fall = 1;
-					/* Falls Through. */
-				case INDY:
-					fall = !fall;
-					/* Falls Through. */
-				case IND:
-					putchar(')');
-					if (!fall) {
-						break;
+					if (tabs) {
+						putchar('\t');
+						tabs--;
 					}
-					/* Falls Through. */
-				case ZMY:
-					printf(", y");
-					break;
+				}
 			}
-		}
-		spaces = s->espace;
-		tabs = s->etab;
-		while (spaces || tabs) {
-			if (spaces) {
-				putchar(' ');
-				spaces--;
+			if (t->next && !isopdone(t)) {
+				op_done = isopdone(t->next);
 			}
-			if (tabs) {
-				putchar('\t');
-				tabs--;
+			if (am != 0xFF && !am_done && op_done) {
+				switch (am) {
+					case INDX:
+					case ZMX :
+						printf(", x");
+						if (am == ZMX) {
+							break;
+						}
+						/* Falls Through. */
+					case INDY:
+					case IND :
+						putchar(')');
+						if (am == IND) {
+							break;
+						}
+					case ZMY : printf(", y"); break;
+				}
+				am = 0xFF;
+				am_done = 1;
 			}
-		}
-		if (t && t->id == TOK_COMMENT) {
-			printf(";%s", (t->str) ? t->str : "");
+			if (t == s->tok && t->id == TOK_SYM) {
+				putchar('=');
+			}
+			t = t->next;
 		}
 		puts("");
 		s = s->next;
@@ -339,6 +353,7 @@ int asmmon(const char *fn) {
 	uint8_t dbg = 0;
 	uint8_t isinclude = 0;
 	uint16_t tmp_lineidx = 0;
+	uint16_t bline = 0;
 	while (!done) {
 		char *cmd;
 		char *arg = malloc(sizeof(char *)*128);
@@ -534,9 +549,12 @@ int asmmon(const char *fn) {
 				case 0xFF:
 					break;
 				default:
-					address = lex(lex_line, address, dbg);
+					address = lex(lex_line, address, bline, dbg);
+					bline = 0;
 					break;
 			}
+		} else if (lex_line[0] == '\n') {
+			bline++;
 		}
 	}
 	free(path);
diff --git a/asmmon.h b/asmmon.h
index 6c71a4a..a03213d 100644
--- a/asmmon.h
+++ b/asmmon.h
@@ -16,6 +16,14 @@ struct tok {
 	uint8_t id;	/* Token ID.      */
 	uint8_t type;	/* Token type ID. */
 
+	uint8_t tab;		/* Number of tabs.        */
+	uint8_t space;		/* Number of spaces.      */
+
+	uint8_t subtab;		/* Number of sub-token tabs.    */
+	uint8_t subspace;	/* Number of sub-token spaces.  */
+
+	uint8_t digits;		/* Number of digits. */
+
 	/* Token value(s). */
 	union {
 		symbol *sym;
@@ -31,12 +39,9 @@ struct ln {
 	line  *next;		/* Pointer to the next line.   */
 	token *tok;		/* The token(s) for this line. */
 	uint16_t count;		/* Total tokens for this line. */
+	uint16_t bline;		/* Number of blank lines.      */
 	uint32_t linenum;	/* Line number.                */
 	uint64_t addr;		/* The address of this line.   */
-	uint8_t stab;		/* Number of starting tabs.    */
-	uint8_t sspace;		/* Number of starting spaces.  */
-	uint8_t etab;		/* Number of ending tabs.      */
-	uint8_t espace;		/* Number of ending spaces.    */
 };
 
 
@@ -536,9 +541,9 @@ extern uint8_t defined;
 extern uint8_t isfixup;
 
 extern line *find_line(uint32_t ln, uint8_t dbg);
-extern uint64_t lex(char *str, uint64_t address, uint8_t dbg);
+extern uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg);
 
 extern uint64_t parse_tokens(token *tm, bytecount *bc, uint8_t isasm, uint64_t address, uint8_t dbg);
-extern token *make_token(uint8_t id, uint8_t type, uint64_t value, char *str, symbol *sym);
+extern token *make_token(uint8_t id, uint8_t type, uint8_t space, uint8_t tab, uint64_t value, char *str, symbol *s);
 extern void assemble(line *ln, bytecount *bc, uint8_t dbg);
 extern void cleanup();
diff --git a/assemble.c b/assemble.c
index d127ce5..5cec345 100644
--- a/assemble.c
+++ b/assemble.c
@@ -456,11 +456,21 @@ uint64_t parse_tokens(token *t, bytecount *bc, uint8_t isasm, uint64_t address,
 	return address;
 }
 
-token *make_token(uint8_t id, uint8_t type, uint64_t value, char *str, symbol *s) {
+token *make_token(uint8_t id, uint8_t type, uint8_t space, uint8_t tab, uint64_t value, char *str, symbol *s) {
 	token *new_tok = malloc(sizeof(token));
 	(last_tok) ? (last_tok->next = new_tok) : (tokens = new_tok);
+
 	new_tok->id = id;
 	new_tok->type = type;
+
+	new_tok->tab = tab;
+	new_tok->space = space;
+
+	new_tok->subtab = 0;
+	new_tok->subspace = 0;
+
+	new_tok->digits = 0;
+
 	if (s) {
 		new_tok->sym = s;
 	} else if (str[0]) {
diff --git a/lexer.c b/lexer.c
index 699589c..63eef47 100644
--- a/lexer.c
+++ b/lexer.c
@@ -118,22 +118,27 @@ uint16_t reslv_fixups(uint8_t dbg) {
 
 uint16_t get_comment(const char *com, uint8_t dbg) {
 	uint16_t i = 0;
-	uint8_t iscom = 0;
-	for (; i < comidx; i++) {
-		if (comment[i] == NULL || iscom) {
+	for (; comment[i] && i < comidx; i++) {
+		if (com[0] == comment[i][0] && !strcmp(com, comment[i])) {
 			break;
-		} else if (com[0] == comment[i][0]) {
-			iscom = !strcmp(com, comment[i]);
 		}
 	}
-	if (comment[i] == NULL || i == comidx) {
+	if (comment[i] == NULL) {
 		if (dbg) {
 			printf("get_comment(): oof, the index $%04X is NULL.\n", i);
 			printf("get_comment(): oof, the comment \"%s\", was not found in the comment table.\n", com);
 		}
-		return 0xFFFF;
+		size_t size = strlen(com)+1;
+		comment[comidx] = malloc(size);
+		memcpy(comment[comidx], com, size);
+		return comidx++;
+
 	}
 	if (dbg) {
+		if (strcmp(com, comment[i])) {
+			printf("get_comment(): oof, the comment \"%s\" is somehow not in the comment table, even though it should be at index $%04X.\n", com, i);
+		}
+		printf("get_comment(): The return value of strcmp(com, comment[$%04X]) is %i.\n", i, strcmp(com, comment[i]));
 		printf("get_comment(): Found comment \"%s\", in the table, at index $%04X.\n", com, i);
 	}
 	return i;
@@ -182,7 +187,7 @@ line *find_line(uint32_t ln, uint8_t dbg) {
 	return l;
 }
 
-uint64_t lex(char *str, uint64_t address, uint8_t dbg) {
+uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) {
 	char sym[0x100];
 	uint16_t i = 0;
 	uint16_t j = 0;
@@ -212,9 +217,11 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) {
 
 	uint8_t space = 0;
 	uint8_t tab = 0;
-	uint8_t isstart = 1;
 	uint8_t fall = 0;
 	uint8_t done = 0;
+
+
+	/*uint8_t is_newcom = 0;*/
 	line *l = NULL;
 	token *st = NULL;
 	token *t = NULL;
@@ -241,10 +248,7 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) {
 		l->tok = NULL;
 		l->next = NULL;
 		l->count = 0;
-		l->espace = 0;
-		l->etab = 0;
-		l->sspace = 0;
-		l->stab = 0;
+		l->bline = bline;
 		last_line = l;
 
 	}
@@ -263,13 +267,6 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) {
 		if (dbg) {
 			printf("lex(): tab: %u, space: %u\n", tab, space);
 		}
-		if (isstart) {
-			l->stab = tab;
-			l->sspace = space;
-			if (dbg) {
-				printf("lex(): starting tabs: %u, starting spaces: %u\n", l->stab, l->sspace);
-			}
-		}
 		if (isdelm(str[i], dbg) == 16) {
 			for (; isdelm(str[i], dbg) == 16; i++);
 		}
@@ -306,7 +303,7 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) {
 						}
 					}
 					l->count++;
-					t = make_token(lex_type, k, 0, "", NULL);
+					t = make_token(lex_type, k, space, tab, 0, "", NULL);
 				} else {
 					lex_type = TOK_RS;
 					switch (tolower(lexeme[j-1])) {
@@ -324,7 +321,7 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) {
 							break;
 					}
 					l->count++;
-					t = make_token(lex_type, rs, 0, "", NULL);
+					t = make_token(lex_type, rs, space, tab, 0, "", NULL);
 					isop = 0;
 				}
 				break;
@@ -353,7 +350,7 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) {
 				}
 				lex_type = TOK_STRING;
 				l->count++;
-				t = make_token(lex_type, 0, 0, string[strid], NULL);
+				t = make_token(lex_type, 0, space, tab, 0, string[strid], NULL);
 				break;
 			case PTOK_DOLLAR:
 			case PTOK_PERCENT:
@@ -385,7 +382,8 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) {
 					}
 				}
 				l->count++;
-				t = make_token(lex_type, 0, value, "", NULL);
+				t = make_token(lex_type, 0, space, tab, value, "", NULL);
+				t->digits = (lt->id != TOK_SYM) ? j : 0;
 				break;
 			case PTOK_SQUOTE:
 				i++;
@@ -413,7 +411,7 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) {
 				}
 				lex_type = TOK_CHAR;
 				l->count++;
-				t = make_token(lex_type, 0, ch, "", NULL);
+				t = make_token(lex_type, 0, space, tab, ch, "", NULL);
 				break;
 			case PTOK_LBRACK:
 			case PTOK_HASH  :
@@ -421,6 +419,8 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) {
 				lex_type = (ptok == PTOK_LBRACK) ? TOK_IND : TOK_IMM;
 				memset(lexeme, 0, strlen(lexeme)+1);
 				lexeme[j++] = str[i];
+				(t) ? (t->subspace = space) : (lt->subspace = space);
+				(t) ? (t->subtab = tab) : (lt->subtab = tab);
 				break;
 			case PTOK_PLUS:
 			case PTOK_MINUS:
@@ -436,7 +436,7 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) {
 					case PTOK_LT   : value = (get_ptok(str[i+1], dbg) == PTOK_LT) ? (EXPR_LSHFT) : (EXPR_HIGH); break;
 				}
 				l->count++;
-				t = make_token(lex_type, value, 0, "", NULL);
+				t = make_token(lex_type, value, space, tab, 0, "", NULL);
 				memset(lexeme, 0, strlen(lexeme)+1);
 				lexeme[j++] = str[i];
 				if (value == EXPR_LSHFT || value == EXPR_RSHFT) {
@@ -448,6 +448,8 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) {
 				lex_type = TOK_SYM;
 				memset(lexeme, 0, strlen(lexeme)+1);
 				lexeme[j] = str[i];
+				(t) ? (t->subspace = space) : (lt->subspace = space);
+				(t) ? (t->subtab = tab) : (lt->subtab = tab);
 				break;
 			case PTOK_RBRACK:
 				i++;
@@ -471,6 +473,8 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) {
 				lexeme[j+2] = '\0';
 				lex_type = TOK_BREG;
 				l->tok->type = BREG;
+				(t) ? (t->subspace = space) : (lt->subspace = space);
+				(t) ? (t->subtab = tab) : (lt->subtab = tab);
 				break;
 			case PTOK_X:
 			case PTOK_Y:
@@ -499,13 +503,17 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) {
 					case PTOK_P: of = 2; break;
 				}
 				lex_type = TOK_OF;
-				t = make_token(lex_type, of, 0, "", NULL);
+				t = make_token(lex_type, of, space, tab, 0, "", NULL);
 				break;
 			case PTOK_AT:
 				memset(lexeme, 0, strlen(lexeme)+1);
 				lexeme[j] = '@';
 				islocal = 1;
 				lex_type = TOK_LOCAL;
+				if (lt || t) {
+					(t) ? (t->subspace = space) : (lt->subspace = space);
+					(t) ? (t->subtab = tab) : (lt->subtab = tab);
+				}
 				break;
 			case PTOK_COLON:
 				i++;
@@ -547,18 +555,18 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) {
 					lexeme[j] = '\0';
 					i += j;
 					comid = get_comment(lexeme, dbg);
+					/*is_newcom = (comid == 0xFFFF);
 					if (comid == 0xFFFF) {
-						/*if (line != lineidx && l[line].com != 0xFFFF) {
+						if (line != lineidx && l[line].com != 0xFFFF) {
 							comid = l[line].com;
 						} else {
 							comid = comidx;
-						}*/
+						}
 						comid = comidx;
 						comment[comid] = malloc(j+1);
 						memcpy(comment[comid], lexeme, j+1);
 						comidx++;
-					} else {
-					}
+					}*/
 					if (dbg) {
 						printf("lex(): com[0x%04X]: %s\n", comid, comment[comid]);
 					}
@@ -566,9 +574,9 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) {
 				lex_type = TOK_COMMENT;
 				l->count++;
 				if (j) {
-					t = make_token(lex_type, 0, 0, comment[comid], NULL);
+					t = make_token(lex_type, 0, space, tab, 0, comment[comid], NULL);
 				} else {
-					t = make_token(lex_type, 0, 0, ""            , NULL);
+					t = make_token(lex_type, 0, space, tab, 0, ""            , NULL);
 				}
 
 				break;
@@ -586,7 +594,7 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) {
 								lex_type = TOK_OPCODE;
 								isop = 1;
 								l->count++;
-								t = make_token(lex_type, 0xFF, k, "", NULL);
+								t = make_token(lex_type, 0xFF, space, tab, k, "", NULL);
 								break;
 							}
 						}
@@ -600,7 +608,7 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) {
 					}
 					lex_type = TOK_SYM;
 					l->count++;
-					t = make_token(lex_type, islocal, 0, "", NULL);
+					t = make_token(lex_type, islocal, space, tab, 0, "", NULL);
 					memcpy(sym, lexeme, j+1);
 					if (dbg) {
 						printf("lex(): spaces: %u\n", spaces);
@@ -622,22 +630,12 @@ uint64_t lex(char *str, uint64_t address, uint8_t dbg) {
 		if (dbg) {
 			printf("lex(): lexeme: %s, lex_type: %s\n", lexeme, (lex_type != 0xFF) ? lex_tok[lex_type] : "TOK_NONE");
 		}
-		isstart = 0;
 		j = 0;
 		if (lex_type == TOK_OPCODE && !isop) {
 			j = 0;
 		} else if (lex_type == TOK_EXPR || !isdelm2(str[i], dbg)) {
 			i++;
 		}
-		if (lex_type == TOK_COMMENT) {
-			if (!isstart) {
-				l->etab = tab;
-				l->espace = space;
-				if (dbg) {
-					printf("lex(): ending tabs: %u, ending spaces: %u\n", l->etab, l->espace);
-				}
-			}
-		}
 		switch (lex_type) {
 			default:
 				lex_type = 0xFF;
-- 
cgit v1.2.3-70-g09d2