Refactored the assembler, yet again, and implemented

support for comma separated values. The assembler now uses a linked list of tokenized lines, each containing a linked list of tokens for that line. I also moved all of the large tables into the higher parts of memory, in order to free up the lower part of memory for the user. Comma sepparated values only work with directives, and only with the byte", word, dword, and qword directives. I also added support for getting the upper, and lower halves of an address. The tokens for both of those are '<', and '>' respectively.
author: mrb0nk500 <b0nk@b0nk.xyz> 2020-05-28 20:03:09 -0400
committer: mrb0nk500 <b0nk@b0nk.xyz> 2020-05-28 20:03:09 -0400
commit: 7ba25336342282bfe57dbb6ddf8f3e2ae3e1b719 (patch)
tree: 1e9a5921d19056be61b9f613f254556edf5242a7 /lexer.c
parent: 691ae45b3916379b0b1d845a5581d9068426b134 (diff)
1 files changed, 156 insertions, 350 deletions
diff --git a/lexer.c b/lexer.c
index 77dff16..ea83801 100644
--- a/lexer.c
+++ b/lexer.c
@@ -2,6 +2,8 @@
 
 uint8_t lex_type;
 uint16_t sym_count = 0;
+token *tokens = NULL;
+token *last_tok = NULL;
 
 void init_symbol() {
 	uint16_t i = 0;
@@ -41,10 +43,8 @@ uint16_t mksymbol(const char *name, uint64_t val, uint8_t def, uint8_t useid, ui
 					printf("mksymbol(): def: %u, val: $%016"PRIX64", name: %s\n", def, val, name);
 					printf("mksymbol(): i: $%X, id: $%04X\n", i, symbols[i]->id);
 				}
-				return symbols[i]->id;
-			} else {
-				return symbols[i]->id;
 			}
+			return symbols[i]->id;
 		}
 	}
 	symbols[i] = malloc(sizeof(**symbols) + strlen(name));
@@ -123,8 +123,8 @@ char *get_symname(uint16_t id, uint8_t dbg) {
 }
 
 uint16_t fixup_cnt = 0;
-uint16_t get_symid(const char *name, uint64_t val, uint16_t ln, uint8_t dbg) {
-	uint16_t i = mksymbol(name, 0, 0, 0, ln, dbg);
+uint16_t get_symid(const char *name, uint64_t val, token *t, uint8_t dbg) {
+	uint16_t i = mksymbol(name, 0, 0, 0, 0, dbg);
 	if (dbg) {
 		printf("get_symid(): Symbol ID: $%X, i: $%X.\n", symbols[i]->id, i);
 	}
@@ -136,7 +136,7 @@ uint16_t get_symid(const char *name, uint64_t val, uint16_t ln, uint8_t dbg) {
 		}
 		fixups[fixup_cnt] = malloc(sizeof(**fixups));
 		fixups[fixup_cnt]->adr = val;
-		fixups[fixup_cnt]->ln = ln;
+		fixups[fixup_cnt]->t = t;
 		fixups[fixup_cnt]->s = symbols[i];
 		fixup_cnt++;
 		return 0xFFFF;
@@ -196,14 +196,14 @@ uint16_t get_string(const char *str, uint8_t dbg) {
 	return i;
 }
 
-uint16_t reslv_fixups(struct line *l, uint8_t dbg) {
+uint16_t reslv_fixups(uint8_t dbg) {
 	uint16_t i = 0, j = 0;
 	for (; fixups[j]; j++) {
 		if (fixups[j]->s->def) {
 			if (dbg) {
-				printf("reslv_fixups(): Symbol ID: $%X, Symbol Name: %s, Symbol Value: $%X.\n", fixups[j]->s->id, fixups[j]->s->name, fixups[j]->s->val);
+				printf("reslv_fixups(): Symbol ID: $%X, Symbol Name: %s, Symbol Value: $%"PRIX64".\n", fixups[j]->s->id, fixups[j]->s->name, fixups[j]->s->val);
 			}
-			l[fixups[j]->ln].sym = fixups[j]->s->id;
+			fixups[j]->t->word = fixups[j]->s->id;
 		} else {
 			if (dbg) {
 				printf("reslv_fixups(): oof, undefined reference to '%s', at $%016"PRIX64".\n", fixups[j]->s->name, fixups[j]->adr);
@@ -215,205 +215,40 @@ uint16_t reslv_fixups(struct line *l, uint8_t dbg) {
 
 }
 
-uint64_t update_addr(struct line *ln, uint64_t address, uint8_t fixup, uint16_t l, uint8_t dbg) {
-	uint64_t value = 0;
+line *find_line(uint16_t ln, uint8_t dbg) {
 	uint16_t i = 0;
-	uint16_t j = 0;
-	uint16_t flags = 0;
-	uint8_t opsize = 0;
-
-	uint16_t symid	= ln[l].sym;
-	uint16_t str	= ln[l].str;
-	uint16_t com	= ln[l].com;
-	uint8_t islabel	= ln[l].islabel;
-	uint8_t issym	= ln[l].issym;
-	uint8_t opbase	= ln[l].opbase;
-	uint8_t aopbase	= ln[l].aopbase;
-	uint8_t dir	= ln[l].dir;
-	uint8_t am	= ln[l].am;
-	uint8_t cm	= ln[l].cm;
-	uint8_t rs	= ln[l].rs;
-	uint8_t mne	= ln[l].mne;
-
-	flags |= (dir     != 0x00FF) << 0x00;
-	flags |= (mne     != 0x00FF) << 0x01;
-	flags |= (rs      != 0x00FF) << 0x02;
-	flags |= (am      != 0x00FF) << 0x03;
-	flags |= (opbase  != 0x00FF) << 0x04;
-	flags |= (aopbase != 0x00FF) << 0x05;
-	flags |= (symid   != 0xFFFF) << 0x06;
-	flags |= (fixup    > 0x0000) << 0x06;
-	flags |= (islabel          ) << 0x07;
-	flags |= (issym            ) << 0x07;
-	flags |= (am      != 0x00FF) << 0x08;
-	flags |= (cm      != 0x00FF) << 0x09;
-	flags |= (str     != 0xFFFF) << 0x0A;
-
-	if (dbg) {
-		printf("update_addr(): ");
-		printf("flags: $%04X\n", flags);
-	}
-	if (!flags || flags == 0x40) {
-		if (dbg) {
-			printf("update_addr(): ");
-			puts("This line only contains a comment, so don't update the address.");
-		}
-		return address;
-	}
-	if ((flags & 0x53) == 0x42 || (flags & 0x51) == 0x41) {
-		if (fixup && symid == 0xFFFF && (opcodes[mne][IMPL] == 0xFF)) {
-			value = address;
-		} else if (opcodes[mne][IMPL] != 0xFF && symid == 0xFFFF) {
-			value = 0;
-		} else {
-			value = use_symbol("", symid, address, 1, dbg);
-		}
-	} else {
-		value = ln[l].op;
-	}
-	if (flags & 0x220) {
-		switch (cm) {
-			case 0: value += ln[l].aop; break;
-			case 1: value -= ln[l].aop; break;
+	line *l = lines;
+	for (; l && l->linenum != ln; l = l->next);
+	/*if (dbg) {
+		if (l->linenum == ln) {
+			printf("find_line(): Found line number %u, at line index %X.\n", ln, i);
 		}
-	}
-	if (dbg) {
-		printf("update_addr(): value: $%"PRIX64"\n", value);
-	}
-	switch (dir) {
-		case DIR_ORG:
-			address = value;
+		printf("find_line(): linenum: %u, i: %X\n", l->linenum, i);
+	}*/
+	if (l != NULL) {
+		if (l->linenum == ln) {
 			if (dbg) {
-				printf("update_addr(): ");
-				printf("Set the Program Counter's Origin to $%"PRIX64".\n", address);
-			}
-			break;
-		case DIR_BYTE:
-			if (flags & 0x400) {
-				for (; string[str][i] != '\0'; i++, j++, address++) {
-					i += string[str][i] == '\\';
-				}
-				j++;
-				address++;
-				if (dbg) {
-					printf("update_addr(): ");
-					printf("Increment Program Counter by $%04X", j);
-					  puts(", to make room for the string.");
-				}
-			} else {
-				address += 1;
+				printf("find_line(): Found line number %u.\n", ln);
 			}
-			break;
-		case DIR_WORD:  address += 2; break;
-		case DIR_DWORD: address += 4; break;
-		case DIR_QWORD: address += 8; break;
-	}
-	if (flags & 0x01) {
-		if (dbg) {
-			printf("update_addr(): ");
-			puts("This line contains a directive, so skip everything else.");
-		}
-		return address;
-	}
-	if ((flags & 0x15B) == 0x02 || (opcodes[mne][IMPL] != 0xFF && am == 0xFF && opbase == 0xFF && symid == 0xFFFF)) {
-		ln[l].am = IMPL;
-		am = IMPL;
-		if (dbg) {
-			printf("update_addr(): ");
-			puts("Addressing Mode has been set to Implied.");
-		}
-	}
-	if (am == IMPL) {
-		opsize = 0;
-	} else if (am == IMM) {
-		switch (rs) {
-			case 3:  address += 8; break;
-			case 2:  address += 4; break;
-			case 1:  address += 2; break;
-			default: address += 1; break;
+			return l;
 		}
+	} else {
 		if (dbg) {
-			if (!(flags & 0x04)) {
-				rs = 0;
-			}
-			printf("update_addr(): ");
-			printf("Increment Program Counter by $%02X", 1 << rs);
-			  puts(", to make room for the operand.");
-		}
-	} else if ((flags & 0x158) && (!(flags & 0x80))) {
-		opsize = 0;
-		opsize = (value <= 0x00000000000000FF) ? 1 : opsize;
-		opsize = (value >  0x00000000000000FF) ? 2 : opsize;
-		opsize = (value >  0x000000000000FFFF) ? 3 : opsize;
-		opsize = (value >  0x0000000000FFFFFF) ? 4 : opsize;
-		opsize = (value >  0x00000000FFFFFFFF) ? 5 : opsize;
-		opsize = (value >  0x000000FFFFFFFFFF) ? 6 : opsize;
-		opsize = (value >  0x0000FFFFFFFFFFFF) ? 7 : opsize;
-		opsize = (value >  0x00FFFFFFFFFFFFFF) ? 8 : opsize;
-		if (opsize) {
-			switch (opsize-1) {
-				case 0:
-				case 2:
-				case 5:
-				case 3:
-					if (!(flags & 0x100)) {
-						am = ZM;
-						ln[l].am = am;
-						if (dbg) {
-							printf("update_addr(): ");
-							puts("Addressing Mode has been set to Zero Matrix.");
-						}
-					}
-					break;
-				case 1:
-				case 4:
-				case 6:
-				case 7:
-					if (!(flags & 0x100)) {
-						am = ABS;
-						ln[l].am = am;
-						if (dbg) {
-							printf("update_addr(): ");
-							puts("Addressing Mode has been set to Absolute.");
-						}
-					}
-					break;
-			}
-			address += opsize;
-			if (dbg) {
-				printf("update_addr(): ");
-				printf("Increment Program Counter by $%02X", opsize);
-				  puts(", to make room for the address.");
-			}
-		}
-	}
-	if (dbg) {
-		printf("update_addr(): ");
-		printf("Address: $%"PRIX64"\n", address);
-	}
-	return address;
-}
-
-uint16_t find_line(struct line *l, uint16_t ln, uint8_t dbg) {
-	uint16_t i = 0;
-	for (; i < lineidx && l[i].linenum != ln; i++);
-	if (dbg) {
-		if (l[i].linenum == ln) {
-			printf("find_line(): Found line number %u, at line index %X.\n", ln, i);
+			printf("find_line(): oof, could not find line number %u.\n", ln);
 		}
-		printf("find_line(): linenum: %u, i: %X\n", l[i].linenum, i);
+		return NULL;
 	}
-	return i;
+	return l;
 }
 
-uint64_t lex(char *str, struct line *l, uint64_t address, uint8_t dbg) {
+uint64_t lex(char *str, uint64_t address, uint8_t dbg) {
 	char sym[0x100];
 	uint16_t i = 0;
 	uint16_t j = 0;
 	uint16_t comid = 0;
 	uint16_t strid = 0;
 	uint16_t symid = 0;
-	uint16_t line = 0;
+	uint64_t value = 0;
 	lex_type = 0xFF;
 
 	uint8_t k = 0;
@@ -436,6 +271,10 @@ uint64_t lex(char *str, struct line *l, uint64_t address, uint8_t dbg) {
 	uint8_t isstart = 1;
 	uint8_t fall = 0;
 	uint8_t done = 0;
+	line *l = NULL;
+	token *st = NULL;
+	token *t = NULL;
+	token *lt = NULL;
 
 	while (isdigit(str[i]) && !isspace(str[i])) {
 		lnum[j++] = str[i++];
@@ -448,25 +287,18 @@ uint64_t lex(char *str, struct line *l, uint64_t address, uint8_t dbg) {
 	} else {
 		ln = linenum;
 	}
-	line = find_line(l, ln, dbg);
-	if (line != lineidx) {
-		address = l[line].addr;
+	l = find_line(ln, dbg);
+	if (l) {
+		address = l->addr;
+	} else {
+		l = malloc(sizeof(line));
+		(last_line) ? (last_line->next = l) : (lines = l);
+		l->tok = NULL;
+		l->next = NULL;
+		l->count = 0;
+		last_line = l;
+
 	}
-	l[line].dir = 0xFF;
-	l[line].mne = 0xFF;
-	l[line].rs = 0xFF;
-	l[line].am = 0xFF;
-	l[line].cm = 0xFF;
-	l[line].opbase = 0xFF;
-	l[line].aopbase = 0xFF;
-	l[line].islabel = 0;
-	l[line].issym = 0;
-	l[line].str = 0xFFFF;
-	l[line].com = 0xFFFF;
-	l[line].sym = 0xFFFF;
-	l[line].op = 0;
-	l[line].aop = 0;
-	l[line].addr = address;
 	while (str[i] != '\0' && str[i] != '\n') {
 		base = 0;
 		space = 0;
@@ -481,10 +313,10 @@ uint64_t lex(char *str, struct line *l, uint64_t address, uint8_t dbg) {
 			printf("lex(): tab: %u, space: %u\n", tab, space);
 		}
 		if (isstart) {
-			l[line].stab = tab;
-			l[line].sspace = space;
+			l->stab = tab;
+			l->sspace = space;
 			if (dbg) {
-				printf("lex(): starting tabs: %u, starting spaces: %u\n", l[line].stab, l[line].sspace);
+				printf("lex(): starting tabs: %u, starting spaces: %u\n", l->stab, l->sspace);
 			}
 		}
 		if (isspace(str[i])) {
@@ -506,7 +338,8 @@ uint64_t lex(char *str, struct line *l, uint64_t address, uint8_t dbg) {
 							break;
 						}
 					}
-					l[line].dir = k;
+					l->count++;
+					t = make_token(lex_type, k, 0, "");
 				} else {
 					lex_type = TOK_RS;
 					switch (tolower(lexeme[j-1])) {
@@ -523,8 +356,8 @@ uint64_t lex(char *str, struct line *l, uint64_t address, uint8_t dbg) {
 							rs = 3;
 							break;
 					}
-					address++;
-					l[line].rs = rs;
+					l->count++;
+					t = make_token(lex_type, rs, 0, "");
 					isop = 0;
 				}
 				break;
@@ -535,59 +368,45 @@ uint64_t lex(char *str, struct line *l, uint64_t address, uint8_t dbg) {
 				}
 				strid = get_string(lexeme, dbg);
 				if (strid == 0xFFFF) {
-					if (line != lineidx && l[line].str != 0xFFFF) {
-						strid = l[line].str;
-					} else {
-						strid = stridx;
-					}
+					strid = stridx;
 					string[strid] = malloc(j+1);
 					memcpy(string[strid], lexeme, j+1);
-					l[line].str = strid;
-					stridx += (line == lineidx);
+					stridx++;
 				} else {
-					l[line].str = strid;
 				}
 				if (dbg) {
 					printf("lex(): str[0x%04X]: %s\n", strid, string[strid]);
 				}
-				if (l[line].dir == DIR_INCLUDE) {
-					l[line].incl = strid;
+				if (lt->id == TOK_DIR && lt->type == DIR_INCLUDE) {
 					incl[inc_file++] = strid;
 				}
 				lex_type = TOK_STRING;
+				l->count++;
+				t = make_token(lex_type, 0, 0, string[strid]);
 				break;
-				if (str[i] == '$') {
-			case '$':	base = 16;
-				} else if (str[i] == '%') {
-			case '%':	base = 2;
-				}
+			case '$':
+			case '%':
+				value = 0;
+				base = (str[i] == '$') ? 16 : 2;
 				i++;
-				while (isxdigit(str[i]) && (str[i] != '\0' && str[i] != '\n')) {
+				while (isxdigit(str[i]) && (str[i] != '\0' && str[i] != '\n' && str[i] != ',')) {
 					lexeme[j++] = str[i++];
 				}
 				lexeme[j] = '\0';
-				switch (lex_type) {
-					default:
-						if (l[line].cm != 0xFF) {
-					case TOK_PLUS :
-					case TOK_MINUS:	l[line].aop = strtoull(lexeme, NULL, base);
-							l[line].aopbase = (base & 16) ? BASE_HEX : BASE_BIN;
-						} else {
-					case TOK_SYM:	l[line].op = strtoull(lexeme, NULL, base);
-							l[line].opbase = (base & 16) ? BASE_HEX : BASE_BIN;
-						}
-						if (lex_type == TOK_SYM) {
-							mksymbol(sym, l[line].op, 1, 0, 0, dbg);
-							l[line].sym = get_symid(sym, address, line, dbg);
-							isfixup += (l[line].sym == 0xFFFF);
-							if (dbg) {
-								printf("lex(): isfixup: %u\n", isfixup);
-							}
-						}
-						break;
+				lex_type = (base == 16) ? TOK_HEX : TOK_BIN;
+				value = strtoull(lexeme, NULL, base);
+				if (lt->id == TOK_SYM) {
+					mksymbol(sym, value, 1, 0, 0, dbg);
+					if (lt) {
+						lt->word = get_symid(sym, address, lt, dbg);
+					}
+					isfixup += (lt->word == 0xFFFF);
+					if (dbg) {
+						printf("lex(): isfixup: %u\n", isfixup);
+					}
 				}
-
-				lex_type = (base & 16) ? TOK_HEX : TOK_BIN;
+				l->count++;
+				t = make_token(lex_type, 0, value, "");
 				break;
 			case '\'':
 				i++;
@@ -611,37 +430,35 @@ uint64_t lex(char *str, struct line *l, uint64_t address, uint8_t dbg) {
 						break;
 					default: ch = lexeme[k];
 				}
-				switch (lex_type) {
-					case TOK_PLUS :
-					case TOK_MINUS:
-						l[line].aop	= ch;
-						l[line].aopbase = BASE_CHAR;
-						break;
-					default:
-						l[line].op	= ch;
-						l[line].opbase	= BASE_CHAR;
-						break;
-				}
 				lex_type = TOK_CHAR;
+				l->count++;
+				t = make_token(lex_type, 0, ch, "");
 				break;
-			case '(':
-				if (str[i] == '#' || str[i] == '(') {
-					if (str[i] == '#') {
-			case '#':		lex_type   = TOK_IMM;
-					}
-					l[line].am = (str[i] == '#') ? IMM : IND;
-				} else {
+			case '(': l->tok->type = IND; memset(lexeme, 0, strlen(lexeme)+1); lexeme[j++] = str[i]; break;
+			case '#': l->tok->type = IMM; memset(lexeme, 0, strlen(lexeme)+1); lexeme[j++] = str[i]; break;
 			case '+':
-			case '-':	l[line].cm = (str[i] == '-');
-					lex_type   = (str[i] == '-') ? TOK_MINUS : TOK_PLUS;
+			case '-':
+			case '>':
+			case '<':
+				lex_type = TOK_EXPR;
+				switch (str[i]) {
+					case '+': value = EXPR_PLUS ; break;
+					case '-': value = EXPR_MINUS; break;
+					case '>': value = EXPR_LOW  ; break;
+					case '<': value = EXPR_HIGH ; break;
 				}
+				l->count++;
+				t = make_token(lex_type, value, 0, "");
+				memset(lexeme, 0, strlen(lexeme)+1);
+				lexeme[j++] = str[i];
+				break;
+			case '=':
+				i++;
+				lex_type = TOK_SYM;
+				l->count++;
+				t = make_token(lex_type, 0, 0, "");
 				memset(lexeme, 0, strlen(lexeme)+1);
 				lexeme[j] = str[i];
-				if (str[i] == '=') {
-			case '=':	i++;
-					l[line].issym = 1;
-					lex_type = TOK_SYM;
-				}
 				break;
 			case ')':
 				i++;
@@ -650,9 +467,9 @@ uint64_t lex(char *str, struct line *l, uint64_t address, uint8_t dbg) {
 					while (isspace(str[i])) {
 						lexeme[j++] = str[i++];
 					}
-					if (l[line].am == IND && tolower(str[i]) == 'y') {
+					if (l->tok->type == IND && tolower(str[i]) == 'y') {
 						lexeme[j++] = 'y';
-						l[line].am = INDY;
+						l->tok->type = INDY;
 					}
 					lexeme[j] = '\0';
 				} else {
@@ -666,20 +483,21 @@ uint64_t lex(char *str, struct line *l, uint64_t address, uint8_t dbg) {
 				while (isspace(str[i])) {
 					lexeme[j++] = str[i++];
 				}
-				if (l[line].am == IND && tolower(str[i]) == 'x') {
-					l[line].am = INDX;
+				if (l->tok->type == IND && tolower(str[i]) == 'x') {
+					l->tok->type = INDX;
 					lexeme[j++] = 'x';
 					i++;
 				} else {
 					switch (tolower(str[i])) {
 						case 'x':
-							l[line].am = ZMX;
+							l->tok->type = ZMX;
 							lexeme[j++] = 'x';
 							break;
 						case 'y':
-							l[line].am = ZMY;
+							l->tok->type = ZMY;
 							lexeme[j++] = 'y';
 							break;
+						default: lex_type = TOK_COMMA; i--; break;
 					}
 				}
 				lexeme[j] = '\0';
@@ -689,13 +507,14 @@ uint64_t lex(char *str, struct line *l, uint64_t address, uint8_t dbg) {
 				lexeme[j] = ':';
 				lexeme[j+1] = '\0';
 				lex_type = TOK_LABEL;
-				l[line].islabel = 1;
+				l->count++;
+				t = make_token(lex_type, 0, 0, "");
 				mksymbol(sym, address, 1, 0, 0, dbg);
 				if (isfixup) {
-					isfixup = reslv_fixups(l, dbg);
+					isfixup = reslv_fixups(dbg);
 				}
-				l[line].sym = get_symid(sym, address, line, dbg);
-				isfixup += (l[line].sym == 0xFFFF);
+				t->word = get_symid(sym, address, t, dbg);
+				isfixup += (t->word == 0xFFFF);
 				if (dbg) {
 					printf("lex(): isfixup: %u\n", isfixup);
 				}
@@ -708,22 +527,23 @@ uint64_t lex(char *str, struct line *l, uint64_t address, uint8_t dbg) {
 				lexeme[j] = '\0';
 				comid = get_comment(lexeme, dbg);
 				if (comid == 0xFFFF) {
-					if (line != lineidx && l[line].com != 0xFFFF) {
+					/*if (line != lineidx && l[line].com != 0xFFFF) {
 						comid = l[line].com;
 					} else {
 						comid = comidx;
-					}
+					}*/
+					comid = comidx;
 					comment[comid] = malloc(j+1);
 					memcpy(comment[comid], lexeme, j+1);
-					l[line].com = comid;
-					comidx += (line == lineidx);
+					comidx++;
 				} else {
-					l[line].com = comid;
 				}
 				if (dbg) {
 					printf("lex(): com[0x%04X]: %s\n", comid, comment[comid]);
 				}
 				lex_type = TOK_COMMENT;
+				l->count++;
+				t = make_token(lex_type, 0, 0, comment[comid]);
 				break;
 			default:
 				if (isalnum(str[i]) || str[i] == '_') {
@@ -733,6 +553,8 @@ uint64_t lex(char *str, struct line *l, uint64_t address, uint8_t dbg) {
 							case ',':
 							case '.':
 							case '+':
+							case '<':
+							case '>':
 							case '-':
 							case ':':
 							case '=':
@@ -759,8 +581,8 @@ uint64_t lex(char *str, struct line *l, uint64_t address, uint8_t dbg) {
 								if (!strcasecmp(lexeme, mne[k])) {
 									lex_type = TOK_OPCODE;
 									isop = 1;
-									l[line].mne = k;
-									address++;
+									l->count++;
+									t = make_token(lex_type, 0xFF, k, "");
 									break;
 								}
 							}
@@ -774,6 +596,8 @@ uint64_t lex(char *str, struct line *l, uint64_t address, uint8_t dbg) {
 								case '.':
 								case '+':
 								case '-':
+								case '<':
+								case '>':
 								case ':':
 								case ';':
 								case '=':
@@ -800,29 +624,25 @@ uint64_t lex(char *str, struct line *l, uint64_t address, uint8_t dbg) {
 						}
 						if (lexeme[k] == '\0') {
 							if (num) {
-								switch (lex_type) {
-									default:
-										if (l[line].cm != 0xFF) {
-									case TOK_PLUS :
-									case TOK_MINUS:	l[line].aop = strtoull(lexeme, NULL, 10);
-											l[line].aopbase = BASE_DEC;
-										} else {
-									case TOK_SYM:	l[line].op = strtoull(lexeme, NULL, 10);
-											l[line].opbase = BASE_DEC;
-										}
-										if (lex_type == TOK_SYM) {
-											mksymbol(sym, l[line].op, 1, 0, 0, dbg);
-											l[line].sym = get_symid(sym, address, line, dbg);
-											isfixup += (l[line].sym == 0xFFFF);
-											if (dbg) {
-												printf("lex(): isfixup: %u\n", isfixup);
-											}
-										}
-										break;
+								value = 0;
+								value = strtoull(lexeme, NULL, 10);
+								if (lt->id == TOK_SYM) {
+									mksymbol(sym, value, 1, 0, 0, dbg);
+									if (lt) {
+										lt->word = get_symid(sym, address, lt, dbg);
+									}
+									isfixup += (lt->word == 0xFFFF);
+									if (dbg) {
+										printf("lex(): isfixup: %u\n", isfixup);
+									}
 								}
 								lex_type = TOK_DEC;
+								l->count++;
+								t = make_token(lex_type, 0, value, "");
 							} else if (isch && lex_type != TOK_HEX && lex_type != TOK_BIN) {
 								lex_type = TOK_SYM;
+								l->count++;
+								t = make_token(lex_type, 0, 0, "");
 								memcpy(sym, lexeme, j+1);
 								uint8_t spaces = 0;
 								for (; isspace(str[i+spaces]); spaces++);
@@ -830,8 +650,8 @@ uint64_t lex(char *str, struct line *l, uint64_t address, uint8_t dbg) {
 									printf("lex(): spaces: %u\n", spaces);
 								}
 								if (str[i+spaces] != ':' && str[i+spaces] != '=') {
-									l[line].sym = get_symid(lexeme, address, line, dbg);
-									isfixup += l[line].sym == 0xFFFF;
+									t->word = get_symid(lexeme, address, t, dbg);
+									isfixup += (t->word == 0xFFFF);
 									if (dbg) {
 										printf("lex(): isfixup: %u\n", isfixup);
 									}
@@ -842,6 +662,9 @@ uint64_t lex(char *str, struct line *l, uint64_t address, uint8_t dbg) {
 				}
 				break;
 		}
+		if (!l->tok && t) {
+			l->tok = tokens;
+		}
 		if (dbg) {
 			printf("lex(): lexeme: %s, lex_type: %s\n", lexeme, (lex_type != 0xFF) ? lex_tok[lex_type] : "TOK_NONE");
 		}
@@ -850,7 +673,7 @@ uint64_t lex(char *str, struct line *l, uint64_t address, uint8_t dbg) {
 		if (lex_type == TOK_OPCODE && !isop) {
 			j = 0;
 		} else {
-			if (lex_type == TOK_PLUS || lex_type == TOK_MINUS) {
+			if (lex_type == TOK_EXPR) {
 				i++;
 			} else {
 				switch (str[i]) {
@@ -859,6 +682,8 @@ uint64_t lex(char *str, struct line *l, uint64_t address, uint8_t dbg) {
 					case '.':
 					case '+':
 					case '-':
+					case '<':
+					case '>':
 					case ':':
 					case ';':
 					case '=':
@@ -875,10 +700,10 @@ uint64_t lex(char *str, struct line *l, uint64_t address, uint8_t dbg) {
 		}
 		if (lex_type == TOK_COMMENT) {
 			if (!isstart) {
-				l[line].etab = tab;
-				l[line].espace = space;
+				l->etab = tab;
+				l->espace = space;
 				if (dbg) {
-					printf("lex(): ending tabs: %u, ending spaces: %u\n", l[line].etab, l[line].espace);
+					printf("lex(): ending tabs: %u, ending spaces: %u\n", l->etab, l->espace);
 				}
 			}
 		}
@@ -886,48 +711,29 @@ uint64_t lex(char *str, struct line *l, uint64_t address, uint8_t dbg) {
 			memset(lexeme, 0, strlen(lexeme)+1);
 			lex_type = 0xFF;
 		}
+		if (t) {
+			lt = t;
+			t = t->next;
+		}
 	}
 	if (i) {
-		address = update_addr(l, address, isfixup, line, dbg);
+		l->tok  = tokens;
+		tokens = NULL;
+		last_tok = NULL;
+		bytecount dummy;
+		address = parse_tokens(l->tok, &dummy, 0, address, dbg);
 		if (dbg) {
 			printf("lex(): Next address: $%"PRIX64"\n", address);
-			printf(
-				"lex(): "
-				"address: $%"PRIX64
-				", dir: %u"
-				", mne: $%02X"
-				", rs: %u"
-				", am: %u"
-				", cm: %u"
-				", opbase: %u"
-				", com: $%04X"
-				", sym: $%04X"
-				", op: $%016"PRIX64
-				", aop: $%016"PRIX64
-				", ln: %u\n"
-				, l[line].addr
-				, l[line].dir
-				, l[line].mne
-				, l[line].rs
-				, l[line].am
-				, l[line].cm
-				, l[line].opbase
-				, l[line].com
-				, l[line].sym
-				, l[line].op
-				, l[line].aop
-				, line);
 		}
 		if (ln > linenum || islinenum) {
-			l[line].linenum = ln;
+			l->linenum = ln;
 			if (ln > linenum) {
 				linenum+=(10+(ln & 10));
 			}
 		} else if (!islinenum) {
-			l[line].linenum = linenum;
+			l->linenum = linenum;
 			linenum += 10;
 		}
-		lineidx += (line == lineidx);
 	}
 	return address;
 }
author	mrb0nk500 <b0nk@b0nk.xyz>	2020-05-28 20:03:09 -0400
committer	mrb0nk500 <b0nk@b0nk.xyz>	2020-05-28 20:03:09 -0400
commit	7ba25336342282bfe57dbb6ddf8f3e2ae3e1b719 (patch)
tree	1e9a5921d19056be61b9f613f254556edf5242a7 /lexer.c
parent	691ae45b3916379b0b1d845a5581d9068426b134 (diff)