1 files changed, 463 insertions, 21 deletions
diff --git a/programs/c-ports/subasm.c b/programs/c-ports/subasm.c
index a996e3a..89d1507 100644
--- a/programs/c-ports/subasm.c
+++ b/programs/c-ports/subasm.c
@@ -1,33 +1,475 @@
 #include "subeditor.h"
-int16_t str_cmp(const char *s0, uint16_t i, const char *s1, uint16_t j) {
-	for (; s0[i] == s1[j]; i++, j++);
-	return i-j;
+
+struct line {
+	uint8_t dir;
+	uint8_t mne;
+	uint8_t am;
+	uint8_t opbase;
+	uint16_t com;
+	uint16_t label;
+	uint64_t op;
+};
+char *label[0x1000];
+uint64_t label_addr[0x1000];
+/*char *comment[0x1000];*/
+/*char *string[0x1000];*/
+
+enum dir {
+	DIR_ORG,
+	DIR_BYTE,
+	DIR_WORD,
+	DIR_DWORD,
+	DIR_QWORD
+};
+
+enum type {
+	TOK_DIR,
+	TOK_LABEL,
+	TOK_STRING,
+	TOK_OPCODE,
+	TOK_COMMENT,
+	TOK_HEX,
+	TOK_DEC,
+	TOK_BIN
+};
+
+enum am {
+	IMPL,
+	IMM,
+	ZM,
+	ZMX,
+	ZMY,
+	ABS,
+	IND,
+	INDX,
+	INDY
+};
+
+enum base {
+	HEX,
+	DEC,
+	BIN
+};
+
+
+char lexeme[0x100];
+uint8_t lex_type;
+struct line tokline[0x1000];
+uint16_t linenum = 10;
+
+static const char *mne_t[90] = {
+	[ 0] = "CPS",
+	[ 1] = "ADC",
+	[ 2] = "AAB",
+	[ 3] = "PHP",
+	[ 4] = "CPB",
+	[ 5] = "PHB",
+	[ 6] = "DEC",
+	[ 7] = "JMP",
+	[ 8] = "SBC",
+	[ 9] = "SAB",
+	[10] = "ENT",
+	[11] = "CPY",
+	[12] = "PLB",
+	[13] = "INC",
+	[14] = "JSR",
+	[15] = "JSL",
+	[16] = "AND",
+	[17] = "ABA",
+	[18] = "PLP",
+	[19] = "CPX",
+	[20] = "PHY",
+	[21] = "BPO",
+	[22] = "ORA",
+	[23] = "OAB",
+	[24] = "STT",
+	[25] = "PLY",
+	[26] = "BNG",
+	[27] = "XOR",
+	[28] = "XAB",
+	[29] = "PHA",
+	[30] = "PHX",
+	[31] = "BCS",
+	[32] = "LSL",
+	[33] = "LLB",
+	[34] = "CLC",
+	[35] = "PLX",
+	[36] = "BCC",
+	[37] = "LSR",
+	[38] = "LRB",
+	[39] = "PLA",
+	[40] = "TAB",
+	[41] = "BEQ",
+	[42] = "ROL",
+	[43] = "RLB",
+	[44] = "SEC",
+	[45] = "TBA",
+	[46] = "BNE",
+	[47] = "ROR",
+	[48] = "RRB",
+	[49] = "DEY",
+	[50] = "TAY",
+	[51] = "BVS",
+	[52] = "MUL",
+	[53] = "MAB",
+	[54] = "CLI",
+	[55] = "TYA",
+	[56] = "BVC",
+	[57] = "DIV",
+	[58] = "DAB",
+	[59] = "INY",
+	[60] = "TAX",
+	[61] = "RTS",
+	[62] = "RTL",
+	[63] = "CMP",
+	[64] = "CAB",
+	[65] = "SEI",
+	[66] = "LDX",
+	[67] = "TXA",
+	[68] = "RTI",
+	[69] = "LDA",
+	[70] = "DEX",
+	[71] = "CLV",
+	[72] = "TYX",
+	[73] = "STA",
+	[74] = "TSX",
+	[75] = "LDB",
+	[76] = "INX",
+	[77] = "WAI",
+	[78] = "TXY",
+	[79] = "STB",
+	[80] = "TXS",
+	[81] = "LDY",
+	[82] = "BRK",
+	[83] = "NOP",
+	[84] = "STY",
+	[85] = "DEB",
+	[86] = "ASR",
+	[87] = "ARB",
+	[88] = "STX",
+	[89] = "INB"
+};
+
+static const char *dir_t[5] = {
+	[0] = "org",
+	[1] = "byte",
+	[2] = "word",
+	[3] = "dword",
+	[4] = "qword"
+};
+
+int isalpha(int c) {
+	return ((unsigned)c|32)-'a' < 26;
+}
+
+int isdigit(int c) {
+	return (unsigned)c-'0' < 10;
+}
+
+int isxdigit(int c) {
+	return isdigit(c) || ((unsigned)c|32)-'a' < 6;
+}
+
+int islower(int c) {
+	return (unsigned)c-'a' < 26;
+}
+
+int isupper(int c) {
+	return (unsigned)c-'A' < 26;
+}
+
+int toupper(int c) {
+	if (islower(c))
+		return c & 0x5F;
+	return c;
+}
+
+uint64_t tohex(char *str) {
+	uint8_t i = 0;
+	uint8_t digit;
+	uint64_t value = 0;
+	for (; str[i] != '\0';) {
+		if (isdigit(str[i])) {
+			digit = str[i] - '0';
+		} else if (isupper(str[i])) {
+			digit = str[i] - ('A' - 10);
+		} else if (islower(str[i])) {
+			digit = str[i] - ('a' - 10);
+		} else {
+			break;
+		}
+		value = (value << 4) + digit;
+		++i;
+	}
+	return value;
+}
+
+uint64_t todec(char *str) {
+	uint8_t i = 0;
+	uint8_t digit;
+	uint64_t value = 0;
+	for (; str[i] != '\0';) {
+		if (isdigit(str[i])) {
+			digit = str[i] - '0';
+		} else {
+			break;
+		}
+
+		value = (value << 3) + (value << 1) + digit;
+		++i;
+	}
+	return value;
+}
+
+uint64_t tobin(char *str) {
+	uint8_t i = 0;
+	uint8_t digit;
+	uint64_t value = 0;
+	for (; str[i] != '\0';) {
+		if (isdigit(str[i])) {
+			digit = str[i] - '0';
+		} else {
+			break;
+		}
+		value = (value << 1) + digit;
+		++i;
+	}
+	return value;
+}
+
+int str_cmp(const char *s0, const char *s1) {
+	for (; *s0 == *s1 && *s0; s0++, s1++);
+	return *s0-*s1;
 }
 
-void tokenize(char *str) {
+int str_casecmp(const char *s0, const char *s1) {
+	for (; *s0 && *s1 && (*s0 == *s1 || toupper(*s0) == toupper(*s1)); s0++, s1++);
+	return toupper(*s0) - toupper(*s1);
+}
+
+uint8_t is_white(char ch) {
+	return (ch == ' ' || ch == '\t');
+}
+
+
+
+uint8_t lex(char *str) {
 	uint16_t i = 0;
-	uint16_t skip = 0;
+	uint16_t j = 0;
+	lex_type = 0xFF;
+	uint8_t k = 0;
+	int line = 40;
+	int16_t ln = -1;
+	int y, x;
+	char lnum[6];
+	uint8_t islinenum;
+	while (isdigit(str[i]) && !is_white(str[i])) {
+		lnum[j++] = str[i++];
+	}
+	islinenum = i;
+	if (i) {
+		lnum[j] = '\0';
+		ln = todec(lnum);
+		j = 0;
+	} else {
+		ln = linenum;
+	}
+	getyx(scr, y, x);
 	uint8_t done = 0;
-	while (!done) {
-		if (!cmd_buf[i]) {
-			done = 1;
-		} else {
-			switch (cmd_buf[i]) {
-				case '.':
-					if (skip = str_cmp(cmd_buf, i+1, "org", 0) > 0) {
-						f = TOK_ORG;
-						i += skip;
-					} else if (skip = str_cmp(cmd_buf, i+1, "byte", 0) > 0) {
-						f = TOK_BYTE;
-						i+=skip;
-					}
-					break;
+	while (str[i] != '\0') {
+		if (is_white(str[i])) {
+			while (is_white(str[i])) {
+				i++;
 			}
 		}
+		switch (str[i]) {
+			case '.':
+				i++;
+				while (!is_white(str[i])) {
+					lexeme[j++] = str[i++];
+				}
+				lexeme[j] = '\0';
+				for (k = 0; k < 5; k++) {
+					if (!str_casecmp(lexeme, dir_t[k])) {
+						lex_type = TOK_DIR;
+						break;
+					}
+				}
+				i++;
+				tokline[ln].dir = k;
+				break;
+			case '\"':
+				i++;
+				while (str[i] != '\"') {
+					lexeme[j] = str[i];
+					/*string[linenum][j] = str[i];*/
+					j++, i++;
+				}
+				i++;
+				lexeme[j] = '\0';
+				/*string[linenum][j] = '\0';*/
+				lex_type = TOK_STRING;
+				break;
+			case '$':
+				i++;
+				while (isxdigit(str[i]) && str[i] != '\0') {
+					lexeme[j++] = str[i++];
+				}
+				lexeme[j] = '\0';
+				tokline[ln].op = tohex(lexeme);
+				tokline[ln].opbase = HEX;
+				lex_type = TOK_HEX;
+				break;
+			case '%':
+				i++;
+				while (isdigit(str[i]) && str[i] != '\0') {
+					lexeme[j++] = str[i++];
+				}
+				lexeme[j] = '\0';
+				tokline[ln].op = tobin(lexeme);
+				tokline[ln].opbase = BIN;
+				lex_type = TOK_BIN;
+				break;
+			case ';':
+				i++;
+				while (str[i] != '\0') {
+					lexeme[j++] = str[i++];
+					/*comment[linenum][j] = str[i];*/
+					/*j++, i++;*/
+				}
+				lexeme[j] = '\0';
+				/*comment[linenum][j] = '\0';*/
+				lex_type = TOK_COMMENT;
+				break;
+			case '#':
+				lexeme[j] = '#';
+				lexeme[j+1] = '\0';
+				lexeme[j+2] = '\0';
+				tokline[ln].am = IMM;
+				break;
+			case '(':
+				lexeme[j] = '(';
+				lexeme[j+1] = '\0';
+				lexeme[j+2] = '\0';
+				tokline[ln].am = IND;
+				break;
+			case ')':
+				i++;
+				if (str[i] == ',') {
+					i++;
+					while (is_white(str[i])) {
+						lexeme[j++] = str[i++];
+					}
+					if (tokline[ln].am == IND && toupper(str[i]) == 'Y') {
+						lexeme[j++] = 'y';
+						tokline[ln].am = INDY;
+					}
+					lexeme[j] = '\0';
+				} else {
+					lexeme[j] = ')';
+					lexeme[j+1] = '\0';
+					lexeme[j+2] = '\0';
+				}
+				break;
+			case ',':
+				i++;
+				while (is_white(str[i])) {
+					lexeme[j++] = str[i++];
+				}
+				if (tokline[ln].am == IND && toupper(str[i]) == 'X') {
+					tokline[ln].am = INDX;
+					lexeme[j++] = 'x';
+					i++;
+				} else {
+					switch (toupper(str[i])) {
+						case 'X':
+							tokline[ln].am = ZMX;
+							lexeme[j++] = 'x';
+							break;
+						case 'Y':
+							tokline[ln].am = ZMY;
+							lexeme[j++] = 'y';
+							break;
+					}
+				}
+				lexeme[j] = '\0';
+				break;
+			default:
+				if (isalpha(str[i]) || isdigit(str[i])) {
+					while (!is_white(str[i]) && str[i] != '\0') {
+						lexeme[j++] = str[i++];
+					}
+					for (k = 0; k < 90; k++) {
+						if (!str_casecmp(lexeme, mne_t[k])) {
+							lex_type = TOK_OPCODE;
+							tokline[ln].mne = k;
+							break;
+						}
+					}
+					for (k = 0; lexeme[k] != '\0';) {
+						if (isdigit(lexeme[k])) {
+							k++;
+						} else {
+							break;
+						}
+					}
+					if (lexeme[k] == '\0') {
+						tokline[ln].op = todec(lexeme);
+						tokline[ln].opbase = DEC;
+						lex_type = TOK_DEC;
+					}
+
+				}
+				break;
+		}
+		wmove(scr, line++, 0);
+		wclrtoeol(scr);
+		wprintw(scr, "lexeme: %s, lex_type: %u\r", lexeme, lex_type);
+		j = 0, lex_type = 0xFF;
+		/*lex_type = 0xFF;*/
+		if (str[i] != ')' && str[i] != ',') {
+			i++;
+		}
+	}
+	line++;
+	uint16_t ix = 0;
+	wmove(scr, line, 0);
+	wclrtoeol(scr);
+	wprintw(scr,   "dir: %u"
+		     ", mne: $%02X"
+		     ", am: %u"
+		     ", opbase: %u"
+		     ", com: $%04X"
+		     ", label: $%04X"
+		     ", op: $%016X"
+		     ", ln: %i\r"
+		      , tokline[ln].dir
+		      , tokline[ln].mne
+		      , tokline[ln].am
+		      , tokline[ln].opbase
+		      , tokline[ln].com
+		      , tokline[ln].label
+		      , tokline[ln].op
+		      , ln);
+	wmove(scr, y, x);
+	wrefresh(scr);
+	if (ln > linenum) {
+		linenum+=(10+(ln & 10));
+	} else if (!islinenum) {
+		linenum += 10;
+	}
+	for (ix = 0; lexeme[ix] != '\0';) {
+		lexeme[ix] = 0;
+		++ix;
 	}
+	return i;
 }
 
-void subasm() {
+uint8_t subasm() {
+	uint16_t i;
 	uint64_t address = 0;
-	tokenize(cmd_buf);
+	lex(cmd_buf);
+	/*if (dir != 0xFF) {
+	}*/
+	return 0;
 }