From b4f547ecb600729e0e1b980c27c154b2a99bbca1 Mon Sep 17 00:00:00 2001 From: mrb0nk500 Date: Fri, 6 Dec 2019 11:21:12 -0500 Subject: Finally got multithreading support working!! I have also added a program that computes the Fibonacci sequence that I wrote in Sux assembly. --- asmmon.c | 21 +- opcode.c | 167 ---------- opcode.h | 1 - sux.c | 895 ++++++++++++++++++++++++++++++++++++++++++-------- test/fib.s | 45 +++ test/test-the-tests.s | 25 +- 6 files changed, 853 insertions(+), 301 deletions(-) create mode 100644 test/fib.s diff --git a/asmmon.c b/asmmon.c index 1a41214..587b1f6 100644 --- a/asmmon.c +++ b/asmmon.c @@ -1,6 +1,8 @@ #include "opcode.h" #include +#define debug 1 + #define SETOP(num, _mne, _IMM, _ZM, _ZMX, _ZMY, _ABS, _IMPL) \ {opcodes[num].mnemonic[3] = '\0'; strncpy(opcodes[num].mnemonic, _mne, 3); \ opcodes[num].imm = _IMM; \ @@ -59,7 +61,7 @@ int asmmon() { SETOP(46, "ROL", 0x71, 0x75, 0x00, 0x00, 0x73, 0x00); SETOP(47, "SSP", 0x00, 0x00, 0x00, 0x00, 0x00, 0x78); SETOP(48, "BNE", 0x00, 0x00, 0x00, 0x00, 0x80, 0x00); - SETOP(49, "ROL", 0x81, 0x85, 0x00, 0x00, 0x83, 0x00); + SETOP(49, "ROR", 0x81, 0x85, 0x00, 0x00, 0x83, 0x00); SETOP(50, "CSP", 0x00, 0x00, 0x00, 0x00, 0x00, 0x88); SETOP(51, "BVS", 0x00, 0x00, 0x00, 0x00, 0x90, 0x00); SETOP(52, "MUL", 0x91, 0x95, 0x00, 0x00, 0x93, 0x00); @@ -150,7 +152,9 @@ int asmmon() { done |= 6; oprand = strtok(oprand, "$"); address = strtoull(oprand, NULL, 16); + #if debug printf("Origin for program code is now at address $%llx.\n", address); + #endif } if (strcasecmp(ins, ".byte") == 0 || strcasecmp(ins, ".word") == 0 || strcasecmp(ins, ".dword") == 0 || strcasecmp(ins, ".qword") == 0) { done |= 6; @@ -181,8 +185,9 @@ int asmmon() { addr[address+7] = value >> 56; address+=8; } - + #if debug printf("The value $%llx was placed at address $%llx.\n", value, address); + #endif } } @@ -206,7 +211,15 @@ int asmmon() { if (mode[0] == '$') { value = strtoull(oprand, NULL, 16); if (value & 0xFFFFFFFF) { - addrmode = 2; + char *stf[] = {"BPO", "BNG", "BCS", "BCC", "BEQ", "BNE", "BVS", "BVC"}; + for (int i = 0; i < 8; i++) { + if (strcasecmp(ins, stf[i]) == 0) { + addrmode = 5; + break; + } else { + addrmode = 2; + } + } } else if (value & 0xFFFFFFFF00000000) { addrmode = 5; } @@ -394,6 +407,7 @@ int asmmon() { } break; } + #if debug if (!(done & 6)) printf("instruction: %s, ", ins); #if (!__GLIBC__) || (__TINYC__) @@ -402,6 +416,7 @@ int asmmon() { printf("Postfix: %s, ", (postfix[0] != '\0') ? postfix : "none"); #endif printf("Operand: %s, Address: $%llx\n", (oprand != NULL) ? oprand : "none", address); + #endif } } } diff --git a/opcode.c b/opcode.c index 036a4fb..847ff80 100644 --- a/opcode.c +++ b/opcode.c @@ -9,55 +9,6 @@ void setps(struct sux *cpu, uint8_t thread) { (cpu->n[thread]) ? (cpu->ps |= (N << 8*thread)) : (cpu->ps &= ~(N << 8*thread)); } -void adc(struct sux *cpu, uint64_t adr, uint8_t thread, uint8_t regsize) { - uint64_t value; - value = (uint64_t)addr[adr]; - if (regsize >= 2) { - value += (uint64_t)addr[adr+1] << 8; - } - if (regsize >= 4) { - value += (uint64_t)addr[adr+2] << 16; - value += (uint64_t)addr[adr+3] << 24; - } - if (regsize >= 8) { - value += (uint64_t)addr[adr+4] << 32; - value += (uint64_t)addr[adr+5] << 40; - value += (uint64_t)addr[adr+6] << 48; - value += (uint64_t)addr[adr+7] << 56; - } - uint64_t sum = cpu->a[thread]+value+cpu->c[thread]; - cpu->z[thread] = (sum == 0); - cpu->n[thread] = (sum >> 63); - cpu->v[thread] = !((cpu->a[thread]^value) & 0x8000000000000000) && ((cpu->a[thread]^sum) & 0x8000000000000000); - cpu->c[thread] = (sum < value); - cpu->a[thread] = sum; - setps(cpu, thread); -} - -void sbc(struct sux *cpu, uint64_t adr, uint8_t thread, uint8_t regsize) { - uint64_t value; - value = (uint64_t)addr[adr]; - if (regsize >= 2) - value += (uint64_t)addr[adr+1] << 8; - if (regsize >= 4) { - value += (uint64_t)addr[adr+2] << 16; - value += (uint64_t)addr[adr+3] << 24; - } - if (regsize >= 8) { - value += (uint64_t)addr[adr+4] << 32; - value += (uint64_t)addr[adr+5] << 40; - value += (uint64_t)addr[adr+6] << 48; - value += (uint64_t)addr[adr+7] << 56; - } - uint64_t sum = cpu->a[thread]-value-!cpu->c[thread]; - cpu->z[thread] = (sum == 0); - cpu->n[thread] = (sum >> 63); - cpu->v[thread] = ((cpu->a[thread]^value) & 0x8000000000000000) && ((cpu->a[thread]^sum) & 0x8000000000000000); - cpu->c[thread] = (sum > value); - cpu->a[thread] = sum; - setps(cpu, thread); -} - void mul(struct sux *cpu, uint64_t adr, uint8_t thread, uint8_t regsize) { uint64_t value; value = (uint64_t)addr[adr]; @@ -104,15 +55,6 @@ void divd(struct sux *cpu, uint64_t adr, uint8_t thread, uint8_t regsize) { setps(cpu, thread); } -uint64_t and(struct sux *cpu, uint64_t value, uint8_t thread) { - uint64_t sum; - sum &= value; - cpu->z[thread] = (sum == 0); - cpu->n[thread] = (sum >> 63); - setps(cpu, thread); - return sum; -} - void and_addr(struct sux *cpu, uint64_t* const reg, uint64_t adr, uint8_t thread, uint8_t regsize) { uint64_t value; value = (uint64_t)addr[adr]; @@ -134,15 +76,6 @@ void and_addr(struct sux *cpu, uint64_t* const reg, uint64_t adr, uint8_t thread setps(cpu, thread); } -uint64_t or(struct sux *cpu, uint64_t value, uint8_t thread) { - uint64_t sum; - sum |= value; - cpu->z[thread] = (sum == 0); - cpu->n[thread] = (sum >> 63); - setps(cpu, thread); - return sum; -} - void or_addr(struct sux *cpu, uint64_t* const reg, uint64_t adr, uint8_t thread, uint8_t regsize) { uint64_t value; value = (uint64_t)addr[adr]; @@ -164,15 +97,6 @@ void or_addr(struct sux *cpu, uint64_t* const reg, uint64_t adr, uint8_t thread, setps(cpu, thread); } -uint64_t xor(struct sux *cpu, uint64_t value, uint8_t thread) { - uint64_t sum; - sum ^= value; - cpu->z[thread] = (sum == 0); - cpu->n[thread] = (sum >> 63); - setps(cpu, thread); - return sum; -} - void xor_addr(struct sux *cpu, uint64_t* const reg, uint64_t adr, uint8_t thread, uint8_t regsize) { uint64_t value; value = (uint64_t)addr[adr]; @@ -194,50 +118,6 @@ void xor_addr(struct sux *cpu, uint64_t* const reg, uint64_t adr, uint8_t thread setps(cpu, thread); } -void rol(struct sux *cpu, uint64_t adr, uint8_t thread) { - uint64_t value = addr[adr]; - uint64_t sum = cpu->a[thread] << value; - sum |= cpu->c[thread]; - cpu->z[thread] = (sum == 0); - cpu->n[thread] = (sum >> 63); - cpu->c[thread] = cpu->a[thread] >> (uint64_t)64-value; - cpu->a[thread] = sum; - setps(cpu, thread); -} - -void ror(struct sux *cpu, uint64_t adr, uint8_t thread) { - uint64_t value = addr[adr]; - uint64_t sum = cpu->a[thread] >> value; - sum |= (uint64_t)cpu->c[thread] << (uint64_t)64-value; - cpu->c[thread] = cpu->a[thread] & 1; - cpu->a[thread] = sum; - setps(cpu, thread); -} - -void lsl(struct sux *cpu, uint64_t adr, uint8_t thread) { - uint64_t value = addr[adr]; - uint64_t sum = (value < 64) ? cpu->a[thread] << value : 0; - cpu->c[thread] = cpu->a[thread] >> 64-value; - cpu->a[thread] = sum; - setps(cpu, thread); -} - -void lsr(struct sux *cpu, uint64_t adr, uint8_t thread) { - uint64_t value = addr[adr]; - uint64_t sum = (value < 64) ? cpu->a[thread] >> value : 0; - cpu->c[thread] = cpu->a[thread] & 1; - cpu->a[thread] = sum; - setps(cpu, thread); -} - -void inc_addr(struct sux *cpu, uint64_t adr, uint8_t thread) { - addr[adr]++; -} - -void dec_addr(struct sux *cpu, uint64_t adr, uint8_t thread) { - addr[adr]--; -} - void stt(struct sux* const cpu, uint8_t value) { uint16_t tv = 0xFF50; /* Thread Vector. */ uint8_t t = addr[value]; @@ -313,52 +193,9 @@ uint8_t pull(struct sux *cpu) { return addr[STK_STADDR+cpu->sp]; } -void cmp_addr(struct sux *cpu, uint64_t reg, uint64_t adr, uint8_t thread, uint8_t regsize) { - uint64_t value; - value = (uint64_t)addr[adr]; - if (regsize >= 2) - value += (uint64_t)addr[adr+1] << 8; - if (regsize >= 4) { - value += (uint64_t)addr[adr+2] << 16; - value += (uint64_t)addr[adr+3] << 24; - } - if (regsize >= 8) { - value += (uint64_t)addr[adr+4] << 32; - value += (uint64_t)addr[adr+5] << 40; - value += (uint64_t)addr[adr+6] << 48; - value += (uint64_t)addr[adr+7] << 56; - } - uint64_t sum = reg-value; - cpu->n[thread] = (sum & 0x8000000000000000) ? 1 : 0; - cpu->z[thread] = (sum == 0) ? 1 : 0; - cpu->c[thread] = (sum > value) ? 1 : 0; - setps(cpu, thread); -} - -void cmp(struct sux *cpu, uint64_t reg1, uint64_t reg2, uint8_t thread) { - uint64_t sum = reg1-reg2; - cpu->n[thread] = (sum & 0x8000000000000000) ? 1 : 0; - cpu->z[thread] = (sum == 0) ? 1 : 0; - cpu->c[thread] = (sum > reg2) ? 1 : 0; - setps(cpu, thread); -} - -/* Branch if Flag Set. */ -void bfs(struct sux *cpu, uint8_t flag, uint64_t adr, uint8_t thread) { - if (flag) - cpu->pc[thread] = adr; -} - -/* Branch if Flag Clear. */ -void bfc(struct sux *cpu, uint8_t flag, uint64_t adr, uint8_t thread) { - if (!flag) - cpu->pc[thread] = adr; -} - uint64_t immaddr(struct sux *cpu, uint8_t thread, uint8_t size) { uint64_t adr = cpu->pc[thread]; cpu->pc[thread]+=size; - clk++; return adr; } @@ -372,7 +209,6 @@ uint64_t absaddr(struct sux *cpu, uint8_t thread) { | (uint64_t)addr[cpu->pc[thread]+6] << 48 | (uint64_t)addr[cpu->pc[thread]+7] << 56; cpu->pc[thread]+=8; - clk++; return adr; } @@ -382,7 +218,6 @@ uint32_t zeromtx(struct sux *cpu, uint8_t thread) { | (uint32_t)addr[cpu->pc[thread]+2] << 16 | (uint32_t)addr[cpu->pc[thread]+3] << 24; cpu->pc[thread]+=4; - clk++; return adr; } @@ -393,7 +228,6 @@ uint32_t zeromx(struct sux *cpu, uint8_t thread) { | (uint32_t)addr[cpu->pc[thread]+3] << 24; adr += cpu->x[thread]; cpu->pc[thread]+=4; - clk++; return adr; } @@ -404,6 +238,5 @@ uint32_t zeromy(struct sux *cpu, uint8_t thread) { | (uint32_t)addr[cpu->pc[thread]+3] << 24; adr += cpu->y[thread]; cpu->pc[thread]+=4; - clk++; return adr; } diff --git a/opcode.h b/opcode.h index 5203fa9..1f84763 100644 --- a/opcode.h +++ b/opcode.h @@ -96,7 +96,6 @@ struct sux; uint8_t *addr; /* Address Space. */ -uint64_t clk; /* Clock cycles. */ uint8_t ibcount; /* Number of bytes taken up by instruction. */ struct sux { diff --git a/sux.c b/sux.c index b791022..2f1461a 100644 --- a/sux.c +++ b/sux.c @@ -4,28 +4,47 @@ #include #define bench 0 #if bench -#include +#include #endif #define THREADS 1 +#define BENCH_INST 100000000*THREADS +uint64_t clk[THREADS]; /* Per Thread Clock cycles. */ +uint64_t tclk; /* Total Clock cycles. */ uint64_t inst[THREADS]; uint64_t inss; +uint8_t threads_done = 0; uint8_t lines[THREADS]; -struct sux *cpu; +pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; +pthread_cond_t cond = PTHREAD_COND_INITIALIZER; +pthread_barrier_t bar; +struct suxthr { + struct sux sx; + uint8_t th; +}; #if bench double ipc; -time_t str, en; +struct timeval str[THREADS], en[THREADS]; #endif void *run(void *args) { - uint8_t thread = *((uint8_t *) args); + struct suxthr *thr = (void *)args; + struct sux *cpu = &thr->sx; + uint8_t thread = thr->th; uint64_t address; uint8_t prefix = 0; uint8_t opcode = 0; uint8_t end = 0; + uint64_t sum = 0; + uint64_t value = 0; + uint64_t iclk = 0; + uint64_t ins = 0; + /*pthread_mutex_lock(&mutex);*/ +#if bench + gettimeofday(&str[thread], 0); +#endif while (!end) { prefix = addr[cpu->pc[thread]]; - if ((prefix & 0x07) == 0x07) cpu->pc[thread]++; else @@ -33,12 +52,11 @@ void *run(void *args) { opcode = addr[cpu->pc[thread]]; #if !bench - printf("\033[%uH\033[2K" - "pc: 0x%08llx, a: 0x%016llx, x: 0x%016llx, y: 0x%016llx" - ", sp: 0x%04lx, ps: 0x%016llx, opcode: 0x%02x, thread: %u, inst: %s\n" - , lines[thread] + printf("\033[%uH", lines[thread]); + printf("pc: 0x%08llx, a: 0x%016llx, x: 0x%016llx, y: 0x%016llx" + ", sp: 0x%04lx, ps: 0x%016llx, prefix: 0x%02x, opcode: 0x%02x, thread: %u, inst: %s \r" , cpu->pc[thread], cpu->a[thread], cpu->x[thread], cpu->y[thread] - , cpu->sp, cpu->ps, opcode, thread, opname[opcode]); + , cpu->sp, cpu->ps, prefix, opcode, thread, opname[opcode]); fflush(stdout); lines[thread]++; if (lines[thread] > 6*(thread+1)) @@ -50,7 +68,7 @@ void *run(void *args) { uint8_t tmp; address = cpu->pc[thread]; cpu->pc[thread]++; - clk++; + iclk++; switch(opcode) { case CPS: /* Clear Processor Status. */ for (uint8_t i = 0; i < 8; i++) { @@ -63,9 +81,58 @@ void *run(void *args) { } cpu->ps &= 0; break; - case ADC: adc(cpu, immaddr(cpu, thread, regsize), thread, regsize); break; /* ADC Immediate. */ - case 0x03: adc(cpu, absaddr(cpu, thread), thread, regsize); break; /* ADC Absolute. */ - case 0x05: adc(cpu, zeromtx(cpu, thread), thread, regsize); break; /* ADC Zero Matrix. */ + case ADC: /* ADC Immediate. */ + case 0x03: /* ADC Absolute. */ + case 0x05: /* ADC Zero Matrix. */ + if (opcode == ADC) { + address = cpu->pc[thread]; + cpu->pc[thread]++; + } + if (opcode == 0x03) { + address = (uint64_t)addr[cpu->pc[thread]] + | (uint64_t)addr[cpu->pc[thread]+1] << 8 + | (uint64_t)addr[cpu->pc[thread]+2] << 16 + | (uint64_t)addr[cpu->pc[thread]+3] << 24 + | (uint64_t)addr[cpu->pc[thread]+4] << 32 + | (uint64_t)addr[cpu->pc[thread]+5] << 40 + | (uint64_t)addr[cpu->pc[thread]+6] << 48 + | (uint64_t)addr[cpu->pc[thread]+7] << 56; + cpu->pc[thread]+=8; + iclk++; + } + if (opcode == 0x05) { + address = addr[cpu->pc[thread]] + | addr[cpu->pc[thread]+1] << 8 + | addr[cpu->pc[thread]+2] << 16 + | addr[cpu->pc[thread]+3] << 24; + cpu->pc[thread]+=4; + iclk++; + } + value = addr[address]; + if (regsize >= 2) { + value += (uint64_t)addr[address+1] << 8; + } + if (regsize >= 4) { + value += (uint64_t)addr[address+2] << 16; + value += (uint64_t)addr[address+3] << 24; + } + if (regsize >= 8) { + value += (uint64_t)addr[address+4] << 32; + value += (uint64_t)addr[address+5] << 40; + value += (uint64_t)addr[address+6] << 48; + value += (uint64_t)addr[address+7] << 56; + } + sum = cpu->a[thread]+value+cpu->c[thread]; + cpu->a[thread] = sum; + cpu->z[thread] = (sum == 0); + cpu->n[thread] = (sum >> 63); + cpu->v[thread] = !((cpu->a[thread]^value) & 0x8000000000000000) && ((cpu->a[thread]^sum) & 0x8000000000000000); + cpu->c[thread] = (sum < value); + (cpu->z[thread]) ? (cpu->ps |= (Z << 8*thread)) : (cpu->ps &= ~(Z << 8*thread)); + (cpu->n[thread]) ? (cpu->ps |= (N << 8*thread)) : (cpu->ps &= ~(N << 8*thread)); + (cpu->v[thread]) ? (cpu->ps |= (V << 8*thread)) : (cpu->ps &= ~(V << 8*thread)); + (cpu->c[thread]) ? (cpu->ps |= (C << 8*thread)) : (cpu->ps &= ~(C << 8*thread)); + break; case PHP: /* PusH Processor status to stack. */ tmp = addr[cpu->pc[thread]++]; if (tmp > 7) @@ -103,12 +170,70 @@ void *run(void *args) { push(cpu, cpu->x[thread] & 0xFF); break; case JMP: /* JMP Absolute. */ - address = absaddr(cpu, thread); + address = (uint64_t)addr[cpu->pc[thread]] + | (uint64_t)addr[cpu->pc[thread]+1] << 8 + | (uint64_t)addr[cpu->pc[thread]+2] << 16 + | (uint64_t)addr[cpu->pc[thread]+3] << 24 + | (uint64_t)addr[cpu->pc[thread]+4] << 32 + | (uint64_t)addr[cpu->pc[thread]+5] << 40 + | (uint64_t)addr[cpu->pc[thread]+6] << 48 + | (uint64_t)addr[cpu->pc[thread]+7] << 56; + cpu->pc[thread]+=8; + iclk++; cpu->pc[thread] = address; break; - case SBC: sbc(cpu, immaddr(cpu, thread, regsize), thread, regsize); break; /* SBC Immediate. */ - case 0x13: sbc(cpu, absaddr(cpu, thread), thread, regsize); break; /* SBC Absolute. */ - case 0x15: sbc(cpu, zeromtx(cpu, thread), thread, regsize); break; /* SBC Zero Matrix. */ + case SBC: /* SBC Immediate. */ + case 0x13: /* SBC Absolute. */ + case 0x15: /* SBC Zero Matrix. */ + if (opcode == SBC) { + address = cpu->pc[thread]; + cpu->pc[thread]++; + } + if (opcode == 0x13) { + address = (uint64_t)addr[cpu->pc[thread]] + | (uint64_t)addr[cpu->pc[thread]+1] << 8 + | (uint64_t)addr[cpu->pc[thread]+2] << 16 + | (uint64_t)addr[cpu->pc[thread]+3] << 24 + | (uint64_t)addr[cpu->pc[thread]+4] << 32 + | (uint64_t)addr[cpu->pc[thread]+5] << 40 + | (uint64_t)addr[cpu->pc[thread]+6] << 48 + | (uint64_t)addr[cpu->pc[thread]+7] << 56; + cpu->pc[thread]+=8; + iclk++; + } + if (opcode == 0x15) { + address = addr[cpu->pc[thread]] + | addr[cpu->pc[thread]+1] << 8 + | addr[cpu->pc[thread]+2] << 16 + | addr[cpu->pc[thread]+3] << 24; + cpu->pc[thread]+=4; + iclk++; + } + value = addr[address]; + if (regsize >= 2) { + value += (uint64_t)addr[address+1] << 8; + } + if (regsize >= 4) { + value += (uint64_t)addr[address+2] << 16; + value += (uint64_t)addr[address+3] << 24; + } + if (regsize >= 8) { + value += (uint64_t)addr[address+4] << 32; + value += (uint64_t)addr[address+5] << 40; + value += (uint64_t)addr[address+6] << 48; + value += (uint64_t)addr[address+7] << 56; + } + sum = cpu->a[thread]-value-!cpu->c[thread]; + cpu->z[thread] = (sum == 0); + cpu->n[thread] = (sum >> 63); + cpu->v[thread] = ((cpu->a[thread]^value) & 0x8000000000000000) && ((cpu->a[thread]^sum) & 0x8000000000000000); + cpu->c[thread] = (sum > value); + (cpu->z[thread]) ? (cpu->ps |= (Z << 8*thread)) : (cpu->ps &= ~(Z << 8*thread)); + (cpu->n[thread]) ? (cpu->ps |= (N << 8*thread)) : (cpu->ps &= ~(N << 8*thread)); + (cpu->v[thread]) ? (cpu->ps |= (V << 8*thread)) : (cpu->ps &= ~(V << 8*thread)); + (cpu->c[thread]) ? (cpu->ps |= (C << 8*thread)) : (cpu->ps &= ~(C << 8*thread)); + cpu->a[thread] = sum; + break; case PLP: /* PuLl Processor status from stack. */ tmp = addr[cpu->pc[thread]++]; if (tmp > 7) @@ -167,96 +292,442 @@ void *run(void *args) { break; case AND: and_addr(cpu, &cpu->a[thread], immaddr(cpu, thread, regsize), thread, regsize); break; /* AND Immediate. */ case ANY: and_addr(cpu, &cpu->y[thread], immaddr(cpu, thread, regsize), thread, regsize); break; /* ANY Immediate. */ - case AAY: cpu->a[thread] = and(cpu, cpu->y[thread], thread); break; + case AAY: + case AAX: + if (opcode == AAY) + cpu->a[thread] &= cpu->y[thread]; + if (opcode == AAX) + cpu->a[thread] &= cpu->x[thread]; + cpu->z[thread] = (sum == 0); + cpu->n[thread] = (sum >> 63); + (cpu->z[thread]) ? (cpu->ps |= (Z << 8*thread)) : (cpu->ps &= ~(Z << 8*thread)); + (cpu->n[thread]) ? (cpu->ps |= (N << 8*thread)) : (cpu->ps &= ~(N << 8*thread)); + break; case ANX: and_addr(cpu, &cpu->x[thread], immaddr(cpu, thread, regsize), thread, regsize); break; /* ANX Immediate. */ - case AAX: cpu->a[thread] = and(cpu, cpu->x[thread], thread); break; case STT: stt(cpu, immaddr(cpu, thread, 1)); break; /* STart Thread. */ case 0x29: and_addr(cpu, &cpu->a[thread], absaddr(cpu, thread), thread, regsize); break; /* AND Absolute. */ case 0x2B: and_addr(cpu, &cpu->a[thread], zeromtx(cpu, thread), thread, regsize); break; /* AND Zero Matrix. */ - case BPO: bfc(cpu, cpu->n[thread], absaddr(cpu, thread), thread); break; /* Branch if POsitive. */ + case BPO: /* Branch if POsitive. */ + address = (uint64_t)addr[cpu->pc[thread]] + | (uint64_t)addr[cpu->pc[thread]+1] << 8 + | (uint64_t)addr[cpu->pc[thread]+2] << 16 + | (uint64_t)addr[cpu->pc[thread]+3] << 24 + | (uint64_t)addr[cpu->pc[thread]+4] << 32 + | (uint64_t)addr[cpu->pc[thread]+5] << 40 + | (uint64_t)addr[cpu->pc[thread]+6] << 48 + | (uint64_t)addr[cpu->pc[thread]+7] << 56; + cpu->pc[thread]+=8; + iclk++; + if (cpu->n[thread]) + cpu->pc[thread] = address; + break; case ORA: or_addr(cpu, &cpu->a[thread], immaddr(cpu, thread, regsize), thread, regsize); break; /* ORA Immediate. */ case ORY: or_addr(cpu, &cpu->y[thread], immaddr(cpu, thread, regsize), thread, regsize); break; /* ORY Immediate. */ - case OAY: cpu->a[thread] = or(cpu, cpu->y[thread], thread); break; case ORX: or_addr(cpu, &cpu->x[thread], immaddr(cpu, thread, regsize), thread, regsize); break; /* ORX Immediate. */ - case OAX: cpu->a[thread] = or(cpu, cpu->x[thread], thread); break; + case OAY: + case OAX: + if (opcode == OAY) + cpu->a[thread] |= cpu->y[thread]; + if (opcode == OAX) + cpu->a[thread] |= cpu->x[thread]; + cpu->z[thread] = (sum == 0); + cpu->n[thread] = (sum >> 63); + (cpu->z[thread]) ? (cpu->ps |= (Z << 8*thread)) : (cpu->ps &= ~(Z << 8*thread)); + (cpu->n[thread]) ? (cpu->ps |= (N << 8*thread)) : (cpu->ps &= ~(N << 8*thread)); + break; case SEI: /* SEt Interrupt. */ cpu->i[thread] = 1; - setps(cpu, thread); + (cpu->ps |= (I << 8*thread)); break; case 0x39: or_addr(cpu, &cpu->a[thread], absaddr(cpu, thread), thread, regsize); break; /* ORA Absolute. */ case 0x3B: or_addr(cpu, &cpu->a[thread], zeromtx(cpu, thread), thread, regsize); break; /* ORA Zero Matrix. */ - case BNG: bfs(cpu, cpu->n[thread], absaddr(cpu, thread), thread); break; /* Branch if NeGative. */ + case BNG: /* Branch if NeGative. */ + address = (uint64_t)addr[cpu->pc[thread]] + | (uint64_t)addr[cpu->pc[thread]+1] << 8 + | (uint64_t)addr[cpu->pc[thread]+2] << 16 + | (uint64_t)addr[cpu->pc[thread]+3] << 24 + | (uint64_t)addr[cpu->pc[thread]+4] << 32 + | (uint64_t)addr[cpu->pc[thread]+5] << 40 + | (uint64_t)addr[cpu->pc[thread]+6] << 48 + | (uint64_t)addr[cpu->pc[thread]+7] << 56; + cpu->pc[thread]+=8; + iclk++; + if (!cpu->n[thread]) + cpu->pc[thread] = address; + break; case XOR: xor_addr(cpu, &cpu->a[thread], immaddr(cpu, thread, regsize), thread, regsize); break; /* XOR Immediate. */ case XRY: xor_addr(cpu, &cpu->y[thread], immaddr(cpu, thread, regsize), thread, regsize); break; /* XRY Immediate. */ - case XAY: cpu->a[thread] = xor(cpu, cpu->y[thread], thread); break; + case XAY: + case XAX: + if (opcode == XAY) + cpu->a[thread] ^= cpu->y[thread]; + if (opcode == XAX) + cpu->a[thread] ^= cpu->x[thread]; + cpu->z[thread] = (sum == 0); + cpu->n[thread] = (sum >> 63); + (cpu->z[thread]) ? (cpu->ps |= (Z << 8*thread)) : (cpu->ps &= ~(Z << 8*thread)); + (cpu->n[thread]) ? (cpu->ps |= (N << 8*thread)) : (cpu->ps &= ~(N << 8*thread)); + break; case XRX: xor_addr(cpu, &cpu->x[thread], immaddr(cpu, thread, regsize), thread, regsize); break; /* XRX Immediate. */ - case XAX: cpu->a[thread] = xor(cpu, cpu->x[thread], thread); break; case CLI: /* CLear Interrupt. */ cpu->i[thread] = 0; - setps(cpu, thread); + (cpu->ps &= ~(I << 8*thread)); break; case 0x49: xor_addr(cpu, &cpu->a[thread], absaddr(cpu, thread), thread, regsize); break; /* XOR Absolute. */ case 0x4B: xor_addr(cpu, &cpu->a[thread], zeromtx(cpu, thread), thread, regsize); break; /* XOR Zero Matrix. */ - case BCS: bfs(cpu, cpu->c[thread], absaddr(cpu, thread), thread); break; /* Branch if Carry Set. */ - case LSL: lsl(cpu, immaddr(cpu, thread, 1), thread); break; /* LSL Immediate. */ + case BCS: /* Branch if Carry Set. */ + address = (uint64_t)addr[cpu->pc[thread]] + | (uint64_t)addr[cpu->pc[thread]+1] << 8 + | (uint64_t)addr[cpu->pc[thread]+2] << 16 + | (uint64_t)addr[cpu->pc[thread]+3] << 24 + | (uint64_t)addr[cpu->pc[thread]+4] << 32 + | (uint64_t)addr[cpu->pc[thread]+5] << 40 + | (uint64_t)addr[cpu->pc[thread]+6] << 48 + | (uint64_t)addr[cpu->pc[thread]+7] << 56; + cpu->pc[thread]+=8; + iclk++; + if (cpu->c[thread]) + cpu->pc[thread] = address; + break; + case LSL: /* LSL Immediate. */ + case 0x53: /* LSL Absolute. */ + case 0x55: /* LSL Zero Matrix. */ + if (opcode == LSL) { + address = cpu->pc[thread]; + cpu->pc[thread]++; + } + if (opcode == 0x53) { + address = (uint64_t)addr[cpu->pc[thread]] + | (uint64_t)addr[cpu->pc[thread]+1] << 8 + | (uint64_t)addr[cpu->pc[thread]+2] << 16 + | (uint64_t)addr[cpu->pc[thread]+3] << 24 + | (uint64_t)addr[cpu->pc[thread]+4] << 32 + | (uint64_t)addr[cpu->pc[thread]+5] << 40 + | (uint64_t)addr[cpu->pc[thread]+6] << 48 + | (uint64_t)addr[cpu->pc[thread]+7] << 56; + + cpu->pc[thread]+=8; + iclk++; + } + if (opcode == 0x55) { + address = addr[cpu->pc[thread]] + | addr[cpu->pc[thread]+1] << 8 + | addr[cpu->pc[thread]+2] << 16 + | addr[cpu->pc[thread]+3] << 24; + cpu->pc[thread]+=4; + iclk++; + } + value = addr[address]; + sum = (value < 64) ? cpu->a[thread] << value : 0; + cpu->z[thread] = (sum == 0); + cpu->n[thread] = (sum >> 63); + cpu->c[thread] = cpu->a[thread] >> 64-value; + cpu->a[thread] = sum; + (cpu->z[thread]) ? (cpu->ps |= (Z << 8*thread)) : (cpu->ps &= ~(Z << 8*thread)); + (cpu->n[thread]) ? (cpu->ps |= (N << 8*thread)) : (cpu->ps &= ~(N << 8*thread)); + (cpu->c[thread]) ? (cpu->ps |= (C << 8*thread)) : (cpu->ps &= ~(C << 8*thread)); + break; case 0x52: and_addr(cpu, &cpu->y[thread], absaddr(cpu, thread), thread, regsize); break; /* ANY Absolute. */ - case 0x53: lsl(cpu, absaddr(cpu, thread), thread); break; /* LSL Absolute. */ case 0x54: and_addr(cpu, &cpu->x[thread], absaddr(cpu, thread), thread, regsize); break; /* ANX Absolute. */ - case 0x55: lsl(cpu, zeromtx(cpu, thread), thread); break; /* LSL Zero Matrix. */ case SEC: /* SEt Carry flag.*/ cpu->c[thread] = 1; - setps(cpu, thread); + (cpu->ps |= (C << 8*thread)); + break; + case STA: /* STA Absolute. */ + case STY: /* STY Absolute. */ + case STX: /* STX Absolute. */ + case 0x7B: /* STA Zero Matrix. */ + case 0x7D: /* STY Zero Matrix. */ + case 0x7E: /* STX Zero Matrix. */ + if (opcode == STA || opcode == STY || opcode == STX) { + address = (uint64_t)addr[cpu->pc[thread]] + | (uint64_t)addr[cpu->pc[thread]+1] << 8 + | (uint64_t)addr[cpu->pc[thread]+2] << 16 + | (uint64_t)addr[cpu->pc[thread]+3] << 24 + | (uint64_t)addr[cpu->pc[thread]+4] << 32 + | (uint64_t)addr[cpu->pc[thread]+5] << 40 + | (uint64_t)addr[cpu->pc[thread]+6] << 48 + | (uint64_t)addr[cpu->pc[thread]+7] << 56; + cpu->pc[thread]+=8; + iclk++; + } + if (opcode == 0x7B || opcode == 0x7D || opcode == 0x7E) { + address = addr[cpu->pc[thread]] + | addr[cpu->pc[thread]+1] << 8 + | addr[cpu->pc[thread]+2] << 16 + | addr[cpu->pc[thread]+3] << 24; + cpu->pc[thread]+=4; + iclk++; + } + if (opcode == STA || opcode == 0x7B) + value = cpu->a[thread]; + if (opcode == STY || opcode == 0x7D) + value = cpu->y[thread]; + if (opcode == STX || opcode == 0x7E) + value = cpu->x[thread]; + + addr[address] = value & 0xFF; + if (regsize >= 2) + addr[address+1] = value >> 8; + if (regsize >= 4) { + addr[address+2] = value >> 16; + addr[address+3] = value >> 24; + } + if (regsize >= 8) { + addr[address+4] = value >> 32; + addr[address+5] = value >> 40; + addr[address+6] = value >> 48; + addr[address+7] = value >> 56; + } + break; + case BCC: /* Branch if Carry Clear. */ + address = (uint64_t)addr[cpu->pc[thread]] + | (uint64_t)addr[cpu->pc[thread]+1] << 8 + | (uint64_t)addr[cpu->pc[thread]+2] << 16 + | (uint64_t)addr[cpu->pc[thread]+3] << 24 + | (uint64_t)addr[cpu->pc[thread]+4] << 32 + | (uint64_t)addr[cpu->pc[thread]+5] << 40 + | (uint64_t)addr[cpu->pc[thread]+6] << 48 + | (uint64_t)addr[cpu->pc[thread]+7] << 56; + cpu->pc[thread]+=8; + iclk++; + if (!cpu->c[thread]) + cpu->pc[thread] = address; + break; + case LSR: /* LSR Immediate. */ + case 0x63: /* LSR Absolute. */ + case 0x65: /* LSR Zero Matrix. */ + if (opcode == LSR) { + address = cpu->pc[thread]; + cpu->pc[thread]++; + } + if (opcode == 0x63) { + address = (uint64_t)addr[cpu->pc[thread]] + | (uint64_t)addr[cpu->pc[thread]+1] << 8 + | (uint64_t)addr[cpu->pc[thread]+2] << 16 + | (uint64_t)addr[cpu->pc[thread]+3] << 24 + | (uint64_t)addr[cpu->pc[thread]+4] << 32 + | (uint64_t)addr[cpu->pc[thread]+5] << 40 + | (uint64_t)addr[cpu->pc[thread]+6] << 48 + | (uint64_t)addr[cpu->pc[thread]+7] << 56; + cpu->pc[thread]+=8; + iclk++; + } + if (opcode == 0x65) { + address = addr[cpu->pc[thread]] + | addr[cpu->pc[thread]+1] << 8 + | addr[cpu->pc[thread]+2] << 16 + | addr[cpu->pc[thread]+3] << 24; + cpu->pc[thread]+=4; + iclk++; + } + value = addr[address]; + sum = (value < 64) ? cpu->a[thread] >> value : 0; + cpu->z[thread] = (sum == 0); + cpu->n[thread] = (sum >> 63); + cpu->c[thread] = cpu->a[thread] & 1; + cpu->a[thread] = sum; + (cpu->z[thread]) ? (cpu->ps |= (Z << 8*thread)) : (cpu->ps &= ~(Z << 8*thread)); + (cpu->n[thread]) ? (cpu->ps |= (N << 8*thread)) : (cpu->ps &= ~(N << 8*thread)); + (cpu->c[thread]) ? (cpu->ps |= (C << 8*thread)) : (cpu->ps &= ~(C << 8*thread)); break; - case 0x59: ld(cpu, &cpu->a[thread], absaddr(cpu, thread), thread, regsize); break; /* LDA Absolute. */ - case 0x5A: ld(cpu, &cpu->y[thread], absaddr(cpu, thread), thread, regsize); break; /* LDY Absolute. */ - case STA: st(cpu, &cpu->a[thread], absaddr(cpu, thread), thread, regsize); break; /* STA Absolute. */ - case 0x5C: ld(cpu, &cpu->x[thread], absaddr(cpu, thread), thread, regsize); break; /* LDX Absolute. */ - case STY: st(cpu, &cpu->y[thread], absaddr(cpu, thread), thread, regsize); break; /* STY Absolute. */ - case STX: st(cpu, &cpu->x[thread], absaddr(cpu, thread), thread, regsize); break; /* STX Absolute. */ - case BCC: bfc(cpu, cpu->c[thread], absaddr(cpu, thread), thread); break; /* Branch if Carry Clear. */ - case LSR: lsr(cpu, immaddr(cpu, thread, 1), thread); break; /* LSR Immediate. */ case 0x62: or_addr(cpu, &cpu->y[thread], absaddr(cpu, thread), thread, regsize); break; /* ORY Absolute. */ - case 0x63: lsr(cpu, absaddr(cpu, thread), thread); break; /* LSR Absolute. */ case 0x64: or_addr(cpu, &cpu->x[thread], absaddr(cpu, thread), thread, regsize); break; /* ORX Absolute. */ - case 0x65: lsr(cpu, zeromtx(cpu, thread), thread); break; /* LSR Zero Matrix. */ case CLC: /* CLear Carry flag. */ cpu->c[thread] = 0; - setps(cpu, thread); + (cpu->ps &= ~(C << 8*thread)); + break; + case 0x59: /* LDA Absolute. */ + case 0x5A: /* LDY Absolute. */ + case 0x5C: /* LDX Absolute. */ + case LDA: /* LDA Immediate. */ + case LDY: /* LDY Immediate. */ + case LDX: /* LDX Immediate. */ + case 0x79: /* LDA Zero Matrix. */ + case 0x7A: /* LDY Zero Matrix. */ + case 0x7C: /* LDX Zero Matrix. */ + if (opcode == LDA || opcode == LDY || opcode == LDX) { + address = cpu->pc[thread]; + cpu->pc[thread]+=regsize; + } + if (opcode == 0x59 || opcode == 0x5A || opcode == 0x5C) { + address = (uint64_t)addr[cpu->pc[thread]] + | (uint64_t)addr[cpu->pc[thread]+1] << 8 + | (uint64_t)addr[cpu->pc[thread]+2] << 16 + | (uint64_t)addr[cpu->pc[thread]+3] << 24 + | (uint64_t)addr[cpu->pc[thread]+4] << 32 + | (uint64_t)addr[cpu->pc[thread]+5] << 40 + | (uint64_t)addr[cpu->pc[thread]+6] << 48 + | (uint64_t)addr[cpu->pc[thread]+7] << 56; + cpu->pc[thread]+=8; + iclk++; + } + if (opcode == 0x79 || opcode == 0x7A || opcode == 0x7C) { + address = addr[cpu->pc[thread]] + | addr[cpu->pc[thread]+1] << 8 + | addr[cpu->pc[thread]+2] << 16 + | addr[cpu->pc[thread]+3] << 24; + cpu->pc[thread]+=4; + iclk++; + } + value = (uint64_t)addr[address]; + if (regsize >= 2) + value += (uint64_t)addr[address+1] << 8; + if (regsize >= 4) { + value += (uint64_t)addr[address+2] << 16; + value += (uint64_t)addr[address+3] << 24; + } + if (regsize >= 8) { + value += (uint64_t)addr[address+4] << 32; + value += (uint64_t)addr[address+5] << 40; + value += (uint64_t)addr[address+6] << 48; + value += (uint64_t)addr[address+7] << 56; + } + if (opcode == LDA || opcode == 0x59 || opcode == 0x79) + cpu->a[thread] = value; + if (opcode == LDY || opcode == 0x5A || opcode == 0x7A) + cpu->y[thread] = value; + if (opcode == LDX || opcode == 0x5C || opcode == 0x7C) + cpu->x[thread] = value; + cpu->z[thread] = (value == 0); + cpu->n[thread] = (value >> 63); + (cpu->z[thread]) ? (cpu->ps |= (Z << 8*thread)) : (cpu->ps &= ~(Z << 8*thread)); + (cpu->n[thread]) ? (cpu->ps |= (N << 8*thread)) : (cpu->ps &= ~(N << 8*thread)); + break; + case BEQ: /* Branch if EQual. */ + address = (uint64_t)addr[cpu->pc[thread]] + | (uint64_t)addr[cpu->pc[thread]+1] << 8 + | (uint64_t)addr[cpu->pc[thread]+2] << 16 + | (uint64_t)addr[cpu->pc[thread]+3] << 24 + | (uint64_t)addr[cpu->pc[thread]+4] << 32 + | (uint64_t)addr[cpu->pc[thread]+5] << 40 + | (uint64_t)addr[cpu->pc[thread]+6] << 48 + | (uint64_t)addr[cpu->pc[thread]+7] << 56; + cpu->pc[thread]+=8; + iclk++; + if (cpu->z[thread]) + cpu->pc[thread] = address; + break; + case ROL: /* ROL Immediate. */ + case 0x73: /* ROL Absolute. */ + case 0x75: /* ROL Zero Matrix. */ + if (opcode == ROL) { + address = cpu->pc[thread]; + cpu->pc[thread]++; + } + if (opcode == 0x73) { + address = (uint64_t)addr[cpu->pc[thread]] + | (uint64_t)addr[cpu->pc[thread]+1] << 8 + | (uint64_t)addr[cpu->pc[thread]+2] << 16 + | (uint64_t)addr[cpu->pc[thread]+3] << 24 + | (uint64_t)addr[cpu->pc[thread]+4] << 32 + | (uint64_t)addr[cpu->pc[thread]+5] << 40 + | (uint64_t)addr[cpu->pc[thread]+6] << 48 + | (uint64_t)addr[cpu->pc[thread]+7] << 56; + cpu->pc[thread]+=8; + iclk++; + } + if (opcode == 0x75) { + address = addr[cpu->pc[thread]] + | addr[cpu->pc[thread]+1] << 8 + | addr[cpu->pc[thread]+2] << 16 + | addr[cpu->pc[thread]+3] << 24; + cpu->pc[thread]+=4; + iclk++; + } + value = addr[address]; + sum = cpu->a[thread] << value; + sum |= cpu->c[thread]; + cpu->z[thread] = (sum == 0); + cpu->n[thread] = (sum >> 63); + cpu->c[thread] = cpu->a[thread] >> (uint64_t)64-value; + cpu->a[thread] = sum; + (cpu->z[thread]) ? (cpu->ps |= (Z << 8*thread)) : (cpu->ps &= ~(Z << 8*thread)); + (cpu->n[thread]) ? (cpu->ps |= (N << 8*thread)) : (cpu->ps &= ~(N << 8*thread)); + (cpu->c[thread]) ? (cpu->ps |= (C << 8*thread)) : (cpu->ps &= ~(C << 8*thread)); break; - case LDA: ld(cpu, &cpu->a[thread], immaddr(cpu, thread, regsize), thread, regsize); break; /* LDA Immediate. */ - case LDY: ld(cpu, &cpu->y[thread], immaddr(cpu, thread, regsize), thread, regsize); break; /* LDY Immediate. */ - case LDX: ld(cpu, &cpu->x[thread], immaddr(cpu, thread, regsize), thread, regsize); break; /* LDX Immediate. */ - case BEQ: bfs(cpu, cpu->z[thread], absaddr(cpu, thread), thread); break; /* Branch if EQual. */ - case ROL: rol(cpu, immaddr(cpu, thread, 1), thread); break; /* ROL Immediate. */ case 0x72: xor_addr(cpu, &cpu->y[thread], absaddr(cpu, thread), thread, regsize); break; /* XRY Absolute. */ - case 0x73: rol(cpu, absaddr(cpu, thread), thread); break; /* ROL Absolute. */ case 0x74: xor_addr(cpu, &cpu->x[thread], absaddr(cpu, thread), thread, regsize); break; /* XRX Absolute. */ - case 0x75: rol(cpu, zeromtx(cpu, thread), thread); break; /* ROL Zero Matrix. */ case SSP: /* Set Stack Protection flag. */ cpu->s[thread] = 1; - setps(cpu, thread); + (cpu->ps |= (S << 8*thread)); + break; + case BNE: /* Branch if Not Equal. */ + address = (uint64_t)addr[cpu->pc[thread]] + | (uint64_t)addr[cpu->pc[thread]+1] << 8 + | (uint64_t)addr[cpu->pc[thread]+2] << 16 + | (uint64_t)addr[cpu->pc[thread]+3] << 24 + | (uint64_t)addr[cpu->pc[thread]+4] << 32 + | (uint64_t)addr[cpu->pc[thread]+5] << 40 + | (uint64_t)addr[cpu->pc[thread]+6] << 48 + | (uint64_t)addr[cpu->pc[thread]+7] << 56; + cpu->pc[thread]+=8; + iclk++; + if (!cpu->z[thread]) + cpu->pc[thread] = address; + break; + case ROR: /* ROR Immediate. */ + case 0x83: /* ROR Absolute. */ + case 0x85: /* ROR Zero Matrix. */ + if (opcode == ROR) { + address = cpu->pc[thread]; + cpu->pc[thread]++; + } + if (opcode == 0x83) { + address = (uint64_t)addr[cpu->pc[thread]] + | (uint64_t)addr[cpu->pc[thread]+1] << 8 + | (uint64_t)addr[cpu->pc[thread]+2] << 16 + | (uint64_t)addr[cpu->pc[thread]+3] << 24 + | (uint64_t)addr[cpu->pc[thread]+4] << 32 + | (uint64_t)addr[cpu->pc[thread]+5] << 40 + | (uint64_t)addr[cpu->pc[thread]+6] << 48 + | (uint64_t)addr[cpu->pc[thread]+7] << 56; + cpu->pc[thread]+=8; + iclk++; + } + if (opcode == 0x85) { + address = addr[cpu->pc[thread]] + | addr[cpu->pc[thread]+1] << 8 + | addr[cpu->pc[thread]+2] << 16 + | addr[cpu->pc[thread]+3] << 24; + cpu->pc[thread]+=4; + iclk++; + } + value = addr[address]; + sum = cpu->a[thread] >> value; + sum |= (uint64_t)cpu->c[thread] << (uint64_t)64-value; + cpu->z[thread] = (sum == 0); + cpu->n[thread] = (sum >> 63); + cpu->c[thread] = cpu->a[thread] & 1; + cpu->a[thread] = sum; + (cpu->z[thread]) ? (cpu->ps |= (Z << 8*thread)) : (cpu->ps &= ~(Z << 8*thread)); + (cpu->n[thread]) ? (cpu->ps |= (N << 8*thread)) : (cpu->ps &= ~(N << 8*thread)); + (cpu->c[thread]) ? (cpu->ps |= (C << 8*thread)) : (cpu->ps &= ~(C << 8*thread)); break; - case 0x79: ld(cpu, &cpu->a[thread], zeromtx(cpu, thread), thread, regsize); break; /* LDA Zero Matrix. */ - case 0x7A: ld(cpu, &cpu->y[thread], zeromtx(cpu, thread), thread, regsize); break; /* LDY Zero Matrix. */ - case 0x7B: st(cpu, &cpu->a[thread], zeromtx(cpu, thread), thread, regsize); break; /* STA Zero Matrix. */ - case 0x7C: ld(cpu, &cpu->x[thread], zeromtx(cpu, thread), thread, regsize); break; /* LDX Zero Matrix. */ - case 0x7D: st(cpu, &cpu->y[thread], zeromtx(cpu, thread), thread, regsize); break; /* STY Zero Matrix. */ - case 0x7E: st(cpu, &cpu->x[thread], zeromtx(cpu, thread), thread, regsize); break; /* STX Zero Matrix. */ - case BNE: bfc(cpu, cpu->z[thread], absaddr(cpu, thread), thread); break; /* Branch if Not Equal. */ - case ROR: ror(cpu, immaddr(cpu, thread, 1), thread); break; /* ROR Immediate. */ case 0x82: and_addr(cpu, &cpu->y[thread], zeromtx(cpu, thread), thread, regsize); break; /* ANY Zero Matrix. */ - case 0x83: ror(cpu, absaddr(cpu, thread), thread); break; /* ROR Absolute. */ case 0x84: and_addr(cpu, &cpu->x[thread], zeromtx(cpu, thread), thread, regsize); break; /* ANX Zero Matrix. */ - case 0x85: ror(cpu, zeromtx(cpu, thread), thread); break; /* ROR Zero Matrix. */ case CSP: /* Clear Stack Protection flag. */ cpu->s[thread] = 0; - setps(cpu, thread); + (cpu->ps &= ~(S << 8*thread)); break; case 0x89: ld(cpu, &cpu->a[thread], zeromx(cpu, thread), thread, regsize); break; /* LDA Zero Matrix, Indexed with X. */ case 0x8A: ld(cpu, &cpu->y[thread], zeromx(cpu, thread), thread, regsize); break; /* LDY Zero Matrix, Indexed with X. */ case 0x8B: st(cpu, &cpu->a[thread], zeromx(cpu, thread), thread, regsize); break; /* STA Zero Matrix, Indexed with X. */ case 0x8D: st(cpu, &cpu->y[thread], zeromx(cpu, thread), thread, regsize); break; /* STY Zero Matrix, Indexed with X. */ - case BVS: bfs(cpu, cpu->v[thread], absaddr(cpu, thread), thread); break; /* Branch if oVerflow Set. */ + case BVS: /* Branch if oVerflow Set. */ + address = (uint64_t)addr[cpu->pc[thread]] + | (uint64_t)addr[cpu->pc[thread]+1] << 8 + | (uint64_t)addr[cpu->pc[thread]+2] << 16 + | (uint64_t)addr[cpu->pc[thread]+3] << 24 + | (uint64_t)addr[cpu->pc[thread]+4] << 32 + | (uint64_t)addr[cpu->pc[thread]+5] << 40 + | (uint64_t)addr[cpu->pc[thread]+6] << 48 + | (uint64_t)addr[cpu->pc[thread]+7] << 56; + cpu->pc[thread]+=8; + iclk++; + if (cpu->v[thread]) + cpu->pc[thread] = address; + break; case MUL: mul(cpu, immaddr(cpu, thread, regsize), thread, regsize); break; /* MUL Immediate. */ case 0x92: or_addr(cpu, &cpu->y[thread], zeromtx(cpu, thread), thread, regsize); break; /* ORY Zero Matrix. */ case 0x93: mul(cpu, absaddr(cpu, thread), thread, regsize); break; /* MUL Absolute. */ @@ -264,13 +735,26 @@ void *run(void *args) { case 0x95: mul(cpu, zeromtx(cpu, thread), thread, regsize); break; /* MUL Zero Matrix. */ case SEV: /* SEt oVerflow flag. */ cpu->v[thread] = 1; - setps(cpu, thread); + (cpu->ps |= (V << 8*thread)); break; case 0x99: ld(cpu, &cpu->a[thread], zeromy(cpu, thread), thread, regsize); break; /* LDA Zero Matrix, Indexed with Y. */ case 0x9B: ld(cpu, &cpu->x[thread], zeromy(cpu, thread), thread, regsize); break; /* STA Zero Matrix, Indexed with Y. */ case 0x9C: st(cpu, &cpu->a[thread], zeromy(cpu, thread), thread, regsize); break; /* LDX Zero Matrix, Indexed with Y. */ case 0x9E: st(cpu, &cpu->x[thread], zeromy(cpu, thread), thread, regsize); break; /* STX Zero Matrix, Indexed with Y. */ - case BVC: bfc(cpu, cpu->z[thread], absaddr(cpu, thread), thread); break; /* Branch if oVerflow Clear. */ + case BVC: /* Branch if oVerflow Clear. */ + address = (uint64_t)addr[cpu->pc[thread]] + | (uint64_t)addr[cpu->pc[thread]+1] << 8 + | (uint64_t)addr[cpu->pc[thread]+2] << 16 + | (uint64_t)addr[cpu->pc[thread]+3] << 24 + | (uint64_t)addr[cpu->pc[thread]+4] << 32 + | (uint64_t)addr[cpu->pc[thread]+5] << 40 + | (uint64_t)addr[cpu->pc[thread]+6] << 48 + | (uint64_t)addr[cpu->pc[thread]+7] << 56; + cpu->pc[thread]+=8; + iclk++; + if (!cpu->v[thread]) + cpu->pc[thread] = address; + break; case DIV: divd(cpu, immaddr(cpu, thread, regsize), thread, regsize); break; /* DIV Immediate. */ case 0xA2: xor_addr(cpu, &cpu->y[thread], zeromtx(cpu, thread), thread, regsize); break; /* XRY Zero Matrix. */ case 0xA3: divd(cpu, absaddr(cpu, thread), thread, regsize); break; /* DIV Absolute. */ @@ -278,7 +762,7 @@ void *run(void *args) { case 0xA5: divd(cpu, zeromtx(cpu, thread), thread, regsize); break; /* DIV Zero Matrix. */ case CLV: /* CLear oVerflow flag. */ cpu->v[thread] = 0; - setps(cpu, thread); + (cpu->ps &= ~(V << 8*thread)); break; case RTS: /* ReTurn from Subroutine. */ cpu->pc[thread] = (uint64_t)pull(cpu); @@ -286,11 +770,79 @@ void *run(void *args) { cpu->pc[thread] += ((uint64_t)pull(cpu) << 16); cpu->pc[thread] += ((uint64_t)pull(cpu) << 24) + 1; break; - case CMP: cmp_addr(cpu, cpu->a[thread], immaddr(cpu, thread, regsize), thread, regsize); break; /* CMP Immediate. */ - case CPY: cmp_addr(cpu, cpu->y[thread], immaddr(cpu, thread, regsize), thread, regsize); break; /* CPY Immediate. */ - case CAY: cmp(cpu, cpu->a[thread], cpu->y[thread], thread); break; - case CPX: cmp_addr(cpu, cpu->x[thread], immaddr(cpu, thread, regsize), thread, regsize); break; /* CPX Immediate. */ - case CAX: cmp(cpu, cpu->a[thread], cpu->x[thread], thread); break; + case CMP: /* CMP Immediate. */ + case CPY: /* CPY Immediate. */ + case CPX: /* CPX Immediate. */ + case 0xE2: /* CPY Absolute. */ + case 0xE4: /* CPX Absolute. */ + case 0xE5: /* CMP Absolute. */ + case 0xF2: /* CPY Zero Matrix. */ + case 0xF4: /* CPX Zero Matrix. */ + case 0xF5: /* CMP Zero Matrix. */ + if (opcode == CMP || opcode == CPY || opcode == CPX) + address = immaddr(cpu, thread, regsize); + if (opcode == 0xE5 || opcode == 0xE2 || opcode == 0xE4) { + address = (uint64_t)addr[cpu->pc[thread]] + | (uint64_t)addr[cpu->pc[thread]+1] << 8 + | (uint64_t)addr[cpu->pc[thread]+2] << 16 + | (uint64_t)addr[cpu->pc[thread]+3] << 24 + | (uint64_t)addr[cpu->pc[thread]+4] << 32 + | (uint64_t)addr[cpu->pc[thread]+5] << 40 + | (uint64_t)addr[cpu->pc[thread]+6] << 48 + | (uint64_t)addr[cpu->pc[thread]+7] << 56; + cpu->pc[thread]+=8; + iclk++; + } + if (opcode == 0xF5 || opcode == 0xF2 || opcode == 0xF4) { + address = addr[cpu->pc[thread]] + | addr[cpu->pc[thread]+1] << 8 + | addr[cpu->pc[thread]+2] << 16 + | addr[cpu->pc[thread]+3] << 24; + cpu->pc[thread]+=4; + iclk++; + } + value = (uint64_t)addr[address]; + if (regsize >= 2) + value += (uint64_t)addr[address+1] << 8; + if (regsize >= 4) { + value += (uint64_t)addr[address+2] << 16; + value += (uint64_t)addr[address+3] << 24; + } + if (regsize >= 8) { + value += (uint64_t)addr[address+4] << 32; + value += (uint64_t)addr[address+5] << 40; + value += (uint64_t)addr[address+6] << 48; + value += (uint64_t)addr[address+7] << 56; + } + if (opcode == CMP || opcode == 0xE5 || opcode == 0xF5) + sum = cpu->a[thread]-value; + if (opcode == CPY || opcode == 0xE2 || opcode == 0xF2) + sum = cpu->y[thread]-value; + if (opcode == CPX || opcode == 0xE4 || opcode == 0xF4) + sum = cpu->x[thread]-value; + cpu->n[thread] = (sum & 0x8000000000000000) ? 1 : 0; + cpu->z[thread] = (sum == 0) ? 1 : 0; + cpu->c[thread] = (sum > value) ? 1 : 0; + (cpu->z[thread]) ? (cpu->ps |= (Z << 8*thread)) : (cpu->ps &= ~(Z << 8*thread)); + (cpu->n[thread]) ? (cpu->ps |= (N << 8*thread)) : (cpu->ps &= ~(N << 8*thread)); + (cpu->c[thread]) ? (cpu->ps |= (C << 8*thread)) : (cpu->ps &= ~(C << 8*thread)); + break; + case CAY: + case CAX: + if (opcode == CAY) + sum = cpu->a[thread]-cpu->y[thread]; + if (opcode == CAX) + sum = cpu->a[thread]-cpu->x[thread]; + cpu->n[thread] = (sum & 0x8000000000000000) ? 1 : 0; + cpu->z[thread] = (sum == 0) ? 1 : 0; + if (opcode == CAY) + cpu->c[thread] = (sum > cpu->y[thread]) ? 1 : 0; + if (opcode == CAX) + cpu->c[thread] = (sum > cpu->x[thread]) ? 1 : 0; + (cpu->z[thread]) ? (cpu->ps |= (Z << 8*thread)) : (cpu->ps &= ~(Z << 8*thread)); + (cpu->n[thread]) ? (cpu->ps |= (N << 8*thread)) : (cpu->ps &= ~(N << 8*thread)); + (cpu->c[thread]) ? (cpu->ps |= (C << 8*thread)) : (cpu->ps &= ~(C << 8*thread)); + break; case ENT: ent(cpu, immaddr(cpu, thread, 1)); break; /* ENd Thread. */ case RTI: /* ReTurn from Interupt routine. */ cpu->ps = ((uint64_t)pull(cpu) << 8*thread); @@ -327,7 +879,12 @@ void *run(void *args) { (cpu->n[thread]) ? (cpu->ps |= (N << 8*thread)) : (cpu->ps &= ~(N << 8*thread)); break; case 0xD0: /* JMP Zero Matrix. */ - address = zeromtx(cpu, thread); + address = (uint32_t)addr[cpu->pc[thread]] + |(uint32_t)addr[cpu->pc[thread]+1] << 8 + |(uint32_t)addr[cpu->pc[thread]+2] << 16 + |(uint32_t)addr[cpu->pc[thread]+3] << 24; + cpu->pc[thread]+=4; + iclk++; cpu->pc[thread] = address; break; @@ -374,11 +931,30 @@ void *run(void *args) { push(cpu, (uint64_t)cpu->pc[thread] & 0xFF); cpu->pc[thread] = address; break; - case 0xE1: inc_addr(cpu, absaddr(cpu, thread), thread); break; /* INC Absolute. */ - case 0xE2: cmp_addr(cpu, cpu->y[thread], absaddr(cpu, thread), thread, regsize); break; /* CPY Absolute. */ - case 0xE3: inc_addr(cpu, zeromtx(cpu, thread), thread); break; /* INC Zero Matrix. */ - case 0xE4: cmp_addr(cpu, cpu->x[thread], absaddr(cpu, thread), thread, regsize); break; /* CPX Absolute. */ - case 0xE5: cmp_addr(cpu, cpu->a[thread], absaddr(cpu, thread), thread, regsize); break; /* CMP Absolute. */ + case 0xE1: /* INC Absolute. */ + case 0xE3: /* INC Zero Matrix. */ + if (opcode == 0xE1) { + address = (uint64_t)addr[cpu->pc[thread]] + | (uint64_t)addr[cpu->pc[thread]+1] << 8 + | (uint64_t)addr[cpu->pc[thread]+2] << 16 + | (uint64_t)addr[cpu->pc[thread]+3] << 24 + | (uint64_t)addr[cpu->pc[thread]+4] << 32 + | (uint64_t)addr[cpu->pc[thread]+5] << 40 + | (uint64_t)addr[cpu->pc[thread]+6] << 48 + | (uint64_t)addr[cpu->pc[thread]+7] << 56; + cpu->pc[thread]+=8; + iclk++; + } + if (opcode == 0xE3) { + address = addr[cpu->pc[thread]] + | addr[cpu->pc[thread]+1] << 8 + | addr[cpu->pc[thread]+2] << 16 + | addr[cpu->pc[thread]+3] << 24; + cpu->pc[thread]+=4; + iclk++; + } + addr[address]++; + break; case NOP: break; /* No OPeration. */ case RTL: /* ReTurn from subroutine Long. */ cpu->pc[thread] = (uint64_t)pull(cpu); @@ -390,11 +966,30 @@ void *run(void *args) { cpu->pc[thread] += ((uint64_t)pull(cpu) << 48); cpu->pc[thread] += ((uint64_t)pull(cpu) << 56) + 1; break; - case 0xF1: dec_addr(cpu, absaddr(cpu, thread), thread); break; /* DEC Absolute. */ - case 0xF2: cmp_addr(cpu, cpu->y[thread], zeromtx(cpu, thread), thread, regsize); break; /* CPY Zero Matrix. */ - case 0xF3: dec_addr(cpu, zeromtx(cpu, thread), thread); break; /* DEC Zero Matrix. */ - case 0xF4: cmp_addr(cpu, cpu->x[thread], zeromtx(cpu, thread), thread, regsize); break; /* CPX Zero Matrix. */ - case 0xF5: cmp_addr(cpu, cpu->a[thread], zeromtx(cpu, thread), thread, regsize); break; /* CMP Zero Matrix. */ + case 0xF1: /* DEC Absolute. */ + case 0xF3: /* DEC Zero Matrix. */ + if (opcode == 0xE1) { + address = (uint64_t)addr[cpu->pc[thread]] + | (uint64_t)addr[cpu->pc[thread]+1] << 8 + | (uint64_t)addr[cpu->pc[thread]+2] << 16 + | (uint64_t)addr[cpu->pc[thread]+3] << 24 + | (uint64_t)addr[cpu->pc[thread]+4] << 32 + | (uint64_t)addr[cpu->pc[thread]+5] << 40 + | (uint64_t)addr[cpu->pc[thread]+6] << 48 + | (uint64_t)addr[cpu->pc[thread]+7] << 56; + cpu->pc[thread]+=8; + iclk++; + } + if (opcode == 0xE3) { + address = addr[cpu->pc[thread]] + | addr[cpu->pc[thread]+1] << 8 + | addr[cpu->pc[thread]+2] << 16 + | addr[cpu->pc[thread]+3] << 24; + cpu->pc[thread]+=4; + iclk++; + } + addr[address]--; + break; case BRK: /* BReaK. */ push(cpu, (uint64_t)cpu->pc[thread]-1 >> 56); push(cpu, (uint64_t)cpu->pc[thread]-1 >> 48); @@ -423,23 +1018,36 @@ void *run(void *args) { } break; } - inst[thread]++; - for (uint8_t i = 0; i < THREADS; i++) { - if (!i) - inss = inst[i]; - else - inss += inst[i]; - } - if (inss >= 100000000/THREADS) { + ins++; + /*if (ins % 50000000 == 0) { + int r = pthread_barrier_wait(&bar); + if (r != 0 && r != PTHREAD_BARRIER_SERIAL_THREAD) { + printf("oof, throds not sinking.\n"); + exit(-1); + } + }*/ + #if !bench + printf("\033[%uHInstructions executed: %llu, Clock cycles: %llu\n", (6*thread)+1, ins, iclk); + fflush(stdout); + #endif + if (ins >= BENCH_INST) { end = 1; + pthread_mutex_lock(&mutex); + threads_done++; + inst[thread] = ins; + clk[thread] = iclk; + pthread_cond_signal(&cond); + pthread_mutex_unlock(&mutex); +#if bench + gettimeofday(&en[thread], 0); +#endif } } /*return 1;*/ } int main(int argc, char **argv) { - cpu = malloc(sizeof(struct sux)); - cpu->sp = 0xFFFF; + struct suxthr thr[THREADS]; ibcount = 0; addr = malloc(0x04000000); inss = 0; @@ -447,55 +1055,86 @@ int main(int argc, char **argv) { if (asmmon() == 2) return 0; - for (int i = 0; i < THREADS; i++) { - cpu->a[i] = 0; - cpu->x[i] = 0; - cpu->y[i] = 0; - cpu->pc[i] = (uint64_t)addr[0xFFC0] - | (uint64_t)addr[0xFFC1] << 8 - | (uint64_t)addr[0xFFC2] << 16 - | (uint64_t)addr[0xFFC3] << 24 - | (uint64_t)addr[0xFFC4] << 32 - | (uint64_t)addr[0xFFC5] << 40 - | (uint64_t)addr[0xFFC6] << 48 - | (uint64_t)addr[0xFFC7] << 56; + thr[i].sx.sp = 0xFFFF; + if (i) { + thr[i].sx.a[i] = 0; + thr[i].sx.x[i] = 0; + thr[i].sx.y[i] = 0; + thr[i].sx.pc[i] = (uint64_t)addr[0xFF50] + | (uint64_t)addr[0xFF51] << 8 + | (uint64_t)addr[0xFF52] << 16 + | (uint64_t)addr[0xFF53] << 24 + | (uint64_t)addr[0xFF54] << 32 + | (uint64_t)addr[0xFF55] << 40 + | (uint64_t)addr[0xFF56] << 48 + | (uint64_t)addr[0xFF57] << 56; + } else { + thr[i].sx.a[i] = 0; + thr[i].sx.x[i] = 0; + thr[i].sx.y[i] = 0; + thr[i].sx.pc[i] = (uint64_t)addr[0xFFC0] + | (uint64_t)addr[0xFFC1] << 8 + | (uint64_t)addr[0xFFC2] << 16 + | (uint64_t)addr[0xFFC3] << 24 + | (uint64_t)addr[0xFFC4] << 32 + | (uint64_t)addr[0xFFC5] << 40 + | (uint64_t)addr[0xFFC6] << 48 + | (uint64_t)addr[0xFFC7] << 56; + } + thr[i].th = i; } for (int i = 0; i < THREADS; i++) { lines[i] = (6*i)+2; inst[i] = 0; } pthread_t therads[THREADS]; + pthread_barrier_init(&bar, NULL, THREADS); int result; - uint8_t throds[THREADS]; - printf("\033[2J"); -#if bench - str=clock(); -#endif + puts("\033[2J\033[H"); for (int i = 0; i < THREADS; i++) { - throds[i] = i; - result = pthread_create(&therads[i], NULL, run, &throds[i]); + result = pthread_create(&therads[i], NULL, run, &thr[i]); assert(!result); } - for (int i = 0; i < THREADS; i++) { - result = pthread_join(therads[i], NULL); - assert(!result); + pthread_mutex_lock(&mutex); + while (threads_done < THREADS) { + pthread_cond_wait(&cond, &mutex); } -#if !bench - printf("\033[HInstructions executed: %llu, Clock cycles: %llu\n", inss, clk); - fflush(stdout); -#endif + pthread_mutex_unlock(&mutex); + pthread_barrier_destroy(&bar); #if bench - en=clock(); - double tm = (en-str)/THREADS; - double clkspd = ((tm/CLOCKS_PER_SEC)*1000000)/clk; - double mhz = 1000000.0/clkspd/1000000; - double ips = (double)inss/(double)tm; - ipc=(double)inss/(double)clk; - printf("\033[2J"); - printf("Instructions executed: %llu, Instructions per Second in MIPS: %f, Clock cycles: %llu, Clock Speed in MHz: %f, tm: %f\n", inss, ips, clk, mhz, tm/CLOCKS_PER_SEC); + if (threads_done == THREADS) { + double tm_sec, tm_usec, tm[THREADS], ttm; + double clkspd; + double mhz; + double ips[THREADS]; + double ipst; + for (int i = 0; i < THREADS; i++) { + tm_sec = (en[i].tv_sec - str[i].tv_sec); + tm_usec = (en[i].tv_usec-str[i].tv_usec); + tm[i] = (tm_sec*1000000)+(tm_usec); + ips[i] = inst[i]/tm[i]; + if (i) { + inss += inst[i]; + ttm += tm[i]; + ipst += ips[i]; + tclk += clk[i]; + } else { + inss = inst[i]; + ttm = tm[i]; + ipst = ips[i]; + tclk = clk[i]; + } + clkspd = (tm[i]/1000000)*1000000/clk[i]; + mhz = 1000000.0/clkspd/1000000; + printf("Instructions executed for thread %i: %llu, Instructions per Second for thread %i in MIPS: %f, tm: %f\n", i, inst[i], i, ips[i], tm[i]/1000000); + } + clkspd = (ttm/1000000)*1000000/tclk; + mhz = 1000000.0/clkspd/1000000; + /*printf("\033[2J");*/ + printf("Total Instructions executed: %llu, Total Instructions per Second in MIPS: %f, Clock cycles: %llu, Clock Speed in MHz: %f, tm: %f\n", inss, ipst, tclk, mhz, ttm/1000000); + } #endif - free(cpu); free(addr); return 0; } diff --git a/test/fib.s b/test/fib.s new file mode 100644 index 0000000..0d3ee03 --- /dev/null +++ b/test/fib.s @@ -0,0 +1,45 @@ +; Name: fib.s +; Description: Computes the Fibbonacci sequence. +; +; Written in Sux Assembly +; by mr b0nk 500 + + +cps ; Clear the Processor Status register. +lda #$0 ; Clear the accumulator. +ldy #$1 ; y=1. +sty.q $1008 ; Store y into memory. +ldx #$0 ; x=0. +ldx.q $1000 ; Output the value of x. +adc.q $1008 ; Add x with y. +sta.q $1010 ; z=x+y +ldy.q $1008 +sty.q $1000 ; x=y. +sta.q $1008 ; y=z. +lda.q $1000 +bcs $1 ; Start all over again, if the carry flag was set. +jmp $D ; Otherwise, keep looping. + +.org $8000 + +cps ; Clear the Processor Status register. +lda #$0 ; Clear the accumulator. +ldy #$1 ; y=1. +sty.q $2008 ; Store y into memory. +ldx #$0 ; x=0. +ldx.q $2000 ; Output the value of x. +adc.q $2008 ; Add x with y. +sta.q $2010 ; z=x+y +ldy.q $2008 +sty.q $2000 ; x=y. +sta.q $2008 ; y=z. +lda.q $2000 +bcs $8001 ; Start all over again, if the carry flag was set. +jmp $800D ; Otherwise, keep looping. + +; Set up the thread vectors. +.org $FF50 +.qword $8000 +; Execute the program. +done + diff --git a/test/test-the-tests.s b/test/test-the-tests.s index 47f7576..8471fab 100644 --- a/test/test-the-tests.s +++ b/test/test-the-tests.s @@ -1,5 +1,26 @@ +.org $0000 cps -inc -jmp $1 +lda #$01 +lsl #$1 +bcs $13 +jmp $3 +lda.q #$8000000000000000 +lsr #$1 +bcs $1 +jmp $1D +.org $8000 +cps +lda #$01 +lsl #$1 +bcs $8013 +jmp $8003 +lda.q #$8000000000000000 +lsr #$1 +bcs $8001 +jmp $801D + +.org $FF50 +.qword $8000 +.org $0 done -- cgit v1.2.3-13-gbd6f