#include "opcode.h"
#include <assert.h>
#include <string.h>
#include <pthread.h>
#define bench 0
#if bench
#include <sys/time.h>
#endif
#define THREADS 1
#define BENCH_INST 100000000*THREADS
uint64_t clk[THREADS];
uint64_t tclk;
uint64_t inst[THREADS];
uint64_t inss;
uint8_t threads_done = 0;
uint8_t lines[THREADS];
pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
pthread_barrier_t bar;
struct suxthr {
struct sux sx;
uint8_t th;
};
#if bench
double ipc;
struct timeval str[THREADS], en[THREADS];
#endif
void *run(void *args) {
struct suxthr *thr = (void *)args;
struct sux *cpu = &thr->sx;
uint8_t thread = thr->th;
uint64_t address;
uint8_t prefix = 0;
uint8_t opcode = 0;
uint8_t end = 0;
uint64_t sum = 0;
uint64_t value = 0;
uint64_t iclk = 0;
uint64_t ins = 0;
#if bench
gettimeofday(&str[thread], 0);
#endif
while (!end) {
prefix = addr[cpu->pc[thread]];
if ((prefix & 0x07) == 0x07)
cpu->pc[thread]++;
else
prefix = 0;
opcode = addr[cpu->pc[thread]];
#if !bench
printf("\033[%uH", lines[thread]);
printf("pc: 0x%08llx, a: 0x%016llx, x: 0x%016llx, y: 0x%016llx"
", sp: 0x%04lx, ps: 0x%016llx, prefix: 0x%02x, opcode: 0x%02x, thread: %u, inst: %s \r"
, cpu->pc[thread], cpu->a[thread], cpu->x[thread], cpu->y[thread]
, cpu->sp, cpu->ps, prefix, opcode, thread, opname[opcode]);
fflush(stdout);
lines[thread]++;
if (lines[thread] > 6*(thread+1))
lines[thread] = (6*thread)+2;
#endif
uint8_t rs = (prefix & 0x30) >> 4;
uint8_t regsize = (1 << rs);
uint8_t tmp;
address = cpu->pc[thread];
cpu->pc[thread]++;
iclk++;
switch(opcode) {
case CPS:
for (uint8_t i = 0; i < 8; i++) {
cpu->c[i] = 0;
cpu->z[i] = 0;
cpu->i[i] = 0;
cpu->s[i] = 0;
cpu->v[i] = 0;
cpu->n[i] = 0;
}
cpu->ps &= 0;
break;
case ADC:
case 0x03:
case 0x05:
if (opcode == ADC) {
address = cpu->pc[thread];
cpu->pc[thread]++;
}
if (opcode == 0x03) {
address = (uint64_t)addr[cpu->pc[thread]]
| (uint64_t)addr[cpu->pc[thread]+1] << 8
| (uint64_t)addr[cpu->pc[thread]+2] << 16
| (uint64_t)addr[cpu->pc[thread]+3] << 24
| (uint64_t)addr[cpu->pc[thread]+4] << 32
| (uint64_t)addr[cpu->pc[thread]+5] << 40
| (uint64_t)addr[cpu->pc[thread]+6] << 48
| (uint64_t)addr[cpu->pc[thread]+7] << 56;
cpu->pc[thread]+=8;
iclk++;
}
if (opcode == 0x05) {
address = addr[cpu->pc[thread]]
| addr[cpu->pc[thread]+1] << 8
| addr[cpu->pc[thread]+2] << 16
| addr[cpu->pc[thread]+3] << 24;
cpu->pc[thread]+=4;
iclk++;
}
value = addr[address];
if (regsize >= 2) {
value += (uint64_t)addr[address+1] << 8;
}
if (regsize >= 4) {
value += (uint64_t)addr[address+2] << 16;
value += (uint64_t)addr[address+3] << 24;
}
if (regsize >= 8) {
value += (uint64_t)addr[address+4] << 32;
value += (uint64_t)addr[address+5] << 40;
value += (uint64_t)addr[address+6] << 48;
value += (uint64_t)addr[address+7] << 56;
}
sum = cpu->a[thread]+value+cpu->c[thread];
cpu->a[thread] = sum;
cpu->z[thread] = (sum == 0);
cpu->n[thread] = (sum >> 63);
cpu->v[thread] = !((cpu->a[thread]^value) & 0x8000000000000000) && ((cpu->a[thread]^sum) & 0x8000000000000000);
cpu->c[thread] = (sum < value);
(cpu->z[thread]) ? (cpu->ps |= (Z << 8*thread)) : (cpu->ps &= ~(Z << 8*thread));
(cpu->n[thread]) ? (cpu->ps |= (N << 8*thread)) : (cpu->ps &= ~(N << 8*thread));
(cpu->v[thread]) ? (cpu->ps |= (V << 8*thread)) : (cpu->ps &= ~(V << 8*thread));
(cpu->c[thread]) ? (cpu->ps |= (C << 8*thread)) : (cpu->ps &= ~(C << 8*thread));
break;
case PHP:
tmp = addr[cpu->pc[thread]++];
if (tmp > 7)
tmp = 7;
for (int8_t i = 8*tmp; i > 0; i-=8) {
push(cpu, cpu->ps >> i);
}
push(cpu, cpu->ps & 0xFF);
break;
case PHA:
tmp = addr[cpu->pc[thread]++];
if (tmp > 7)
tmp = 7;
for (int8_t i = 8*tmp; i > 0; i-=8) {
push(cpu, cpu->a[thread] >> i);
}
push(cpu, cpu->a[thread] & 0xFF);
break;
case PHY:
tmp = addr[cpu->pc[thread]++];
if (tmp > 7)
tmp = 7;
for (int8_t i = 8*tmp; i > 0; i-=8) {
push(cpu, cpu->y[thread] >> i);
}
push(cpu, cpu->y[thread] & 0xFF);
break;
case PHX:
tmp = addr[cpu->pc[thread]++];
if (tmp > 7)
tmp = 7;
for (int8_t i = 8*tmp; i > 0; i-=8) {
push(cpu, cpu->x[thread] >> i);
}
push(cpu, cpu->x[thread] & 0xFF);
break;
case JMP:
address = (uint64_t)addr[cpu->pc[thread]]
| (uint64_t)addr[cpu->pc[thread]+1] << 8
| (uint64_t)addr[cpu->pc[thread]+2] << 16
| (uint64_t)addr[cpu->pc[thread]+3] << 24
| (uint64_t)addr[cpu->pc[thread]+4] << 32
| (uint64_t)addr[cpu->pc[thread]+5] << 40
| (uint64_t)addr[cpu->pc[thread]+6] << 48
| (uint64_t)addr[cpu->pc[thread]+7] << 56;
cpu->pc[thread]+=8;
iclk++;
cpu->pc[thread] = address;
break;
case SBC:
case 0x13:
case 0x15:
if (opcode == SBC) {
address = cpu->pc[thread];
cpu->pc[thread]++;
}
if (opcode == 0x13) {
address = (uint64_t)addr[cpu->pc[thread]]
| (uint64_t)addr[cpu->pc[thread]+1] << 8
| (uint64_t)addr[cpu->pc[thread]+2] << 16
| (uint64_t)addr[cpu->pc[thread]+3] << 24
| (uint64_t)addr[cpu->pc[thread]+4] << 32
| (uint64_t)addr[cpu->pc[thread]+5] << 40
| (uint64_t)addr[cpu->pc[thread]+6] << 48
| (uint64_t)addr[cpu->pc[thread]+7] << 56;
cpu->pc[thread]+=8;
iclk++;
}
if (opcode == 0x15) {
address = addr[cpu->pc[thread]]
| addr[cpu->pc[thread]+1] << 8
| addr[cpu->pc[thread]+2] << 16
| addr[cpu->pc[thread]+3] << 24;
cpu->pc[thread]+=4;
iclk++;
}
value = addr[address];
if (regsize >= 2) {
value += (uint64_t)addr[address+1] << 8;
}
if (regsize >= 4) {
value += (uint64_t)addr[address+2] << 16;
value += (uint64_t)addr[address+3] << 24;
}
if (regsize >= 8) {
value += (uint64_t)addr[address+4] << 32;
value += (uint64_t)addr[address+5] << 40;
value += (uint64_t)addr[address+6] << 48;
value += (uint64_t)addr[address+7] << 56;
}
sum = cpu->a[thread]-value-!cpu->c[thread];
cpu->z[thread] = (sum == 0);
cpu->n[thread] = (sum >> 63);
cpu->v[thread] = ((cpu->a[thread]^value) & 0x8000000000000000) && ((cpu->a[thread]^sum) & 0x8000000000000000);
cpu->c[thread] = (sum > value);
(cpu->z[thread]) ? (cpu->ps |= (Z << 8*thread)) : (cpu->ps &= ~(Z << 8*thread));
(cpu->n[thread]) ? (cpu->ps |= (N << 8*thread)) : (cpu->ps &= ~(N << 8*thread));
(cpu->v[thread]) ? (cpu->ps |= (V << 8*thread)) : (cpu->ps &= ~(V << 8*thread));
(cpu->c[thread]) ? (cpu->ps |= (C << 8*thread)) : (cpu->ps &= ~(C << 8*thread));
cpu->a[thread] = sum;
break;
case PLP:
tmp = addr[cpu->pc[thread]++];
if (tmp > 7)
tmp = 7;
for (uint8_t i = 0; i <= 8*tmp; i+=8) {
if (!i)
cpu->ps = (uint64_t)pull(cpu);
else
cpu->ps += (uint64_t)pull(cpu) << i;
}
break;
case PLA:
tmp = addr[cpu->pc[thread]++];
if (tmp > 7)
tmp = 7;
for (uint8_t i = 0; i <= 8*tmp; i+=8) {
if (!i)
cpu->a[thread] = (uint64_t)pull(cpu);
else
cpu->a[thread] += (uint64_t)pull(cpu) << i;
}
break;
case PLY:
tmp = addr[cpu->pc[thread]++];
if (tmp > 7)
tmp = 7;
for (uint8_t i = 0; i <= 8*tmp; i+=8) {
if (!i)
cpu->y[thread] = (uint64_t)pull(cpu);
else
cpu->y[thread] += (uint64_t)pull(cpu) << i;
}
break;
case PLX:
tmp = addr[cpu->pc[thread]++];
if (tmp > 7)
tmp = 7;
for (uint8_t i = 0; i <= 8*tmp; i+=8) {
if (!i)
cpu->x[thread] = (uint64_t)pull(cpu);
else
cpu->x[thread] += (uint64_t)pull(cpu) << i;
}
break;
case JSR:
address = (uint64_t)addr[cpu->pc[thread]]
| (uint64_t)addr[cpu->pc[thread]+1] << 8
| (uint64_t)addr[cpu->pc[thread]+2] << 16
| (uint64_t)addr[cpu->pc[thread]+3] << 24;
cpu->pc[thread]+=4;
push(cpu, (uint64_t)cpu->pc[thread] >> 24);
push(cpu, (uint64_t)cpu->pc[thread] >> 16);
push(cpu, (uint64_t)cpu->pc[thread] >> 8);
push(cpu, (uint64_t)cpu->pc[thread] & 0xFF);
cpu->pc[thread] = address;
break;
case AND: and_addr(cpu, &cpu->a[thread], immaddr(cpu, thread, regsize), thread, regsize); break;
case ANY: and_addr(cpu, &cpu->y[thread], immaddr(cpu, thread, regsize), thread, regsize); break;
case AAY:
case AAX:
if (opcode == AAY)
cpu->a[thread] &= cpu->y[thread];
if (opcode == AAX)
cpu->a[thread] &= cpu->x[thread];
cpu->z[thread] = (sum == 0);
cpu->n[thread] = (sum >> 63);
(cpu->z[thread]) ? (cpu->ps |= (Z << 8*thread)) : (cpu->ps &= ~(Z << 8*thread));
(cpu->n[thread]) ? (cpu->ps |= (N << 8*thread)) : (cpu->ps &= ~(N << 8*thread));
break;
case ANX: and_addr(cpu, &cpu->x[thread], immaddr(cpu, thread, regsize), thread, regsize); break;
case STT: stt(cpu, immaddr(cpu, thread, 1)); break;
case 0x29: and_addr(cpu, &cpu->a[thread], absaddr(cpu, thread), thread, regsize); break;
case 0x2B: and_addr(cpu, &cpu->a[thread], zeromtx(cpu, thread), thread, regsize); break;
case BPO:
address = (uint64_t)addr[cpu->pc[thread]]
| (uint64_t)addr[cpu->pc[thread]+1] << 8
| (uint64_t)addr[cpu->pc[thread]+2] << 16
| (uint64_t)addr[cpu->pc[thread]+3] << 24
| (uint64_t)addr[cpu->pc[thread]+4] << 32
| (uint64_t)addr[cpu->pc[thread]+5] << 40
| (uint64_t)addr[cpu->pc[thread]+6] << 48
| (uint64_t)addr[cpu->pc[thread]+7] << 56;
cpu->pc[thread]+=8;
iclk++;
if (cpu->n[thread])
cpu->pc[thread] = address;
break;
case ORA: or_addr(cpu, &cpu->a[thread], immaddr(cpu, thread, regsize), thread, regsize); break;
case ORY: or_addr(cpu, &cpu->y[thread], immaddr(cpu, thread, regsize), thread, regsize); break;
case ORX: or_addr(cpu, &cpu->x[thread], immaddr(cpu, thread, regsize), thread, regsize); break;
case OAY:
case OAX:
if (opcode == OAY)
cpu->a[thread] |= cpu->y[thread];
if (opcode == OAX)
cpu->a[thread] |= cpu->x[thread];
cpu->z[thread] = (sum == 0);
cpu->n[thread] = (sum >> 63);
(cpu->z[thread]) ? (cpu->ps |= (Z << 8*thread)) : (cpu->ps &= ~(Z << 8*thread));
(cpu->n[thread]) ? (cpu->ps |= (N << 8*thread)) : (cpu->ps &= ~(N << 8*thread));
break;
case SEI:
cpu->i[thread] = 1;
(cpu->ps |= (I << 8*thread));
break;
case 0x39: or_addr(cpu, &cpu->a[thread], absaddr(cpu, thread), thread, regsize); break;
case 0x3B: or_addr(cpu, &cpu->a[thread], zeromtx(cpu, thread), thread, regsize); break;
case BNG:
address = (uint64_t)addr[cpu->pc[thread]]
| (uint64_t)addr[cpu->pc[thread]+1] << 8
| (uint64_t)addr[cpu->pc[thread]+2] << 16
| (uint64_t)addr[cpu->pc[thread]+3] << 24
| (uint64_t)addr[cpu->pc[thread]+4] << 32
| (uint64_t)addr[cpu->pc[thread]+5] << 40
| (uint64_t)addr[cpu->pc[thread]+6] << 48
| (uint64_t)addr[cpu->pc[thread]+7] << 56;
cpu->pc[thread]+=8;
iclk++;
if (!cpu->n[thread])
cpu->pc[thread] = address;
break;
case XOR: xor_addr(cpu, &cpu->a[thread], immaddr(cpu, thread, regsize), thread, regsize); break;
case XRY: xor_addr(cpu, &cpu->y[thread], immaddr(cpu, thread, regsize), thread, regsize); break;
case XAY:
case XAX:
if (opcode == XAY)
cpu->a[thread] ^= cpu->y[thread];
if (opcode == XAX)
cpu->a[thread] ^= cpu->x[thread];
cpu->z[thread] = (sum == 0);
cpu->n[thread] = (sum >> 63);
(cpu->z[thread]) ? (cpu->ps |= (Z << 8*thread)) : (cpu->ps &= ~(Z << 8*thread));
(cpu->n[thread]) ? (cpu->ps |= (N << 8*thread)) : (cpu->ps &= ~(N << 8*thread));
break;
case XRX: xor_addr(cpu, &cpu->x[thread], immaddr(cpu, thread, regsize), thread, regsize); break;
case CLI:
cpu->i[thread] = 0;
(cpu->ps &= ~(I << 8*thread));
break;
case 0x49: xor_addr(cpu, &cpu->a[thread], absaddr(cpu, thread), thread, regsize); break;
case 0x4B: xor_addr(cpu, &cpu->a[thread], zeromtx(cpu, thread), thread, regsize); break;
case BCS:
address = (uint64_t)addr[cpu->pc[thread]]
| (uint64_t)addr[cpu->pc[thread]+1] << 8
| (uint64_t)addr[cpu->pc[thread]+2] << 16
| (uint64_t)addr[cpu->pc[thread]+3] << 24
| (uint64_t)addr[cpu->pc[thread]+4] << 32
| (uint64_t)addr[cpu->pc[thread]+5] << 40
| (uint64_t)addr[cpu->pc[thread]+6] << 48
| (uint64_t)addr[cpu->pc[thread]+7] << 56;
cpu->pc[thread]+=8;
iclk++;
if (cpu->c[thread])
cpu->pc[thread] = address;
break;
case LSL:
case 0x53:
case 0x55:
if (opcode == LSL) {
address = cpu->pc[thread];
cpu->pc[thread]++;
}
if (opcode == 0x53) {
address = (uint64_t)addr[cpu->pc[thread]]
| (uint64_t)addr[cpu->pc[thread]+1] << 8
| (uint64_t)addr[cpu->pc[thread]+2] << 16
| (uint64_t)addr[cpu->pc[thread]+3] << 24
| (uint64_t)addr[cpu->pc[thread]+4] << 32
| (uint64_t)addr[cpu->pc[thread]+5] << 40
| (uint64_t)addr[cpu->pc[thread]+6] << 48
| (uint64_t)addr[cpu->pc[thread]+7] << 56;
cpu->pc[thread]+=8;
iclk++;
}
if (opcode == 0x55) {
address = addr[cpu->pc[thread]]
| addr[cpu->pc[thread]+1] << 8
| addr[cpu->pc[thread]+2] << 16
| addr[cpu->pc[thread]+3] << 24;
cpu->pc[thread]+=4;
iclk++;
}
value = addr[address];
sum = (value < 64) ? cpu->a[thread] << value : 0;
cpu->z[thread] = (sum == 0);
cpu->n[thread] = (sum >> 63);
cpu->c[thread] = cpu->a[thread] >> 64-value;
cpu->a[thread] = sum;
(cpu->z[thread]) ? (cpu->ps |= (Z << 8*thread)) : (cpu->ps &= ~(Z << 8*thread));
(cpu->n[thread]) ? (cpu->ps |= (N << 8*thread)) : (cpu->ps &= ~(N << 8*thread));
(cpu->c[thread]) ? (cpu->ps |= (C << 8*thread)) : (cpu->ps &= ~(C << 8*thread));
break;
case 0x52: and_addr(cpu, &cpu->y[thread], absaddr(cpu, thread), thread, regsize); break;
case 0x54: and_addr(cpu, &cpu->x[thread], absaddr(cpu, thread), thread, regsize); break;
case SEC:
cpu->c[thread] = 1;
(cpu->ps |= (C << 8*thread));
break;
case STA:
case STY:
case STX:
case 0x7B:
case 0x7D:
case 0x7E:
if (opcode == STA || opcode == STY || opcode == STX) {
address = (uint64_t)addr[cpu->pc[thread]]
| (uint64_t)addr[cpu->pc[thread]+1] << 8
| (uint64_t)addr[cpu->pc[thread]+2] << 16
| (uint64_t)addr[cpu->pc[thread]+3] << 24
| (uint64_t)addr[cpu->pc[thread]+4] << 32
| (uint64_t)addr[cpu->pc[thread]+5] << 40
| (uint64_t)addr[cpu->pc[thread]+6] << 48
| (uint64_t)addr[cpu->pc[thread]+7] << 56;
cpu->pc[thread]+=8;
iclk++;
}
if (opcode == 0x7B || opcode == 0x7D || opcode == 0x7E) {
address = addr[cpu->pc[thread]]
| addr[cpu->pc[thread]+1] << 8
| addr[cpu->pc[thread]+2] << 16
| addr[cpu->pc[thread]+3] << 24;
cpu->pc[thread]+=4;
iclk++;
}
if (opcode == STA || opcode == 0x7B)
value = cpu->a[thread];
if (opcode == STY || opcode == 0x7D)
value = cpu->y[thread];
if (opcode == STX || opcode == 0x7E)
value = cpu->x[thread];
addr[address] = value & 0xFF;
if (regsize >= 2)
addr[address+1] = value >> 8;
if (regsize >= 4) {
addr[address+2] = value >> 16;
addr[address+3] = value >> 24;
}
if (regsize >= 8) {
addr[address+4] = value >> 32;
addr[address+5] = value >> 40;
addr[address+6] = value >> 48;
addr[address+7] = value >> 56;
}
break;
case BCC:
address = (uint64_t)addr[cpu->pc[thread]]
| (uint64_t)addr[cpu->pc[thread]+1] << 8
| (uint64_t)addr[cpu->pc[thread]+2] << 16
| (uint64_t)addr[cpu->pc[thread]+3] << 24
| (uint64_t)addr[cpu->pc[thread]+4] << 32
| (uint64_t)addr[cpu->pc[thread]+5] << 40
| (uint64_t)addr[cpu->pc[thread]+6] << 48
| (uint64_t)addr[cpu->pc[thread]+7] << 56;
cpu->pc[thread]+=8;
iclk++;
if (!cpu->c[thread])
cpu->pc[thread] = address;
break;
case LSR:
case 0x63:
case 0x65:
if (opcode == LSR) {
address = cpu->pc[thread];
cpu->pc[thread]++;
}
if (opcode == 0x63) {
address = (uint64_t)addr[cpu->pc[thread]]
| (uint64_t)addr[cpu->pc[thread]+1] << 8
| (uint64_t)addr[cpu->pc[thread]+2] << 16
| (uint64_t)addr[cpu->pc[thread]+3] << 24
| (uint64_t)addr[cpu->pc[thread]+4] << 32
| (uint64_t)addr[cpu->pc[thread]+5] << 40
| (uint64_t)addr[cpu->pc[thread]+6] << 48
| (uint64_t)addr[cpu->pc[thread]+7] << 56;
cpu->pc[thread]+=8;
iclk++;
}
if (opcode == 0x65) {
address = addr[cpu->pc[thread]]
| addr[cpu->pc[thread]+1] << 8
| addr[cpu->pc[thread]+2] << 16
| addr[cpu->pc[thread]+3] << 24;
cpu->pc[thread]+=4;
iclk++;
}
value = addr[address];
sum = (value < 64) ? cpu->a[thread] >> value : 0;
cpu->z[thread] = (sum == 0);
cpu->n[thread] = (sum >> 63);
cpu->c[thread] = cpu->a[thread] & 1;
cpu->a[thread] = sum;
(cpu->z[thread]) ? (cpu->ps |= (Z << 8*thread)) : (cpu->ps &= ~(Z << 8*thread));
(cpu->n[thread]) ? (cpu->ps |= (N << 8*thread)) : (cpu->ps &= ~(N << 8*thread));
(cpu->c[thread]) ? (cpu->ps |= (C << 8*thread)) : (cpu->ps &= ~(C << 8*thread));
break;
case 0x62: or_addr(cpu, &cpu->y[thread], absaddr(cpu, thread), thread, regsize); break;
case 0x64: or_addr(cpu, &cpu->x[thread], absaddr(cpu, thread), thread, regsize); break;
case CLC:
cpu->c[thread] = 0;
(cpu->ps &= ~(C << 8*thread));
break;
case 0x59:
case 0x5A:
case 0x5C:
case LDA:
case LDY:
case LDX:
case 0x79:
case 0x7A:
case 0x7C:
if (opcode == LDA || opcode == LDY || opcode == LDX) {
address = cpu->pc[thread];
cpu->pc[thread]+=regsize;
}
if (opcode == 0x59 || opcode == 0x5A || opcode == 0x5C) {
address = (uint64_t)addr[cpu->pc[thread]]
| (uint64_t)addr[cpu->pc[thread]+1] << 8
| (uint64_t)addr[cpu->pc[thread]+2] << 16
| (uint64_t)addr[cpu->pc[thread]+3] << 24
| (uint64_t)addr[cpu->pc[thread]+4] << 32
| (uint64_t)addr[cpu->pc[thread]+5] << 40
| (uint64_t)addr[cpu->pc[thread]+6] << 48
| (uint64_t)addr[cpu->pc[thread]+7] << 56;
cpu->pc[thread]+=8;
iclk++;
}
if (opcode == 0x79 || opcode == 0x7A || opcode == 0x7C) {
address = addr[cpu->pc[thread]]
| addr[cpu->pc[thread]+1] << 8
| addr[cpu->pc[thread]+2] << 16
| addr[cpu->pc[thread]+3] << 24;
cpu->pc[thread]+=4;
iclk++;
}
value = (uint64_t)addr[address];
if (regsize >= 2)
value += (uint64_t)addr[address+1] << 8;
if (regsize >= 4) {
value += (uint64_t)addr[address+2] << 16;
value += (uint64_t)addr[address+3] << 24;
}
if (regsize >= 8) {
value += (uint64_t)addr[address+4] << 32;
value += (uint64_t)addr[address+5] << 40;
value += (uint64_t)addr[address+6] << 48;
value += (uint64_t)addr[address+7] << 56;
}
if (opcode == LDA || opcode == 0x59 || opcode == 0x79)
cpu->a[thread] = value;
if (opcode == LDY || opcode == 0x5A || opcode == 0x7A)
cpu->y[thread] = value;
if (opcode == LDX || opcode == 0x5C || opcode == 0x7C)
cpu->x[thread] = value;
cpu->z[thread] = (value == 0);
cpu->n[thread] = (value >> 63);
(cpu->z[thread]) ? (cpu->ps |= (Z << 8*thread)) : (cpu->ps &= ~(Z << 8*thread));
(cpu->n[thread]) ? (cpu->ps |= (N << 8*thread)) : (cpu->ps &= ~(N << 8*thread));
break;
case BEQ:
address = (uint64_t)addr[cpu->pc[thread]]
| (uint64_t)addr[cpu->pc[thread]+1] << 8
| (uint64_t)addr[cpu->pc[thread]+2] << 16
| (uint64_t)addr[cpu->pc[thread]+3] << 24
| (uint64_t)addr[cpu->pc[thread]+4] << 32
| (uint64_t)addr[cpu->pc[thread]+5] << 40
| (uint64_t)addr[cpu->pc[thread]+6] << 48
| (uint64_t)addr[cpu->pc[thread]+7] << 56;
cpu->pc[thread]+=8;
iclk++;
if (cpu->z[thread])
cpu->pc[thread] = address;
break;
case ROL:
case 0x73:
case 0x75:
if (opcode == ROL) {
address = cpu->pc[thread];
cpu->pc[thread]++;
}
if (opcode == 0x73) {
address = (uint64_t)addr[cpu->pc[thread]]
| (uint64_t)addr[cpu->pc[thread]+1] << 8
| (uint64_t)addr[cpu->pc[thread]+2] << 16
| (uint64_t)addr[cpu->pc[thread]+3] << 24
| (uint64_t)addr[cpu->pc[thread]+4] << 32
| (uint64_t)addr[cpu->pc[thread]+5] << 40
| (uint64_t)addr[cpu->pc[thread]+6] << 48
| (uint64_t)addr[cpu->pc[thread]+7] << 56;
cpu->pc[thread]+=8;
iclk++;
}
if (opcode == 0x75) {
address = addr[cpu->pc[thread]]
| addr[cpu->pc[thread]+1] << 8
| addr[cpu->pc[thread]+2] << 16
| addr[cpu->pc[thread]+3] << 24;
cpu->pc[thread]+=4;
iclk++;
}
value = addr[address];
sum = cpu->a[thread] << value;
sum |= cpu->c[thread];
cpu->z[thread] = (sum == 0);
cpu->n[thread] = (sum >> 63);
cpu->c[thread] = cpu->a[thread] >> (uint64_t)64-value;
cpu->a[thread] = sum;
(cpu->z[thread]) ? (cpu->ps |= (Z << 8*thread)) : (cpu->ps &= ~(Z << 8*thread));
(cpu->n[thread]) ? (cpu->ps |= (N << 8*thread)) : (cpu->ps &= ~(N << 8*thread));
(cpu->c[thread]) ? (cpu->ps |= (C << 8*thread)) : (cpu->ps &= ~(C << 8*thread));
break;
case 0x72: xor_addr(cpu, &cpu->y[thread], absaddr(cpu, thread), thread, regsize); break;
case 0x74: xor_addr(cpu, &cpu->x[thread], absaddr(cpu, thread), thread, regsize); break;
case SSP:
cpu->s[thread] = 1;
(cpu->ps |= (S << 8*thread));
break;
case BNE:
address = (uint64_t)addr[cpu->pc[thread]]
| (uint64_t)addr[cpu->pc[thread]+1] << 8
| (uint64_t)addr[cpu->pc[thread]+2] << 16
| (uint64_t)addr[cpu->pc[thread]+3] << 24
| (uint64_t)addr[cpu->pc[thread]+4] << 32
| (uint64_t)addr[cpu->pc[thread]+5] << 40
| (uint64_t)addr[cpu->pc[thread]+6] << 48
| (uint64_t)addr[cpu->pc[thread]+7] << 56;
cpu->pc[thread]+=8;
iclk++;
if (!cpu->z[thread])
cpu->pc[thread] = address;
break;
case ROR:
case 0x83:
case 0x85:
if (opcode == ROR) {
address = cpu->pc[thread];
cpu->pc[thread]++;
}
if (opcode == 0x83) {
address = (uint64_t)addr[cpu->pc[thread]]
| (uint64_t)addr[cpu->pc[thread]+1] << 8
| (uint64_t)addr[cpu->pc[thread]+2] << 16
| (uint64_t)addr[cpu->pc[thread]+3] << 24
| (uint64_t)addr[cpu->pc[thread]+4] << 32
| (uint64_t)addr[cpu->pc[thread]+5] << 40
| (uint64_t)addr[cpu->pc[thread]+6] << 48
| (uint64_t)addr[cpu->pc[thread]+7] << 56;
cpu->pc[thread]+=8;
iclk++;
}
if (opcode == 0x85) {
address = addr[cpu->pc[thread]]
| addr[cpu->pc[thread]+1] << 8
| addr[cpu->pc[thread]+2] << 16
| addr[cpu->pc[thread]+3] << 24;
cpu->pc[thread]+=4;
iclk++;
}
value = addr[address];
sum = cpu->a[thread] >> value;
sum |= (uint64_t)cpu->c[thread] << (uint64_t)64-value;
cpu->z[thread] = (sum == 0);
cpu->n[thread] = (sum >> 63);
cpu->c[thread] = cpu->a[thread] & 1;
cpu->a[thread] = sum;
(cpu->z[thread]) ? (cpu->ps |= (Z << 8*thread)) : (cpu->ps &= ~(Z << 8*thread));
(cpu->n[thread]) ? (cpu->ps |= (N << 8*thread)) : (cpu->ps &= ~(N << 8*thread));
(cpu->c[thread]) ? (cpu->ps |= (C << 8*thread)) : (cpu->ps &= ~(C << 8*thread));
break;
case 0x82: and_addr(cpu, &cpu->y[thread], zeromtx(cpu, thread), thread, regsize); break;
case 0x84: and_addr(cpu, &cpu->x[thread], zeromtx(cpu, thread), thread, regsize); break;
case CSP:
cpu->s[thread] = 0;
(cpu->ps &= ~(S << 8*thread));
break;
case 0x89: ld(cpu, &cpu->a[thread], zeromx(cpu, thread), thread, regsize); break;
case 0x8A: ld(cpu, &cpu->y[thread], zeromx(cpu, thread), thread, regsize); break;
case 0x8B: st(cpu, &cpu->a[thread], zeromx(cpu, thread), thread, regsize); break;
case 0x8D: st(cpu, &cpu->y[thread], zeromx(cpu, thread), thread, regsize); break;
case BVS:
address = (uint64_t)addr[cpu->pc[thread]]
| (uint64_t)addr[cpu->pc[thread]+1] << 8
| (uint64_t)addr[cpu->pc[thread]+2] << 16
| (uint64_t)addr[cpu->pc[thread]+3] << 24
| (uint64_t)addr[cpu->pc[thread]+4] << 32
| (uint64_t)addr[cpu->pc[thread]+5] << 40
| (uint64_t)addr[cpu->pc[thread]+6] << 48
| (uint64_t)addr[cpu->pc[thread]+7] << 56;
cpu->pc[thread]+=8;
iclk++;
if (cpu->v[thread])
cpu->pc[thread] = address;
break;
case MUL: mul(cpu, immaddr(cpu, thread, regsize), thread, regsize); break;
case 0x92: or_addr(cpu, &cpu->y[thread], zeromtx(cpu, thread), thread, regsize); break;
case 0x93: mul(cpu, absaddr(cpu, thread), thread, regsize); break;
case 0x94: or_addr(cpu, &cpu->x[thread], zeromtx(cpu, thread), thread, regsize); break;
case 0x95: mul(cpu, zeromtx(cpu, thread), thread, regsize); break;
case SEV:
cpu->v[thread] = 1;
(cpu->ps |= (V << 8*thread));
break;
case 0x99: ld(cpu, &cpu->a[thread], zeromy(cpu, thread), thread, regsize); break;
case 0x9B: ld(cpu, &cpu->x[thread], zeromy(cpu, thread), thread, regsize); break;
case 0x9C: st(cpu, &cpu->a[thread], zeromy(cpu, thread), thread, regsize); break;
case 0x9E: st(cpu, &cpu->x[thread], zeromy(cpu, thread), thread, regsize); break;
case BVC:
address = (uint64_t)addr[cpu->pc[thread]]
| (uint64_t)addr[cpu->pc[thread]+1] << 8
| (uint64_t)addr[cpu->pc[thread]+2] << 16
| (uint64_t)addr[cpu->pc[thread]+3] << 24
| (uint64_t)addr[cpu->pc[thread]+4] << 32
| (uint64_t)addr[cpu->pc[thread]+5] << 40
| (uint64_t)addr[cpu->pc[thread]+6] << 48
| (uint64_t)addr[cpu->pc[thread]+7] << 56;
cpu->pc[thread]+=8;
iclk++;
if (!cpu->v[thread])
cpu->pc[thread] = address;
break;
case DIV: divd(cpu, immaddr(cpu, thread, regsize), thread, regsize); break;
case 0xA2: xor_addr(cpu, &cpu->y[thread], zeromtx(cpu, thread), thread, regsize); break;
case 0xA3: divd(cpu, absaddr(cpu, thread), thread, regsize); break;
case 0xA4: xor_addr(cpu, &cpu->x[thread], zeromtx(cpu, thread), thread, regsize); break;
case 0xA5: divd(cpu, zeromtx(cpu, thread), thread, regsize); break;
case CLV:
cpu->v[thread] = 0;
(cpu->ps &= ~(V << 8*thread));
break;
case RTS:
cpu->pc[thread] = (uint64_t)pull(cpu);
cpu->pc[thread] += ((uint64_t)pull(cpu) << 8);
cpu->pc[thread] += ((uint64_t)pull(cpu) << 16);
cpu->pc[thread] += ((uint64_t)pull(cpu) << 24) + 1;
break;
case CMP:
case CPY:
case CPX:
case 0xE2:
case 0xE4:
case 0xE5:
case 0xF2:
case 0xF4:
case 0xF5:
if (opcode == CMP || opcode == CPY || opcode == CPX)
address = immaddr(cpu, thread, regsize);
if (opcode == 0xE5 || opcode == 0xE2 || opcode == 0xE4) {
address = (uint64_t)addr[cpu->pc[thread]]
| (uint64_t)addr[cpu->pc[thread]+1] << 8
| (uint64_t)addr[cpu->pc[thread]+2] << 16
| (uint64_t)addr[cpu->pc[thread]+3] << 24
| (uint64_t)addr[cpu->pc[thread]+4] << 32
| (uint64_t)addr[cpu->pc[thread]+5] << 40
| (uint64_t)addr[cpu->pc[thread]+6] << 48
| (uint64_t)addr[cpu->pc[thread]+7] << 56;
cpu->pc[thread]+=8;
iclk++;
}
if (opcode == 0xF5 || opcode == 0xF2 || opcode == 0xF4) {
address = addr[cpu->pc[thread]]
| addr[cpu->pc[thread]+1] << 8
| addr[cpu->pc[thread]+2] << 16
| addr[cpu->pc[thread]+3] << 24;
cpu->pc[thread]+=4;
iclk++;
}
value = (uint64_t)addr[address];
if (regsize >= 2)
value += (uint64_t)addr[address+1] << 8;
if (regsize >= 4) {
value += (uint64_t)addr[address+2] << 16;
value += (uint64_t)addr[address+3] << 24;
}
if (regsize >= 8) {
value += (uint64_t)addr[address+4] << 32;
value += (uint64_t)addr[address+5] << 40;
value += (uint64_t)addr[address+6] << 48;
value += (uint64_t)addr[address+7] << 56;
}
if (opcode == CMP || opcode == 0xE5 || opcode == 0xF5)
sum = cpu->a[thread]-value;
if (opcode == CPY || opcode == 0xE2 || opcode == 0xF2)
sum = cpu->y[thread]-value;
if (opcode == CPX || opcode == 0xE4 || opcode == 0xF4)
sum = cpu->x[thread]-value;
cpu->n[thread] = (sum & 0x8000000000000000) ? 1 : 0;
cpu->z[thread] = (sum == 0) ? 1 : 0;
cpu->c[thread] = (sum > value) ? 1 : 0;
(cpu->z[thread]) ? (cpu->ps |= (Z << 8*thread)) : (cpu->ps &= ~(Z << 8*thread));
(cpu->n[thread]) ? (cpu->ps |= (N << 8*thread)) : (cpu->ps &= ~(N << 8*thread));
(cpu->c[thread]) ? (cpu->ps |= (C << 8*thread)) : (cpu->ps &= ~(C << 8*thread));
break;
case CAY:
case CAX:
if (opcode == CAY)
sum = cpu->a[thread]-cpu->y[thread];
if (opcode == CAX)
sum = cpu->a[thread]-cpu->x[thread];
cpu->n[thread] = (sum & 0x8000000000000000) ? 1 : 0;
cpu->z[thread] = (sum == 0) ? 1 : 0;
if (opcode == CAY)
cpu->c[thread] = (sum > cpu->y[thread]) ? 1 : 0;
if (opcode == CAX)
cpu->c[thread] = (sum > cpu->x[thread]) ? 1 : 0;
(cpu->z[thread]) ? (cpu->ps |= (Z << 8*thread)) : (cpu->ps &= ~(Z << 8*thread));
(cpu->n[thread]) ? (cpu->ps |= (N << 8*thread)) : (cpu->ps &= ~(N << 8*thread));
(cpu->c[thread]) ? (cpu->ps |= (C << 8*thread)) : (cpu->ps &= ~(C << 8*thread));
break;
case ENT: ent(cpu, immaddr(cpu, thread, 1)); break;
case RTI:
cpu->ps = ((uint64_t)pull(cpu) << 8*thread);
cpu->pc[thread] = (uint64_t)pull(cpu);
cpu->pc[thread] += ((uint64_t)pull(cpu) << 8);
cpu->pc[thread] += ((uint64_t)pull(cpu) << 16);
cpu->pc[thread] += ((uint64_t)pull(cpu) << 24);
cpu->pc[thread] += ((uint64_t)pull(cpu) << 32);
cpu->pc[thread] += ((uint64_t)pull(cpu) << 40);
cpu->pc[thread] += ((uint64_t)pull(cpu) << 48);
cpu->pc[thread] += ((uint64_t)pull(cpu) << 56);
break;
case INC:
case INY:
case IAY:
case INX:
case IAX:
if (opcode == INC || opcode == IAY || opcode == IAX) {
cpu->a[thread]+=1;
cpu->z[thread] = (cpu->a[thread] == 0);
cpu->n[thread] = (cpu->a[thread] >> 63);
}
if (opcode == INY || opcode == IAY) {
cpu->y[thread]+=1;
cpu->z[thread] = (cpu->y[thread] == 0);
cpu->n[thread] = (cpu->y[thread] >> 63);
}
if (opcode == INX || opcode == IAX) {
cpu->x[thread]+=1;
cpu->z[thread] = (cpu->x[thread] == 0);
cpu->n[thread] = (cpu->x[thread] >> 63);
}
(cpu->z[thread]) ? (cpu->ps |= (Z << 8*thread)) : (cpu->ps &= ~(Z << 8*thread));
(cpu->n[thread]) ? (cpu->ps |= (N << 8*thread)) : (cpu->ps &= ~(N << 8*thread));
break;
case 0xD0:
address = (uint32_t)addr[cpu->pc[thread]]
|(uint32_t)addr[cpu->pc[thread]+1] << 8
|(uint32_t)addr[cpu->pc[thread]+2] << 16
|(uint32_t)addr[cpu->pc[thread]+3] << 24;
cpu->pc[thread]+=4;
iclk++;
cpu->pc[thread] = address;
break;
case DEC:
case DEY:
case DAY:
case DEX:
case DAX:
if (opcode == DEC || opcode == DAY || opcode == DAX) {
cpu->a[thread]-=1;
cpu->z[thread] = (cpu->a[thread] == 0);
cpu->n[thread] = (cpu->a[thread] >> 63);
}
if (opcode == DEY || opcode == DAY) {
cpu->y[thread]-=1;
cpu->z[thread] = (cpu->y[thread] == 0);
cpu->n[thread] = (cpu->y[thread] >> 63);
}
if (opcode == DEX || opcode == DAX) {
cpu->x[thread]-=1;
cpu->z[thread] = (cpu->x[thread] == 0);
cpu->n[thread] = (cpu->x[thread] >> 63);
}
(cpu->z[thread]) ? (cpu->ps |= (Z << 8*thread)) : (cpu->ps &= ~(Z << 8*thread));
(cpu->n[thread]) ? (cpu->ps |= (N << 8*thread)) : (cpu->ps &= ~(N << 8*thread));
break;
case JSL:
address = (uint64_t)addr[cpu->pc[thread]]
| (uint64_t)addr[cpu->pc[thread]+1] << 8
| (uint64_t)addr[cpu->pc[thread]+2] << 16
| (uint64_t)addr[cpu->pc[thread]+3] << 24
| (uint64_t)addr[cpu->pc[thread]+4] << 32
| (uint64_t)addr[cpu->pc[thread]+5] << 40
| (uint64_t)addr[cpu->pc[thread]+6] << 48
| (uint64_t)addr[cpu->pc[thread]+7] << 56;
cpu->pc[thread]+=8;
push(cpu, (uint64_t)cpu->pc[thread] >> 56);
push(cpu, (uint64_t)cpu->pc[thread] >> 48);
push(cpu, (uint64_t)cpu->pc[thread] >> 40);
push(cpu, (uint64_t)cpu->pc[thread] >> 32);
push(cpu, (uint64_t)cpu->pc[thread] >> 24);
push(cpu, (uint64_t)cpu->pc[thread] >> 16);
push(cpu, (uint64_t)cpu->pc[thread] >> 8);
push(cpu, (uint64_t)cpu->pc[thread] & 0xFF);
cpu->pc[thread] = address;
break;
case 0xE1:
case 0xE3:
if (opcode == 0xE1) {
address = (uint64_t)addr[cpu->pc[thread]]
| (uint64_t)addr[cpu->pc[thread]+1] << 8
| (uint64_t)addr[cpu->pc[thread]+2] << 16
| (uint64_t)addr[cpu->pc[thread]+3] << 24
| (uint64_t)addr[cpu->pc[thread]+4] << 32
| (uint64_t)addr[cpu->pc[thread]+5] << 40
| (uint64_t)addr[cpu->pc[thread]+6] << 48
| (uint64_t)addr[cpu->pc[thread]+7] << 56;
cpu->pc[thread]+=8;
iclk++;
}
if (opcode == 0xE3) {
address = addr[cpu->pc[thread]]
| addr[cpu->pc[thread]+1] << 8
| addr[cpu->pc[thread]+2] << 16
| addr[cpu->pc[thread]+3] << 24;
cpu->pc[thread]+=4;
iclk++;
}
addr[address]++;
break;
case NOP: break;
case RTL:
cpu->pc[thread] = (uint64_t)pull(cpu);
cpu->pc[thread] += ((uint64_t)pull(cpu) << 8);
cpu->pc[thread] += ((uint64_t)pull(cpu) << 16);
cpu->pc[thread] += ((uint64_t)pull(cpu) << 24);
cpu->pc[thread] += ((uint64_t)pull(cpu) << 32);
cpu->pc[thread] += ((uint64_t)pull(cpu) << 40);
cpu->pc[thread] += ((uint64_t)pull(cpu) << 48);
cpu->pc[thread] += ((uint64_t)pull(cpu) << 56) + 1;
break;
case 0xF1:
case 0xF3:
if (opcode == 0xE1) {
address = (uint64_t)addr[cpu->pc[thread]]
| (uint64_t)addr[cpu->pc[thread]+1] << 8
| (uint64_t)addr[cpu->pc[thread]+2] << 16
| (uint64_t)addr[cpu->pc[thread]+3] << 24
| (uint64_t)addr[cpu->pc[thread]+4] << 32
| (uint64_t)addr[cpu->pc[thread]+5] << 40
| (uint64_t)addr[cpu->pc[thread]+6] << 48
| (uint64_t)addr[cpu->pc[thread]+7] << 56;
cpu->pc[thread]+=8;
iclk++;
}
if (opcode == 0xE3) {
address = addr[cpu->pc[thread]]
| addr[cpu->pc[thread]+1] << 8
| addr[cpu->pc[thread]+2] << 16
| addr[cpu->pc[thread]+3] << 24;
cpu->pc[thread]+=4;
iclk++;
}
addr[address]--;
break;
case BRK:
push(cpu, (uint64_t)cpu->pc[thread]-1 >> 56);
push(cpu, (uint64_t)cpu->pc[thread]-1 >> 48);
push(cpu, (uint64_t)cpu->pc[thread]-1 >> 40);
push(cpu, (uint64_t)cpu->pc[thread]-1 >> 32);
push(cpu, (uint64_t)cpu->pc[thread]-1 >> 24);
push(cpu, (uint64_t)cpu->pc[thread]-1 >> 16);
push(cpu, (uint64_t)cpu->pc[thread]-1 >> 8);
push(cpu, (uint64_t)cpu->pc[thread]-1 & 0xFF);
push(cpu, (uint64_t)cpu->ps >> 8*thread);
cpu->i[thread] = 1;
setps(cpu, thread);
cpu->pc[thread] = (uint64_t)addr[0xFFE0]
| (uint64_t)addr[0xFFE1] << 8
| (uint64_t)addr[0xFFE2] << 16
| (uint64_t)addr[0xFFE3] << 24
| (uint64_t)addr[0xFFE4] << 32
| (uint64_t)addr[0xFFE5] << 40
| (uint64_t)addr[0xFFE6] << 48
| (uint64_t)addr[0xFFE7] << 56;
default:
if(opcode != BRK) {
printf("Cool, you inputed a non existent opcode, which means\n"
"that you have now wasted clock cycles.\n"
"Good job! *clap*\n");
}
break;
}
ins++;
#if !bench
printf("\033[%uHInstructions executed: %llu, Clock cycles: %llu\n", (6*thread)+1, ins, iclk);
fflush(stdout);
#endif
if (ins >= BENCH_INST) {
end = 1;
pthread_mutex_lock(&mutex);
threads_done++;
inst[thread] = ins;
clk[thread] = iclk;
pthread_cond_signal(&cond);
pthread_mutex_unlock(&mutex);
#if bench
gettimeofday(&en[thread], 0);
#endif
}
}
}
int main(int argc, char **argv) {
struct suxthr thr[THREADS];
ibcount = 0;
addr = malloc(0x04000000);
inss = 0;
int v = 0;
if (asmmon() == 2)
return 0;
for (int i = 0; i < THREADS; i++) {
thr[i].sx.sp = 0xFFFF;
if (i) {
thr[i].sx.a[i] = 0;
thr[i].sx.x[i] = 0;
thr[i].sx.y[i] = 0;
thr[i].sx.pc[i] = (uint64_t)addr[0xFF50]
| (uint64_t)addr[0xFF51] << 8
| (uint64_t)addr[0xFF52] << 16
| (uint64_t)addr[0xFF53] << 24
| (uint64_t)addr[0xFF54] << 32
| (uint64_t)addr[0xFF55] << 40
| (uint64_t)addr[0xFF56] << 48
| (uint64_t)addr[0xFF57] << 56;
} else {
thr[i].sx.a[i] = 0;
thr[i].sx.x[i] = 0;
thr[i].sx.y[i] = 0;
thr[i].sx.pc[i] = (uint64_t)addr[0xFFC0]
| (uint64_t)addr[0xFFC1] << 8
| (uint64_t)addr[0xFFC2] << 16
| (uint64_t)addr[0xFFC3] << 24
| (uint64_t)addr[0xFFC4] << 32
| (uint64_t)addr[0xFFC5] << 40
| (uint64_t)addr[0xFFC6] << 48
| (uint64_t)addr[0xFFC7] << 56;
}
thr[i].th = i;
}
for (int i = 0; i < THREADS; i++) {
lines[i] = (6*i)+2;
inst[i] = 0;
}
pthread_t therads[THREADS];
pthread_barrier_init(&bar, NULL, THREADS);
int result;
puts("\033[2J\033[H");
for (int i = 0; i < THREADS; i++) {
result = pthread_create(&therads[i], NULL, run, &thr[i]);
assert(!result);
}
pthread_mutex_lock(&mutex);
while (threads_done < THREADS) {
pthread_cond_wait(&cond, &mutex);
}
pthread_mutex_unlock(&mutex);
pthread_barrier_destroy(&bar);
#if bench
if (threads_done == THREADS) {
double tm_sec, tm_usec, tm[THREADS], ttm;
double clkspd;
double mhz;
double ips[THREADS];
double ipst;
for (int i = 0; i < THREADS; i++) {
tm_sec = (en[i].tv_sec - str[i].tv_sec);
tm_usec = (en[i].tv_usec-str[i].tv_usec);
tm[i] = (tm_sec*1000000)+(tm_usec);
ips[i] = inst[i]/tm[i];
if (i) {
inss += inst[i];
ttm += tm[i];
ipst += ips[i];
tclk += clk[i];
} else {
inss = inst[i];
ttm = tm[i];
ipst = ips[i];
tclk = clk[i];
}
clkspd = (tm[i]/1000000)*1000000/clk[i];
mhz = 1000000.0/clkspd/1000000;
printf("Instructions executed for thread %i: %llu, Instructions per Second for thread %i in MIPS: %f, tm: %f\n", i, inst[i], i, ips[i], tm[i]/1000000);
}
clkspd = (ttm/1000000)*1000000/tclk;
mhz = 1000000.0/clkspd/1000000;
printf("Total Instructions executed: %llu, Total Instructions per Second in MIPS: %f, Clock cycles: %llu, Clock Speed in MHz: %f, tm: %f\n", inss, ipst, tclk, mhz, ttm/1000000);
}
#endif
free(addr);
return 0;
}