#include "opcode.h" #include #if bench #include #endif #include #define THREADS 1 #define BENCH_INST 100000000 << (THREADS-1) #define CTRL_ADDR 0x100 #define TX_ADDR 0x101 #define RX_ADDR 0x102 #define STEP_ADDR 0x110 #define CURSES_BACKSPACE 0x7F extern uint8_t kbd_rdy; extern WINDOW *scr; #if debug extern uint8_t subdbg; #endif extern uint8_t step; extern uint8_t esc; #define setflag(flag, bit) ((flag)) ? (cpu->ps.u8[thread] |= bit) : (cpu->ps.u8[thread] &= ~bit) #define getflag(bit) (cpu->ps.u8[thread] & bit) extern pthread_mutex_t mutex; extern pthread_mutex_t main_mutex; extern pthread_cond_t cond; extern pthread_cond_t main_cond; #if debug extern void disasm(struct sux *cpu, uint64_t *operands, uint8_t lines, uint8_t opcode, uint8_t prefix, uint8_t thread); #endif extern void io(uint64_t address, uint8_t rw); static inline uint64_t get_addr(struct sux *cpu, uint64_t *tmpaddr, uint8_t opcode, uint8_t prefix, uint8_t thread) { union reg address; union reg value; uint8_t tmp = 0; address.u64 = 0; value.u64 = 0; switch (optype[opcode]) { case IMPL: break; case IMM: address.u64 = cpu->pc[thread]; switch (opcode) { case PHB: case PHP: case PHA: case PHY: case PHX: case PLB: case PLP: case PLA: case PLY: case PLX: case STT: case LSL: case LSR: case ROL: case ROR: case ASR: case ENT: ++cpu->pc[thread]; break; default : cpu->pc[thread]+=(1 << (prefix >> 4)); case TXS: break; } break; case ZM: case ZMX: case ZMY: case IND: case INDX: case INDY: tmp = 0; address.u8[0] = addr[cpu->pc[thread]]; /* Unroll Loop by implementing Duff's Device. */ switch ((prefix & 0x0C) >> 2) { case 2: address.u8[5] = addr[cpu->pc[thread]+5];++tmp; address.u8[4] = addr[cpu->pc[thread]+4];++tmp; case 3: address.u8[3] = addr[cpu->pc[thread]+3];++tmp; case 1: address.u8[2] = addr[cpu->pc[thread]+2];++tmp; address.u8[1] = addr[cpu->pc[thread]+1];++tmp; case 0: ++tmp; } cpu->pc[thread]+=tmp; #if debug && !bench *tmpaddr = address.u64; #endif #if getclk iclk++; #endif uint64_t reg = 0; switch (optype[opcode]) { case ZMX: address.u64 += cpu->x[thread]; #if getclk iclk++; #endif break; case ZMY: address.u64 += cpu->y[thread]; #if getclk iclk++; #endif break; case INDX: address.u64 += cpu->x[thread]; #if getclk iclk++; #endif /* Falls Through. */ case INDY: /* Did we fall through? */ if (optype[opcode] == INDX) { reg = 0; /* Yes, so set reg back to zero. */ } else { reg = cpu->y[thread]; /* No, so set reg to Y. */ #if getclk iclk++; #endif } /* Falls Through. */ case IND: value.u8[0] = addr[address.u64 ]; value.u8[1] = addr[address.u64+1]; value.u8[2] = addr[address.u64+2]; value.u8[3] = addr[address.u64+3]; value.u8[4] = addr[address.u64+4]; value.u8[5] = addr[address.u64+5]; value.u8[6] = addr[address.u64+6]; value.u8[7] = addr[address.u64+7]; #if getclk iclk++; #endif value.u64 += reg; address.u64 = value.u64; break; } break; case ABS: tmp = 0; address.u8[0] = addr[cpu->pc[thread]];++tmp; /* Unroll Loop by implementing Duff's Device. */ switch ((prefix & 0x0C) >> 2) { case 3: address.u8[7] = addr[cpu->pc[thread]+7];++tmp; case 2: address.u8[6] = addr[cpu->pc[thread]+6];++tmp; address.u8[5] = addr[cpu->pc[thread]+5];++tmp; #if getclk iclk++; #endif case 1: address.u8[4] = addr[cpu->pc[thread]+4];++tmp; address.u8[3] = addr[cpu->pc[thread]+3];++tmp; address.u8[2] = addr[cpu->pc[thread]+2];++tmp; case 0: address.u8[1] = addr[cpu->pc[thread]+1];++tmp; } cpu->pc[thread]+=tmp; #if getclk iclk++; #endif break; } return address.u64; } static inline void adc(struct sux *cpu, uint64_t value, uint8_t thread) { uint64_t sum = cpu->a[thread]+value+getflag(C); setflag(sum == 0, Z); setflag((sum >> 63), N); setflag(((cpu->a[thread]^value) >> 63) && ((cpu->a[thread]^sum) >> 63), V); setflag((sum < value), C); cpu->a[thread] = sum; } static inline void sbc(struct sux *cpu, uint64_t value, uint8_t thread) { uint64_t sum = cpu->a[thread]-value-!getflag(C); setflag(sum == 0, Z); setflag(sum >> 63, N); setflag(((cpu->a[thread]^value) >> 63) && ((cpu->a[thread]^sum) >> 63), V); setflag((sum > value), C); cpu->a[thread] = sum; } static inline void transfer(struct sux *cpu, uint64_t value, uint8_t opcode, uint8_t prefix, uint8_t thread) { uint64_t reg; switch (opcode) { case TBA: cpu->a[thread] = cpu->b[thread]; reg = cpu->a[thread]; break; case TXA: cpu->a[thread] = cpu->x[thread]; reg = cpu->a[thread]; break; case TYA: cpu->a[thread] = cpu->y[thread]; reg = cpu->a[thread]; break; case TAB: cpu->b[thread] = cpu->a[thread]; reg = cpu->b[thread]; break; case TAY: cpu->y[thread] = cpu->a[thread]; reg = cpu->y[thread]; break; case TXY: cpu->y[thread] = cpu->x[thread]; reg = cpu->y[thread]; break; case TAX: cpu->x[thread] = cpu->a[thread]; reg = cpu->x[thread]; break; case TYX: cpu->x[thread] = cpu->y[thread]; reg = cpu->x[thread]; break; case TSX: cpu->x[thread] = cpu->sp[thread] & 0xFFFF; cpu->x[thread] = cpu->stk_st[thread] << 16; break; case TXS: cpu->sp[thread] = cpu->x[thread]; if (prefix == 0x13 && (value == thread+1 || value > 8)) { cpu->stk_st[thread] = value & 0xFF; cpu->stk_st[thread] += value << 16; cpu->pc[thread]+=2; } break; } setflag(reg == 0, Z); setflag(reg >> 63, N); } static inline void push(struct sux *cpu, uint64_t value, uint64_t reg, uint8_t thread) { union { uint64_t reg; uint8_t byte[8]; } r; r.reg = reg; uint8_t size = (value > 0) ? value-1 : 0; uint8_t tmp = (size <= 7) ? size : 7; /* Unroll Loop by implementing Duff's Device. */ switch (tmp) { case 7: addr[(cpu->stk_st[thread] << 16)+cpu->sp[thread]] = r.byte[7];cpu->sp[thread]--; case 6: addr[(cpu->stk_st[thread] << 16)+cpu->sp[thread]] = r.byte[6];cpu->sp[thread]--; case 5: addr[(cpu->stk_st[thread] << 16)+cpu->sp[thread]] = r.byte[5];cpu->sp[thread]--; case 4: addr[(cpu->stk_st[thread] << 16)+cpu->sp[thread]] = r.byte[4];cpu->sp[thread]--; case 3: addr[(cpu->stk_st[thread] << 16)+cpu->sp[thread]] = r.byte[3];cpu->sp[thread]--; case 2: addr[(cpu->stk_st[thread] << 16)+cpu->sp[thread]] = r.byte[2];cpu->sp[thread]--; case 1: addr[(cpu->stk_st[thread] << 16)+cpu->sp[thread]] = r.byte[1];cpu->sp[thread]--; case 0: addr[(cpu->stk_st[thread] << 16)+cpu->sp[thread]] = r.byte[0];cpu->sp[thread]--; } } static inline uint64_t pull(struct sux *cpu, uint64_t value, uint8_t thread) { union { uint64_t reg; uint8_t byte[8]; } r; r.reg = 0; uint8_t size = (value > 0) ? value-1 : 0; uint8_t tmp = (size <= 7) ? size : 7; uint8_t tmp2 = 0; /* Unroll Loop by implementing Duff's Device. */ cpu->sp[thread]++;r.byte[tmp2] = addr[(cpu->stk_st[thread] << 16)+cpu->sp[thread]]; switch (tmp) { case 7: cpu->sp[thread]++;tmp2++;r.byte[tmp2] |= addr[(cpu->stk_st[thread] << 16)+cpu->sp[thread]]; case 6: cpu->sp[thread]++;tmp2++;r.byte[tmp2] |= addr[(cpu->stk_st[thread] << 16)+cpu->sp[thread]]; case 5: cpu->sp[thread]++;tmp2++;r.byte[tmp2] |= addr[(cpu->stk_st[thread] << 16)+cpu->sp[thread]]; case 4: cpu->sp[thread]++;tmp2++;r.byte[tmp2] |= addr[(cpu->stk_st[thread] << 16)+cpu->sp[thread]]; case 3: cpu->sp[thread]++;tmp2++;r.byte[tmp2] |= addr[(cpu->stk_st[thread] << 16)+cpu->sp[thread]]; case 2: cpu->sp[thread]++;tmp2++;r.byte[tmp2] |= addr[(cpu->stk_st[thread] << 16)+cpu->sp[thread]]; case 1: cpu->sp[thread]++;tmp2++;r.byte[tmp2] |= addr[(cpu->stk_st[thread] << 16)+cpu->sp[thread]]; } return r.reg; } static inline void and(struct sux *cpu, uint64_t value, uint8_t thread) { cpu->a[thread] &= value; setflag(cpu->a[thread] == 0, Z); setflag(cpu->a[thread] >> 63, N); } static inline void or(struct sux *cpu, uint64_t value, uint8_t thread) { cpu->a[thread] |= value; setflag(cpu->a[thread] == 0, Z); setflag(cpu->a[thread] >> 63, N); } static inline void xor(struct sux *cpu, uint64_t value, uint8_t thread) { cpu->a[thread] ^= value; setflag(cpu->a[thread] == 0, Z); setflag(cpu->a[thread] >> 63, N); } static inline void lsl(struct sux *cpu, uint64_t value, uint8_t thread) { uint64_t sum = (value < 64) ? cpu->a[thread] << value : 0; setflag(sum == 0, Z); setflag(sum >> 63, N); setflag(cpu->a[thread] >> (64-value), C); cpu->a[thread] = sum; } static inline void lsr(struct sux *cpu, uint64_t value, uint8_t thread) { uint64_t sum = (value < 64) ? cpu->a[thread] >> value : 0; setflag(sum == 0, Z); setflag(sum >> 63, N); setflag(cpu->a[thread] & 1, C); cpu->a[thread] = sum; } static inline void asr(struct sux *cpu, uint64_t value, uint8_t thread) { uint8_t sign = cpu->a[thread] >> 63; uint64_t sum = (value < 64) ? (cpu->a[thread] >> value) | ((uint64_t)sign << 63) : 0; setflag(sum == 0, Z); setflag(sum >> 63, N); setflag(cpu->a[thread] & 1, C); cpu->a[thread] = sum; } static inline void rol(struct sux *cpu, uint64_t value, uint8_t thread) { uint64_t sum = cpu->a[thread] << value; sum |= getflag(C); setflag(sum == 0, Z); setflag(sum >> 63, N); setflag(cpu->a[thread] >> (uint64_t)(64-value), C); cpu->a[thread] = sum; } static inline void ror(struct sux *cpu, uint64_t value, uint8_t thread) { uint64_t sum = cpu->a[thread] >> value; sum |= (uint64_t)getflag(C) << (uint64_t)(64-value); setflag(sum == 0, Z); setflag(sum >> 63, N); setflag(cpu->a[thread] & 1, C); cpu->a[thread] = sum; } static inline void mul(struct sux *cpu, uint64_t value, uint8_t thread) { uint64_t sum = cpu->a[thread]*value; cpu->a[thread] = sum; setflag(sum == 0, Z); setflag(sum >> 63, N); setflag(!((cpu->a[thread]^value) >> 63) && ((cpu->a[thread]^sum) >> 63), V); } static inline void divd(struct sux *cpu, uint64_t value, uint8_t opcode, uint8_t thread) { uint64_t sum = cpu->a[thread]/value; if (opcode != DAB) { cpu->b[thread] = cpu->a[thread] % value; } else { value = cpu->b[thread]; cpu->x[thread] = cpu->a[thread] % value; } cpu->a[thread] = sum; setflag(sum == 0, Z); setflag((sum >> 63), N); } static inline void cmp(struct sux *cpu, uint64_t value, uint64_t reg, uint8_t thread) { uint64_t sum = reg-value; setflag(sum >> 63, N); setflag(((reg^value) >> 63) && ((reg^sum) >> 63), V); setflag(sum == 0, Z); setflag(reg >= value, C); } /* Increment, or Decrement register. */ static inline uint64_t idr(struct sux *cpu, uint64_t reg, uint8_t inc, uint8_t thread) { if (inc) { reg++; } else { reg--; } setflag(reg == 0, Z); setflag(reg >> 63, N); return reg; } /* Increment, or Decrement memory. */ static inline void idm(struct sux *cpu, uint64_t address, uint8_t prefix, uint8_t inc, uint8_t thread) { union reg value; value.u64 = 0; /* Unroll Loop by implementing Duff's Device. */ value.u8[0] = addr[address]; switch (1 << (prefix >> 4)) { case 8: value.u8[7] = addr[address+7]; value.u8[6] = addr[address+6]; value.u8[5] = addr[address+5]; value.u8[4] = addr[address+4]; case 4: value.u8[3] = addr[address+3]; value.u8[2] = addr[address+2]; case 2: value.u8[1] = addr[address+1]; } if (inc) { value.u64++; } else { value.u64--; } setflag(value.u64 == 0, Z); setflag(value.u64 >> 7, N); addr[address] = value.u8[0]; io(address, 0); switch (1 << (prefix >> 4)) { case 8: addr[address+7] = value.u8[7]; addr[address+6] = value.u8[6]; addr[address+5] = value.u8[5]; addr[address+4] = value.u8[4]; case 4: addr[address+3] = value.u8[3]; addr[address+2] = value.u8[2]; case 2: addr[address+1] = value.u8[1]; } } static inline uint64_t load(struct sux *cpu, uint64_t address, uint64_t reg, uint8_t prefix, uint8_t thread) { io(address, 1); union reg value; value.u64 = reg; /* Unroll Loop by implementing Duff's Device. */ value.u8[0] = addr[address]; switch (1 << (prefix >> 4)) { case 8: value.u8[7] = addr[address+7]; value.u8[6] = addr[address+6]; value.u8[5] = addr[address+5]; value.u8[4] = addr[address+4]; case 4: value.u8[3] = addr[address+3]; value.u8[2] = addr[address+2]; case 2: value.u8[1] = addr[address+1]; } setflag(value.u64 == 0, Z); setflag(value.u64 >> 63, N); return value.u64; } static inline void store(struct sux *cpu, uint64_t address, uint64_t reg, uint8_t prefix, uint8_t thread) { union reg value; value.u64 = reg; addr[address] = value.u8[0]; #if (IO || debug) && !branch #if keypoll pthread_mutex_lock(&mutex); #endif io(address, 0); #if keypoll pthread_mutex_unlock(&mutex); #endif #endif /* Unroll Loop by implementing Duff's Device. */ switch (1 << (prefix >> 4)) { case 8: addr[address+7] = value.u8[7]; addr[address+6] = value.u8[6]; addr[address+5] = value.u8[5]; addr[address+4] = value.u8[4]; case 4: addr[address+3] = value.u8[3]; addr[address+2] = value.u8[2]; case 2: addr[address+1] = value.u8[1]; } }