#include "opcode.h" #include #if bench #include #endif #include #define THREADS 1 #define BENCH_INST 100000000 << (THREADS-1) #define CTRL_ADDR 0x100 #define TX_ADDR 0x101 #define RX_ADDR 0x102 #define STEP_ADDR 0x110 #define CURSES_BACKSPACE 0x7F extern uint8_t kbd_rdy; extern WINDOW *scr; #if debug extern uint8_t subdbg; #endif #define setflag(flag, bit) ((flag)) ? (cpu->ps.u8[thread] |= bit) : (cpu->ps.u8[thread] &= ~bit) #define getflag(bit) (cpu->ps.u8[thread] & bit) extern pthread_mutex_t mutex; extern pthread_mutex_t main_mutex; extern pthread_cond_t cond; extern pthread_cond_t main_cond; #if debug extern void disasm(struct sux *cpu, uint64_t *operands, uint8_t lines, uint8_t opcode, uint8_t prefix, uint8_t thread); #endif extern void io(uint64_t address, uint8_t *esc); static inline uint64_t get_addr(struct sux *cpu, uint64_t *tmpaddr, uint8_t opcode, uint8_t prefix, uint8_t thread) { union reg address; union reg value; uint8_t tmp = 0; address.u64 = 0; value.u64 = 0; switch (optype[opcode]) { case IMPL: break; case IMM: switch (opcode) { case PHB: case PHP: case PHA: case PHY: case PHX: case PLB: case PLP: case PLA: case PLY: case PLX: case STT: case LSL: case LSR: case ROL: case ROR: case ASR: case ENT: address.u64 = cpu->pc[thread]; ++cpu->pc[thread]; break; default: address.u64 = cpu->pc[thread]; cpu->pc[thread]+=(1 << (prefix >> 4)); case TXS: break; } break; case ZM: case ZMX: case ZMY: case IND: case INDX: case INDY: tmp = 0; address.u8[0] = addr[cpu->pc[thread]]; /* Unroll Loop by implementing Duff's Device. */ switch ((prefix & 0x0C) >> 2) { case 2: address.u8[5] = addr[cpu->pc[thread]+5];++tmp; address.u8[4] = addr[cpu->pc[thread]+4];++tmp; case 3: address.u8[3] = addr[cpu->pc[thread]+3];++tmp; case 1: address.u8[2] = addr[cpu->pc[thread]+2];++tmp; address.u8[1] = addr[cpu->pc[thread]+1];++tmp; case 0: ++tmp; } cpu->pc[thread]+=tmp; #if debug && !bench *tmpaddr = address.u64; #endif #if getclk iclk++; #endif uint64_t reg = 0; switch (optype[opcode]) { case ZMX: address.u64 += cpu->x[thread]; #if getclk iclk++; #endif break; case ZMY: address.u64 += cpu->y[thread]; #if getclk iclk++; #endif break; case INDX: address.u64 += cpu->x[thread]; #if getclk iclk++; #endif /* Falls Through. */ case INDY: /* Did we fall through? */ if (optype[opcode] == INDX) { reg = 0; /* Yes, so set reg back to zero. */ } else { reg = cpu->y[thread]; /* No, so set reg to Y. */ #if getclk iclk++; #endif } /* Falls Through. */ case IND: value.u8[0] = addr[address.u64 ]; value.u8[1] = addr[address.u64+1]; value.u8[2] = addr[address.u64+2]; value.u8[3] = addr[address.u64+3]; value.u8[4] = addr[address.u64+4]; value.u8[5] = addr[address.u64+5]; value.u8[6] = addr[address.u64+6]; value.u8[7] = addr[address.u64+7]; #if getclk iclk++; #endif value.u64 += reg; address.u64 = value.u64; break; } break; case ABS: tmp = 0; address.u8[0] = addr[cpu->pc[thread]];++tmp; /* Unroll Loop by implementing Duff's Device. */ switch ((prefix & 0x0C) >> 2) { case 3: address.u8[7] = addr[cpu->pc[thread]+7];++tmp; case 2: address.u8[6] = addr[cpu->pc[thread]+6];++tmp; address.u8[5] = addr[cpu->pc[thread]+5];++tmp; #if getclk iclk++; #endif case 1: address.u8[4] = addr[cpu->pc[thread]+4];++tmp; address.u8[3] = addr[cpu->pc[thread]+3];++tmp; address.u8[2] = addr[cpu->pc[thread]+2];++tmp; case 0: address.u8[1] = addr[cpu->pc[thread]+1];++tmp; } cpu->pc[thread]+=tmp; #if getclk iclk++; #endif break; } return address.u64; } inline void adc(struct sux *cpu, uint64_t value, uint8_t thread) { uint64_t sum = cpu->a[thread]+value+getflag(C); setflag(sum == 0, Z); setflag((sum >> 63), N); setflag(((cpu->a[thread]^value) >> 63) && ((cpu->a[thread]^sum) >> 63), V); setflag((sum < value), C); cpu->a[thread] = sum; } inline void sbc(struct sux *cpu, uint64_t value, uint8_t thread) { uint64_t sum = cpu->a[thread]-value-!getflag(C); setflag(sum == 0, Z); setflag(sum >> 63, N); setflag(((cpu->a[thread]^value) >> 63) && ((cpu->a[thread]^sum) >> 63), V); setflag((sum > value), C); cpu->a[thread] = sum; } inline void transfer(struct sux *cpu, uint64_t value, uint8_t opcode, uint8_t prefix, uint8_t thread) { uint64_t reg; switch (opcode) { case TBA: cpu->a[thread] = cpu->b[thread]; reg = cpu->a[thread]; break; case TXA: cpu->a[thread] = cpu->x[thread]; reg = cpu->a[thread]; break; case TYA: cpu->a[thread] = cpu->y[thread]; reg = cpu->a[thread]; break; case TAB: cpu->b[thread] = cpu->a[thread]; reg = cpu->b[thread]; break; case TAY: cpu->y[thread] = cpu->a[thread]; reg = cpu->y[thread]; break; case TXY: cpu->y[thread] = cpu->x[thread]; reg = cpu->y[thread]; break; case TAX: cpu->x[thread] = cpu->a[thread]; reg = cpu->x[thread]; break; case TYX: cpu->x[thread] = cpu->y[thread]; reg = cpu->x[thread]; break; case TSX: cpu->x[thread] = cpu->sp[thread] & 0xFFFF; cpu->x[thread] = cpu->stk_st[thread] << 16; break; case TXS: cpu->sp[thread] = cpu->x[thread]; if (prefix == 0x13 && (value == thread+1 || value > 8)) { cpu->stk_st[thread] = value & 0xFF; cpu->stk_st[thread] += value << 16; cpu->pc[thread]+=2; } break; } setflag(reg == 0, Z); setflag(reg >> 63, N); } inline void push(struct sux *cpu, uint64_t value, uint8_t opcode, uint8_t thread) { union { uint64_t reg; uint8_t byte[8]; } r; r.reg = 0; uint8_t size = (value > 0) ? value-1 : 0; uint8_t tmp = (size <= 7) ? size : 7; switch (opcode) { case PHA: r.reg = cpu->a[thread]; break; case PHB: r.reg = cpu->b[thread]; break; case PHX: r.reg = cpu->x[thread]; break; case PHY: r.reg = cpu->y[thread]; break; case PHP: r.reg = cpu->ps.u64; break; } /* Unroll Loop by implementing Duff's Device. */ switch (tmp) { case 7: addr[(cpu->stk_st[thread] << 16)+cpu->sp[thread]] = r.byte[7];cpu->sp[thread]--; case 6: addr[(cpu->stk_st[thread] << 16)+cpu->sp[thread]] = r.byte[6];cpu->sp[thread]--; case 5: addr[(cpu->stk_st[thread] << 16)+cpu->sp[thread]] = r.byte[5];cpu->sp[thread]--; case 4: addr[(cpu->stk_st[thread] << 16)+cpu->sp[thread]] = r.byte[4];cpu->sp[thread]--; case 3: addr[(cpu->stk_st[thread] << 16)+cpu->sp[thread]] = r.byte[3];cpu->sp[thread]--; case 2: addr[(cpu->stk_st[thread] << 16)+cpu->sp[thread]] = r.byte[2];cpu->sp[thread]--; case 1: addr[(cpu->stk_st[thread] << 16)+cpu->sp[thread]] = r.byte[1];cpu->sp[thread]--; case 0: addr[(cpu->stk_st[thread] << 16)+cpu->sp[thread]] = r.byte[0];cpu->sp[thread]--; } } inline void pull(struct sux *cpu, uint64_t value, uint8_t opcode, uint8_t thread) { union { uint64_t reg; uint8_t byte[8]; } r; r.reg = 0; uint8_t size = (value > 0) ? value-1 : 0; uint8_t tmp = (size <= 7) ? size : 7; uint8_t tmp2 = 0; /* Unroll Loop by implementing Duff's Device. */ cpu->sp[thread]++;r.byte[tmp2] = addr[(cpu->stk_st[thread] << 16)+cpu->sp[thread]]; switch (tmp) { case 7: cpu->sp[thread]++;tmp2++;r.byte[tmp2] |= addr[(cpu->stk_st[thread] << 16)+cpu->sp[thread]]; case 6: cpu->sp[thread]++;tmp2++;r.byte[tmp2] |= addr[(cpu->stk_st[thread] << 16)+cpu->sp[thread]]; case 5: cpu->sp[thread]++;tmp2++;r.byte[tmp2] |= addr[(cpu->stk_st[thread] << 16)+cpu->sp[thread]]; case 4: cpu->sp[thread]++;tmp2++;r.byte[tmp2] |= addr[(cpu->stk_st[thread] << 16)+cpu->sp[thread]]; case 3: cpu->sp[thread]++;tmp2++;r.byte[tmp2] |= addr[(cpu->stk_st[thread] << 16)+cpu->sp[thread]]; case 2: cpu->sp[thread]++;tmp2++;r.byte[tmp2] |= addr[(cpu->stk_st[thread] << 16)+cpu->sp[thread]]; case 1: cpu->sp[thread]++;tmp2++;r.byte[tmp2] |= addr[(cpu->stk_st[thread] << 16)+cpu->sp[thread]]; } switch (opcode) { case PLA: cpu->a[thread] = r.reg; break; case PLB: cpu->b[thread] = r.reg; break; case PLX: cpu->x[thread] = r.reg; break; case PLY: cpu->y[thread] = r.reg; break; case PLP: cpu->ps.u64 = r.reg; break; } } inline void and(struct sux *cpu, uint64_t value, uint8_t thread) { cpu->a[thread] &= value; setflag(cpu->a[thread] == 0, Z); setflag(cpu->a[thread] >> 63, N); } inline void or(struct sux *cpu, uint64_t value, uint8_t thread) { cpu->a[thread] |= value; setflag(cpu->a[thread] == 0, Z); setflag(cpu->a[thread] >> 63, N); } inline void xor(struct sux *cpu, uint64_t value, uint8_t thread) { cpu->a[thread] ^= value; setflag(cpu->a[thread] == 0, Z); setflag(cpu->a[thread] >> 63, N); } inline void lsl(struct sux *cpu, uint64_t value, uint8_t thread) { uint64_t sum = (value < 64) ? cpu->a[thread] << value : 0; setflag(sum == 0, Z); setflag(sum >> 63, N); setflag(cpu->a[thread] >> (64-value), C); cpu->a[thread] = sum; } inline void lsr(struct sux *cpu, uint64_t value, uint8_t thread) { uint64_t sum = (value < 64) ? cpu->a[thread] >> value : 0; setflag(sum == 0, Z); setflag(sum >> 63, N); setflag(cpu->a[thread] & 1, C); cpu->a[thread] = sum; } inline void asr(struct sux *cpu, uint64_t value, uint8_t thread) { uint8_t sign = cpu->a[thread] >> 63; uint64_t sum = (value < 64) ? (cpu->a[thread] >> value) | ((uint64_t)sign << 63) : 0; setflag(sum == 0, Z); setflag(sum >> 63, N); setflag(cpu->a[thread] & 1, C); cpu->a[thread] = sum; } inline void rol(struct sux *cpu, uint64_t value, uint8_t thread) { uint64_t sum = cpu->a[thread] << value; sum |= getflag(C); setflag(sum == 0, Z); setflag(sum >> 63, N); setflag(cpu->a[thread] >> (uint64_t)(64-value), C); cpu->a[thread] = sum; } inline void ror(struct sux *cpu, uint64_t value, uint8_t thread) { uint64_t sum = cpu->a[thread] >> value; sum |= (uint64_t)getflag(C) << (uint64_t)(64-value); setflag(sum == 0, Z); setflag(sum >> 63, N); setflag(cpu->a[thread] & 1, C); cpu->a[thread] = sum; } inline void mul(struct sux *cpu, uint64_t value, uint8_t thread) { uint64_t sum = cpu->a[thread]*value; cpu->a[thread] = sum; setflag(sum == 0, Z); setflag(sum >> 63, N); setflag(!((cpu->a[thread]^value) >> 63) && ((cpu->a[thread]^sum) >> 63), V); } inline void divd(struct sux *cpu, uint64_t value, uint8_t opcode, uint8_t thread) { uint64_t sum = cpu->a[thread]/value; if (opcode != DAB) { cpu->b[thread] = cpu->a[thread] % value; } else { value = cpu->b[thread]; cpu->x[thread] = cpu->a[thread] % value; } cpu->a[thread] = sum; setflag(sum == 0, Z); setflag((sum >> 63), N); } inline void cmp(struct sux *cpu, uint64_t value, uint8_t opcode, uint8_t thread) { uint64_t reg; switch (opcode) { case CPB: case CPB_AB: case CPB_Z: case CPB_IN: case CPB_IX: case CPB_IY: reg = cpu->b[thread]; break; case CMP: case CAB: case CMP_AB: case CMP_Z: case CMP_IN: case CMP_IX: case CMP_IY: reg = cpu->a[thread]; break; case CPY: case CPY_AB: case CPY_Z: case CPY_IN: reg = cpu->y[thread]; break; case CPX: case CPX_AB: case CPX_Z: case CPX_IN: reg = cpu->x[thread]; break; } uint64_t sum = reg-value; setflag(sum >> 63, N); setflag(((reg^value) >> 63) && ((reg^sum) >> 63), V); setflag(sum == 0, Z); setflag(reg >= value, C); } inline void incr(struct sux *cpu, uint8_t opcode, uint8_t thread) { uint64_t reg; switch (opcode) { case INC: cpu->a[thread]+=1; reg = cpu->a[thread]; break; case INB: cpu->b[thread]+=1; reg = cpu->b[thread]; break; case INY: cpu->y[thread]+=1; reg = cpu->y[thread]; break; case INX: cpu->x[thread]+=1; reg = cpu->x[thread]; break; } setflag(reg == 0, Z); setflag(reg >> 63, N); } inline void decr(struct sux *cpu, uint8_t opcode, uint8_t thread) { uint64_t reg; switch (opcode) { case DEC: cpu->a[thread]-=1; reg = cpu->a[thread]; break; case DEB: cpu->b[thread]-=1; reg = cpu->b[thread]; break; case DEY: cpu->y[thread]-=1; reg = cpu->y[thread]; break; case DEX: cpu->x[thread]-=1; reg = cpu->x[thread]; break; } setflag(reg == 0, Z); setflag(reg >> 63, N); } inline void incm(struct sux *cpu, uint64_t address, uint8_t thread) { addr[address]++; setflag(addr[address] == 0, Z); setflag(addr[address] >> 7, N); } inline void decm(struct sux *cpu, uint64_t address, uint8_t thread) { addr[address]--; setflag(addr[address] == 0, Z); setflag(addr[address] >> 7, N); } inline void load(struct sux *cpu, uint64_t address, uint8_t *esc, uint8_t opcode, uint8_t prefix, uint8_t thread) { if (address == CTRL_ADDR) { io(address, esc); } union reg value; value.u64 = 0; /* Unroll Loop by implementing Duff's Device. */ value.u8[0] = addr[address]; switch (1 << (prefix >> 4)) { case 8: value.u8[7] = addr[address+7]; value.u8[6] = addr[address+6]; value.u8[5] = addr[address+5]; value.u8[4] = addr[address+4]; case 4: value.u8[3] = addr[address+3]; value.u8[2] = addr[address+2]; case 2: value.u8[1] = addr[address+1]; } switch (opcode) { case LDB: case LDB_AB: case LDB_Z: case LDB_ZX: case LDB_ZY: case LDB_IN: case LDB_IX: case LDB_IY: cpu->b[thread] = value.u64; break; case LDA: case LDA_AB: case LDA_Z: case LDA_ZX: case LDA_ZY: case LDA_IN: case LDA_IX: case LDA_IY: cpu->a[thread] = value.u64; break; case LDY: case LDY_AB: case LDY_Z: case LDY_ZX: case LDY_IN: cpu->y[thread] = value.u64; break; case LDX: case LDX_AB: case LDX_Z: case LDX_ZY: case LDX_IN: cpu->x[thread] = value.u64; break; } setflag(value.u64 == 0, Z); setflag(value.u64 >> 63, N); } inline void store(struct sux *cpu, uint64_t address, uint8_t *esc, uint8_t opcode, uint8_t prefix, uint8_t thread) { union reg value; value.u64 = 0; switch (opcode) { case STB: case STB_Z: case STB_ZX: case STB_ZY: case STB_IN: case STB_IX: case STB_IY: value.u64 = cpu->b[thread]; break; case STA: case STA_Z: case STA_ZX: case STA_ZY: case STA_IN: case STA_IX: case STA_IY: value.u64 = cpu->a[thread]; break; case STY: case STY_Z: case STY_ZX: case STY_IN: value.u64 = cpu->y[thread]; break; case STX: case STX_Z: case STX_ZY: case STX_IN: value.u64 = cpu->x[thread]; break; } addr[address] = value.u8[0]; #if (IO || debug) && !branch #if keypoll pthread_mutex_lock(&mutex); #endif if (address != CTRL_ADDR && address == TX_ADDR) { io(address, esc); } #if keypoll pthread_mutex_unlock(&mutex); #endif #endif /* Unroll Loop by implementing Duff's Device. */ switch (1 << (prefix >> 4)) { case 8: addr[address+7] = value.u8[7]; addr[address+6] = value.u8[6]; addr[address+5] = value.u8[5]; addr[address+4] = value.u8[4]; case 4: addr[address+3] = value.u8[3]; addr[address+2] = value.u8[2]; case 2: addr[address+1] = value.u8[1]; } }