From 55487832c614edb61ebf22d0c153944ba864feaa Mon Sep 17 00:00:00 2001 From: mrb0nk500 Date: Thu, 2 Feb 2023 11:30:25 -0400 Subject: sdk: Add `mtx` We're slowly getting there. --- src/Dolphin/mtx/mtxvec.c | 95 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) create mode 100644 src/Dolphin/mtx/mtxvec.c (limited to 'src/Dolphin/mtx/mtxvec.c') diff --git a/src/Dolphin/mtx/mtxvec.c b/src/Dolphin/mtx/mtxvec.c new file mode 100644 index 0000000..bb2e1d4 --- /dev/null +++ b/src/Dolphin/mtx/mtxvec.c @@ -0,0 +1,95 @@ +#include + +asm void PSMTXMultVec(const register Mtx m, const register Vec *src, register Vec *dst) +{ + nofralloc + psq_l f0,0x0(src),0,0 + psq_l f2,0x0(m),0,0 + psq_l f1,0x8(src),0x1,0 + + ps_mul f4, f2, f0 + psq_l f3,0x8(m),0,0 + ps_madd f5, f3, f1, f4 + psq_l f8,0x10(m),0,0 + ps_sum0 f6, f5, f6, f5 + psq_l f9,0x18(m),0,0 + ps_mul f10, f8, f0 + psq_st f6,0x0(dst),0x1,0 + ps_madd f11, f9, f1, f10 + psq_l f2,0x20(m),0,0 + ps_sum0 f12, f11, f12, f11 + psq_l f3,0x28(m),0,0 + ps_mul f4, f2, f0 + psq_st f12,0x4(dst),0x1,0 + ps_madd f5, f3, f1, f4 + ps_sum0 f6, f5, f6, f5 + psq_st f6,0x8(dst),0x1,0 + blr +} + +asm void PSMTXMultVecSR(const register Mtx m, const register Vec *src, register Vec *dst) +{ + nofralloc + psq_l f0,0x0(m),0,0 + psq_l f6,0x0(src),0,0 + psq_l f2,0x10(m),0,0 + ps_mul f8, f0, f6 + psq_l f4,0x20(m),0,0 + ps_mul f10, f2, f6 + psq_l f7,0x8(src),0x1,0 + ps_mul f12, f4, f6 + psq_l f3,0x18(m),0,0 + ps_sum0 f8, f8, f8, f8 + psq_l f5,0x28(m),0,0 + ps_sum0 f10, f10, f10, f10 + psq_l f1,0x8(m),0,0 + ps_sum0 f12, f12, f12, f12 + ps_madd f9, f1, f7, f8 + psq_st f9,0x0(dst),0x1,0 + ps_madd f11, f3, f7, f10 + psq_st f11,0x4(dst),0x1,0 + ps_madd f13, f5, f7, f12 + psq_st f13,0x8(dst),0x1,0 + blr +} + +asm void PSMTXMultVecArraySR(const register Mtx m, const register Vec *src, register Vec *dst, register u32 n) +{ + nofralloc + psq_l f13,0x0(m),0,0 + psq_l f12,0x10(m),0,0 + subi n, n, 0x1 + psq_l f11,0x8(m),0x1,0 + ps_merge00 f0, f13, f12 + subi r5, dst, 0x4 + psq_l f10,0x18(m),0x1,0 + ps_merge11 f1, f13, f12 + mtctr n + psq_l f3,0x20(m),0,0 + ps_merge00 f2, f11, f10 + psq_l f4,0x28(m),0x1,0 + psq_l f6,0x0(src),0,0 + psq_lu f7,0x8(src),0x1,0 + ps_muls0 f8, f0, f6 + ps_mul f9, f3, f6 + ps_madds1 f8, f1, f6, f8 + ps_madd f10, f4, f7, f9 + + loop: + psq_lu f6,0x4(src),0,0 + ps_madds0 f12, f2, f7, f8 + psq_lu f7,0x8(src),0x1,0 + ps_sum0 f13, f10, f9, f9 + ps_muls0 f8, f0, f6 + ps_mul f9, f3, f6 + psq_stu f12,0x4(dst),0,0 + ps_madds1 f8, f1, f6, f8 + psq_stu f13,0x8(dst),0x1,0 + ps_madd f10, f4, f7, f9 + bdnz+ loop + ps_madds0 f12, f2, f7, f8 + ps_sum0 f13, f10, f9, f9 + psq_stu f12,0x4(dst),0,0 + psq_stu f13,0x8(dst),0x1,0 + blr +} -- cgit v1.2.3-13-gbd6f