summaryrefslogtreecommitdiff
path: root/src/Dolphin/mtx/mtxvec.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/Dolphin/mtx/mtxvec.c')
-rw-r--r--src/Dolphin/mtx/mtxvec.c95
1 files changed, 95 insertions, 0 deletions
diff --git a/src/Dolphin/mtx/mtxvec.c b/src/Dolphin/mtx/mtxvec.c
new file mode 100644
index 0000000..bb2e1d4
--- /dev/null
+++ b/src/Dolphin/mtx/mtxvec.c
@@ -0,0 +1,95 @@
+#include <mtx.h>
+
+asm void PSMTXMultVec(const register Mtx m, const register Vec *src, register Vec *dst)
+{
+ nofralloc
+ psq_l f0,0x0(src),0,0
+ psq_l f2,0x0(m),0,0
+ psq_l f1,0x8(src),0x1,0
+
+ ps_mul f4, f2, f0
+ psq_l f3,0x8(m),0,0
+ ps_madd f5, f3, f1, f4
+ psq_l f8,0x10(m),0,0
+ ps_sum0 f6, f5, f6, f5
+ psq_l f9,0x18(m),0,0
+ ps_mul f10, f8, f0
+ psq_st f6,0x0(dst),0x1,0
+ ps_madd f11, f9, f1, f10
+ psq_l f2,0x20(m),0,0
+ ps_sum0 f12, f11, f12, f11
+ psq_l f3,0x28(m),0,0
+ ps_mul f4, f2, f0
+ psq_st f12,0x4(dst),0x1,0
+ ps_madd f5, f3, f1, f4
+ ps_sum0 f6, f5, f6, f5
+ psq_st f6,0x8(dst),0x1,0
+ blr
+}
+
+asm void PSMTXMultVecSR(const register Mtx m, const register Vec *src, register Vec *dst)
+{
+ nofralloc
+ psq_l f0,0x0(m),0,0
+ psq_l f6,0x0(src),0,0
+ psq_l f2,0x10(m),0,0
+ ps_mul f8, f0, f6
+ psq_l f4,0x20(m),0,0
+ ps_mul f10, f2, f6
+ psq_l f7,0x8(src),0x1,0
+ ps_mul f12, f4, f6
+ psq_l f3,0x18(m),0,0
+ ps_sum0 f8, f8, f8, f8
+ psq_l f5,0x28(m),0,0
+ ps_sum0 f10, f10, f10, f10
+ psq_l f1,0x8(m),0,0
+ ps_sum0 f12, f12, f12, f12
+ ps_madd f9, f1, f7, f8
+ psq_st f9,0x0(dst),0x1,0
+ ps_madd f11, f3, f7, f10
+ psq_st f11,0x4(dst),0x1,0
+ ps_madd f13, f5, f7, f12
+ psq_st f13,0x8(dst),0x1,0
+ blr
+}
+
+asm void PSMTXMultVecArraySR(const register Mtx m, const register Vec *src, register Vec *dst, register u32 n)
+{
+ nofralloc
+ psq_l f13,0x0(m),0,0
+ psq_l f12,0x10(m),0,0
+ subi n, n, 0x1
+ psq_l f11,0x8(m),0x1,0
+ ps_merge00 f0, f13, f12
+ subi r5, dst, 0x4
+ psq_l f10,0x18(m),0x1,0
+ ps_merge11 f1, f13, f12
+ mtctr n
+ psq_l f3,0x20(m),0,0
+ ps_merge00 f2, f11, f10
+ psq_l f4,0x28(m),0x1,0
+ psq_l f6,0x0(src),0,0
+ psq_lu f7,0x8(src),0x1,0
+ ps_muls0 f8, f0, f6
+ ps_mul f9, f3, f6
+ ps_madds1 f8, f1, f6, f8
+ ps_madd f10, f4, f7, f9
+
+ loop:
+ psq_lu f6,0x4(src),0,0
+ ps_madds0 f12, f2, f7, f8
+ psq_lu f7,0x8(src),0x1,0
+ ps_sum0 f13, f10, f9, f9
+ ps_muls0 f8, f0, f6
+ ps_mul f9, f3, f6
+ psq_stu f12,0x4(dst),0,0
+ ps_madds1 f8, f1, f6, f8
+ psq_stu f13,0x8(dst),0x1,0
+ ps_madd f10, f4, f7, f9
+ bdnz+ loop
+ ps_madds0 f12, f2, f7, f8
+ ps_sum0 f13, f10, f9, f9
+ psq_stu f12,0x4(dst),0,0
+ psq_stu f13,0x8(dst),0x1,0
+ blr
+}