summaryrefslogtreecommitdiff
path: root/src/Dolphin
diff options
context:
space:
mode:
authormrb0nk500 <b0nk@b0nk.xyz>2023-02-02 11:30:25 -0400
committermrb0nk500 <b0nk@b0nk.xyz>2023-02-02 11:30:25 -0400
commit55487832c614edb61ebf22d0c153944ba864feaa (patch)
tree335e05d8c9a4ab4b4154b26bdd078ef66ccb7254 /src/Dolphin
parentac1bd37d19f946142d03992a35b4d5d353e003ff (diff)
sdk: Add `mtx`
We're slowly getting there.
Diffstat (limited to 'src/Dolphin')
-rw-r--r--src/Dolphin/mtx/mtx.c648
-rw-r--r--src/Dolphin/mtx/mtx44.c98
-rw-r--r--src/Dolphin/mtx/mtxvec.c95
-rw-r--r--src/Dolphin/mtx/vec.c146
4 files changed, 987 insertions, 0 deletions
diff --git a/src/Dolphin/mtx/mtx.c b/src/Dolphin/mtx/mtx.c
new file mode 100644
index 0000000..4a229bd
--- /dev/null
+++ b/src/Dolphin/mtx/mtx.c
@@ -0,0 +1,648 @@
+// This file was taken from the Pikmin 2 decompilation project.
+// https://github.com/projectPiki/pikmin2/blob/main/src/Dolphin/mtx/mtx.c
+#include "types.h"
+#include "fdlibm.h"
+#include "Dolphin/mtx.h"
+static f32 Unit01[] = { 0.0f, 1.0f };
+
+void PSMTXIdentity
+(
+ register Mtx m
+)
+{
+ register f32 zero_c = 0.0f;
+ register f32 one_c = 1.0f;
+ register f32 c_01;
+ register f32 c_10;
+
+ asm
+ {
+ psq_st zero_c, 8(m), 0, 0
+ ps_merge01 c_01, zero_c, one_c
+ psq_st zero_c, 24(m), 0, 0
+ ps_merge10 c_10, one_c, zero_c
+ psq_st zero_c, 32(m), 0, 0
+ psq_st c_01, 16(m), 0, 0
+ psq_st c_10, 0(m), 0, 0
+ psq_st c_10, 40(m), 0, 0
+ }
+}
+
+asm void PSMTXCopy
+(
+ const register Mtx src,
+ register Mtx dst
+)
+{
+ nofralloc
+
+ psq_l fp0, 0(src), 0, 0
+ psq_st fp0, 0(dst), 0, 0
+ psq_l fp1, 8(src), 0, 0
+ psq_st fp1, 8(dst), 0, 0
+ psq_l fp2, 16(src), 0, 0
+ psq_st fp2, 16(dst), 0, 0
+ psq_l fp3, 24(src), 0, 0
+ psq_st fp3, 24(dst), 0, 0
+ psq_l fp4, 32(src), 0, 0
+ psq_st fp4, 32(dst), 0, 0
+ psq_l fp5, 40(src), 0, 0
+ psq_st fp5, 40(dst), 0, 0
+
+ blr
+}
+
+asm void PSMTXConcat
+(
+ const register Mtx mA, // r3
+ const register Mtx mB, // r4
+ register Mtx mAB // r5
+)
+{
+ nofralloc
+
+#define FP0 fp0
+#define FP1 fp1
+#define FP2 fp2
+#define FP3 fp3
+#define FP4 fp4
+#define FP5 fp5
+#define FP6 fp6
+#define FP7 fp7
+#define FP8 fp8
+#define FP9 fp9
+#define FP10 fp10
+#define FP11 fp11
+#define FP12 fp12
+#define FP13 fp13
+#define FP14 fp14
+#define FP15 fp15
+#define FP31 fp31
+ stwu r1, -64(r1)
+ psq_l FP0, 0(mA), 0, 0
+ stfd fp14, 8(r1)
+ psq_l FP6, 0(mB), 0, 0
+ addis r6, 0, Unit01@ha
+ psq_l FP7, 8(mB), 0, 0
+ stfd fp15, 16(r1)
+ addi r6, r6, Unit01@l
+ stfd fp31, 40(r1)
+ psq_l FP8, 16(mB), 0, 0
+ ps_muls0 FP12, FP6, FP0
+ psq_l FP2, 16(mA), 0, 0
+ ps_muls0 FP13, FP7, FP0
+ psq_l FP31, 0(r6), 0, 0
+ ps_muls0 FP14, FP6, FP2
+ psq_l FP9, 24(mB), 0, 0
+ ps_muls0 FP15, FP7, FP2
+ psq_l FP1, 8(mA), 0, 0
+ ps_madds1 FP12, FP8, FP0, FP12
+ psq_l FP3, 24(mA), 0, 0
+ ps_madds1 FP14, FP8, FP2, FP14
+ psq_l FP10, 32(mB), 0, 0
+ ps_madds1 FP13, FP9, FP0, FP13
+ psq_l FP11, 40(mB), 0, 0
+ ps_madds1 FP15, FP9, FP2, FP15
+ psq_l FP4, 32(mA), 0, 0
+ psq_l FP5, 40(mA), 0, 0
+ ps_madds0 FP12, FP10, FP1, FP12
+ ps_madds0 FP13, FP11, FP1, FP13
+ ps_madds0 FP14, FP10, FP3, FP14
+ ps_madds0 FP15, FP11, FP3, FP15
+ psq_st FP12, 0(mAB), 0, 0
+
+ ps_muls0 FP2, FP6, FP4
+ ps_madds1 FP13, FP31, FP1, FP13
+ ps_muls0 FP0, FP7, FP4
+ psq_st FP14, 16(mAB), 0, 0
+ ps_madds1 FP15, FP31, FP3, FP15
+
+ psq_st FP13, 8(mAB), 0, 0
+
+ ps_madds1 FP2, FP8, FP4, FP2
+ ps_madds1 FP0, FP9, FP4, FP0
+ ps_madds0 FP2, FP10, FP5, FP2
+ lfd fp14, 8(r1)
+ psq_st FP15, 24(mAB), 0, 0
+ ps_madds0 FP0, FP11, FP5, FP0
+ psq_st FP2, 32(mAB), 0, 0
+ ps_madds1 FP0, FP31, FP5, FP0
+ lfd fp15, 16(r1)
+ psq_st FP0, 40(mAB), 0, 0
+
+ lfd fp31, 40(r1)
+ addi r1, r1, 64
+
+ blr
+
+#undef FP0
+#undef FP1
+#undef FP2
+#undef FP3
+#undef FP4
+#undef FP5
+#undef FP6
+#undef FP7
+#undef FP8
+#undef FP9
+#undef FP10
+#undef FP11
+#undef FP12
+#undef FP13
+#undef FP14
+#undef FP15
+#undef FP31
+}
+
+#pragma scheduling off
+void PSMTXTranspose ( const register Mtx src, register Mtx xPose )
+{
+ register f32 c_zero = 0.0f;
+ register f32 row0a, row1a, row0b, row1b;
+ register f32 trns0, trns1, trns2;
+
+ asm
+ {
+ psq_l row0a, 0(src), 0, 0
+ stfs c_zero, 44(xPose)
+ psq_l row1a, 16(src), 0, 0
+ ps_merge00 trns0, row0a, row1a
+ psq_l row0b, 8(src), 1, 0
+ ps_merge11 trns1, row0a, row1a
+ psq_l row1b, 24(src), 1, 0
+ psq_st trns0, 0(xPose), 0, 0
+ psq_l row0a, 32(src), 0, 0
+ ps_merge00 trns2, row0b, row1b
+ psq_st trns1, 16(xPose), 0, 0
+ ps_merge00 trns0, row0a, c_zero
+ psq_st trns2, 32(xPose), 0, 0
+ ps_merge10 trns1, row0a, c_zero
+ psq_st trns0, 8(xPose), 0, 0
+ lfs row0b, 40(src)
+ psq_st trns1, 24(xPose), 0, 0
+ stfs row0b, 40(xPose)
+ }
+}
+
+asm u32 PSMTXInverse ( const register Mtx src, register Mtx inv )
+{
+ nofralloc
+ psq_l fp0, 0(src), 1, 0
+ psq_l fp1, 4(src), 0, 0
+ psq_l fp2, 16(src), 1, 0
+ ps_merge10 fp6, fp1, fp0
+ psq_l fp3, 20(src), 0, 0
+ psq_l fp4, 32(src), 1, 0
+ ps_merge10 fp7, fp3, fp2
+ psq_l fp5, 36(src), 0, 0
+ ps_mul fp11, fp3, fp6
+ ps_mul fp13, fp5, fp7
+ ps_merge10 fp8, fp5, fp4
+ ps_msub fp11, fp1, fp7, fp11
+ ps_mul fp12, fp1, fp8
+ ps_msub fp13, fp3, fp8, fp13
+ ps_mul fp10, fp3, fp4
+ ps_msub fp12, fp5, fp6, fp12
+ ps_mul fp9, fp0, fp5
+ ps_mul fp8, fp1, fp2
+ ps_sub fp6, fp6, fp6
+ ps_msub fp10, fp2, fp5, fp10
+ ps_mul fp7, fp0, fp13
+ ps_msub fp9, fp1, fp4, fp9
+ ps_madd fp7, fp2, fp12, fp7
+ ps_msub fp8, fp0, fp3, fp8
+ ps_madd fp7, fp4, fp11, fp7
+ ps_cmpo0 cr0, fp7, fp6
+ bne _regular
+ addi r3, 0, 0
+ blr
+ _regular:
+ fres fp0, fp7
+ ps_add fp6, fp0, fp0
+ ps_mul fp5, fp0, fp0
+ ps_nmsub fp0, fp7, fp5, fp6
+ lfs fp1, 12(src)
+ ps_muls0 fp13, fp13, fp0
+ lfs fp2, 28(src)
+ ps_muls0 fp12, fp12, fp0
+ lfs fp3, 44(src)
+ ps_muls0 fp11, fp11, fp0
+ ps_merge00 fp5, fp13, fp12
+ ps_muls0 fp10, fp10, fp0
+ ps_merge11 fp4, fp13, fp12
+ ps_muls0 fp9, fp9, fp0
+ psq_st fp5, 0(inv), 0, 0
+ ps_mul fp6, fp13, fp1
+ psq_st fp4, 16(inv), 0, 0
+ ps_muls0 fp8, fp8, fp0
+ ps_madd fp6, fp12, fp2, fp6
+ psq_st fp10, 32(inv), 1, 0
+ ps_nmadd fp6, fp11, fp3, fp6
+ psq_st fp9, 36(inv), 1, 0
+ ps_mul fp7, fp10, fp1
+ ps_merge00 fp5, fp11, fp6
+ psq_st fp8, 40(inv), 1, 0
+ ps_merge11 fp4, fp11, fp6
+ psq_st fp5, 8(inv), 0, 0
+ ps_madd fp7, fp9, fp2, fp7
+ psq_st fp4, 24(inv), 0, 0
+ ps_nmadd fp7, fp8, fp3, fp7
+ addi r3, 0, 1
+ psq_st fp7, 44(inv), 1, 0
+ blr
+}
+
+void PSMTXRotRad ( Mtx m, char axis, f32 rad )
+{
+ f32 sinA, cosA;
+
+ sinA = sinf(rad);
+ cosA = cosf(rad);
+
+ PSMTXRotTrig( m, axis, sinA, cosA );
+}
+
+void PSMTXRotTrig (
+ register Mtx m,
+ register char axis,
+ register f32 sinA,
+ register f32 cosA )
+{
+ register f32 fc0, fc1, nsinA;
+ register f32 fw0, fw1, fw2, fw3;
+
+ asm
+ {
+ frsp sinA, sinA
+ frsp cosA, cosA
+ }
+
+ fc0 = 0.0f;
+ fc1 = 1.0f;
+
+ asm
+ {
+ ori axis, axis, 0x20
+ ps_neg nsinA, sinA
+ cmplwi axis, 'x'
+ beq _case_x
+ cmplwi axis, 'y'
+ beq _case_y
+ cmplwi axis, 'z'
+ beq _case_z
+ b _end
+
+ _case_x:
+ psq_st fc1, 0(m), 1, 0
+ psq_st fc0, 4(m), 0, 0
+ ps_merge00 fw0, sinA, cosA
+ psq_st fc0, 12(m), 0, 0
+ ps_merge00 fw1, cosA, nsinA
+ psq_st fc0, 28(m), 0, 0
+ psq_st fc0, 44(m), 1, 0
+ psq_st fw0, 36(m), 0, 0
+ psq_st fw1, 20(m), 0, 0
+ b _end;
+
+ _case_y:
+ ps_merge00 fw0, cosA, fc0
+ ps_merge00 fw1, fc0, fc1
+ psq_st fc0, 24(m), 0, 0
+ psq_st fw0, 0(m), 0, 0
+ ps_merge00 fw2, nsinA, fc0
+ ps_merge00 fw3, sinA, fc0
+ psq_st fw0, 40(m), 0, 0;
+ psq_st fw1, 16(m), 0, 0;
+ psq_st fw3, 8(m), 0, 0;
+ psq_st fw2, 32(m), 0, 0;
+ b _end;
+
+ _case_z:
+ psq_st fc0, 8(m), 0, 0
+ ps_merge00 fw0, sinA, cosA
+ ps_merge00 fw2, cosA, nsinA
+ psq_st fc0, 24(m), 0, 0
+ psq_st fc0, 32(m), 0, 0
+ ps_merge00 fw1, fc1, fc0
+ psq_st fw0, 16(m), 0, 0
+ psq_st fw2, 0(m), 0, 0
+ psq_st fw1, 40(m), 0, 0
+
+ _end:
+
+ }
+}
+
+static void __PSMTXRotAxisRadInternal(
+ register Mtx m,
+ const register Vec *axis,
+ register f32 sT,
+ register f32 cT )
+{
+ register f32 tT, fc0;
+ register f32 tmp0, tmp1, tmp2, tmp3, tmp4;
+ register f32 tmp5, tmp6, tmp7, tmp8, tmp9;
+
+ tmp9 = 0.5f;
+ tmp8 = 3.0f;
+
+ asm
+ {
+ frsp cT, cT
+ psq_l tmp0, 0(axis), 0, 0
+ frsp sT, sT
+ lfs tmp1, 8(axis)
+ ps_mul tmp2, tmp0, tmp0
+ fadds tmp7, tmp9, tmp9
+ ps_madd tmp3, tmp1, tmp1, tmp2
+ fsubs fc0, tmp9, tmp9
+ ps_sum0 tmp4, tmp3, tmp1, tmp2
+ fsubs tT, tmp7, cT
+ frsqrte tmp5, tmp4
+ fmuls tmp2, tmp5, tmp5
+ fmuls tmp3, tmp5, tmp9
+ fnmsubs tmp2, tmp2, tmp4, tmp8
+ fmuls tmp5, tmp2, tmp3
+ ps_merge00 cT, cT, cT
+ ps_muls0 tmp0, tmp0, tmp5
+ ps_muls0 tmp1, tmp1, tmp5
+ ps_muls0 tmp4, tmp0, tT
+ ps_muls0 tmp9, tmp0, sT
+ ps_muls0 tmp5, tmp1, tT
+ ps_muls1 tmp3, tmp4, tmp0
+ ps_muls0 tmp2, tmp4, tmp0
+ ps_muls0 tmp4, tmp4, tmp1
+ fnmsubs tmp6, tmp1, sT, tmp3
+ fmadds tmp7, tmp1, sT, tmp3
+ ps_neg tmp0, tmp9
+ ps_sum0 tmp8, tmp4, fc0, tmp9
+ ps_sum0 tmp2, tmp2, tmp6, cT
+ ps_sum1 tmp3, cT, tmp7, tmp3
+ ps_sum0 tmp6, tmp0, fc0 ,tmp4
+ psq_st tmp8, 8(m), 0, 0
+ ps_sum0 tmp0, tmp4, tmp4, tmp0
+ psq_st tmp2, 0(m), 0, 0
+ ps_muls0 tmp5, tmp5, tmp1
+ psq_st tmp3, 16(m), 0, 0
+ ps_sum1 tmp4, tmp9, tmp0, tmp4
+ psq_st tmp6, 24(m), 0, 0
+ ps_sum0 tmp5, tmp5, fc0, cT
+ psq_st tmp4, 32(m), 0, 0
+ psq_st tmp5, 40(m), 0, 0
+ }
+}
+
+// clang-format on
+void PSMTXRotAxisRad(Mtx m, const Vec* axis, f32 rad)
+{
+ f32 sinT, cosT;
+
+ sinT = sinf(rad);
+ cosT = cosf(rad);
+
+ __PSMTXRotAxisRadInternal(m, axis, sinT, cosT);
+}
+
+// clang-format off
+void PSMTXTrans(
+ register Mtx m,
+ register f32 xT,
+ register f32 yT,
+ register f32 zT
+)
+{
+ register f32 c0 = 0.0f;
+ register f32 c1 = 1.0f;
+
+ asm
+ {
+ stfs xT, 12(m)
+ stfs yT, 28(m)
+ psq_st c0, 4(m), 0, 0
+ psq_st c0, 32(m), 0, 0
+ stfs c0, 16(m)
+ stfs c1, 20(m)
+ stfs c0, 24(m)
+ stfs c1, 40(m)
+ stfs zT, 44(m)
+ stfs c1, 0(m)
+ }
+}
+
+asm void PSMTXTransApply(
+ const register Mtx src,
+ register Mtx dst,
+ register f32 xT,
+ register f32 yT,
+ register f32 zT )
+{
+ nofralloc;
+ psq_l fp4, 0(src), 0, 0;
+ frsp xT, xT;
+ psq_l fp5, 8(src), 0, 0;
+ frsp yT, yT;
+ psq_l fp7, 24(src), 0, 0;
+ frsp zT, zT;
+ psq_l fp8, 40(src), 0, 0;
+ psq_st fp4, 0(dst), 0, 0;
+ ps_sum1 fp5, xT, fp5, fp5;
+ psq_l fp6, 16(src), 0, 0;
+ psq_st fp5, 8(dst), 0, 0;
+ ps_sum1 fp7, yT, fp7, fp7;
+ psq_l fp9, 32(src), 0, 0;
+ psq_st fp6, 16(dst), 0, 0;
+ ps_sum1 fp8, zT, fp8, fp8;
+ psq_st fp7, 24(dst), 0, 0;
+ psq_st fp9, 32(dst), 0, 0;
+ psq_st fp8, 40(dst), 0, 0;
+ blr;
+}
+
+void PSMTXScale(
+ register Mtx m,
+ register f32 xS,
+ register f32 yS,
+ register f32 zS
+)
+{
+ register f32 c0 = 0.0f;
+
+ asm
+ {
+ stfs xS, 0(m)
+ psq_st c0, 4(m), 0, 0
+ psq_st c0, 12(m), 0, 0
+ stfs yS, 20(m)
+ psq_st c0, 24(m), 0, 0
+ psq_st c0, 32(m), 0, 0
+ stfs zS, 40(m)
+ stfs c0, 44(m)
+ }
+}
+
+asm void PSMTXScaleApply (
+ const register Mtx src,
+ register Mtx dst,
+ register f32 xS,
+ register f32 yS,
+ register f32 zS )
+{
+ nofralloc;
+ frsp xS, xS;
+ psq_l fp4, 0(src), 0, 0;
+ frsp yS, yS;
+ psq_l fp5, 8(src), 0, 0;
+ frsp zS, zS;
+ ps_muls0 fp4, fp4, xS;
+ psq_l fp6, 16(src), 0, 0;
+ ps_muls0 fp5, fp5, xS;
+ psq_l fp7, 24(src), 0, 0;
+ ps_muls0 fp6, fp6, yS;
+ psq_l fp8, 32(src), 0, 0;
+ psq_st fp4, 0(dst), 0, 0;
+ ps_muls0 fp7, fp7, yS;
+ psq_l fp2, 40(src), 0, 0;
+ psq_st fp5, 8(dst), 0, 0;
+ ps_muls0 fp8, fp8, zS;
+ psq_st fp6, 16(dst), 0, 0;
+ ps_muls0 fp2, fp2, zS;
+ psq_st fp7, 24(dst), 0, 0;
+ psq_st fp8, 32(dst), 0, 0;
+ psq_st fp2, 40(dst), 0, 0;
+ blr;
+}
+
+void PSMTXQuat ( register Mtx m, const register Quaternion *q )
+{
+ register f32 c_zero, c_one, c_two, scale;
+ register f32 tmp0, tmp1, tmp2, tmp3, tmp4;
+ register f32 tmp5, tmp6, tmp7, tmp8, tmp9;
+ c_one = 1.0f;
+ asm
+ {
+ psq_l tmp0, 0(q), 0, 0
+ psq_l tmp1, 8(q), 0, 0
+ fsubs c_zero, c_one, c_one
+ fadds c_two, c_one, c_one
+ ps_mul tmp2, tmp0, tmp0
+ ps_merge10 tmp5, tmp0, tmp0
+ ps_madd tmp4, tmp1, tmp1, tmp2
+ ps_mul tmp3, tmp1, tmp1
+ ps_sum0 scale, tmp4, tmp4, tmp4
+ ps_muls1 tmp7, tmp5, tmp1
+ fres tmp9, scale
+ ps_sum1 tmp4, tmp3, tmp4, tmp2
+ ps_nmsub scale, scale, tmp9, c_two
+ ps_muls1 tmp6, tmp1, tmp1
+ ps_mul scale, tmp9, scale
+ ps_sum0 tmp2, tmp2, tmp2, tmp2
+ fmuls scale, scale, c_two
+ ps_madd tmp8, tmp0, tmp5, tmp6
+ ps_msub tmp6, tmp0, tmp5, tmp6
+ psq_st c_zero, 12(m), 1, 0
+ ps_nmsub tmp2, tmp2, scale, c_one
+ ps_nmsub tmp4, tmp4, scale, c_one
+ psq_st c_zero, 44(m), 1, 0
+ ps_mul tmp8, tmp8, scale
+ ps_mul tmp6, tmp6, scale
+ psq_st tmp2, 40(m), 1, 0
+ ps_madds0 tmp5, tmp0, tmp1, tmp7
+ ps_merge00 tmp1, tmp8, tmp4
+ ps_nmsub tmp7, tmp7, c_two, tmp5
+ ps_merge10 tmp0, tmp4, tmp6
+ psq_st tmp1, 16(m), 0, 0
+ ps_mul tmp5, tmp5, scale
+ ps_mul tmp7, tmp7, scale
+ psq_st tmp0, 0(m), 0, 0
+ psq_st tmp5, 8(m), 1, 0
+ ps_merge10 tmp3, tmp7, c_zero
+ ps_merge01 tmp9, tmp7, tmp5
+ psq_st tmp3, 24(m), 0, 0
+ psq_st tmp9, 32(m), 0, 0
+ }
+}
+
+// clang-format on
+void C_MTXLookAt(Mtx m, const Vec* camPos, const Vec* camUp, const Vec* target)
+{
+
+ Vec vLook, vRight, vUp;
+
+ vLook.x = camPos->x - target->x;
+ vLook.y = camPos->y - target->y;
+ vLook.z = camPos->z - target->z;
+ PSVECNormalize(&vLook, &vLook);
+
+ PSVECCrossProduct(camUp, &vLook, &vRight);
+ PSVECNormalize(&vRight, &vRight);
+
+ PSVECCrossProduct(&vLook, &vRight, &vUp);
+
+ m[0][0] = vRight.x;
+ m[0][1] = vRight.y;
+ m[0][2] = vRight.z;
+ m[0][3] = -(camPos->x * vRight.x + camPos->y * vRight.y + camPos->z * vRight.z);
+
+ m[1][0] = vUp.x;
+ m[1][1] = vUp.y;
+ m[1][2] = vUp.z;
+ m[1][3] = -(camPos->x * vUp.x + camPos->y * vUp.y + camPos->z * vUp.z);
+
+ m[2][0] = vLook.x;
+ m[2][1] = vLook.y;
+ m[2][2] = vLook.z;
+ m[2][3] = -(camPos->x * vLook.x + camPos->y * vLook.y + camPos->z * vLook.z);
+}
+
+void C_MTXLightFrustum(Mtx m, f32 t, f32 b, f32 l, f32 r, f32 n, f32 scaleS, f32 scaleT, f32 transS, f32 transT)
+{
+
+}
+
+void C_MTXLightPerspective(Mtx m, f32 fovY, f32 aspect, float scaleS, float scaleT, float transS, float transT)
+{
+ f32 angle;
+ f32 cot;
+
+ angle = fovY * 0.5f;
+ angle = MTXDegToRad(angle);
+
+ cot = 1.0f / tanf(angle);
+
+ m[0][0] = (cot / aspect) * scaleS;
+ m[0][1] = 0.0f;
+ m[0][2] = -transS;
+ m[0][3] = 0.0f;
+
+ m[1][0] = 0.0f;
+ m[1][1] = cot * scaleT;
+ m[1][2] = -transT;
+ m[1][3] = 0.0f;
+
+ m[2][0] = 0.0f;
+ m[2][1] = 0.0f;
+ m[2][2] = -1.0f;
+ m[2][3] = 0.0f;
+}
+
+void C_MTXLightOrtho(Mtx m, f32 t, f32 b, f32 l, f32 r, float scaleS, float scaleT, float transS, float transT)
+{
+ f32 tmp;
+
+ tmp = 1.0f / (r - l);
+ m[0][0] = (2.0f * tmp * scaleS);
+ m[0][1] = 0.0f;
+ m[0][2] = 0.0f;
+ m[0][3] = ((-(r + l) * tmp) * scaleS) + transS;
+
+ tmp = 1.0f / (t - b);
+ m[1][0] = 0.0f;
+ m[1][1] = (2.0f * tmp) * scaleT;
+ m[1][2] = 0.0f;
+ m[1][3] = ((-(t + b) * tmp) * scaleT) + transT;
+
+ m[2][0] = 0.0f;
+ m[2][1] = 0.0f;
+ m[2][2] = 0.0f;
+ m[2][3] = 1.0f;
+}
diff --git a/src/Dolphin/mtx/mtx44.c b/src/Dolphin/mtx/mtx44.c
new file mode 100644
index 0000000..b974888
--- /dev/null
+++ b/src/Dolphin/mtx/mtx44.c
@@ -0,0 +1,98 @@
+// This file was taken from the Melee decompilation project.
+// https://github.com/doldecomp/melee/blob/master/src/dolphin/mtx/mtx44.c
+#include <mtx.h>
+#include <math.h>
+
+void C_MTXFrustum(Mtx44 m, f32 t, f32 b, f32 l, f32 r, f32 n, f32 f)
+{
+ f32 temp_f8;
+ f32 temp_f6;
+ f32 temp_f9;
+ f32 temp_f11;
+
+ temp_f11 = 1.0F / (r - l);
+ temp_f8 = 2.0F * n;
+ temp_f9 = 1.0F / (t - b);
+ temp_f6 = 1.0F / (f - n);
+
+ m[0][0] = temp_f8 * temp_f11;
+ m[0][1] = 0.0F;
+ m[0][2] = temp_f11 * (r + l);
+ m[0][3] = 0.0F;
+
+ m[1][0] = 0.0F;
+ m[1][1] = temp_f8 * temp_f9;
+ m[1][2] = temp_f9 * (t + b);
+ m[1][3] = 0.0F;
+
+ m[2][0] = 0.0F;
+ m[2][1] = 0.0F;
+ m[2][2] = -n * temp_f6;
+ m[2][3] = temp_f6 * -(f * n);
+
+ m[3][0] = 0.0F;
+ m[3][1] = 0.0F;
+ m[3][2] = -1.0F;
+ m[3][3] = 0.0F;
+}
+
+void C_MTXPerspective(Mtx44 m, f32 fovY, f32 aspect, f32 n, f32 f)
+{
+ f32 temp_f3;
+ f32 temp_f4;
+
+ fovY = 0.5F * fovY;
+
+ temp_f4 = 1.0F / tanf(0.017453292F * (fovY));
+ temp_f3 = 1.0F / (f - n);
+
+ m[0][0] = temp_f4 / aspect;
+ m[0][1] = 0.0F;
+ m[0][2] = 0.0F;
+ m[0][3] = 0.0F;
+
+ m[1][0] = 0.0F;
+ m[1][1] = temp_f4;
+ m[1][2] = 0.0F;
+ m[1][3] = 0.0F;
+
+ m[2][0] = 0.0F;
+ m[2][1] = 0.0F;
+ m[2][2] = -n * temp_f3;
+ m[2][3] = temp_f3 * -(f * n);
+
+ m[3][0] = 0.0F;
+ m[3][1] = 0.0F;
+ m[3][2] = -1.0F;
+ m[3][3] = 0.0F;
+}
+
+void C_MTXOrtho(Mtx44 m, f32 t, f32 b, f32 l, f32 r, f32 n, f32 f)
+{
+ f32 temp_f8;
+ f32 temp_f10;
+ f32 temp_f4;
+
+ temp_f10 = 1.0F / (r - l);
+ m[0][0] = 2.0F * temp_f10;
+ m[0][1] = 0.0F;
+ m[0][2] = 0.0F;
+ m[0][3] = temp_f10 * -(r + l);
+
+ temp_f8 = 1.0F / (t - b);
+ m[1][0] = 0.0F;
+ m[1][1] = 2.0F * temp_f8;
+ m[1][2] = 0.0F;
+ m[1][3] = temp_f8 * -(t + b);
+
+ temp_f4 = 1.0F / (f - n);
+ m[2][0] = 0.0F;
+ m[2][1] = 0.0F;
+ m[2][2] = -1.0F * temp_f4;
+ m[2][3] = -f * temp_f4;
+
+ m[3][0] = 0.0F;
+ m[3][1] = 0.0F;
+ m[3][2] = 0.0F;
+ m[3][3] = 1.0F;
+}
diff --git a/src/Dolphin/mtx/mtxvec.c b/src/Dolphin/mtx/mtxvec.c
new file mode 100644
index 0000000..bb2e1d4
--- /dev/null
+++ b/src/Dolphin/mtx/mtxvec.c
@@ -0,0 +1,95 @@
+#include <mtx.h>
+
+asm void PSMTXMultVec(const register Mtx m, const register Vec *src, register Vec *dst)
+{
+ nofralloc
+ psq_l f0,0x0(src),0,0
+ psq_l f2,0x0(m),0,0
+ psq_l f1,0x8(src),0x1,0
+
+ ps_mul f4, f2, f0
+ psq_l f3,0x8(m),0,0
+ ps_madd f5, f3, f1, f4
+ psq_l f8,0x10(m),0,0
+ ps_sum0 f6, f5, f6, f5
+ psq_l f9,0x18(m),0,0
+ ps_mul f10, f8, f0
+ psq_st f6,0x0(dst),0x1,0
+ ps_madd f11, f9, f1, f10
+ psq_l f2,0x20(m),0,0
+ ps_sum0 f12, f11, f12, f11
+ psq_l f3,0x28(m),0,0
+ ps_mul f4, f2, f0
+ psq_st f12,0x4(dst),0x1,0
+ ps_madd f5, f3, f1, f4
+ ps_sum0 f6, f5, f6, f5
+ psq_st f6,0x8(dst),0x1,0
+ blr
+}
+
+asm void PSMTXMultVecSR(const register Mtx m, const register Vec *src, register Vec *dst)
+{
+ nofralloc
+ psq_l f0,0x0(m),0,0
+ psq_l f6,0x0(src),0,0
+ psq_l f2,0x10(m),0,0
+ ps_mul f8, f0, f6
+ psq_l f4,0x20(m),0,0
+ ps_mul f10, f2, f6
+ psq_l f7,0x8(src),0x1,0
+ ps_mul f12, f4, f6
+ psq_l f3,0x18(m),0,0
+ ps_sum0 f8, f8, f8, f8
+ psq_l f5,0x28(m),0,0
+ ps_sum0 f10, f10, f10, f10
+ psq_l f1,0x8(m),0,0
+ ps_sum0 f12, f12, f12, f12
+ ps_madd f9, f1, f7, f8
+ psq_st f9,0x0(dst),0x1,0
+ ps_madd f11, f3, f7, f10
+ psq_st f11,0x4(dst),0x1,0
+ ps_madd f13, f5, f7, f12
+ psq_st f13,0x8(dst),0x1,0
+ blr
+}
+
+asm void PSMTXMultVecArraySR(const register Mtx m, const register Vec *src, register Vec *dst, register u32 n)
+{
+ nofralloc
+ psq_l f13,0x0(m),0,0
+ psq_l f12,0x10(m),0,0
+ subi n, n, 0x1
+ psq_l f11,0x8(m),0x1,0
+ ps_merge00 f0, f13, f12
+ subi r5, dst, 0x4
+ psq_l f10,0x18(m),0x1,0
+ ps_merge11 f1, f13, f12
+ mtctr n
+ psq_l f3,0x20(m),0,0
+ ps_merge00 f2, f11, f10
+ psq_l f4,0x28(m),0x1,0
+ psq_l f6,0x0(src),0,0
+ psq_lu f7,0x8(src),0x1,0
+ ps_muls0 f8, f0, f6
+ ps_mul f9, f3, f6
+ ps_madds1 f8, f1, f6, f8
+ ps_madd f10, f4, f7, f9
+
+ loop:
+ psq_lu f6,0x4(src),0,0
+ ps_madds0 f12, f2, f7, f8
+ psq_lu f7,0x8(src),0x1,0
+ ps_sum0 f13, f10, f9, f9
+ ps_muls0 f8, f0, f6
+ ps_mul f9, f3, f6
+ psq_stu f12,0x4(dst),0,0
+ ps_madds1 f8, f1, f6, f8
+ psq_stu f13,0x8(dst),0x1,0
+ ps_madd f10, f4, f7, f9
+ bdnz+ loop
+ ps_madds0 f12, f2, f7, f8
+ ps_sum0 f13, f10, f9, f9
+ psq_stu f12,0x4(dst),0,0
+ psq_stu f13,0x8(dst),0x1,0
+ blr
+}
diff --git a/src/Dolphin/mtx/vec.c b/src/Dolphin/mtx/vec.c
new file mode 100644
index 0000000..af42c0b
--- /dev/null
+++ b/src/Dolphin/mtx/vec.c
@@ -0,0 +1,146 @@
+#include "types.h"
+#include "Dolphin/vec.h"
+
+#define R_RET fp1
+#define FP2 fp2
+#define FP3 fp3
+#define FP4 fp4
+#define FP5 fp5
+#define FP6 fp6
+#define FP7 fp7
+#define FP8 fp8
+#define FP9 fp9
+#define FP10 fp10
+#define FP11 fp11
+#define FP12 fp12
+#define FP13 fp13
+// clang-format off
+asm void PSVECAdd
+(
+ const register Vec *vec1,
+ const register Vec *vec2,
+ register Vec *ret
+)
+{
+ nofralloc;
+ psq_l FP2, 0(vec1), 0, 0;
+ psq_l FP4, 0(vec2), 0, 0;
+ ps_add FP6, FP2, FP4;
+ psq_st FP6, 0(ret), 0, 0;
+ psq_l FP3, 8(vec1), 1, 0;
+ psq_l FP5, 8(vec2), 1, 0;
+ ps_add FP7, FP3, FP5;
+ psq_st FP7, 8(ret), 1, 0;
+ blr
+}
+
+asm void PSVECSubtract
+(
+ const register Vec *vec1,
+ const register Vec *vec2,
+ register Vec *ret
+)
+{
+ nofralloc;
+ psq_l FP2, 0(vec1), 0, 0;
+ psq_l FP4, 0(vec2), 0, 0;
+ ps_sub FP6, FP2, FP4;
+ psq_st FP6, 0(ret), 0, 0;
+ psq_l FP3, 8(vec1), 1, 0;
+ psq_l FP5, 8(vec2), 1, 0;
+ ps_sub FP7, FP3, FP5;
+ psq_st FP7, 8(ret), 1, 0;
+ blr
+}
+
+void PSVECNormalize
+(
+ const register Vec *vec1,
+ register Vec *ret
+)
+{
+ register f32 half = 0.5f;
+ register f32 three = 3.0f;
+ register f32 xx_zz, xx_yy;
+ register f32 square_sum;
+ register f32 ret_sqrt;
+ register f32 n_0, n_1;
+ asm
+ {
+ psq_l FP2, 0(vec1), 0, 0;
+ ps_mul xx_yy, FP2, FP2;
+ psq_l FP3, 8(vec1), 1, 0;
+ ps_madd xx_zz, FP3, FP3, xx_yy;
+ ps_sum0 square_sum, xx_zz, FP3, xx_yy;
+ frsqrte ret_sqrt, square_sum;
+ fmuls n_0, ret_sqrt, ret_sqrt;
+ fmuls n_1, ret_sqrt, half;
+ fnmsubs n_0, n_0, square_sum, three;
+ fmuls ret_sqrt, n_0, n_1;
+ ps_muls0 FP2, FP2, ret_sqrt;
+ psq_st FP2, 0(ret), 0, 0;
+ ps_muls0 FP3, FP3, ret_sqrt;
+ psq_st FP3, 8(ret), 1, 0;
+ }
+}
+
+f32 PSVECMag ( const register Vec *v )
+{
+ register f32 v_xy, v_zz, square_mag;
+ register f32 ret_mag, n_0, n_1;
+ register f32 three, half, zero;
+ half = 0.5f;
+ asm
+ {
+ psq_l v_xy, 0(v), 0, 0
+ ps_mul v_xy, v_xy, v_xy
+ lfs v_zz, 8(v)
+ fsubs zero, half, half
+ ps_madd square_mag, v_zz, v_zz, v_xy
+ ps_sum0 square_mag, square_mag, v_xy, v_xy
+ fcmpu cr0, square_mag, zero
+ beq- __exit
+ frsqrte ret_mag, square_mag
+ }
+ three = 3.0f;
+ asm
+ {
+ fmuls n_0, ret_mag, ret_mag
+ fmuls n_1, ret_mag, half
+ fnmsubs n_0, n_0, square_mag, three
+ fmuls ret_mag, n_0, n_1
+ fmuls square_mag, square_mag, ret_mag
+ __exit:
+ }
+ return square_mag;
+}
+
+asm void PSVECCrossProduct
+(
+ const register Vec *vec1,
+ const register Vec *vec2,
+ register Vec *ret
+)
+{
+ nofralloc;
+ psq_l fp1, 0(vec2), 0, 0
+ lfs fp2, 8(vec1)
+ psq_l fp0, 0(vec1), 0, 0
+ ps_merge10 fp6, fp1, fp1
+ lfs fp3, 8(vec2)
+ ps_mul fp4, fp1, fp2
+ ps_muls0 fp7, fp1, fp0
+ ps_msub fp5, fp0, fp3, fp4
+ ps_msub fp8, fp0, fp6, fp7
+ ps_merge11 fp9, fp5, fp5
+ ps_merge01 fp10, fp5, fp8
+ psq_st fp9, 0(ret), 1, 0
+ ps_neg fp10, fp10
+ psq_st fp10, 4(ret), 0, 0
+ blr;
+}
+
+f32 PSVECDistance(const register Vec *a, const register Vec *b)
+{
+ return 0.0f;
+}