summaryrefslogtreecommitdiff
path: root/src/Dolphin/mtx/vec.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/Dolphin/mtx/vec.c')
-rw-r--r--src/Dolphin/mtx/vec.c146
1 files changed, 146 insertions, 0 deletions
diff --git a/src/Dolphin/mtx/vec.c b/src/Dolphin/mtx/vec.c
new file mode 100644
index 0000000..af42c0b
--- /dev/null
+++ b/src/Dolphin/mtx/vec.c
@@ -0,0 +1,146 @@
+#include "types.h"
+#include "Dolphin/vec.h"
+
+#define R_RET fp1
+#define FP2 fp2
+#define FP3 fp3
+#define FP4 fp4
+#define FP5 fp5
+#define FP6 fp6
+#define FP7 fp7
+#define FP8 fp8
+#define FP9 fp9
+#define FP10 fp10
+#define FP11 fp11
+#define FP12 fp12
+#define FP13 fp13
+// clang-format off
+asm void PSVECAdd
+(
+ const register Vec *vec1,
+ const register Vec *vec2,
+ register Vec *ret
+)
+{
+ nofralloc;
+ psq_l FP2, 0(vec1), 0, 0;
+ psq_l FP4, 0(vec2), 0, 0;
+ ps_add FP6, FP2, FP4;
+ psq_st FP6, 0(ret), 0, 0;
+ psq_l FP3, 8(vec1), 1, 0;
+ psq_l FP5, 8(vec2), 1, 0;
+ ps_add FP7, FP3, FP5;
+ psq_st FP7, 8(ret), 1, 0;
+ blr
+}
+
+asm void PSVECSubtract
+(
+ const register Vec *vec1,
+ const register Vec *vec2,
+ register Vec *ret
+)
+{
+ nofralloc;
+ psq_l FP2, 0(vec1), 0, 0;
+ psq_l FP4, 0(vec2), 0, 0;
+ ps_sub FP6, FP2, FP4;
+ psq_st FP6, 0(ret), 0, 0;
+ psq_l FP3, 8(vec1), 1, 0;
+ psq_l FP5, 8(vec2), 1, 0;
+ ps_sub FP7, FP3, FP5;
+ psq_st FP7, 8(ret), 1, 0;
+ blr
+}
+
+void PSVECNormalize
+(
+ const register Vec *vec1,
+ register Vec *ret
+)
+{
+ register f32 half = 0.5f;
+ register f32 three = 3.0f;
+ register f32 xx_zz, xx_yy;
+ register f32 square_sum;
+ register f32 ret_sqrt;
+ register f32 n_0, n_1;
+ asm
+ {
+ psq_l FP2, 0(vec1), 0, 0;
+ ps_mul xx_yy, FP2, FP2;
+ psq_l FP3, 8(vec1), 1, 0;
+ ps_madd xx_zz, FP3, FP3, xx_yy;
+ ps_sum0 square_sum, xx_zz, FP3, xx_yy;
+ frsqrte ret_sqrt, square_sum;
+ fmuls n_0, ret_sqrt, ret_sqrt;
+ fmuls n_1, ret_sqrt, half;
+ fnmsubs n_0, n_0, square_sum, three;
+ fmuls ret_sqrt, n_0, n_1;
+ ps_muls0 FP2, FP2, ret_sqrt;
+ psq_st FP2, 0(ret), 0, 0;
+ ps_muls0 FP3, FP3, ret_sqrt;
+ psq_st FP3, 8(ret), 1, 0;
+ }
+}
+
+f32 PSVECMag ( const register Vec *v )
+{
+ register f32 v_xy, v_zz, square_mag;
+ register f32 ret_mag, n_0, n_1;
+ register f32 three, half, zero;
+ half = 0.5f;
+ asm
+ {
+ psq_l v_xy, 0(v), 0, 0
+ ps_mul v_xy, v_xy, v_xy
+ lfs v_zz, 8(v)
+ fsubs zero, half, half
+ ps_madd square_mag, v_zz, v_zz, v_xy
+ ps_sum0 square_mag, square_mag, v_xy, v_xy
+ fcmpu cr0, square_mag, zero
+ beq- __exit
+ frsqrte ret_mag, square_mag
+ }
+ three = 3.0f;
+ asm
+ {
+ fmuls n_0, ret_mag, ret_mag
+ fmuls n_1, ret_mag, half
+ fnmsubs n_0, n_0, square_mag, three
+ fmuls ret_mag, n_0, n_1
+ fmuls square_mag, square_mag, ret_mag
+ __exit:
+ }
+ return square_mag;
+}
+
+asm void PSVECCrossProduct
+(
+ const register Vec *vec1,
+ const register Vec *vec2,
+ register Vec *ret
+)
+{
+ nofralloc;
+ psq_l fp1, 0(vec2), 0, 0
+ lfs fp2, 8(vec1)
+ psq_l fp0, 0(vec1), 0, 0
+ ps_merge10 fp6, fp1, fp1
+ lfs fp3, 8(vec2)
+ ps_mul fp4, fp1, fp2
+ ps_muls0 fp7, fp1, fp0
+ ps_msub fp5, fp0, fp3, fp4
+ ps_msub fp8, fp0, fp6, fp7
+ ps_merge11 fp9, fp5, fp5
+ ps_merge01 fp10, fp5, fp8
+ psq_st fp9, 0(ret), 1, 0
+ ps_neg fp10, fp10
+ psq_st fp10, 4(ret), 0, 0
+ blr;
+}
+
+f32 PSVECDistance(const register Vec *a, const register Vec *b)
+{
+ return 0.0f;
+}