From 55487832c614edb61ebf22d0c153944ba864feaa Mon Sep 17 00:00:00 2001 From: mrb0nk500 Date: Thu, 2 Feb 2023 11:30:25 -0400 Subject: sdk: Add `mtx` We're slowly getting there. --- src/Dolphin/mtx/vec.c | 146 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 146 insertions(+) create mode 100644 src/Dolphin/mtx/vec.c (limited to 'src/Dolphin/mtx/vec.c') diff --git a/src/Dolphin/mtx/vec.c b/src/Dolphin/mtx/vec.c new file mode 100644 index 0000000..af42c0b --- /dev/null +++ b/src/Dolphin/mtx/vec.c @@ -0,0 +1,146 @@ +#include "types.h" +#include "Dolphin/vec.h" + +#define R_RET fp1 +#define FP2 fp2 +#define FP3 fp3 +#define FP4 fp4 +#define FP5 fp5 +#define FP6 fp6 +#define FP7 fp7 +#define FP8 fp8 +#define FP9 fp9 +#define FP10 fp10 +#define FP11 fp11 +#define FP12 fp12 +#define FP13 fp13 +// clang-format off +asm void PSVECAdd +( + const register Vec *vec1, + const register Vec *vec2, + register Vec *ret +) +{ + nofralloc; + psq_l FP2, 0(vec1), 0, 0; + psq_l FP4, 0(vec2), 0, 0; + ps_add FP6, FP2, FP4; + psq_st FP6, 0(ret), 0, 0; + psq_l FP3, 8(vec1), 1, 0; + psq_l FP5, 8(vec2), 1, 0; + ps_add FP7, FP3, FP5; + psq_st FP7, 8(ret), 1, 0; + blr +} + +asm void PSVECSubtract +( + const register Vec *vec1, + const register Vec *vec2, + register Vec *ret +) +{ + nofralloc; + psq_l FP2, 0(vec1), 0, 0; + psq_l FP4, 0(vec2), 0, 0; + ps_sub FP6, FP2, FP4; + psq_st FP6, 0(ret), 0, 0; + psq_l FP3, 8(vec1), 1, 0; + psq_l FP5, 8(vec2), 1, 0; + ps_sub FP7, FP3, FP5; + psq_st FP7, 8(ret), 1, 0; + blr +} + +void PSVECNormalize +( + const register Vec *vec1, + register Vec *ret +) +{ + register f32 half = 0.5f; + register f32 three = 3.0f; + register f32 xx_zz, xx_yy; + register f32 square_sum; + register f32 ret_sqrt; + register f32 n_0, n_1; + asm + { + psq_l FP2, 0(vec1), 0, 0; + ps_mul xx_yy, FP2, FP2; + psq_l FP3, 8(vec1), 1, 0; + ps_madd xx_zz, FP3, FP3, xx_yy; + ps_sum0 square_sum, xx_zz, FP3, xx_yy; + frsqrte ret_sqrt, square_sum; + fmuls n_0, ret_sqrt, ret_sqrt; + fmuls n_1, ret_sqrt, half; + fnmsubs n_0, n_0, square_sum, three; + fmuls ret_sqrt, n_0, n_1; + ps_muls0 FP2, FP2, ret_sqrt; + psq_st FP2, 0(ret), 0, 0; + ps_muls0 FP3, FP3, ret_sqrt; + psq_st FP3, 8(ret), 1, 0; + } +} + +f32 PSVECMag ( const register Vec *v ) +{ + register f32 v_xy, v_zz, square_mag; + register f32 ret_mag, n_0, n_1; + register f32 three, half, zero; + half = 0.5f; + asm + { + psq_l v_xy, 0(v), 0, 0 + ps_mul v_xy, v_xy, v_xy + lfs v_zz, 8(v) + fsubs zero, half, half + ps_madd square_mag, v_zz, v_zz, v_xy + ps_sum0 square_mag, square_mag, v_xy, v_xy + fcmpu cr0, square_mag, zero + beq- __exit + frsqrte ret_mag, square_mag + } + three = 3.0f; + asm + { + fmuls n_0, ret_mag, ret_mag + fmuls n_1, ret_mag, half + fnmsubs n_0, n_0, square_mag, three + fmuls ret_mag, n_0, n_1 + fmuls square_mag, square_mag, ret_mag + __exit: + } + return square_mag; +} + +asm void PSVECCrossProduct +( + const register Vec *vec1, + const register Vec *vec2, + register Vec *ret +) +{ + nofralloc; + psq_l fp1, 0(vec2), 0, 0 + lfs fp2, 8(vec1) + psq_l fp0, 0(vec1), 0, 0 + ps_merge10 fp6, fp1, fp1 + lfs fp3, 8(vec2) + ps_mul fp4, fp1, fp2 + ps_muls0 fp7, fp1, fp0 + ps_msub fp5, fp0, fp3, fp4 + ps_msub fp8, fp0, fp6, fp7 + ps_merge11 fp9, fp5, fp5 + ps_merge01 fp10, fp5, fp8 + psq_st fp9, 0(ret), 1, 0 + ps_neg fp10, fp10 + psq_st fp10, 4(ret), 0, 0 + blr; +} + +f32 PSVECDistance(const register Vec *a, const register Vec *b) +{ + return 0.0f; +} -- cgit v1.2.3-13-gbd6f