#include "types.h" #include "Dolphin/vec.h" #define R_RET fp1 #define FP2 fp2 #define FP3 fp3 #define FP4 fp4 #define FP5 fp5 #define FP6 fp6 #define FP7 fp7 #define FP8 fp8 #define FP9 fp9 #define FP10 fp10 #define FP11 fp11 #define FP12 fp12 #define FP13 fp13 // clang-format off asm void PSVECAdd ( const register Vec *vec1, const register Vec *vec2, register Vec *ret ) { nofralloc; psq_l FP2, 0(vec1), 0, 0; psq_l FP4, 0(vec2), 0, 0; ps_add FP6, FP2, FP4; psq_st FP6, 0(ret), 0, 0; psq_l FP3, 8(vec1), 1, 0; psq_l FP5, 8(vec2), 1, 0; ps_add FP7, FP3, FP5; psq_st FP7, 8(ret), 1, 0; blr } asm void PSVECSubtract ( const register Vec *vec1, const register Vec *vec2, register Vec *ret ) { nofralloc; psq_l FP2, 0(vec1), 0, 0; psq_l FP4, 0(vec2), 0, 0; ps_sub FP6, FP2, FP4; psq_st FP6, 0(ret), 0, 0; psq_l FP3, 8(vec1), 1, 0; psq_l FP5, 8(vec2), 1, 0; ps_sub FP7, FP3, FP5; psq_st FP7, 8(ret), 1, 0; blr } void PSVECNormalize ( const register Vec *vec1, register Vec *ret ) { register f32 half = 0.5f; register f32 three = 3.0f; register f32 xx_zz, xx_yy; register f32 square_sum; register f32 ret_sqrt; register f32 n_0, n_1; asm { psq_l FP2, 0(vec1), 0, 0; ps_mul xx_yy, FP2, FP2; psq_l FP3, 8(vec1), 1, 0; ps_madd xx_zz, FP3, FP3, xx_yy; ps_sum0 square_sum, xx_zz, FP3, xx_yy; frsqrte ret_sqrt, square_sum; fmuls n_0, ret_sqrt, ret_sqrt; fmuls n_1, ret_sqrt, half; fnmsubs n_0, n_0, square_sum, three; fmuls ret_sqrt, n_0, n_1; ps_muls0 FP2, FP2, ret_sqrt; psq_st FP2, 0(ret), 0, 0; ps_muls0 FP3, FP3, ret_sqrt; psq_st FP3, 8(ret), 1, 0; } } f32 PSVECMag ( const register Vec *v ) { register f32 v_xy, v_zz, square_mag; register f32 ret_mag, n_0, n_1; register f32 three, half, zero; half = 0.5f; asm { psq_l v_xy, 0(v), 0, 0 ps_mul v_xy, v_xy, v_xy lfs v_zz, 8(v) fsubs zero, half, half ps_madd square_mag, v_zz, v_zz, v_xy ps_sum0 square_mag, square_mag, v_xy, v_xy fcmpu cr0, square_mag, zero beq- __exit frsqrte ret_mag, square_mag } three = 3.0f; asm { fmuls n_0, ret_mag, ret_mag fmuls n_1, ret_mag, half fnmsubs n_0, n_0, square_mag, three fmuls ret_mag, n_0, n_1 fmuls square_mag, square_mag, ret_mag __exit: } return square_mag; } asm void PSVECCrossProduct ( const register Vec *vec1, const register Vec *vec2, register Vec *ret ) { nofralloc; psq_l fp1, 0(vec2), 0, 0 lfs fp2, 8(vec1) psq_l fp0, 0(vec1), 0, 0 ps_merge10 fp6, fp1, fp1 lfs fp3, 8(vec2) ps_mul fp4, fp1, fp2 ps_muls0 fp7, fp1, fp0 ps_msub fp5, fp0, fp3, fp4 ps_msub fp8, fp0, fp6, fp7 ps_merge11 fp9, fp5, fp5 ps_merge01 fp10, fp5, fp8 psq_st fp9, 0(ret), 1, 0 ps_neg fp10, fp10 psq_st fp10, 4(ret), 0, 0 blr; } f32 PSVECDistance(const register Vec *a, const register Vec *b) { register f32 v0_yz, v1_yz, v0_xy, v1_xy; register f32 dist_yz, dist_xy, square_dist, ret_dist; register f32 n_0, n_1; register f32 half, three, zero; asm { psq_l v0_yz, 4(a), 0, 0 psq_l v1_yz, 4(b), 0, 0 ps_sub dist_yz, v0_yz, v1_yz psq_l v0_xy, 4(a), 0, 0 psq_l v1_xy, 4(b), 0, 0 ps_sub dist_yz, dist_yz, dist_yz ps_sub dist_xy, v0_xy, v1_xy } half = 0.5f; asm { ps_madd square_dist, dist_xy, dist_xy, dist_yz fsubs zero, half, half ps_sum0 square_dist, square_dist, dist_yz, dist_yz fcmpu cr0, zero, square_dist beq- __exit } three = 3.0f; asm { frsqrte ret_dist, square_dist fmuls n_0, ret_dist, ret_dist fmuls n_1, ret_dist, half fnmsubs n_0, n_0, square_dist, three fmuls ret_dist, n_0, n_1 fmuls square_dist, square_dist, ret_dist __exit: } return square_dist; }