#include "types.h"
#include "Dolphin/vec.h"
#define R_RET fp1
#define FP2 fp2
#define FP3 fp3
#define FP4 fp4
#define FP5 fp5
#define FP6 fp6
#define FP7 fp7
#define FP8 fp8
#define FP9 fp9
#define FP10 fp10
#define FP11 fp11
#define FP12 fp12
#define FP13 fp13
asm void PSVECAdd
(
const register Vec *vec1,
const register Vec *vec2,
register Vec *ret
)
{
nofralloc;
psq_l FP2, 0(vec1), 0, 0;
psq_l FP4, 0(vec2), 0, 0;
ps_add FP6, FP2, FP4;
psq_st FP6, 0(ret), 0, 0;
psq_l FP3, 8(vec1), 1, 0;
psq_l FP5, 8(vec2), 1, 0;
ps_add FP7, FP3, FP5;
psq_st FP7, 8(ret), 1, 0;
blr
}
asm void PSVECSubtract
(
const register Vec *vec1,
const register Vec *vec2,
register Vec *ret
)
{
nofralloc;
psq_l FP2, 0(vec1), 0, 0;
psq_l FP4, 0(vec2), 0, 0;
ps_sub FP6, FP2, FP4;
psq_st FP6, 0(ret), 0, 0;
psq_l FP3, 8(vec1), 1, 0;
psq_l FP5, 8(vec2), 1, 0;
ps_sub FP7, FP3, FP5;
psq_st FP7, 8(ret), 1, 0;
blr
}
void PSVECNormalize
(
const register Vec *vec1,
register Vec *ret
)
{
register f32 half = 0.5f;
register f32 three = 3.0f;
register f32 xx_zz, xx_yy;
register f32 square_sum;
register f32 ret_sqrt;
register f32 n_0, n_1;
asm
{
psq_l FP2, 0(vec1), 0, 0;
ps_mul xx_yy, FP2, FP2;
psq_l FP3, 8(vec1), 1, 0;
ps_madd xx_zz, FP3, FP3, xx_yy;
ps_sum0 square_sum, xx_zz, FP3, xx_yy;
frsqrte ret_sqrt, square_sum;
fmuls n_0, ret_sqrt, ret_sqrt;
fmuls n_1, ret_sqrt, half;
fnmsubs n_0, n_0, square_sum, three;
fmuls ret_sqrt, n_0, n_1;
ps_muls0 FP2, FP2, ret_sqrt;
psq_st FP2, 0(ret), 0, 0;
ps_muls0 FP3, FP3, ret_sqrt;
psq_st FP3, 8(ret), 1, 0;
}
}
f32 PSVECMag ( const register Vec *v )
{
register f32 v_xy, v_zz, square_mag;
register f32 ret_mag, n_0, n_1;
register f32 three, half, zero;
half = 0.5f;
asm
{
psq_l v_xy, 0(v), 0, 0
ps_mul v_xy, v_xy, v_xy
lfs v_zz, 8(v)
fsubs zero, half, half
ps_madd square_mag, v_zz, v_zz, v_xy
ps_sum0 square_mag, square_mag, v_xy, v_xy
fcmpu cr0, square_mag, zero
beq- __exit
frsqrte ret_mag, square_mag
}
three = 3.0f;
asm
{
fmuls n_0, ret_mag, ret_mag
fmuls n_1, ret_mag, half
fnmsubs n_0, n_0, square_mag, three
fmuls ret_mag, n_0, n_1
fmuls square_mag, square_mag, ret_mag
__exit:
}
return square_mag;
}
asm void PSVECCrossProduct
(
const register Vec *vec1,
const register Vec *vec2,
register Vec *ret
)
{
nofralloc;
psq_l fp1, 0(vec2), 0, 0
lfs fp2, 8(vec1)
psq_l fp0, 0(vec1), 0, 0
ps_merge10 fp6, fp1, fp1
lfs fp3, 8(vec2)
ps_mul fp4, fp1, fp2
ps_muls0 fp7, fp1, fp0
ps_msub fp5, fp0, fp3, fp4
ps_msub fp8, fp0, fp6, fp7
ps_merge11 fp9, fp5, fp5
ps_merge01 fp10, fp5, fp8
psq_st fp9, 0(ret), 1, 0
ps_neg fp10, fp10
psq_st fp10, 4(ret), 0, 0
blr;
}
f32 PSVECDistance(const register Vec *a, const register Vec *b)
{
register f32 v0_yz, v1_yz, v0_xy, v1_xy;
register f32 dist_yz, dist_xy, square_dist, ret_dist;
register f32 n_0, n_1;
register f32 half, three, zero;
asm
{
psq_l v0_yz, 4(a), 0, 0
psq_l v1_yz, 4(b), 0, 0
ps_sub dist_yz, v0_yz, v1_yz
psq_l v0_xy, 4(a), 0, 0
psq_l v1_xy, 4(b), 0, 0
ps_sub dist_yz, dist_yz, dist_yz
ps_sub dist_xy, v0_xy, v1_xy
}
half = 0.5f;
asm
{
ps_madd square_dist, dist_xy, dist_xy, dist_yz
fsubs zero, half, half
ps_sum0 square_dist, square_dist, dist_yz, dist_yz
fcmpu cr0, zero, square_dist
beq- __exit
}
three = 3.0f;
asm
{
frsqrte ret_dist, square_dist
fmuls n_0, ret_dist, ret_dist
fmuls n_1, ret_dist, half
fnmsubs n_0, n_0, square_dist, three
fmuls ret_dist, n_0, n_1
fmuls square_dist, square_dist, ret_dist
__exit:
}
return square_dist;
}