summaryrefslogtreecommitdiff
path: root/src/Dolphin/mtx/vec.c
blob: e5ca7f9705c743aa7139c574f049dfeacfdc9666 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
#include "types.h"
#include "Dolphin/vec.h"

#define R_RET fp1
#define FP2   fp2
#define FP3   fp3
#define FP4   fp4
#define FP5   fp5
#define FP6   fp6
#define FP7   fp7
#define FP8   fp8
#define FP9   fp9
#define FP10  fp10
#define FP11  fp11
#define FP12  fp12
#define FP13  fp13
// clang-format off
asm void PSVECAdd
(
    const register Vec *vec1,
    const register Vec *vec2,
    register Vec *ret
)
{
    nofralloc;
    psq_l     FP2,  0(vec1), 0, 0;
    psq_l     FP4,  0(vec2), 0, 0;
    ps_add    FP6, FP2, FP4;
    psq_st    FP6,  0(ret), 0, 0;
    psq_l     FP3,   8(vec1), 1, 0;
    psq_l     FP5,   8(vec2), 1, 0;
    ps_add    FP7, FP3, FP5;
    psq_st    FP7,   8(ret), 1, 0;
    blr
}

asm void PSVECSubtract
(
    const register Vec *vec1,
    const register Vec *vec2,
          register Vec *ret
)
{
    nofralloc;
    psq_l     FP2,  0(vec1), 0, 0;
    psq_l     FP4,  0(vec2), 0, 0;
    ps_sub    FP6, FP2, FP4;
    psq_st    FP6, 0(ret), 0, 0;
    psq_l     FP3,   8(vec1), 1, 0;
    psq_l     FP5,   8(vec2), 1, 0;
    ps_sub    FP7, FP3, FP5;
    psq_st    FP7,  8(ret), 1, 0;
    blr
}

void PSVECNormalize
(
    const register Vec *vec1,
          register Vec *ret
)
{
    register f32 half  = 0.5f;
    register f32 three = 3.0f;
    register f32 xx_zz, xx_yy;
    register f32 square_sum;
    register f32 ret_sqrt;
    register f32 n_0, n_1;
    asm
    {
        psq_l       FP2, 0(vec1), 0, 0;
        ps_mul      xx_yy, FP2, FP2;
        psq_l       FP3, 8(vec1), 1, 0;
        ps_madd     xx_zz, FP3, FP3, xx_yy;
        ps_sum0     square_sum, xx_zz, FP3, xx_yy;
        frsqrte     ret_sqrt, square_sum;
        fmuls       n_0, ret_sqrt, ret_sqrt;
        fmuls       n_1, ret_sqrt, half;
        fnmsubs     n_0, n_0, square_sum, three;
        fmuls       ret_sqrt, n_0, n_1;
        ps_muls0    FP2, FP2, ret_sqrt;
        psq_st      FP2, 0(ret), 0, 0;
        ps_muls0    FP3, FP3, ret_sqrt;
        psq_st      FP3, 8(ret), 1, 0;
    }
}

f32 PSVECMag ( const register Vec *v )
{
    register f32    v_xy, v_zz, square_mag;
    register f32    ret_mag, n_0, n_1;
    register f32    three, half, zero;
    half = 0.5f;
    asm
    {
        psq_l       v_xy, 0(v), 0, 0
        ps_mul      v_xy, v_xy, v_xy
        lfs         v_zz, 8(v)
        fsubs       zero, half, half
        ps_madd     square_mag, v_zz, v_zz, v_xy
        ps_sum0     square_mag, square_mag, v_xy, v_xy
        fcmpu       cr0, square_mag, zero
        beq-        __exit
        frsqrte     ret_mag, square_mag
    }
    three = 3.0f;
    asm
    {
        fmuls       n_0, ret_mag, ret_mag
        fmuls       n_1, ret_mag, half
        fnmsubs     n_0, n_0, square_mag, three
        fmuls       ret_mag, n_0, n_1
        fmuls       square_mag, square_mag, ret_mag
    __exit:
    }
    return square_mag;
}

asm void PSVECCrossProduct
(
    const register Vec *vec1,
    const register Vec *vec2,
          register Vec *ret
)
{
    nofralloc;
    psq_l       fp1, 0(vec2), 0, 0
    lfs         fp2, 8(vec1)
    psq_l       fp0, 0(vec1), 0, 0
    ps_merge10  fp6, fp1, fp1
    lfs         fp3, 8(vec2)
    ps_mul      fp4, fp1, fp2
    ps_muls0    fp7, fp1, fp0
    ps_msub     fp5, fp0, fp3, fp4
    ps_msub     fp8, fp0, fp6, fp7
    ps_merge11  fp9, fp5, fp5
    ps_merge01  fp10, fp5, fp8
    psq_st      fp9, 0(ret), 1, 0
    ps_neg      fp10, fp10
    psq_st      fp10, 4(ret), 0, 0
    blr;
}

f32 PSVECDistance(const register Vec *a, const register Vec *b)
{
	register f32 v0_yz, v1_yz, v0_xy, v1_xy;
	register f32 dist_yz, dist_xy, square_dist, ret_dist;
	register f32 n_0, n_1;
	register f32 half, three, zero;

	asm
	{
		psq_l v0_yz, 4(a), 0, 0
		psq_l v1_yz, 4(b), 0, 0
		ps_sub dist_yz, v0_yz, v1_yz

		psq_l v0_xy, 4(a), 0, 0
		psq_l v1_xy, 4(b), 0, 0
		ps_sub dist_yz, dist_yz, dist_yz
		ps_sub dist_xy, v0_xy, v1_xy
	}
	half = 0.5f;
	asm
	{
		ps_madd square_dist, dist_xy, dist_xy, dist_yz
		fsubs zero, half, half
		ps_sum0 square_dist, square_dist, dist_yz, dist_yz
		fcmpu cr0, zero, square_dist
		beq- __exit
	}
	three = 3.0f;
	asm
	{
		frsqrte ret_dist, square_dist
		fmuls n_0, ret_dist, ret_dist
		fmuls n_1, ret_dist, half
		fnmsubs n_0, n_0, square_dist, three
		fmuls ret_dist, n_0, n_1
		fmuls square_dist, square_dist, ret_dist
		__exit:
	}
	return square_dist;
}