summaryrefslogtreecommitdiff
path: root/src/Dolphin/mtx/vec.c
blob: af42c0b0b8ba3e67acfc50193bc97d617d4081e9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
#include "types.h"
#include "Dolphin/vec.h"

#define R_RET fp1
#define FP2   fp2
#define FP3   fp3
#define FP4   fp4
#define FP5   fp5
#define FP6   fp6
#define FP7   fp7
#define FP8   fp8
#define FP9   fp9
#define FP10  fp10
#define FP11  fp11
#define FP12  fp12
#define FP13  fp13
// clang-format off
asm void PSVECAdd
(
    const register Vec *vec1,
    const register Vec *vec2,
    register Vec *ret
)
{
    nofralloc;
    psq_l     FP2,  0(vec1), 0, 0;
    psq_l     FP4,  0(vec2), 0, 0;
    ps_add    FP6, FP2, FP4;
    psq_st    FP6,  0(ret), 0, 0;
    psq_l     FP3,   8(vec1), 1, 0;
    psq_l     FP5,   8(vec2), 1, 0;
    ps_add    FP7, FP3, FP5;
    psq_st    FP7,   8(ret), 1, 0;
    blr
}

asm void PSVECSubtract
(
    const register Vec *vec1,
    const register Vec *vec2,
          register Vec *ret
)
{
    nofralloc;
    psq_l     FP2,  0(vec1), 0, 0;
    psq_l     FP4,  0(vec2), 0, 0;
    ps_sub    FP6, FP2, FP4;
    psq_st    FP6, 0(ret), 0, 0;
    psq_l     FP3,   8(vec1), 1, 0;
    psq_l     FP5,   8(vec2), 1, 0;
    ps_sub    FP7, FP3, FP5;
    psq_st    FP7,  8(ret), 1, 0;
    blr
}

void PSVECNormalize
(
    const register Vec *vec1,
          register Vec *ret
)
{
    register f32 half  = 0.5f;
    register f32 three = 3.0f;
    register f32 xx_zz, xx_yy;
    register f32 square_sum;
    register f32 ret_sqrt;
    register f32 n_0, n_1;
    asm
    {
        psq_l       FP2, 0(vec1), 0, 0;
        ps_mul      xx_yy, FP2, FP2;
        psq_l       FP3, 8(vec1), 1, 0;
        ps_madd     xx_zz, FP3, FP3, xx_yy;
        ps_sum0     square_sum, xx_zz, FP3, xx_yy;
        frsqrte     ret_sqrt, square_sum;
        fmuls       n_0, ret_sqrt, ret_sqrt;
        fmuls       n_1, ret_sqrt, half;
        fnmsubs     n_0, n_0, square_sum, three;
        fmuls       ret_sqrt, n_0, n_1;
        ps_muls0    FP2, FP2, ret_sqrt;
        psq_st      FP2, 0(ret), 0, 0;
        ps_muls0    FP3, FP3, ret_sqrt;
        psq_st      FP3, 8(ret), 1, 0;
    }
}

f32 PSVECMag ( const register Vec *v )
{
    register f32    v_xy, v_zz, square_mag;
    register f32    ret_mag, n_0, n_1;
    register f32    three, half, zero;
    half = 0.5f;
    asm
    {
        psq_l       v_xy, 0(v), 0, 0
        ps_mul      v_xy, v_xy, v_xy
        lfs         v_zz, 8(v)
        fsubs       zero, half, half
        ps_madd     square_mag, v_zz, v_zz, v_xy
        ps_sum0     square_mag, square_mag, v_xy, v_xy
        fcmpu       cr0, square_mag, zero
        beq-        __exit
        frsqrte     ret_mag, square_mag
    }
    three = 3.0f;
    asm
    {
        fmuls       n_0, ret_mag, ret_mag
        fmuls       n_1, ret_mag, half
        fnmsubs     n_0, n_0, square_mag, three
        fmuls       ret_mag, n_0, n_1
        fmuls       square_mag, square_mag, ret_mag
    __exit:
    }
    return square_mag;
}

asm void PSVECCrossProduct
(
    const register Vec *vec1,
    const register Vec *vec2,
          register Vec *ret
)
{
    nofralloc;
    psq_l       fp1, 0(vec2), 0, 0
    lfs         fp2, 8(vec1)
    psq_l       fp0, 0(vec1), 0, 0
    ps_merge10  fp6, fp1, fp1
    lfs         fp3, 8(vec2)
    ps_mul      fp4, fp1, fp2
    ps_muls0    fp7, fp1, fp0
    ps_msub     fp5, fp0, fp3, fp4
    ps_msub     fp8, fp0, fp6, fp7
    ps_merge11  fp9, fp5, fp5
    ps_merge01  fp10, fp5, fp8
    psq_st      fp9, 0(ret), 1, 0
    ps_neg      fp10, fp10
    psq_st      fp10, 4(ret), 0, 0
    blr;
}

f32 PSVECDistance(const register Vec *a, const register Vec *b)
{
	return 0.0f;
}