From 55487832c614edb61ebf22d0c153944ba864feaa Mon Sep 17 00:00:00 2001 From: mrb0nk500 Date: Thu, 2 Feb 2023 11:30:25 -0400 Subject: sdk: Add `mtx` We're slowly getting there. --- include/dolphin/mtx.h | 3 +- include/dolphin/vec.h | 36 +++ include/fdlibm.h | 227 +++++++++++++++++ obj_files.mk | 6 +- src/Dolphin/mtx/mtx.c | 648 +++++++++++++++++++++++++++++++++++++++++++++++ src/Dolphin/mtx/mtx44.c | 98 +++++++ src/Dolphin/mtx/mtxvec.c | 95 +++++++ src/Dolphin/mtx/vec.c | 146 +++++++++++ 8 files changed, 1256 insertions(+), 3 deletions(-) create mode 100644 include/dolphin/vec.h create mode 100644 include/fdlibm.h create mode 100644 src/Dolphin/mtx/mtx.c create mode 100644 src/Dolphin/mtx/mtx44.c create mode 100644 src/Dolphin/mtx/mtxvec.c create mode 100644 src/Dolphin/mtx/vec.c diff --git a/include/dolphin/mtx.h b/include/dolphin/mtx.h index f57beea..b1c5525 100644 --- a/include/dolphin/mtx.h +++ b/include/dolphin/mtx.h @@ -68,9 +68,8 @@ void PSMTXTranspose(const Mtx src, Mtx xPose); u32 PSMTXInverse(const Mtx src, Mtx inv); u32 PSMTXInvXpose(const Mtx src, Mtx invX); void PSMTXMultVec(const Mtx m, const Vec* src, Vec* dst); -void PSMTXMultVecArray(const Mtx m, const Vec* srcBase, Vec* dstBase, u32 count); void PSMTXMultVecSR(const Mtx m, const Vec* src, Vec* dst); -void PSMTXMultVecArraySR(const Mtx m, const Vec* srcBase, Vec* dstBase, u32 count); +void PSMTXMultVecArraySR(const Mtx m, const Vec* src, Vec* dst, u32 n); void PSMTXQuat(Mtx m, const Quaternion* q); void PSMTXReflect(Mtx m, const Vec* p, const Vec* n); void PSMTXTrans(Mtx m, f32 xT, f32 yT, f32 zT); diff --git a/include/dolphin/vec.h b/include/dolphin/vec.h new file mode 100644 index 0000000..27a0037 --- /dev/null +++ b/include/dolphin/vec.h @@ -0,0 +1,36 @@ +#ifndef _DOLPHIN_VEC_H +#define _DOLPHIN_VEC_H + +#include "types.h" + +#ifdef __cplusplus +extern "C" { +#endif // ifdef __cplusplus + +typedef struct Vec { + f32 x; + f32 y; + f32 z; +} Vec; + +void PSVECAdd(const Vec*, const Vec*, Vec*); +void PSVECSubtract(const Vec*, const Vec*, Vec*); +void PSVECNormalize(const Vec*, Vec*); +f32 PSVECMag(const Vec*); +void PSVECCrossProduct(const Vec*, const Vec*, Vec*); + +#ifdef __cplusplus +} +#endif + +// lfs f1,0(r3) +// lfs f0,4(r3) +// fmuls f1,f1,f1 +// lfs f2,8(r3) +// fmuls f0,f0,f0 +// fmuls f2,f2,f2 +// fadds f0,f1,f0 +// fadds f1,f2,f0 +// blr + +#endif diff --git a/include/fdlibm.h b/include/fdlibm.h new file mode 100644 index 0000000..2b3388c --- /dev/null +++ b/include/fdlibm.h @@ -0,0 +1,227 @@ +#ifndef _FDLIBM_H +#define _FDLIBM_H + +/* @(#)fdlibm.h 1.5 04/04/22 */ +/* + * ==================================================== + * Copyright (C) 2004 by Sun Microsystems, Inc. All rights reserved. + * + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +#ifdef __cplusplus +extern "C" { +#endif // ifdef __cplusplus + +/* Sometimes it's necessary to define __LITTLE_ENDIAN explicitly + but these catch some common cases. */ + +#if defined(i386) || defined(i486) || defined(intel) || defined(x86) || defined(i86pc) || defined(__alpha) || defined(__osf__) +#define __LITTLE_ENDIAN +#endif + +#ifdef __LITTLE_ENDIAN +#define __HI(x) *(1 + (int*)&x) +#define __LO(x) *(int*)&x +#define __HIp(x) *(1 + (int*)x) +#define __LOp(x) *(int*)x +#else +#define __HI(x) *(int*)&x +#define __LO(x) *(1 + (int*)&x) +#define __HIp(x) *(int*)x +#define __LOp(x) *(1 + (int*)x) +#endif + +// TODO: should __STDC__ actually be defined? +// #ifdef __STDC__ +#define __P(p) p +// #else +// #define __P(p) () +// #endif + +/* + * ANSI/POSIX + */ + +extern int signgam; + +#define MAXFLOAT ((float)3.40282346638528860e+38) + +enum fdversion { fdlibmIeee = -1, fdlibmSvid, fdlibmXopen, fdlibmPosix }; + +#define _LIB_VERSION_TYPE enum fdversion +#define _LIB_VERSION _fdlib_version + +/* if global variable _LIB_VERSION is not desirable, one may + * change the following to be a constant by: + * #define _LIB_VERSION_TYPE const enum version + * In that case, after one initializes the value _LIB_VERSION (see + * s_lib_version.c) during compile time, it cannot be modified + * in the middle of a program + */ +extern _LIB_VERSION_TYPE _LIB_VERSION; + +#define _IEEE_ fdlibmIeee +#define _SVID_ fdlibmSvid +#define _XOPEN_ fdlibmXopen +#define _POSIX_ fdlibmPosix + +struct exception { + int type; + char* name; + double arg1; + double arg2; + double retval; +}; + +#define HUGE MAXFLOAT + +/* + * set X_TLOSS = pi*2**52, which is possibly defined in + * (one may replace the following line by "#include ") + */ + +#define X_TLOSS 1.41484755040568800000e+16 + +#define DOMAIN 1 +#define SING 2 +#define OVERFLOW 3 +#define UNDERFLOW 4 +#define TLOSS 5 +#define PLOSS 6 + +/* + * ANSI/POSIX + */ +extern double acos __P((double)); +extern double asin __P((double)); +extern double atan __P((double)); +extern double atan2 __P((double, double)); +extern double cos __P((double)); +extern double sin __P((double)); +extern double tan __P((double)); + +extern double cosh __P((double)); +extern double sinh __P((double)); +extern double tanh __P((double)); + +extern double exp __P((double)); +extern double frexp __P((double, int*)); +extern double ldexp __P((double, int)); +extern double log __P((double)); +extern double log10 __P((double)); +extern double modf __P((double, double*)); + +extern double pow __P((double, double)); +extern double sqrt __P((double)); + +extern double ceil __P((double)); +extern double fabs __P((double)); +extern double floor __P((double)); +extern double fmod __P((double, double)); + +extern double erf __P((double)); +extern double erfc __P((double)); +extern double gamma __P((double)); +extern double hypot __P((double, double)); +extern int isnan __P((double)); +extern int finite __P((double)); +extern double j0 __P((double)); +extern double j1 __P((double)); +extern double jn __P((int, double)); +extern double lgamma __P((double)); +extern double y0 __P((double)); +extern double y1 __P((double)); +extern double yn __P((int, double)); + +extern double acosh __P((double)); +extern double asinh __P((double)); +extern double atanh __P((double)); +extern double cbrt __P((double)); +extern double logb __P((double)); +extern double nextafter __P((double, double)); +extern double remainder __P((double, double)); +#ifdef _SCALB_INT +extern double scalb __P((double, int)); +#else +extern double scalb __P((double, double)); +#endif + +extern int matherr __P((struct exception*)); + +/* + * IEEE Test Vector + */ +extern double significand __P((double)); + +/* + * Functions callable from C, intended to support IEEE arithmetic. + */ +extern double copysign __P((double, double)); +extern int ilogb __P((double)); +extern double rint __P((double)); +extern double scalbn __P((double, int)); + +/* + * BSD math library entry points + */ +extern double expm1 __P((double)); +extern double log1p __P((double)); + +/* + * Reentrant version of gamma & lgamma; passes signgam back by reference + * as the second argument; user must allocate space for signgam. + */ +#ifdef _REENTRANT +extern double gamma_r __P((double, int*)); +extern double lgamma_r __P((double, int*)); +#endif /* _REENTRANT */ + +/* ieee style elementary functions */ +extern double __ieee754_sqrt __P((double)); +extern double __ieee754_acos __P((double)); +extern double __ieee754_acosh __P((double)); +extern double __ieee754_log __P((double)); +extern double __ieee754_atanh __P((double)); +extern double __ieee754_asin __P((double)); +extern double __ieee754_atan2 __P((double, double)); +extern double __ieee754_exp __P((double)); +extern double __ieee754_cosh __P((double)); +extern double __ieee754_fmod __P((double, double)); +extern double __ieee754_pow __P((double, double)); +extern double __ieee754_lgamma_r __P((double, int*)); +extern double __ieee754_gamma_r __P((double, int*)); +extern double __ieee754_lgamma __P((double)); +extern double __ieee754_gamma __P((double)); +extern double __ieee754_log10 __P((double)); +extern double __ieee754_sinh __P((double)); +extern double __ieee754_hypot __P((double, double)); +extern double __ieee754_j0 __P((double)); +extern double __ieee754_j1 __P((double)); +extern double __ieee754_y0 __P((double)); +extern double __ieee754_y1 __P((double)); +extern double __ieee754_jn __P((int, double)); +extern double __ieee754_yn __P((int, double)); +extern double __ieee754_remainder __P((double, double)); +extern int __ieee754_remPio2 __P((double, double*)); +#ifdef _SCALB_INT +extern double __ieee754_scalb __P((double, int)); +#else +extern double __ieee754_scalb __P((double, double)); +#endif + +/* fdlibm kernel function */ +extern double __kernel_standard __P((double, double, int)); +extern double __kernel_sin __P((double, double, int)); +extern double __kernel_cos __P((double, double)); +extern double __kernel_tan __P((double, double, int)); +extern int __kernel_remPio2 __P((double*, double*, int, int, int, const int*)); + +#ifdef __cplusplus +}; +#endif // ifdef __cplusplus + +#endif diff --git a/obj_files.mk b/obj_files.mk index 99022d0..24a0fb6 100644 --- a/obj_files.mk +++ b/obj_files.mk @@ -30,4 +30,8 @@ O_FILES := $(BUILD_DIR)/src/main.o \ $(BUILD_DIR)/src/Dolphin/os/OSSync.o \ $(BUILD_DIR)/src/Dolphin/os/OSThread.o \ $(BUILD_DIR)/src/Dolphin/os/OSTime.o \ - $(BUILD_DIR)/src/Dolphin/os/__ppc_eabi_init.o + $(BUILD_DIR)/src/Dolphin/os/__ppc_eabi_init.o \ + $(BUILD_DIR)/src/Dolphin/mtx/mtx.o \ + $(BUILD_DIR)/src/Dolphin/mtx/mtxvec.o \ + $(BUILD_DIR)/src/Dolphin/mtx/mtx44.o \ + $(BUILD_DIR)/src/Dolphin/mtx/vec.o diff --git a/src/Dolphin/mtx/mtx.c b/src/Dolphin/mtx/mtx.c new file mode 100644 index 0000000..4a229bd --- /dev/null +++ b/src/Dolphin/mtx/mtx.c @@ -0,0 +1,648 @@ +// This file was taken from the Pikmin 2 decompilation project. +// https://github.com/projectPiki/pikmin2/blob/main/src/Dolphin/mtx/mtx.c +#include "types.h" +#include "fdlibm.h" +#include "Dolphin/mtx.h" +static f32 Unit01[] = { 0.0f, 1.0f }; + +void PSMTXIdentity +( + register Mtx m +) +{ + register f32 zero_c = 0.0f; + register f32 one_c = 1.0f; + register f32 c_01; + register f32 c_10; + + asm + { + psq_st zero_c, 8(m), 0, 0 + ps_merge01 c_01, zero_c, one_c + psq_st zero_c, 24(m), 0, 0 + ps_merge10 c_10, one_c, zero_c + psq_st zero_c, 32(m), 0, 0 + psq_st c_01, 16(m), 0, 0 + psq_st c_10, 0(m), 0, 0 + psq_st c_10, 40(m), 0, 0 + } +} + +asm void PSMTXCopy +( + const register Mtx src, + register Mtx dst +) +{ + nofralloc + + psq_l fp0, 0(src), 0, 0 + psq_st fp0, 0(dst), 0, 0 + psq_l fp1, 8(src), 0, 0 + psq_st fp1, 8(dst), 0, 0 + psq_l fp2, 16(src), 0, 0 + psq_st fp2, 16(dst), 0, 0 + psq_l fp3, 24(src), 0, 0 + psq_st fp3, 24(dst), 0, 0 + psq_l fp4, 32(src), 0, 0 + psq_st fp4, 32(dst), 0, 0 + psq_l fp5, 40(src), 0, 0 + psq_st fp5, 40(dst), 0, 0 + + blr +} + +asm void PSMTXConcat +( + const register Mtx mA, // r3 + const register Mtx mB, // r4 + register Mtx mAB // r5 +) +{ + nofralloc + +#define FP0 fp0 +#define FP1 fp1 +#define FP2 fp2 +#define FP3 fp3 +#define FP4 fp4 +#define FP5 fp5 +#define FP6 fp6 +#define FP7 fp7 +#define FP8 fp8 +#define FP9 fp9 +#define FP10 fp10 +#define FP11 fp11 +#define FP12 fp12 +#define FP13 fp13 +#define FP14 fp14 +#define FP15 fp15 +#define FP31 fp31 + stwu r1, -64(r1) + psq_l FP0, 0(mA), 0, 0 + stfd fp14, 8(r1) + psq_l FP6, 0(mB), 0, 0 + addis r6, 0, Unit01@ha + psq_l FP7, 8(mB), 0, 0 + stfd fp15, 16(r1) + addi r6, r6, Unit01@l + stfd fp31, 40(r1) + psq_l FP8, 16(mB), 0, 0 + ps_muls0 FP12, FP6, FP0 + psq_l FP2, 16(mA), 0, 0 + ps_muls0 FP13, FP7, FP0 + psq_l FP31, 0(r6), 0, 0 + ps_muls0 FP14, FP6, FP2 + psq_l FP9, 24(mB), 0, 0 + ps_muls0 FP15, FP7, FP2 + psq_l FP1, 8(mA), 0, 0 + ps_madds1 FP12, FP8, FP0, FP12 + psq_l FP3, 24(mA), 0, 0 + ps_madds1 FP14, FP8, FP2, FP14 + psq_l FP10, 32(mB), 0, 0 + ps_madds1 FP13, FP9, FP0, FP13 + psq_l FP11, 40(mB), 0, 0 + ps_madds1 FP15, FP9, FP2, FP15 + psq_l FP4, 32(mA), 0, 0 + psq_l FP5, 40(mA), 0, 0 + ps_madds0 FP12, FP10, FP1, FP12 + ps_madds0 FP13, FP11, FP1, FP13 + ps_madds0 FP14, FP10, FP3, FP14 + ps_madds0 FP15, FP11, FP3, FP15 + psq_st FP12, 0(mAB), 0, 0 + + ps_muls0 FP2, FP6, FP4 + ps_madds1 FP13, FP31, FP1, FP13 + ps_muls0 FP0, FP7, FP4 + psq_st FP14, 16(mAB), 0, 0 + ps_madds1 FP15, FP31, FP3, FP15 + + psq_st FP13, 8(mAB), 0, 0 + + ps_madds1 FP2, FP8, FP4, FP2 + ps_madds1 FP0, FP9, FP4, FP0 + ps_madds0 FP2, FP10, FP5, FP2 + lfd fp14, 8(r1) + psq_st FP15, 24(mAB), 0, 0 + ps_madds0 FP0, FP11, FP5, FP0 + psq_st FP2, 32(mAB), 0, 0 + ps_madds1 FP0, FP31, FP5, FP0 + lfd fp15, 16(r1) + psq_st FP0, 40(mAB), 0, 0 + + lfd fp31, 40(r1) + addi r1, r1, 64 + + blr + +#undef FP0 +#undef FP1 +#undef FP2 +#undef FP3 +#undef FP4 +#undef FP5 +#undef FP6 +#undef FP7 +#undef FP8 +#undef FP9 +#undef FP10 +#undef FP11 +#undef FP12 +#undef FP13 +#undef FP14 +#undef FP15 +#undef FP31 +} + +#pragma scheduling off +void PSMTXTranspose ( const register Mtx src, register Mtx xPose ) +{ + register f32 c_zero = 0.0f; + register f32 row0a, row1a, row0b, row1b; + register f32 trns0, trns1, trns2; + + asm + { + psq_l row0a, 0(src), 0, 0 + stfs c_zero, 44(xPose) + psq_l row1a, 16(src), 0, 0 + ps_merge00 trns0, row0a, row1a + psq_l row0b, 8(src), 1, 0 + ps_merge11 trns1, row0a, row1a + psq_l row1b, 24(src), 1, 0 + psq_st trns0, 0(xPose), 0, 0 + psq_l row0a, 32(src), 0, 0 + ps_merge00 trns2, row0b, row1b + psq_st trns1, 16(xPose), 0, 0 + ps_merge00 trns0, row0a, c_zero + psq_st trns2, 32(xPose), 0, 0 + ps_merge10 trns1, row0a, c_zero + psq_st trns0, 8(xPose), 0, 0 + lfs row0b, 40(src) + psq_st trns1, 24(xPose), 0, 0 + stfs row0b, 40(xPose) + } +} + +asm u32 PSMTXInverse ( const register Mtx src, register Mtx inv ) +{ + nofralloc + psq_l fp0, 0(src), 1, 0 + psq_l fp1, 4(src), 0, 0 + psq_l fp2, 16(src), 1, 0 + ps_merge10 fp6, fp1, fp0 + psq_l fp3, 20(src), 0, 0 + psq_l fp4, 32(src), 1, 0 + ps_merge10 fp7, fp3, fp2 + psq_l fp5, 36(src), 0, 0 + ps_mul fp11, fp3, fp6 + ps_mul fp13, fp5, fp7 + ps_merge10 fp8, fp5, fp4 + ps_msub fp11, fp1, fp7, fp11 + ps_mul fp12, fp1, fp8 + ps_msub fp13, fp3, fp8, fp13 + ps_mul fp10, fp3, fp4 + ps_msub fp12, fp5, fp6, fp12 + ps_mul fp9, fp0, fp5 + ps_mul fp8, fp1, fp2 + ps_sub fp6, fp6, fp6 + ps_msub fp10, fp2, fp5, fp10 + ps_mul fp7, fp0, fp13 + ps_msub fp9, fp1, fp4, fp9 + ps_madd fp7, fp2, fp12, fp7 + ps_msub fp8, fp0, fp3, fp8 + ps_madd fp7, fp4, fp11, fp7 + ps_cmpo0 cr0, fp7, fp6 + bne _regular + addi r3, 0, 0 + blr + _regular: + fres fp0, fp7 + ps_add fp6, fp0, fp0 + ps_mul fp5, fp0, fp0 + ps_nmsub fp0, fp7, fp5, fp6 + lfs fp1, 12(src) + ps_muls0 fp13, fp13, fp0 + lfs fp2, 28(src) + ps_muls0 fp12, fp12, fp0 + lfs fp3, 44(src) + ps_muls0 fp11, fp11, fp0 + ps_merge00 fp5, fp13, fp12 + ps_muls0 fp10, fp10, fp0 + ps_merge11 fp4, fp13, fp12 + ps_muls0 fp9, fp9, fp0 + psq_st fp5, 0(inv), 0, 0 + ps_mul fp6, fp13, fp1 + psq_st fp4, 16(inv), 0, 0 + ps_muls0 fp8, fp8, fp0 + ps_madd fp6, fp12, fp2, fp6 + psq_st fp10, 32(inv), 1, 0 + ps_nmadd fp6, fp11, fp3, fp6 + psq_st fp9, 36(inv), 1, 0 + ps_mul fp7, fp10, fp1 + ps_merge00 fp5, fp11, fp6 + psq_st fp8, 40(inv), 1, 0 + ps_merge11 fp4, fp11, fp6 + psq_st fp5, 8(inv), 0, 0 + ps_madd fp7, fp9, fp2, fp7 + psq_st fp4, 24(inv), 0, 0 + ps_nmadd fp7, fp8, fp3, fp7 + addi r3, 0, 1 + psq_st fp7, 44(inv), 1, 0 + blr +} + +void PSMTXRotRad ( Mtx m, char axis, f32 rad ) +{ + f32 sinA, cosA; + + sinA = sinf(rad); + cosA = cosf(rad); + + PSMTXRotTrig( m, axis, sinA, cosA ); +} + +void PSMTXRotTrig ( + register Mtx m, + register char axis, + register f32 sinA, + register f32 cosA ) +{ + register f32 fc0, fc1, nsinA; + register f32 fw0, fw1, fw2, fw3; + + asm + { + frsp sinA, sinA + frsp cosA, cosA + } + + fc0 = 0.0f; + fc1 = 1.0f; + + asm + { + ori axis, axis, 0x20 + ps_neg nsinA, sinA + cmplwi axis, 'x' + beq _case_x + cmplwi axis, 'y' + beq _case_y + cmplwi axis, 'z' + beq _case_z + b _end + + _case_x: + psq_st fc1, 0(m), 1, 0 + psq_st fc0, 4(m), 0, 0 + ps_merge00 fw0, sinA, cosA + psq_st fc0, 12(m), 0, 0 + ps_merge00 fw1, cosA, nsinA + psq_st fc0, 28(m), 0, 0 + psq_st fc0, 44(m), 1, 0 + psq_st fw0, 36(m), 0, 0 + psq_st fw1, 20(m), 0, 0 + b _end; + + _case_y: + ps_merge00 fw0, cosA, fc0 + ps_merge00 fw1, fc0, fc1 + psq_st fc0, 24(m), 0, 0 + psq_st fw0, 0(m), 0, 0 + ps_merge00 fw2, nsinA, fc0 + ps_merge00 fw3, sinA, fc0 + psq_st fw0, 40(m), 0, 0; + psq_st fw1, 16(m), 0, 0; + psq_st fw3, 8(m), 0, 0; + psq_st fw2, 32(m), 0, 0; + b _end; + + _case_z: + psq_st fc0, 8(m), 0, 0 + ps_merge00 fw0, sinA, cosA + ps_merge00 fw2, cosA, nsinA + psq_st fc0, 24(m), 0, 0 + psq_st fc0, 32(m), 0, 0 + ps_merge00 fw1, fc1, fc0 + psq_st fw0, 16(m), 0, 0 + psq_st fw2, 0(m), 0, 0 + psq_st fw1, 40(m), 0, 0 + + _end: + + } +} + +static void __PSMTXRotAxisRadInternal( + register Mtx m, + const register Vec *axis, + register f32 sT, + register f32 cT ) +{ + register f32 tT, fc0; + register f32 tmp0, tmp1, tmp2, tmp3, tmp4; + register f32 tmp5, tmp6, tmp7, tmp8, tmp9; + + tmp9 = 0.5f; + tmp8 = 3.0f; + + asm + { + frsp cT, cT + psq_l tmp0, 0(axis), 0, 0 + frsp sT, sT + lfs tmp1, 8(axis) + ps_mul tmp2, tmp0, tmp0 + fadds tmp7, tmp9, tmp9 + ps_madd tmp3, tmp1, tmp1, tmp2 + fsubs fc0, tmp9, tmp9 + ps_sum0 tmp4, tmp3, tmp1, tmp2 + fsubs tT, tmp7, cT + frsqrte tmp5, tmp4 + fmuls tmp2, tmp5, tmp5 + fmuls tmp3, tmp5, tmp9 + fnmsubs tmp2, tmp2, tmp4, tmp8 + fmuls tmp5, tmp2, tmp3 + ps_merge00 cT, cT, cT + ps_muls0 tmp0, tmp0, tmp5 + ps_muls0 tmp1, tmp1, tmp5 + ps_muls0 tmp4, tmp0, tT + ps_muls0 tmp9, tmp0, sT + ps_muls0 tmp5, tmp1, tT + ps_muls1 tmp3, tmp4, tmp0 + ps_muls0 tmp2, tmp4, tmp0 + ps_muls0 tmp4, tmp4, tmp1 + fnmsubs tmp6, tmp1, sT, tmp3 + fmadds tmp7, tmp1, sT, tmp3 + ps_neg tmp0, tmp9 + ps_sum0 tmp8, tmp4, fc0, tmp9 + ps_sum0 tmp2, tmp2, tmp6, cT + ps_sum1 tmp3, cT, tmp7, tmp3 + ps_sum0 tmp6, tmp0, fc0 ,tmp4 + psq_st tmp8, 8(m), 0, 0 + ps_sum0 tmp0, tmp4, tmp4, tmp0 + psq_st tmp2, 0(m), 0, 0 + ps_muls0 tmp5, tmp5, tmp1 + psq_st tmp3, 16(m), 0, 0 + ps_sum1 tmp4, tmp9, tmp0, tmp4 + psq_st tmp6, 24(m), 0, 0 + ps_sum0 tmp5, tmp5, fc0, cT + psq_st tmp4, 32(m), 0, 0 + psq_st tmp5, 40(m), 0, 0 + } +} + +// clang-format on +void PSMTXRotAxisRad(Mtx m, const Vec* axis, f32 rad) +{ + f32 sinT, cosT; + + sinT = sinf(rad); + cosT = cosf(rad); + + __PSMTXRotAxisRadInternal(m, axis, sinT, cosT); +} + +// clang-format off +void PSMTXTrans( + register Mtx m, + register f32 xT, + register f32 yT, + register f32 zT +) +{ + register f32 c0 = 0.0f; + register f32 c1 = 1.0f; + + asm + { + stfs xT, 12(m) + stfs yT, 28(m) + psq_st c0, 4(m), 0, 0 + psq_st c0, 32(m), 0, 0 + stfs c0, 16(m) + stfs c1, 20(m) + stfs c0, 24(m) + stfs c1, 40(m) + stfs zT, 44(m) + stfs c1, 0(m) + } +} + +asm void PSMTXTransApply( + const register Mtx src, + register Mtx dst, + register f32 xT, + register f32 yT, + register f32 zT ) +{ + nofralloc; + psq_l fp4, 0(src), 0, 0; + frsp xT, xT; + psq_l fp5, 8(src), 0, 0; + frsp yT, yT; + psq_l fp7, 24(src), 0, 0; + frsp zT, zT; + psq_l fp8, 40(src), 0, 0; + psq_st fp4, 0(dst), 0, 0; + ps_sum1 fp5, xT, fp5, fp5; + psq_l fp6, 16(src), 0, 0; + psq_st fp5, 8(dst), 0, 0; + ps_sum1 fp7, yT, fp7, fp7; + psq_l fp9, 32(src), 0, 0; + psq_st fp6, 16(dst), 0, 0; + ps_sum1 fp8, zT, fp8, fp8; + psq_st fp7, 24(dst), 0, 0; + psq_st fp9, 32(dst), 0, 0; + psq_st fp8, 40(dst), 0, 0; + blr; +} + +void PSMTXScale( + register Mtx m, + register f32 xS, + register f32 yS, + register f32 zS +) +{ + register f32 c0 = 0.0f; + + asm + { + stfs xS, 0(m) + psq_st c0, 4(m), 0, 0 + psq_st c0, 12(m), 0, 0 + stfs yS, 20(m) + psq_st c0, 24(m), 0, 0 + psq_st c0, 32(m), 0, 0 + stfs zS, 40(m) + stfs c0, 44(m) + } +} + +asm void PSMTXScaleApply ( + const register Mtx src, + register Mtx dst, + register f32 xS, + register f32 yS, + register f32 zS ) +{ + nofralloc; + frsp xS, xS; + psq_l fp4, 0(src), 0, 0; + frsp yS, yS; + psq_l fp5, 8(src), 0, 0; + frsp zS, zS; + ps_muls0 fp4, fp4, xS; + psq_l fp6, 16(src), 0, 0; + ps_muls0 fp5, fp5, xS; + psq_l fp7, 24(src), 0, 0; + ps_muls0 fp6, fp6, yS; + psq_l fp8, 32(src), 0, 0; + psq_st fp4, 0(dst), 0, 0; + ps_muls0 fp7, fp7, yS; + psq_l fp2, 40(src), 0, 0; + psq_st fp5, 8(dst), 0, 0; + ps_muls0 fp8, fp8, zS; + psq_st fp6, 16(dst), 0, 0; + ps_muls0 fp2, fp2, zS; + psq_st fp7, 24(dst), 0, 0; + psq_st fp8, 32(dst), 0, 0; + psq_st fp2, 40(dst), 0, 0; + blr; +} + +void PSMTXQuat ( register Mtx m, const register Quaternion *q ) +{ + register f32 c_zero, c_one, c_two, scale; + register f32 tmp0, tmp1, tmp2, tmp3, tmp4; + register f32 tmp5, tmp6, tmp7, tmp8, tmp9; + c_one = 1.0f; + asm + { + psq_l tmp0, 0(q), 0, 0 + psq_l tmp1, 8(q), 0, 0 + fsubs c_zero, c_one, c_one + fadds c_two, c_one, c_one + ps_mul tmp2, tmp0, tmp0 + ps_merge10 tmp5, tmp0, tmp0 + ps_madd tmp4, tmp1, tmp1, tmp2 + ps_mul tmp3, tmp1, tmp1 + ps_sum0 scale, tmp4, tmp4, tmp4 + ps_muls1 tmp7, tmp5, tmp1 + fres tmp9, scale + ps_sum1 tmp4, tmp3, tmp4, tmp2 + ps_nmsub scale, scale, tmp9, c_two + ps_muls1 tmp6, tmp1, tmp1 + ps_mul scale, tmp9, scale + ps_sum0 tmp2, tmp2, tmp2, tmp2 + fmuls scale, scale, c_two + ps_madd tmp8, tmp0, tmp5, tmp6 + ps_msub tmp6, tmp0, tmp5, tmp6 + psq_st c_zero, 12(m), 1, 0 + ps_nmsub tmp2, tmp2, scale, c_one + ps_nmsub tmp4, tmp4, scale, c_one + psq_st c_zero, 44(m), 1, 0 + ps_mul tmp8, tmp8, scale + ps_mul tmp6, tmp6, scale + psq_st tmp2, 40(m), 1, 0 + ps_madds0 tmp5, tmp0, tmp1, tmp7 + ps_merge00 tmp1, tmp8, tmp4 + ps_nmsub tmp7, tmp7, c_two, tmp5 + ps_merge10 tmp0, tmp4, tmp6 + psq_st tmp1, 16(m), 0, 0 + ps_mul tmp5, tmp5, scale + ps_mul tmp7, tmp7, scale + psq_st tmp0, 0(m), 0, 0 + psq_st tmp5, 8(m), 1, 0 + ps_merge10 tmp3, tmp7, c_zero + ps_merge01 tmp9, tmp7, tmp5 + psq_st tmp3, 24(m), 0, 0 + psq_st tmp9, 32(m), 0, 0 + } +} + +// clang-format on +void C_MTXLookAt(Mtx m, const Vec* camPos, const Vec* camUp, const Vec* target) +{ + + Vec vLook, vRight, vUp; + + vLook.x = camPos->x - target->x; + vLook.y = camPos->y - target->y; + vLook.z = camPos->z - target->z; + PSVECNormalize(&vLook, &vLook); + + PSVECCrossProduct(camUp, &vLook, &vRight); + PSVECNormalize(&vRight, &vRight); + + PSVECCrossProduct(&vLook, &vRight, &vUp); + + m[0][0] = vRight.x; + m[0][1] = vRight.y; + m[0][2] = vRight.z; + m[0][3] = -(camPos->x * vRight.x + camPos->y * vRight.y + camPos->z * vRight.z); + + m[1][0] = vUp.x; + m[1][1] = vUp.y; + m[1][2] = vUp.z; + m[1][3] = -(camPos->x * vUp.x + camPos->y * vUp.y + camPos->z * vUp.z); + + m[2][0] = vLook.x; + m[2][1] = vLook.y; + m[2][2] = vLook.z; + m[2][3] = -(camPos->x * vLook.x + camPos->y * vLook.y + camPos->z * vLook.z); +} + +void C_MTXLightFrustum(Mtx m, f32 t, f32 b, f32 l, f32 r, f32 n, f32 scaleS, f32 scaleT, f32 transS, f32 transT) +{ + +} + +void C_MTXLightPerspective(Mtx m, f32 fovY, f32 aspect, float scaleS, float scaleT, float transS, float transT) +{ + f32 angle; + f32 cot; + + angle = fovY * 0.5f; + angle = MTXDegToRad(angle); + + cot = 1.0f / tanf(angle); + + m[0][0] = (cot / aspect) * scaleS; + m[0][1] = 0.0f; + m[0][2] = -transS; + m[0][3] = 0.0f; + + m[1][0] = 0.0f; + m[1][1] = cot * scaleT; + m[1][2] = -transT; + m[1][3] = 0.0f; + + m[2][0] = 0.0f; + m[2][1] = 0.0f; + m[2][2] = -1.0f; + m[2][3] = 0.0f; +} + +void C_MTXLightOrtho(Mtx m, f32 t, f32 b, f32 l, f32 r, float scaleS, float scaleT, float transS, float transT) +{ + f32 tmp; + + tmp = 1.0f / (r - l); + m[0][0] = (2.0f * tmp * scaleS); + m[0][1] = 0.0f; + m[0][2] = 0.0f; + m[0][3] = ((-(r + l) * tmp) * scaleS) + transS; + + tmp = 1.0f / (t - b); + m[1][0] = 0.0f; + m[1][1] = (2.0f * tmp) * scaleT; + m[1][2] = 0.0f; + m[1][3] = ((-(t + b) * tmp) * scaleT) + transT; + + m[2][0] = 0.0f; + m[2][1] = 0.0f; + m[2][2] = 0.0f; + m[2][3] = 1.0f; +} diff --git a/src/Dolphin/mtx/mtx44.c b/src/Dolphin/mtx/mtx44.c new file mode 100644 index 0000000..b974888 --- /dev/null +++ b/src/Dolphin/mtx/mtx44.c @@ -0,0 +1,98 @@ +// This file was taken from the Melee decompilation project. +// https://github.com/doldecomp/melee/blob/master/src/dolphin/mtx/mtx44.c +#include +#include + +void C_MTXFrustum(Mtx44 m, f32 t, f32 b, f32 l, f32 r, f32 n, f32 f) +{ + f32 temp_f8; + f32 temp_f6; + f32 temp_f9; + f32 temp_f11; + + temp_f11 = 1.0F / (r - l); + temp_f8 = 2.0F * n; + temp_f9 = 1.0F / (t - b); + temp_f6 = 1.0F / (f - n); + + m[0][0] = temp_f8 * temp_f11; + m[0][1] = 0.0F; + m[0][2] = temp_f11 * (r + l); + m[0][3] = 0.0F; + + m[1][0] = 0.0F; + m[1][1] = temp_f8 * temp_f9; + m[1][2] = temp_f9 * (t + b); + m[1][3] = 0.0F; + + m[2][0] = 0.0F; + m[2][1] = 0.0F; + m[2][2] = -n * temp_f6; + m[2][3] = temp_f6 * -(f * n); + + m[3][0] = 0.0F; + m[3][1] = 0.0F; + m[3][2] = -1.0F; + m[3][3] = 0.0F; +} + +void C_MTXPerspective(Mtx44 m, f32 fovY, f32 aspect, f32 n, f32 f) +{ + f32 temp_f3; + f32 temp_f4; + + fovY = 0.5F * fovY; + + temp_f4 = 1.0F / tanf(0.017453292F * (fovY)); + temp_f3 = 1.0F / (f - n); + + m[0][0] = temp_f4 / aspect; + m[0][1] = 0.0F; + m[0][2] = 0.0F; + m[0][3] = 0.0F; + + m[1][0] = 0.0F; + m[1][1] = temp_f4; + m[1][2] = 0.0F; + m[1][3] = 0.0F; + + m[2][0] = 0.0F; + m[2][1] = 0.0F; + m[2][2] = -n * temp_f3; + m[2][3] = temp_f3 * -(f * n); + + m[3][0] = 0.0F; + m[3][1] = 0.0F; + m[3][2] = -1.0F; + m[3][3] = 0.0F; +} + +void C_MTXOrtho(Mtx44 m, f32 t, f32 b, f32 l, f32 r, f32 n, f32 f) +{ + f32 temp_f8; + f32 temp_f10; + f32 temp_f4; + + temp_f10 = 1.0F / (r - l); + m[0][0] = 2.0F * temp_f10; + m[0][1] = 0.0F; + m[0][2] = 0.0F; + m[0][3] = temp_f10 * -(r + l); + + temp_f8 = 1.0F / (t - b); + m[1][0] = 0.0F; + m[1][1] = 2.0F * temp_f8; + m[1][2] = 0.0F; + m[1][3] = temp_f8 * -(t + b); + + temp_f4 = 1.0F / (f - n); + m[2][0] = 0.0F; + m[2][1] = 0.0F; + m[2][2] = -1.0F * temp_f4; + m[2][3] = -f * temp_f4; + + m[3][0] = 0.0F; + m[3][1] = 0.0F; + m[3][2] = 0.0F; + m[3][3] = 1.0F; +} diff --git a/src/Dolphin/mtx/mtxvec.c b/src/Dolphin/mtx/mtxvec.c new file mode 100644 index 0000000..bb2e1d4 --- /dev/null +++ b/src/Dolphin/mtx/mtxvec.c @@ -0,0 +1,95 @@ +#include + +asm void PSMTXMultVec(const register Mtx m, const register Vec *src, register Vec *dst) +{ + nofralloc + psq_l f0,0x0(src),0,0 + psq_l f2,0x0(m),0,0 + psq_l f1,0x8(src),0x1,0 + + ps_mul f4, f2, f0 + psq_l f3,0x8(m),0,0 + ps_madd f5, f3, f1, f4 + psq_l f8,0x10(m),0,0 + ps_sum0 f6, f5, f6, f5 + psq_l f9,0x18(m),0,0 + ps_mul f10, f8, f0 + psq_st f6,0x0(dst),0x1,0 + ps_madd f11, f9, f1, f10 + psq_l f2,0x20(m),0,0 + ps_sum0 f12, f11, f12, f11 + psq_l f3,0x28(m),0,0 + ps_mul f4, f2, f0 + psq_st f12,0x4(dst),0x1,0 + ps_madd f5, f3, f1, f4 + ps_sum0 f6, f5, f6, f5 + psq_st f6,0x8(dst),0x1,0 + blr +} + +asm void PSMTXMultVecSR(const register Mtx m, const register Vec *src, register Vec *dst) +{ + nofralloc + psq_l f0,0x0(m),0,0 + psq_l f6,0x0(src),0,0 + psq_l f2,0x10(m),0,0 + ps_mul f8, f0, f6 + psq_l f4,0x20(m),0,0 + ps_mul f10, f2, f6 + psq_l f7,0x8(src),0x1,0 + ps_mul f12, f4, f6 + psq_l f3,0x18(m),0,0 + ps_sum0 f8, f8, f8, f8 + psq_l f5,0x28(m),0,0 + ps_sum0 f10, f10, f10, f10 + psq_l f1,0x8(m),0,0 + ps_sum0 f12, f12, f12, f12 + ps_madd f9, f1, f7, f8 + psq_st f9,0x0(dst),0x1,0 + ps_madd f11, f3, f7, f10 + psq_st f11,0x4(dst),0x1,0 + ps_madd f13, f5, f7, f12 + psq_st f13,0x8(dst),0x1,0 + blr +} + +asm void PSMTXMultVecArraySR(const register Mtx m, const register Vec *src, register Vec *dst, register u32 n) +{ + nofralloc + psq_l f13,0x0(m),0,0 + psq_l f12,0x10(m),0,0 + subi n, n, 0x1 + psq_l f11,0x8(m),0x1,0 + ps_merge00 f0, f13, f12 + subi r5, dst, 0x4 + psq_l f10,0x18(m),0x1,0 + ps_merge11 f1, f13, f12 + mtctr n + psq_l f3,0x20(m),0,0 + ps_merge00 f2, f11, f10 + psq_l f4,0x28(m),0x1,0 + psq_l f6,0x0(src),0,0 + psq_lu f7,0x8(src),0x1,0 + ps_muls0 f8, f0, f6 + ps_mul f9, f3, f6 + ps_madds1 f8, f1, f6, f8 + ps_madd f10, f4, f7, f9 + + loop: + psq_lu f6,0x4(src),0,0 + ps_madds0 f12, f2, f7, f8 + psq_lu f7,0x8(src),0x1,0 + ps_sum0 f13, f10, f9, f9 + ps_muls0 f8, f0, f6 + ps_mul f9, f3, f6 + psq_stu f12,0x4(dst),0,0 + ps_madds1 f8, f1, f6, f8 + psq_stu f13,0x8(dst),0x1,0 + ps_madd f10, f4, f7, f9 + bdnz+ loop + ps_madds0 f12, f2, f7, f8 + ps_sum0 f13, f10, f9, f9 + psq_stu f12,0x4(dst),0,0 + psq_stu f13,0x8(dst),0x1,0 + blr +} diff --git a/src/Dolphin/mtx/vec.c b/src/Dolphin/mtx/vec.c new file mode 100644 index 0000000..af42c0b --- /dev/null +++ b/src/Dolphin/mtx/vec.c @@ -0,0 +1,146 @@ +#include "types.h" +#include "Dolphin/vec.h" + +#define R_RET fp1 +#define FP2 fp2 +#define FP3 fp3 +#define FP4 fp4 +#define FP5 fp5 +#define FP6 fp6 +#define FP7 fp7 +#define FP8 fp8 +#define FP9 fp9 +#define FP10 fp10 +#define FP11 fp11 +#define FP12 fp12 +#define FP13 fp13 +// clang-format off +asm void PSVECAdd +( + const register Vec *vec1, + const register Vec *vec2, + register Vec *ret +) +{ + nofralloc; + psq_l FP2, 0(vec1), 0, 0; + psq_l FP4, 0(vec2), 0, 0; + ps_add FP6, FP2, FP4; + psq_st FP6, 0(ret), 0, 0; + psq_l FP3, 8(vec1), 1, 0; + psq_l FP5, 8(vec2), 1, 0; + ps_add FP7, FP3, FP5; + psq_st FP7, 8(ret), 1, 0; + blr +} + +asm void PSVECSubtract +( + const register Vec *vec1, + const register Vec *vec2, + register Vec *ret +) +{ + nofralloc; + psq_l FP2, 0(vec1), 0, 0; + psq_l FP4, 0(vec2), 0, 0; + ps_sub FP6, FP2, FP4; + psq_st FP6, 0(ret), 0, 0; + psq_l FP3, 8(vec1), 1, 0; + psq_l FP5, 8(vec2), 1, 0; + ps_sub FP7, FP3, FP5; + psq_st FP7, 8(ret), 1, 0; + blr +} + +void PSVECNormalize +( + const register Vec *vec1, + register Vec *ret +) +{ + register f32 half = 0.5f; + register f32 three = 3.0f; + register f32 xx_zz, xx_yy; + register f32 square_sum; + register f32 ret_sqrt; + register f32 n_0, n_1; + asm + { + psq_l FP2, 0(vec1), 0, 0; + ps_mul xx_yy, FP2, FP2; + psq_l FP3, 8(vec1), 1, 0; + ps_madd xx_zz, FP3, FP3, xx_yy; + ps_sum0 square_sum, xx_zz, FP3, xx_yy; + frsqrte ret_sqrt, square_sum; + fmuls n_0, ret_sqrt, ret_sqrt; + fmuls n_1, ret_sqrt, half; + fnmsubs n_0, n_0, square_sum, three; + fmuls ret_sqrt, n_0, n_1; + ps_muls0 FP2, FP2, ret_sqrt; + psq_st FP2, 0(ret), 0, 0; + ps_muls0 FP3, FP3, ret_sqrt; + psq_st FP3, 8(ret), 1, 0; + } +} + +f32 PSVECMag ( const register Vec *v ) +{ + register f32 v_xy, v_zz, square_mag; + register f32 ret_mag, n_0, n_1; + register f32 three, half, zero; + half = 0.5f; + asm + { + psq_l v_xy, 0(v), 0, 0 + ps_mul v_xy, v_xy, v_xy + lfs v_zz, 8(v) + fsubs zero, half, half + ps_madd square_mag, v_zz, v_zz, v_xy + ps_sum0 square_mag, square_mag, v_xy, v_xy + fcmpu cr0, square_mag, zero + beq- __exit + frsqrte ret_mag, square_mag + } + three = 3.0f; + asm + { + fmuls n_0, ret_mag, ret_mag + fmuls n_1, ret_mag, half + fnmsubs n_0, n_0, square_mag, three + fmuls ret_mag, n_0, n_1 + fmuls square_mag, square_mag, ret_mag + __exit: + } + return square_mag; +} + +asm void PSVECCrossProduct +( + const register Vec *vec1, + const register Vec *vec2, + register Vec *ret +) +{ + nofralloc; + psq_l fp1, 0(vec2), 0, 0 + lfs fp2, 8(vec1) + psq_l fp0, 0(vec1), 0, 0 + ps_merge10 fp6, fp1, fp1 + lfs fp3, 8(vec2) + ps_mul fp4, fp1, fp2 + ps_muls0 fp7, fp1, fp0 + ps_msub fp5, fp0, fp3, fp4 + ps_msub fp8, fp0, fp6, fp7 + ps_merge11 fp9, fp5, fp5 + ps_merge01 fp10, fp5, fp8 + psq_st fp9, 0(ret), 1, 0 + ps_neg fp10, fp10 + psq_st fp10, 4(ret), 0, 0 + blr; +} + +f32 PSVECDistance(const register Vec *a, const register Vec *b) +{ + return 0.0f; +} -- cgit v1.2.3-13-gbd6f