From cae351f7b1b921020aed1422230d95f38be9f6cf Mon Sep 17 00:00:00 2001 From: Crow-bar Date: Tue, 20 Dec 2022 11:10:04 +0300 Subject: [PATCH] engine: psp math optimization, QuaternionSlerp, BoundsIntersect, Mod_SetStudioHullPlane --- engine/common/mod_studio.c | 3 ++ public/xash3d_mathlib.c | 5 +- public/xash3d_mathlib.h | 6 +++ public/xash3d_mathlib_asm.S | 92 +++++++++++++++++++++++++++++++++++-- ref_gu/gu_studio.c | 2 +- 5 files changed, 103 insertions(+), 5 deletions(-) diff --git a/engine/common/mod_studio.c b/engine/common/mod_studio.c index a20f4fa60..98bce2fe4 100644 --- a/engine/common/mod_studio.c +++ b/engine/common/mod_studio.c @@ -204,6 +204,9 @@ mstudiocache_t *Mod_CheckStudioCache( model_t *model, float frame, int sequence, SetStudioHullPlane ==================== */ +#if XASH_PSP +static inline +#endif void Mod_SetStudioHullPlane( int planenum, int bone, int axis, float offset, const vec3_t size ) { mplane_t *pl = &studio_planes[planenum]; diff --git a/public/xash3d_mathlib.c b/public/xash3d_mathlib.c index 05de993d9..8980ec0cd 100644 --- a/public/xash3d_mathlib.c +++ b/public/xash3d_mathlib.c @@ -600,6 +600,7 @@ void ExpandBounds( vec3_t mins, vec3_t maxs, float offset ) BoundsIntersect ================= */ +#if !XASH_PSP qboolean BoundsIntersect( const vec3_t mins1, const vec3_t maxs1, const vec3_t mins2, const vec3_t maxs2 ) { if( mins1[0] > maxs2[0] || mins1[1] > maxs2[1] || mins1[2] > maxs2[2] ) @@ -608,6 +609,7 @@ qboolean BoundsIntersect( const vec3_t mins1, const vec3_t maxs1, const vec3_t m return false; return true; } +#endif /* ================= @@ -757,6 +759,7 @@ make sure quaternions are within 180 degrees of one another, if not, reverse q ==================== */ +#if !XASH_PSP void QuaternionAlign( const vec4_t p, const vec4_t q, vec4_t qt ) { // decide if one of the quaternions is backwards @@ -845,9 +848,9 @@ void QuaternionSlerp( const vec4_t p, const vec4_t q, float t, vec4_t qt ) // 0.0 returns p, 1.0 return q. // decide if one of the quaternions is backwards QuaternionAlign( p, q, q2 ); - QuaternionSlerpNoAlign( p, q2, t, qt ); } +#endif /* ==================== diff --git a/public/xash3d_mathlib.h b/public/xash3d_mathlib.h index 34a1e16b3..e199c5a39 100644 --- a/public/xash3d_mathlib.h +++ b/public/xash3d_mathlib.h @@ -153,7 +153,13 @@ void PlaneIntersect( const struct mplane_s *plane, const vec3_t p0, const vec3_t void ClearBounds( vec3_t mins, vec3_t maxs ); void AddPointToBounds( const vec3_t v, vec3_t mins, vec3_t maxs ); +#if XASH_PSP +#define BoundsIntersect( mins1, maxs1, mins2, maxs2 ) \ + (( mins1[0] > maxs2[0] || mins1[1] > maxs2[1] || mins1[2] > maxs2[2] ) ? false : \ + ( maxs1[0] < mins2[0] || maxs1[1] < mins2[1] || maxs1[2] < mins2[2] ) ? false : true ) +#else qboolean BoundsIntersect( const vec3_t mins1, const vec3_t maxs1, const vec3_t mins2, const vec3_t maxs2 ); +#endif qboolean BoundsAndSphereIntersect( const vec3_t mins, const vec3_t maxs, const vec3_t origin, float radius ); qboolean SphereIntersect( const vec3_t vSphereCenter, float fSphereRadiusSquared, const vec3_t vLinePt, const vec3_t vLineDir ); float RadiusFromBounds( const vec3_t mins, const vec3_t maxs ); diff --git a/public/xash3d_mathlib_asm.S b/public/xash3d_mathlib_asm.S index 901e39e66..833d03e71 100644 --- a/public/xash3d_mathlib_asm.S +++ b/public/xash3d_mathlib_asm.S @@ -20,15 +20,101 @@ GNU General Public License for more details. .text .align 4 + .global QuaternionSlerp .global BoxOnPlaneSide +// void QuaternionSlerp( const vec4_t p, const vec4_t q, float t, vec4_t qt ); + .ent QuaternionSlerp +QuaternionSlerp: + ############################ + # a0 - IN *p # + # a1 - IN *q # + # f12 - IN t # + # a2 - IN *qt # + ############################ + mfc1 $t0, $f12 + mtv $t0, S031 // S031 = sclq = t + li $t0, 0x358637bd // t0 = EPSILON = 0.000001f + mtv $t0, S033 // S002 = t0 = EPSILON + lv.q C010, 0($a0) // C010 = p + lv.q C020, 0($a1) // C020 = q + + // QuaternionAlign + vsub.q C100, C010, C020 // C100 = p[*] - q[*] + vadd.q C110, C010, C020 // C110 = p[*] + q[*] + vdot.q S000, C100, C100 // S000 = a += (p[*] - q[*]) * (p[*] - q[*]) + vdot.q S001, C110, C110 // S001 = b += (p[*] + q[*]) * (p[*] + q[*]) + vcmp.s GT, S000, S001 // CC[0] = a > b + vcmovt.q C020, C020[-X,-Y,-Z,-W], 0 // if CC[0] q = -q + // *** + + // QuaternionSlerpNoAlign + vdot.q S000, C010, C020 // S000 = cosom += p[*] * q[*] + vadd.s S001, S000[1], S000 // S002 = 1.0f + cosom + vcmp.s LE, S001, S033 // CC[0] = ( 1.0f + cosom ) <= 0.000001f + bvt 0, Lqs2 // if CC[0] goto Lqs2 + vocp.s S030, S031 // S030 = sclp = 1.0f - t (delay slot) + vsub.s S001, S000[1], S000 // S002 = 1.0f - cosom + vcmp.s LE, S001, S033 // CC[0] = ( 1.0f - cosom ) <= 0.000001f + bvt 0, Lqs1 + nop + + // acos + vcst.s S001, VFPU_SQRT1_2 // S001 = VFPU_SQRT1_2 = 1 / sqrt(2) + vcmp.s LT, S000[|x|], S001 // CC[0] = abs(cosom) < (1 / sqrt(2)) + vasin.s S032, S000[|x|] // S032 = asin(abs(cosom)) + bvtl 0, Lqs0 // if CC[0] goto Lqs0 + vocp.s S032, S032 // S032 = 1 - S032 = acos(abs(cosom)) = omega (bvtl delay slot) + vmul.s S001, S000, S000 // S001 = cosom * cosom + vocp.s S001[0:1], S001 // S001 = 1 - S001[0:1] + vsqrt.s S001, S001 // S001 = sqrt(S001) + vasin.s S032, S001 // S032 = asin(S001) = acos(abs(cosom)) = omega + // *** +Lqs0: + vscl.p C030, C030, S032 // S030 = S030 * S032 = sclp * omega + // S031 = S031 * S032 = sclq * omega + vsin.t C030, C030 // S030 = sin(S030) = sin(sclp * omega) = sclp + // S031 = sin(S031) = sin(sclq * omega) = sclq + // S032 = sin(S032) = sin(omega) + vrcp.s S032, S032 // S032 = 1.0f / S032 = 1 / sin(omega) = sinom + vscl.p C030, C030, S032 // S030 = S030 * S032 = sin(sclp * omega) / sinom + // S031 = S031 * S032 = sin(sclq * omega) / sinom +Lqs1: + vscl.q C010, C010, S030 // C010 = p[*4] * sclp + vscl.q C020, C020, S031 // C020 = qt[*4] * sclq + b LqsEnd // goto LqsEnd + vadd.q C000, C010, C020 // S000 = qt[0] = sclp * p[0] + sclq * qt[0] (delay slot) + // S001 = qt[1] = sclp * p[1] + sclq * qt[1] + // S002 = qt[2] = sclp * p[2] + sclq * qt[2] + // S003 = qt[3] = sclp * p[3] + sclq * qt[3] +Lqs2: + vmov.q C000, C020[-Y,X,-W,Z] // S000 = qt[0] = -q[1]; + // S001 = qt[1] = q[0]; + // S002 = qt[2] = -q[3]; + // S003 = qt[3] = q[2]; + vsin.p C030, C030 // S030 = sclp = sin(( 1.0f - t ) * ( 0.5f * M_PI_F )) + // S031 = sclq = sin( t * ( 0.5f * M_PI_F )) + vscl.t C010, C010, S030 // C000 = p[*3] * sclp + vscl.t C020, C000, S031 // C030 = qt[*3] * sclq + vadd.t C000, C010, C020 // S000 = qt[0] = sclp * p[0] + sclq * qt[0] + // S001 = qt[1] = sclp * p[1] + sclq * qt[1] + // S002 = qt[2] = sclp * p[2] + sclq * qt[2] + // S003 = qt[3] +LqsEnd: + sv.q C000, 0($a2) + jr $ra + .end QuaternionSlerp + + + + // int BoxOnPlaneSide( vec3_t emins, vec3_t emaxs, mplane_t *p ); .ent BoxOnPlaneSide BoxOnPlaneSide: ############################ - # a0 - IN emins # - # a1 - IN emaxs # - # a2 - IN p # + # a0 - IN *emins # + # a1 - IN *emaxs # + # a2 - IN *p # # v0 - OUT sides # ############################ lbu $v0, 17($a2) // p->signbits diff --git a/ref_gu/gu_studio.c b/ref_gu/gu_studio.c index 1b8f6eafd..3c79d5269 100644 --- a/ref_gu/gu_studio.c +++ b/ref_gu/gu_studio.c @@ -2427,7 +2427,7 @@ static void R_StudioDrawPoints( void ) { mstudioboneweight_t *pvertweight = (mstudioboneweight_t *)((byte *)m_pStudioHeader + m_pSubModel->blendvertinfoindex); mstudioboneweight_t *pnormweight = (mstudioboneweight_t *)((byte *)m_pStudioHeader + m_pSubModel->blendnorminfoindex); - matrix3x4 skinMat __attribute__( (aligned( 16 ) ) ); + matrix3x4 skinMat; for( i = 0; i < m_pSubModel->numverts; i++ ) {