Skip to content

Commit

Permalink
engine: psp math optimization, QuaternionSlerp, BoundsIntersect, Mod_…
Browse files Browse the repository at this point in the history
…SetStudioHullPlane
  • Loading branch information
Crow-bar committed Dec 20, 2022
1 parent 9812a91 commit cae351f
Show file tree
Hide file tree
Showing 5 changed files with 103 additions and 5 deletions.
3 changes: 3 additions & 0 deletions engine/common/mod_studio.c
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,9 @@ mstudiocache_t *Mod_CheckStudioCache( model_t *model, float frame, int sequence,
SetStudioHullPlane
====================
*/
#if XASH_PSP
static inline
#endif
void Mod_SetStudioHullPlane( int planenum, int bone, int axis, float offset, const vec3_t size )
{
mplane_t *pl = &studio_planes[planenum];
Expand Down
5 changes: 4 additions & 1 deletion public/xash3d_mathlib.c
Original file line number Diff line number Diff line change
Expand Up @@ -600,6 +600,7 @@ void ExpandBounds( vec3_t mins, vec3_t maxs, float offset )
BoundsIntersect
=================
*/
#if !XASH_PSP
qboolean BoundsIntersect( const vec3_t mins1, const vec3_t maxs1, const vec3_t mins2, const vec3_t maxs2 )
{
if( mins1[0] > maxs2[0] || mins1[1] > maxs2[1] || mins1[2] > maxs2[2] )
Expand All @@ -608,6 +609,7 @@ qboolean BoundsIntersect( const vec3_t mins1, const vec3_t maxs1, const vec3_t m
return false;
return true;
}
#endif

/*
=================
Expand Down Expand Up @@ -757,6 +759,7 @@ make sure quaternions are within 180 degrees of one another,
if not, reverse q
====================
*/
#if !XASH_PSP
void QuaternionAlign( const vec4_t p, const vec4_t q, vec4_t qt )
{
// decide if one of the quaternions is backwards
Expand Down Expand Up @@ -845,9 +848,9 @@ void QuaternionSlerp( const vec4_t p, const vec4_t q, float t, vec4_t qt )
// 0.0 returns p, 1.0 return q.
// decide if one of the quaternions is backwards
QuaternionAlign( p, q, q2 );

QuaternionSlerpNoAlign( p, q2, t, qt );
}
#endif

/*
====================
Expand Down
6 changes: 6 additions & 0 deletions public/xash3d_mathlib.h
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,13 @@ void PlaneIntersect( const struct mplane_s *plane, const vec3_t p0, const vec3_t

void ClearBounds( vec3_t mins, vec3_t maxs );
void AddPointToBounds( const vec3_t v, vec3_t mins, vec3_t maxs );
#if XASH_PSP
#define BoundsIntersect( mins1, maxs1, mins2, maxs2 ) \
(( mins1[0] > maxs2[0] || mins1[1] > maxs2[1] || mins1[2] > maxs2[2] ) ? false : \
( maxs1[0] < mins2[0] || maxs1[1] < mins2[1] || maxs1[2] < mins2[2] ) ? false : true )
#else
qboolean BoundsIntersect( const vec3_t mins1, const vec3_t maxs1, const vec3_t mins2, const vec3_t maxs2 );
#endif
qboolean BoundsAndSphereIntersect( const vec3_t mins, const vec3_t maxs, const vec3_t origin, float radius );
qboolean SphereIntersect( const vec3_t vSphereCenter, float fSphereRadiusSquared, const vec3_t vLinePt, const vec3_t vLineDir );
float RadiusFromBounds( const vec3_t mins, const vec3_t maxs );
Expand Down
92 changes: 89 additions & 3 deletions public/xash3d_mathlib_asm.S
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,101 @@ GNU General Public License for more details.

.text
.align 4
.global QuaternionSlerp
.global BoxOnPlaneSide

// void QuaternionSlerp( const vec4_t p, const vec4_t q, float t, vec4_t qt );
.ent QuaternionSlerp
QuaternionSlerp:
############################
# a0 - IN *p #
# a1 - IN *q #
# f12 - IN t #
# a2 - IN *qt #
############################
mfc1 $t0, $f12
mtv $t0, S031 // S031 = sclq = t
li $t0, 0x358637bd // t0 = EPSILON = 0.000001f
mtv $t0, S033 // S002 = t0 = EPSILON
lv.q C010, 0($a0) // C010 = p
lv.q C020, 0($a1) // C020 = q

// QuaternionAlign
vsub.q C100, C010, C020 // C100 = p[*] - q[*]
vadd.q C110, C010, C020 // C110 = p[*] + q[*]
vdot.q S000, C100, C100 // S000 = a += (p[*] - q[*]) * (p[*] - q[*])
vdot.q S001, C110, C110 // S001 = b += (p[*] + q[*]) * (p[*] + q[*])
vcmp.s GT, S000, S001 // CC[0] = a > b
vcmovt.q C020, C020[-X,-Y,-Z,-W], 0 // if CC[0] q = -q
// ***

// QuaternionSlerpNoAlign
vdot.q S000, C010, C020 // S000 = cosom += p[*] * q[*]
vadd.s S001, S000[1], S000 // S002 = 1.0f + cosom
vcmp.s LE, S001, S033 // CC[0] = ( 1.0f + cosom ) <= 0.000001f
bvt 0, Lqs2 // if CC[0] goto Lqs2
vocp.s S030, S031 // S030 = sclp = 1.0f - t (delay slot)
vsub.s S001, S000[1], S000 // S002 = 1.0f - cosom
vcmp.s LE, S001, S033 // CC[0] = ( 1.0f - cosom ) <= 0.000001f
bvt 0, Lqs1
nop

// acos
vcst.s S001, VFPU_SQRT1_2 // S001 = VFPU_SQRT1_2 = 1 / sqrt(2)
vcmp.s LT, S000[|x|], S001 // CC[0] = abs(cosom) < (1 / sqrt(2))
vasin.s S032, S000[|x|] // S032 = asin(abs(cosom))
bvtl 0, Lqs0 // if CC[0] goto Lqs0
vocp.s S032, S032 // S032 = 1 - S032 = acos(abs(cosom)) = omega (bvtl delay slot)
vmul.s S001, S000, S000 // S001 = cosom * cosom
vocp.s S001[0:1], S001 // S001 = 1 - S001[0:1]
vsqrt.s S001, S001 // S001 = sqrt(S001)
vasin.s S032, S001 // S032 = asin(S001) = acos(abs(cosom)) = omega
// ***
Lqs0:
vscl.p C030, C030, S032 // S030 = S030 * S032 = sclp * omega
// S031 = S031 * S032 = sclq * omega
vsin.t C030, C030 // S030 = sin(S030) = sin(sclp * omega) = sclp
// S031 = sin(S031) = sin(sclq * omega) = sclq
// S032 = sin(S032) = sin(omega)
vrcp.s S032, S032 // S032 = 1.0f / S032 = 1 / sin(omega) = sinom
vscl.p C030, C030, S032 // S030 = S030 * S032 = sin(sclp * omega) / sinom
// S031 = S031 * S032 = sin(sclq * omega) / sinom
Lqs1:
vscl.q C010, C010, S030 // C010 = p[*4] * sclp
vscl.q C020, C020, S031 // C020 = qt[*4] * sclq
b LqsEnd // goto LqsEnd
vadd.q C000, C010, C020 // S000 = qt[0] = sclp * p[0] + sclq * qt[0] (delay slot)
// S001 = qt[1] = sclp * p[1] + sclq * qt[1]
// S002 = qt[2] = sclp * p[2] + sclq * qt[2]
// S003 = qt[3] = sclp * p[3] + sclq * qt[3]
Lqs2:
vmov.q C000, C020[-Y,X,-W,Z] // S000 = qt[0] = -q[1];
// S001 = qt[1] = q[0];
// S002 = qt[2] = -q[3];
// S003 = qt[3] = q[2];
vsin.p C030, C030 // S030 = sclp = sin(( 1.0f - t ) * ( 0.5f * M_PI_F ))
// S031 = sclq = sin( t * ( 0.5f * M_PI_F ))
vscl.t C010, C010, S030 // C000 = p[*3] * sclp
vscl.t C020, C000, S031 // C030 = qt[*3] * sclq
vadd.t C000, C010, C020 // S000 = qt[0] = sclp * p[0] + sclq * qt[0]
// S001 = qt[1] = sclp * p[1] + sclq * qt[1]
// S002 = qt[2] = sclp * p[2] + sclq * qt[2]
// S003 = qt[3]
LqsEnd:
sv.q C000, 0($a2)
jr $ra
.end QuaternionSlerp




// int BoxOnPlaneSide( vec3_t emins, vec3_t emaxs, mplane_t *p );
.ent BoxOnPlaneSide
BoxOnPlaneSide:
############################
# a0 - IN emins #
# a1 - IN emaxs #
# a2 - IN p #
# a0 - IN *emins #
# a1 - IN *emaxs #
# a2 - IN *p #
# v0 - OUT sides #
############################
lbu $v0, 17($a2) // p->signbits
Expand Down
2 changes: 1 addition & 1 deletion ref_gu/gu_studio.c
Original file line number Diff line number Diff line change
Expand Up @@ -2427,7 +2427,7 @@ static void R_StudioDrawPoints( void )
{
mstudioboneweight_t *pvertweight = (mstudioboneweight_t *)((byte *)m_pStudioHeader + m_pSubModel->blendvertinfoindex);
mstudioboneweight_t *pnormweight = (mstudioboneweight_t *)((byte *)m_pStudioHeader + m_pSubModel->blendnorminfoindex);
matrix3x4 skinMat __attribute__( (aligned( 16 ) ) );
matrix3x4 skinMat;

for( i = 0; i < m_pSubModel->numverts; i++ )
{
Expand Down

0 comments on commit cae351f

Please sign in to comment.