Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CrossSIMD: Add a simple unit test, fix a couple of operations in the no-simd path #19955

Merged
merged 5 commits into from
Feb 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 10 additions & 6 deletions Common/Math/CrossSIMD.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@

#include "Common/Math/SIMDHeaders.h"

#if PPSSPP_ARCH(SSE2)
#define TEST_FALLBACK 0

#if PPSSPP_ARCH(SSE2) && !TEST_FALLBACK

// The point of this, as opposed to a float4 array, is to almost force the compiler
// to keep the matrix in registers, rather than loading on every access.
Expand Down Expand Up @@ -367,7 +369,7 @@ inline Vec4U16 SignBits32ToMaskU16(Vec4S32 v) {
};
}

#elif PPSSPP_ARCH(ARM_NEON)
#elif PPSSPP_ARCH(ARM_NEON) && !TEST_FALLBACK

struct Mat4F32 {
Mat4F32() {}
Expand Down Expand Up @@ -1222,12 +1224,11 @@ inline void TranslateAndScaleInplace(Mat4F32 &m, Vec4F32 scale, Vec4F32 translat

inline Mat4F32 Mul4x4By4x4(Mat4F32 a, Mat4F32 b) {
Mat4F32 result;

for (int j = 0; j < 4; j++) {
for (int i = 0; i < 4; i++) {
float sum = 0.0f;
for (int k = 0; k < 4; k++) {
sum += b.m[i * 4 + k] * a.m[k * 4 + j];
sum += b.m[k * 4 + i] * a.m[j * 4 + k];
}
result.m[j * 4 + i] = sum;
}
Expand All @@ -1242,9 +1243,12 @@ inline Mat4F32 Mul4x3By4x4(Mat4x3F32 a, Mat4F32 b) {
for (int i = 0; i < 4; i++) {
float sum = 0.0f;
for (int k = 0; k < 3; k++) {
sum += b.m[i * 4 + k] * a.m[k * 3 + j];
sum += b.m[k * 4 + i] * a.m[j * 3 + k];
}
result.m[j * 4 + i] = sum + b.m[i * 4 + 3];
if (j == 3) {
sum += b.m[12 + i];
}
result.m[j * 4 + i] = sum;
}
}
return result;
Expand Down
75 changes: 75 additions & 0 deletions unittest/UnitTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1144,6 +1144,80 @@ bool TestSIMD() {
return true;
}

static void PrintFloats(const float *f, int count) {
for (int i = 0; i < count; i++) {
printf("%.1ff, ", f[i]);
}
printf("\n");
}

static bool CompareFloats(const float *values, const float *known_good, int count, int line) {
int wrongCount = 0;

for (int i = 0; i < count; i++) {
if (values[i] != known_good[i]) {
wrongCount++;
}
}

if (wrongCount > 0) {
for (int i = 0; i < count; i++) {
bool wrong = values[i] != known_good[i];
printf("%d: %0.3f vs %0.3f %s\n", i + 1, values[i], known_good[i], wrong ? "!! MISMATCH" : "");
}
printf("At UnitTest.cpp:%d: %d / %d were wrong\n", line, wrongCount, count);
return false;
} else {
return true;
}
}

bool TestCrossSIMD() {
static const float a_values[16] = { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f };
static const float b_values[16] = { -12.0f, 3.0f, -2.5f, 5.0f, 31.0f, 0.5f, 4.0f, 6.0f, 7.0f, 13.0f, 12.0f, 51.0f, 81.0f, 32.0f };
static const float known_result[16] = { 395.0f, 171.0f, 41.5f, 170.0f, 942.0f, 410.5f, 111.5f, 475.0f, 1358.0f, 607.5f, 163.0f, 728.0f, 297.0f, 49.5f, 25.0f, 160.0f, };
float result[16];
Mat4F32 a(a_values);
Mat4F32 b(b_values);

Mul4x4By4x4(a, b).Store(result);
if (!CompareFloats(result, known_result, 16, __LINE__)) {
return false;
}

Mat4x3F32 d = Mat4x3F32(b_values + 2);
Mul4x3By4x4(d, a).Store(result);

static const float known_4x3_result[16] = { 332.5f, 371.0f, 404.5f, 438.0f, 80.5f, 95.0f, 105.5f, 116.0f, 192.0f, 237.0f, 269.0f, 301.0f, 790.0f, 1036.0f, 1185.0f, 1349.0f, };
if (!CompareFloats(result, known_4x3_result, 16, __LINE__)) {
return false;
}

static const float vec_values[4] = { 3.0f, 5.0f, 7.0f, 10000000.0f };
Vec4F32 v = Vec4F32::Load(vec_values);

v.AsVec3ByMatrix44(b).Store3(result);

static const float known_vec_result[3] = { 249.0f, 134.5f, 96.5f, };
if (!CompareFloats(result, known_vec_result, ARRAY_SIZE(known_vec_result), __LINE__)) {
return false;
}
Vec4F32 scale = Vec4F32::Load(a_values);
Vec4F32 translate = Vec4F32::Load(b_values);

TranslateAndScaleInplace(a, scale, translate);
a.Store(result);

static const float known_scale_result[16] = { -47.0f, 16.0f, -1.0f, 36.0f, -103.0f, 41.0f, 1.5f, 81.0f, -146.0f, 61.0f, 3.5f, 117.0f, 14.0f, 30.0f, 0.0f, 0.0f,};
if (!CompareFloats(result, known_scale_result, ARRAY_SIZE(known_scale_result), __LINE__)) {
return false;
}

// PrintFloats(result, 16);

return true;
}

typedef bool (*TestFunc)();
struct TestItem {
const char *name;
Expand Down Expand Up @@ -1207,6 +1281,7 @@ TestItem availableTests[] = {
TEST_ITEM(CharQueue),
TEST_ITEM(Buffer),
TEST_ITEM(SIMD),
TEST_ITEM(CrossSIMD),
};

int main(int argc, const char *argv[]) {
Expand Down
Loading