Skip to content

Commit

Permalink
add tests
Browse files Browse the repository at this point in the history
  • Loading branch information
zhenghaoz committed Dec 22, 2024
1 parent 03c784c commit 42a0c09
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 0 deletions.
24 changes: 24 additions & 0 deletions example/avx_dot_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
package example

import (
"testing"
"unsafe"
)

func AVXDot(a, b []float32) float32 {
if len(a) != len(b) {
panic("floats: slice lengths do not match")
}
var c float32
avx_dot(unsafe.Pointer(&a[0]), unsafe.Pointer(&b[0]), unsafe.Pointer(uintptr(len(a))), unsafe.Pointer(&c))
return c
}

func TestDot(t *testing.T) {
a := []float32{1, 2, 3, 4}
b := []float32{5, 6, 7, 8}
c := AVXDot(a, b)
if c != 70 {
t.Errorf("AVXDot(%v, %v) = %v, want %v", a, b, c, 70)
}
}
39 changes: 39 additions & 0 deletions example/src/avx_dot.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#include <immintrin.h>
#include <stdint.h>

void avx_dot(float *a, float *b, int64_t n, float *ret)
{
int epoch = n / 8;
int remain = n % 8;
__m256 s;
if (epoch > 0)
{
__m256 v1 = _mm256_loadu_ps(a);
__m256 v2 = _mm256_loadu_ps(b);
s = _mm256_mul_ps(v1, v2);
a += 8;
b += 8;
}
for (int i = 1; i < epoch; i++)
{
__m256 v1 = _mm256_loadu_ps(a);
__m256 v2 = _mm256_loadu_ps(b);
s = _mm256_add_ps(_mm256_mul_ps(v1, v2), s);
a += 8;
b += 8;
}
__m128 s7_6_5_4 = _mm256_extractf128_ps(s, 1);
__m128 s3_2_1_0 = _mm256_castps256_ps128(s);
__m128 s37_26_15_04 = _mm_add_ps(s7_6_5_4, s3_2_1_0);
__m128 sxx_15_04 = s37_26_15_04;
__m128 sxx_37_26 = _mm_movehl_ps(s37_26_15_04, s37_26_15_04);
const __m128 sxx_1357_0246 = _mm_add_ps(sxx_15_04, sxx_37_26);
const __m128 sxxx_0246 = sxx_1357_0246;
const __m128 sxxx_1357 = _mm_shuffle_ps(sxx_1357_0246, sxx_1357_0246, 0x1);
__m128 sxxx_01234567 = _mm_add_ss(sxxx_0246, sxxx_1357);
*ret = _mm_cvtss_f32(sxxx_01234567);
for (int i = 0; i < remain; i++)
{
*ret += a[i] * b[i];
}
}

0 comments on commit 42a0c09

Please sign in to comment.