Skip to content

Commit

Permalink
use f32 accumulat on non arch
Browse files Browse the repository at this point in the history
  • Loading branch information
eddyxu committed Sep 15, 2024
1 parent ac76fa3 commit 7ef541b
Showing 1 changed file with 5 additions and 0 deletions.
5 changes: 5 additions & 0 deletions rust/lance-linalg/src/simd/f16.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,12 @@ float FUNC(l2_f16)(const FP16 *x, const FP16 *y, uint32_t dimension) {

#pragma clang loop unroll(enable) interleave(enable) vectorize(enable)
for (uint32_t i = 0; i < dimension; i++) {
#if defined(__aarch64__)
// on aarch64 with fp16, this is 2x faster.
FP16 sub = x[i] - y[i];
#else
float sub = x[i] - y[i];
#endif
// Use float32 as the accumulator to avoid overflow.
sum += sub * sub;
}
Expand Down

0 comments on commit 7ef541b

Please sign in to comment.