From 471fe24ed001a6dc8574967c14633f0ab5ab5a3f Mon Sep 17 00:00:00 2001 From: Mahesh Madhav Date: Tue, 17 Sep 2024 04:36:11 +0000 Subject: [PATCH] Performance: Convert an FDIV to an FMUL in a hot loop --- CHANGELOG.md | 1 + src/tensors/cpu/tensor_operators.cpp | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index afa4465ce..581dcc412 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). ## [Unreleased] +- Convert an FDIV to an FMUL in a hot loop in CPU tensor operation. - Fixed compilation with clang 16.0.6 - Added Threads::Threads to `EXT_LIBS` - Updates to pymarian: building for multiple python versions; disabling tcmalloc; hosting gated COMETs on HuggingFace diff --git a/src/tensors/cpu/tensor_operators.cpp b/src/tensors/cpu/tensor_operators.cpp index 9d5c8166d..25fd5184f 100755 --- a/src/tensors/cpu/tensor_operators.cpp +++ b/src/tensors/cpu/tensor_operators.cpp @@ -1110,11 +1110,11 @@ void LayerNormalizationImpl(float* out, sqSum += ex * ex; } - float sigma = std::sqrt(sqSum / cols + eps); + float invSigma = 1.0/std::sqrt(sqSum / cols + eps); #pragma omp simd for(int i = 0; i < cols; ++i) { - float t = alpha[alphaStride * i] * ((sp[i] - mean) / sigma); + float t = alpha[alphaStride * i] * ((sp[i] - mean) * invSigma); if(hasBeta) t += beta[betaStride * i];