From ec26662ac9f1e89b2556d81fa311f91ab3abd492 Mon Sep 17 00:00:00 2001 From: Alexandre Mutel <alexandre_mutel@live.com> Date: Sat, 5 Nov 2022 16:05:12 +0100 Subject: [PATCH] Optimize XxHash3 on ARM platform (#77881) * Optimize XxHash3 on ARM platform * Extract code to MultiplyWideningLower * Update src/libraries/System.IO.Hashing/src/System/IO/Hashing/XxHash3.cs Co-authored-by: Stephen Toub <stoub@microsoft.com> Co-authored-by: Stephen Toub <stoub@microsoft.com> --- .../src/System/IO/Hashing/XxHash3.cs | 24 +++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/src/libraries/System.IO.Hashing/src/System/IO/Hashing/XxHash3.cs b/src/libraries/System.IO.Hashing/src/System/IO/Hashing/XxHash3.cs index 4e821618d36e3e..17087d87631036 100644 --- a/src/libraries/System.IO.Hashing/src/System/IO/Hashing/XxHash3.cs +++ b/src/libraries/System.IO.Hashing/src/System/IO/Hashing/XxHash3.cs @@ -10,6 +10,7 @@ using System.Runtime.InteropServices; #if NET7_0_OR_GREATER using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; using System.Runtime.Intrinsics.X86; #endif @@ -896,16 +897,31 @@ private static Vector128<ulong> Accumulate128(Vector128<ulong> accVec, byte* sou Vector128<uint> sourceKey = sourceVec ^ secret; // TODO: Figure out how to unwind this shuffle and just use Vector128.Multiply - Vector128<uint> sourceKeyLow = Vector128.Shuffle(sourceKey, Vector128.Create(1u, 0, 3, 0)); Vector128<uint> sourceSwap = Vector128.Shuffle(sourceVec, Vector128.Create(2u, 3, 0, 1)); Vector128<ulong> sum = accVec + sourceSwap.AsUInt64(); - Vector128<ulong> product = Sse2.IsSupported ? - Sse2.Multiply(sourceKey, sourceKeyLow) : - (sourceKey & Vector128.Create(~0u, 0u, ~0u, 0u)).AsUInt64() * (sourceKeyLow & Vector128.Create(~0u, 0u, ~0u, 0u)).AsUInt64(); + Vector128<ulong> product = MultiplyWideningLower(sourceKey); accVec = product + sum; return accVec; } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static Vector128<ulong> MultiplyWideningLower(Vector128<uint> source) + { + if (AdvSimd.IsSupported) + { + Vector64<uint> sourceLow = Vector128.Shuffle(source, Vector128.Create(0u, 2, 0, 0)).GetLower(); + Vector64<uint> sourceHigh = Vector128.Shuffle(source, Vector128.Create(1u, 3, 0, 0)).GetLower(); + return AdvSimd.MultiplyWideningLower(sourceLow, sourceHigh); + } + else + { + Vector128<uint> sourceLow = Vector128.Shuffle(source, Vector128.Create(1u, 0, 3, 0)); + return Sse2.IsSupported ? + Sse2.Multiply(source, sourceLow) : + (source & Vector128.Create(~0u, 0u, ~0u, 0u)).AsUInt64() * (sourceLow & Vector128.Create(~0u, 0u, ~0u, 0u)).AsUInt64(); + } + } #endif private static void ScrambleAccumulators(ulong* accumulators, byte* secret)