diff --git a/galois_gen_switch_amd64.go b/galois_gen_switch_amd64.go index 7cc88f6..c9035a9 100644 --- a/galois_gen_switch_amd64.go +++ b/galois_gen_switch_amd64.go @@ -17,9 +17,13 @@ const ( minCodeGenSize = 64 ) +var ( + fAvx2 = galMulSlicesAvx2 + fAvx2Xor = galMulSlicesAvx2Xor +) + func (r *reedSolomon) hasCodeGen(byteCount int, inputs, outputs int) (_, _ *func(matrix []byte, in, out [][]byte, start, stop int) int, ok bool) { - f, fXor := galMulSlicesAvx2, galMulSlicesAvx2Xor - return &f, &fXor, codeGen && pshufb && r.o.useAVX2 && + return &fAvx2, &fAvx2Xor, codeGen && pshufb && r.o.useAVX2 && byteCount >= codeGenMinSize && inputs+outputs >= codeGenMinShards && inputs <= codeGenMaxInputs && outputs <= codeGenMaxOutputs } diff --git a/galois_gen_switch_arm64.go b/galois_gen_switch_arm64.go index 3e912a9..e4cc5db 100644 --- a/galois_gen_switch_arm64.go +++ b/galois_gen_switch_arm64.go @@ -15,15 +15,20 @@ const ( minCodeGenSize = 64 ) +var ( + fSve = galMulSlicesSve + fSveXor = galMulSlicesSveXor + fNeon = galMulSlicesNeon + fNeonXor = galMulSlicesNeonXor +) + func (r *reedSolomon) hasCodeGen(byteCount int, inputs, outputs int) (_, _ *func(matrix []byte, in, out [][]byte, start, stop int) int, ok bool) { if r.o.useSVE { - f, fXor := galMulSlicesSve, galMulSlicesSveXor - return &f, &fXor, codeGen && pshufb && + return &fSve, &fSveXor, codeGen && pshufb && byteCount >= codeGenMinSize && inputs+outputs >= codeGenMinShards && inputs <= codeGenMaxInputs && outputs <= codeGenMaxOutputs } - f, fXor := galMulSlicesNeon, galMulSlicesNeonXor - return &f, &fXor, codeGen && pshufb && r.o.useNEON && + return &fNeon, &fNeonXor, codeGen && pshufb && r.o.useNEON && byteCount >= codeGenMinSize && inputs+outputs >= codeGenMinShards && inputs <= codeGenMaxInputs && outputs <= codeGenMaxOutputs }