Skip to content

Commit

Permalink
Update build tags for more platforms
Browse files Browse the repository at this point in the history
  • Loading branch information
klauspost committed Nov 15, 2023
1 parent 19ed26b commit 7380cbf
Show file tree
Hide file tree
Showing 13 changed files with 72 additions and 61 deletions.
2 changes: 0 additions & 2 deletions _gen/gen.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
//go:build generate
// +build generate

// Copyright 2022+, Klaus Post. See LICENSE for details.

Expand Down Expand Up @@ -110,7 +109,6 @@ import (
)
`)
w.WriteString(fmt.Sprintf("const pshufb = %v\n\n", pshufb))
w.WriteString(fmt.Sprintf(`const (
avx2CodeGen = true
maxAvx2Inputs = %d
Expand Down
2 changes: 2 additions & 0 deletions galois_amd64.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

package reedsolomon

const pshufb = true

//go:noescape
func galMulSSSE3(low, high, in, out []byte)

Expand Down
25 changes: 5 additions & 20 deletions galois_arm64.go
Original file line number Diff line number Diff line change
@@ -1,19 +1,18 @@
//go:build !noasm && !appengine && !gccgo && pshufb
//go:build !noasm && !appengine && !gccgo && !nopshufb

// Copyright 2015, Klaus Post, see LICENSE for details.
// Copyright 2017, Minio, Inc.

package reedsolomon

const pshufb = true

//go:noescape
func galMulNEON(low, high, in, out []byte)

//go:noescape
func galMulXorNEON(low, high, in, out []byte)

//go:noescape
func galXorNEON(in, out []byte)

func galMulSlice(c byte, in, out []byte, o *options) {
if c == 1 {
copy(out, in)
Expand Down Expand Up @@ -50,20 +49,6 @@ func galMulSliceXor(c byte, in, out []byte, o *options) {
}
}

// simple slice xor
func sliceXor(in, out []byte, o *options) {

galXorNEON(in, out)
done := (len(in) >> 5) << 5

remain := len(in) - done
if remain > 0 {
for i := done; i < len(in); i++ {
out[i] ^= in[i]
}
}
}

// 4-way butterfly
func ifftDIT4(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) {
ifftDIT4Ref(work, dist, log_m01, log_m23, log_m02, o)
Expand All @@ -89,7 +74,7 @@ func fftDIT2(x, y []byte, log_m ffe, o *options) {
// Reference version:
refMulAdd(x, y, log_m)
// 64 byte aligned, always full.
galXorNEON(x, y)
xorSliceNEON(x, y)
}

// 2-way butterfly forward
Expand All @@ -102,7 +87,7 @@ func fftDIT28(x, y []byte, log_m ffe8, o *options) {
// 2-way butterfly
func ifftDIT2(x, y []byte, log_m ffe, o *options) {
// 64 byte aligned, always full.
galXorNEON(x, y)
xorSliceNEON(x, y)
// Reference version:
refMulAdd(x, y, log_m)
}
Expand Down
28 changes: 1 addition & 27 deletions galois_arm64.s
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
//+build !noasm
//+build !appengine
//+build !gccgo
//+build !pshufb
//+build !nopshufb

// Copyright 2015, Klaus Post, see LICENSE for details.
// Copyright 2017, Minio, Inc.
Expand Down Expand Up @@ -100,29 +100,3 @@ loopXor:

completeXor:
RET

// func galXorNEON(in, out []byte)
TEXT ·galXorNEON(SB), 7, $0
MOVD in_base+0(FP), R1
MOVD in_len+8(FP), R2 // length of message
MOVD out_base+24(FP), R5
SUBS $32, R2
BMI completeXor

loopXor:
// Main loop
VLD1.P 32(R1), [V0.B16, V1.B16]
VLD1 (R5), [V20.B16, V21.B16]

VEOR V20.B16, V0.B16, V4.B16
VEOR V21.B16, V1.B16, V5.B16

// Store result
VST1.P [V4.D2, V5.D2], 32(R5)

SUBS $32, R2
BPL loopXor

completeXor:
RET

2 changes: 0 additions & 2 deletions galois_gen_switch_amd64.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 0 additions & 2 deletions galois_gen_switch_nopshufb_amd64.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 2 additions & 5 deletions galois_noasm.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

package reedsolomon

const pshufb = false

func galMulSlice(c byte, in, out []byte, o *options) {
out = out[:len(in)]
if c == 1 {
Expand All @@ -28,11 +30,6 @@ func galMulSliceXor(c byte, in, out []byte, o *options) {
}
}

// simple slice xor
func sliceXor(in, out []byte, o *options) {
sliceXorGo(in, out, o)
}

func init() {
defaultOptions.useAVX512 = false
}
Expand Down
2 changes: 2 additions & 0 deletions galois_nopshufb_amd64.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ package reedsolomon
// bigSwitchover is the size where 64 bytes are processed per loop.
const bigSwitchover = 128

const pshufb = false

// simple slice xor
func sliceXor(in, out []byte, o *options) {
if o.useSSE2 {
Expand Down
4 changes: 3 additions & 1 deletion galois_ppc64le.go
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
//go:build !noasm && !appengine && !gccgo && !pshufb
//go:build !noasm && !appengine && !gccgo && !nopshufb

// Copyright 2015, Klaus Post, see LICENSE for details.
// Copyright 2018, Minio, Inc.

package reedsolomon

const pshufb = true

//go:noescape
func galMulPpc(low, high, in, out []byte)

Expand Down
4 changes: 2 additions & 2 deletions reedsolomon.go
Original file line number Diff line number Diff line change
Expand Up @@ -652,7 +652,7 @@ func (r *reedSolomon) EncodeIdx(dataShard []byte, idx int, parity [][]byte) erro
return ErrShardSize
}

if avx2CodeGen && len(dataShard) >= r.o.perRound && len(parity) >= avx2CodeGenMinShards && (r.o.useAVX2 || r.o.useGFNI) {
if avx2CodeGen && len(dataShard) >= r.o.perRound && len(parity) >= avx2CodeGenMinShards && ((pshufb && r.o.useAVX2) || r.o.useGFNI) {
m := make([][]byte, r.parityShards)
for iRow := range m {
m[iRow] = r.parity[iRow][idx : idx+1]
Expand Down Expand Up @@ -803,7 +803,7 @@ func (r *reedSolomon) Verify(shards [][]byte) (bool, error) {
}

func (r *reedSolomon) canAVX2C(byteCount int, inputs, outputs int) bool {
return avx2CodeGen && r.o.useAVX2 &&
return avx2CodeGen && pshufb && r.o.useAVX2 &&
byteCount >= avx2CodeGenMinSize && inputs+outputs >= avx2CodeGenMinShards &&
inputs <= maxAvx2Inputs && outputs <= maxAvx2Outputs
}
Expand Down
19 changes: 19 additions & 0 deletions xor_arm64.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
//go:build !noasm && !appengine && !gccgo

package reedsolomon

//go:noescape
func xorSliceNEON(in, out []byte)

// simple slice xor
func sliceXor(in, out []byte, o *options) {
xorSliceNEON(in, out)
done := (len(in) >> 5) << 5

remain := len(in) - done
if remain > 0 {
for i := done; i < len(in); i++ {
out[i] ^= in[i]
}
}
}
29 changes: 29 additions & 0 deletions xor_arm64.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
//+build !noasm
//+build !appengine
//+build !gccgo

// func xorSliceNEON(in, out []byte)
TEXT ·xorSliceNEON(SB), 7, $0
MOVD in_base+0(FP), R1
MOVD in_len+8(FP), R2 // length of message
MOVD out_base+24(FP), R5
SUBS $32, R2
BMI completeXor

loopXor:
// Main loop
VLD1.P 32(R1), [V0.B16, V1.B16]
VLD1 (R5), [V20.B16, V21.B16]

VEOR V20.B16, V0.B16, V4.B16
VEOR V21.B16, V1.B16, V5.B16

// Store result
VST1.P [V4.D2, V5.D2], 32(R5)

SUBS $32, R2
BPL loopXor

completeXor:
RET

7 changes: 7 additions & 0 deletions xor_noasm.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
//go:build noasm || gccgo || appengine || (ppc64le && nopshufb)

package reedsolomon

func sliceXor(in, out []byte, o *options) {
sliceXorGo(in, out, o)
}

0 comments on commit 7380cbf

Please sign in to comment.