-
Notifications
You must be signed in to change notification settings - Fork 19
/
Copy pathscopy_amd64.s
51 lines (43 loc) · 839 Bytes
/
scopy_amd64.s
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
// func Scopy(N int, X []float32, incX int, Y []float32, incY int)
TEXT ·Scopy(SB), 7, $0
MOVQ N+0(FP), CX
MOVQ X_data+8(FP), SI
MOVQ incX+32(FP), AX
MOVQ Y_data+40(FP), DI
MOVQ incY+64(FP), BX
// Check data bounaries
MOVQ CX, BP
DECQ BP
MOVQ BP, DX
IMULQ AX, BP // BP = incX * (N - 1)
IMULQ BX, DX // DX = incY * (N - 1)
CMPQ BP, X_len+16(FP)
JGE panic
CMPQ DX, Y_len+48(FP)
JGE panic
// Check if incX != 1 or incY != 1
CMPQ AX, $1
JNE with_stride
CMPQ BX, $1
JNE with_stride
// Optimized copy for incX == incY == 1
REP; MOVSL
RET
with_stride:
// Setup strides
SALQ $2, AX // AX = sizeof(float32) * incX
SALQ $2, BX // BX = sizeof(float32) * incY
CMPQ CX, $0
JE end
loop:
MOVL (SI), DX
MOVL DX, (DI)
ADDQ AX, SI
ADDQ BX, DI
DECQ CX
JNE loop
end:
RET
panic:
CALL ·panicIndex(SB)
RET