-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdot_int8_kernel.c
58 lines (58 loc) · 2.35 KB
/
dot_int8_kernel.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#include <stdint.h>
void __attribute__((noinline))
dot_int8_kernel(int16_t* output,
const uint8_t* data,
const int8_t* kernel) {
asm volatile (
/// init
"li a4, 32 \n"
"li a5, 32 \n"
/// load data
"//vsetvli t4, a4, e8, m2, d1 \n"
".word 0b000101110111111011010111 \n"
"vle8.v v4, (%[data]) \n"
/// mul lane 0
"vle8.v v8, (%[kern]) \n"
"vwmulsu.vv v16, v8, v4 \n"
"add %[kern], %[kern], a5 \n"
/// mul lane 1
"vle8.v v8, (%[kern]) \n"
"vwmulsu.vv v20, v8, v4 \n"
"add %[kern], %[kern], a5 \n"
/// mul lane 2
"vle8.v v8, (%[kern]) \n"
"vwmulsu.vv v24, v8, v4 \n"
"add %[kern], %[kern], a5 \n"
/// mul lane 3
"vle8.v v8, (%[kern]) \n"
"vwmulsu.vv v28, v8, v4 \n"
/// reduce
"//vsetvli t4, a4, e16, m4, d1 \n"
".word 0b011001110111111011010111 \n"
"vwredsum.vs v8, v16, v0 \n"
"vwredsum.vs v12, v20, v0 \n"
"vwredsum.vs v16, v24, v0 \n"
"vwredsum.vs v20, v28, v0 \n"
/// store
"//vmv.x.s t4, v8 \n"
".word 0b0000110010100000000010111011010111 \n"
"sh t4, 0(%[outw]) \n"
"addi %[outw], %[outw], 2 \n"
"//vmv.x.s t4, v12 \n"
".word 0b0000110010110000000010111011010111 \n"
"sh t4, 0(%[outw]) \n"
"addi %[outw], %[outw], 2 \n"
"//vmv.x.s t4, v16 \n"
".word 0b0000110011000000000010111011010111 \n"
"sh t4, 0(%[outw]) \n"
"addi %[outw], %[outw], 2 \n"
"//vmv.x.s t4, v20 \n"
".word 0b0000110011010000000010111011010111 \n"
"sh t4, 0(%[outw]) \n"
::
[data] "r" (data),
[kern] "r" (kernel),
[outw] "r" (output)
);
return;
}