forked from mratsim/laser
-
Notifications
You must be signed in to change notification settings - Fork 1
/
nim.cfg
39 lines (33 loc) · 1.16 KB
/
nim.cfg
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
# ############################################################
#
# OpenMP OSX
#
# ############################################################
@if openmp:
stackTrace:off
threads:on
@if macosx: # Default compiler on Mac is clang without OpenMP and gcc is an alias to clang.
# Use Homebrew GCC instead for OpenMP support. GCC (v7), must be properly linked via `brew link gcc`
cc:"gcc"
gcc.exe:"/usr/local/bin/gcc-7"
gcc.linkerexe:"/usr/local/bin/gcc-7"
@end
@end
# ############################################################
#
# SIMD flags
#
# ############################################################
gemm_ukernel_sse.always = "-msse"
gemm_ukernel_sse2.always = "-msse2"
gemm_ukernel_sse4_1.always = "-msse4.1"
gemm_ukernel_avx.always = "-mavx"
gemm_ukernel_avx_fma.always = "-mavx -mfma"
gemm_ukernel_avx2.always = "-mavx2"
gemm_ukernel_avx512.always = "-mavx512f -mavx512dq"
reductions_sse3.always = "-msse3"
exp_log_avx2.always = "-mavx2"
exp_log_avx512.always = "-mavx512f -mavx512dq -mavx512bw"
# Benchmarks
# For PyTorch Glow - AVX512 is slower than AVX2
libjit_matmul.always = "-std=c++11 -mavx -mfma"