forked from cuMF/cumf_als
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMakefile
170 lines (144 loc) · 4.5 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
# Location of the CUDA Toolkit
CUDA_PATH ?= "/usr/local/cuda"
OSUPPER = $(shell uname -s 2>/dev/null | tr "[:lower:]" "[:upper:]")
OSLOWER = $(shell uname -s 2>/dev/null | tr "[:upper:]" "[:lower:]")
OS_SIZE = $(shell uname -m | sed -e "s/x86_64/64/" -e "s/armv7l/32/" -e "s/aarch64/64/")
OS_ARCH = $(shell uname -m)
ARCH_FLAGS =
DARWIN = $(strip $(findstring DARWIN, $(OSUPPER)))
ifneq ($(DARWIN),)
XCODE_GE_5 = $(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5)
endif
# Take command line flags that override any of these settings
ifeq ($(x86_64),1)
OS_SIZE = 64
OS_ARCH = x86_64
endif
ifeq ($(ARMv7),1)
OS_SIZE = 32
OS_ARCH = armv7l
ARCH_FLAGS = -target-cpu-arch ARM
endif
ifeq ($(aarch64),1)
OS_SIZE = 64
OS_ARCH = aarch64
ARCH_FLAGS = -target-cpu-arch ARM
endif
# Common binaries
ifneq ($(DARWIN),)
ifeq ($(XCODE_GE_5),1)
GCC ?= clang
else
GCC ?= g++
endif
else
ifeq ($(ARMv7),1)
GCC ?= arm-linux-gnueabihf-g++
else
GCC ?= g++
endif
endif
NVCC := $(CUDA_PATH)/bin/nvcc -ccbin $(GCC)
# internal flags
NVCCFLAGS := -m${OS_SIZE} ${ARCH_FLAGS} --ptxas-options=-v #--maxrregcount 255
CCFLAGS := -DADD_
LDFLAGS :=
# Extra user flags
EXTRA_NVCCFLAGS ?=
EXTRA_LDFLAGS ?=
EXTRA_CCFLAGS ?=
# OS-specific build flags
ifneq ($(DARWIN),)
LDFLAGS += -rpath $(CUDA_PATH)/lib
CCFLAGS += -arch $(OS_ARCH)
else
ifeq ($(OS_ARCH),armv7l)
ifeq ($(abi),androideabi)
NVCCFLAGS += -target-os-variant Android
else
ifeq ($(abi),gnueabi)
CCFLAGS += -mfloat-abi=softfp
else
# default to gnueabihf
override abi := gnueabihf
LDFLAGS += --dynamic-linker=/lib/ld-linux-armhf.so.3
CCFLAGS += -mfloat-abi=hard
endif
endif
endif
endif
ifeq ($(ARMv7),1)
ifneq ($(TARGET_FS),)
GCCVERSIONLTEQ46 := $(shell expr `$(GCC) -dumpversion` \<= 4.6)
ifeq ($(GCCVERSIONLTEQ46),1)
CCFLAGS += --sysroot=$(TARGET_FS)
endif
LDFLAGS += --sysroot=$(TARGET_FS)
LDFLAGS += -rpath-link=$(TARGET_FS)/lib
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/arm-linux-$(abi)
endif
endif
# Debug build flags
ifeq ($(dbg),1)
NVCCFLAGS += -g -G
TARGET := debug
else
TARGET := release
endif
ALL_CCFLAGS :=
ALL_CCFLAGS += $(NVCCFLAGS)
ALL_CCFLAGS += $(EXTRA_NVCCFLAGS)
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(CCFLAGS))
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS))
ALL_LDFLAGS :=
ALL_LDFLAGS += $(ALL_CCFLAGS)
ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS))
ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS))
# Common includes and paths for CUDA
INCLUDES := -I../../common/inc -I/u/weitan/gpfs/software/magma-2.0.0/include -I/u/weitan/gpfs/software/magma-2.0.0/control -I/u/weitan/gpfs/software/magma-2.0.0/testing
LIBRARIES := -L/u/weitan/gpfs/software/magma-2.0.0/lib -L/opt/share/OpenBLAS-0.2.14/lib -L/u/weitan/gpfs/software/magma-2.0.0/testing -L/u/weitan/gpfs/software/magma-2.0.0/testing/lin
################################################################################
# Gencode arguments
SMS ?= 35
ifeq ($(GENCODE_FLAGS),)
# Generate SASS code for each SM architecture listed in $(SMS)
$(foreach sm,$(SMS),$(eval GENCODE_FLAGS += -gencode arch=compute_$(sm),code=sm_$(sm)))
ifeq ($(SMS),)
ifeq ($(OS_ARCH),armv7l)
# Generate PTX code from SM 30
GENCODE_FLAGS += -gencode arch=compute_20,code=compute_20
else
# Generate PTX code from SM 30
GENCODE_FLAGS += -gencode arch=compute_11,code=compute_11
endif
endif
# Generate PTX code from the highest SM architecture in $(SMS) to guarantee forward-compatibility
HIGHEST_SM := $(lastword $(sort $(SMS)))
ifneq ($(HIGHEST_SM),)
GENCODE_FLAGS += -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM)
endif
endif
ALL_CCFLAGS +=-rdc=true -Xptxas -dlcm=ca
LIBRARIES +=-lcublas -lcusparse #-llapack -lblas -lmagma -lm -ltest -llapacktest -lcudart
################################################################################
# Target rules
all: build
build: clean main
debug: clean
debug: ALL_CCFLAGS+=-DDEBUG
debug: build
als.o:als.cu
$(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -lineinfo -o $@ -c $<
main.o:main.cu
$(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -lineinfo -o $@ -c $<
host_utilities.o:host_utilities.cpp
$(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -lineinfo -o $@ -c $<
main: host_utilities.o als.o main.o
$(NVCC) $(ALL_LDFLAGS) $(GENCODE_FLAGS) -lineinfo -o $@ $+ $(LIBRARIES)
#netflix
run: main
./main 100 0.055 3
clean:
rm -f host_utilities.o als.o main main.o
clobber: clean