forked from AlexeyAB/darknet
-
Notifications
You must be signed in to change notification settings - Fork 4
/
Makefile
215 lines (170 loc) · 6.47 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
GPU=0
CUDNN=0
CUDNN_HALF=0
OPENCV=0
AVX=0
OPENMP=0
LIBSO=0
ZED_CAMERA=0
ZED_CAMERA_v2_8=0
# set GPU=1 and CUDNN=1 to speedup on GPU
# set CUDNN_HALF=1 to further speedup 3 x times (Mixed-precision on Tensor Cores) GPU: Volta, Xavier, Turing, Ampere, Ada and higher
# set AVX=1 and OPENMP=1 to speedup on CPU (if error occurs then set AVX=0)
# set ZED_CAMERA=1 to enable ZED SDK 3.0 and above
# set ZED_CAMERA_v2_8=1 to enable ZED SDK 2.X
USE_CPP=0
DEBUG=0
ARCH= -gencode arch=compute_50,code=[sm_50,compute_50] \
-gencode arch=compute_52,code=[sm_52,compute_52] \
-gencode arch=compute_61,code=[sm_61,compute_61]
OS := $(shell uname)
# Naming confusion with recent RTX cards.
# "NVIDIA Quadro RTX x000" and T1000/Tx00 are Turing Architecture Family with Compute Capability of 7.5
# "NVIDIA RTX Ax000" are Ampere Architecture Family with Compute Capability of 8.6
# NVIDIA "RTX x000 Ada" are Ada Lovelace Architecture Family with Compute Capability of 8.9
# Source https://developer.nvidia.com/cuda-gpus
# KEPLER, GeForce GTX 770, GTX 760, GT 740
# ARCH= -gencode arch=compute_30,code=sm_30
# MAXWELL, GeForce GTX 950, 960, 970, 980, 980 Ti, "GTX" Titan X
# ARCH= -gencode arch=compute_52,code=sm_52
# Jetson TX1, Tegra X1, DRIVE CX, DRIVE PX, Jetson Nano (2GB, 4GB)
# ARCH= -gencode arch=compute_53,code=[sm_53,compute_53]
# GP100/Tesla P100 - DGX-1
# ARCH= -gencode arch=compute_60,code=sm_60
# PASCAL, GTX 10x0, GTX 10x0 Ti, Titan Xp, Tesla P40, Tesla P4
# ARCH= -gencode arch=compute_61,code=[sm_61,compute_61]
# For Jetson TX2, Jetson Nano TX2 or Drive-PX2 uncomment:
# ARCH= -gencode arch=compute_62,code=[sm_62,compute_62]
# Tesla V100
# ARCH= -gencode arch=compute_70,code=[sm_70,compute_70]
# Jetson XAVIER, XAVIER NX
# ARCH= -gencode arch=compute_72,code=[sm_72,compute_72]
# GeForce Titan RTX, RTX 20x0, RTX 20x0 Ti, Quadro RTX x000, Tesla T4, XNOR Tensor Cores
# ARCH= -gencode arch=compute_75,code=[sm_75,compute_75]
# Tesla A100 (GA100), DGX-A100, A30, A100, RTX 3080
# ARCH= -gencode arch=compute_80,code=[sm_80,compute_80]
# GeForce RTX 30x0, 30x0 Ti, Tesla GA10x, RTX Axxxx, A2, A10, A16, A40
# ARCH= -gencode arch=compute_86,code=[sm_86,compute_86]
# NOT TESTED, THEORETICAL
# Jetson ORIN, ORIN NX, ORIN NANO
# ARCH= -gencode arch=compute_87,code=[sm_87,compute_87]
# NOT TESTED, THEORETICAL
# GeForce RTX 4070 Ti, 4080, 4090, L4, L40
# ARCH= -gencode arch=compute_89,code=[sm_89,compute_89]
# NOT TESTED, THEORETICAL
# Nvidia H100
# ARCH= -gencode arch=compute_90,code=[sm_90,compute_90]
VPATH=./src/
EXEC=darknet
OBJDIR=./obj/
ifeq ($(LIBSO), 1)
LIBNAMESO=libdarknet.so
APPNAMESO=uselib
endif
ifeq ($(USE_CPP), 1)
CC=g++
else
CC=gcc
endif
CPP=g++ -std=c++11
NVCC=nvcc
OPTS=-Ofast
LDFLAGS= -lm -pthread
COMMON= -Iinclude/ -I3rdparty/stb/include
CFLAGS=-Wall -Wfatal-errors -Wno-unused-result -Wno-unknown-pragmas -fPIC -rdynamic
ifeq ($(DEBUG), 1)
#OPTS= -O0 -g
#OPTS= -Og -g
COMMON+= -DDEBUG
CFLAGS+= -DDEBUG
else
ifeq ($(AVX), 1)
CFLAGS+= -ffp-contract=fast -mavx -mavx2 -msse3 -msse4.1 -msse4.2 -msse4a
endif
endif
CFLAGS+=$(OPTS)
ifneq (,$(findstring MSYS_NT,$(OS)))
LDFLAGS+=-lws2_32
endif
ifeq ($(OPENCV), 1)
COMMON+= -DOPENCV
CFLAGS+= -DOPENCV
LDFLAGS+= `pkg-config --libs opencv4 2> /dev/null || pkg-config --libs opencv`
COMMON+= `pkg-config --cflags opencv4 2> /dev/null || pkg-config --cflags opencv`
endif
ifeq ($(OPENMP), 1)
ifeq ($(OS),Darwin) #MAC
CFLAGS+= -Xpreprocessor -fopenmp
else
CFLAGS+= -fopenmp
endif
LDFLAGS+= -lgomp
endif
ifeq ($(GPU), 1)
COMMON+= -DGPU -I/usr/local/cuda/include/
CFLAGS+= -DGPU
ifeq ($(OS),Darwin) #MAC
LDFLAGS+= -L/usr/local/cuda/lib -lcuda -lcudart -lcublas -lcurand
else
LDFLAGS+= -L/usr/local/cuda/lib64 -lcuda -lcudart -lcublas -lcurand
endif
endif
ifeq ($(CUDNN), 1)
COMMON+= -DCUDNN
ifeq ($(OS),Darwin) #MAC
CFLAGS+= -DCUDNN -I/usr/local/cuda/include
LDFLAGS+= -L/usr/local/cuda/lib -lcudnn
else
CFLAGS+= -DCUDNN -I/usr/local/cudnn/include
LDFLAGS+= -L/usr/local/cudnn/lib64 -lcudnn
endif
endif
ifeq ($(CUDNN_HALF), 1)
COMMON+= -DCUDNN_HALF
CFLAGS+= -DCUDNN_HALF
ARCH+= -gencode arch=compute_70,code=[sm_70,compute_70]
endif
ifeq ($(ZED_CAMERA), 1)
CFLAGS+= -DZED_STEREO -I/usr/local/zed/include
ifeq ($(ZED_CAMERA_v2_8), 1)
LDFLAGS+= -L/usr/local/zed/lib -lsl_core -lsl_input -lsl_zed
#-lstdc++ -D_GLIBCXX_USE_CXX11_ABI=0
else
LDFLAGS+= -L/usr/local/zed/lib -lsl_zed
#-lstdc++ -D_GLIBCXX_USE_CXX11_ABI=0
endif
endif
OBJ=image_opencv.o http_stream.o gemm.o utils.o dark_cuda.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o detector.o layer.o compare.o classifier.o local_layer.o swag.o shortcut_layer.o representation_layer.o activation_layer.o rnn_layer.o gru_layer.o rnn.o rnn_vid.o crnn_layer.o demo.o tag.o cifar.o go.o batchnorm_layer.o art.o region_layer.o reorg_layer.o reorg_old_layer.o super.o voxel.o tree.o yolo_layer.o gaussian_yolo_layer.o upsample_layer.o lstm_layer.o conv_lstm_layer.o scale_channels_layer.o sam_layer.o
ifeq ($(GPU), 1)
LDFLAGS+= -lstdc++
OBJ+=convolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o network_kernels.o avgpool_layer_kernels.o
endif
OBJS = $(addprefix $(OBJDIR), $(OBJ))
DEPS = $(wildcard src/*.h) Makefile include/darknet.h
all: $(OBJDIR) backup results setchmod $(EXEC) $(LIBNAMESO) $(APPNAMESO)
ifeq ($(LIBSO), 1)
CFLAGS+= -fPIC
$(LIBNAMESO): $(OBJDIR) $(OBJS) include/yolo_v2_class.hpp src/yolo_v2_class.cpp
$(CPP) -shared -std=c++11 -fvisibility=hidden -DLIB_EXPORTS $(COMMON) $(CFLAGS) $(OBJS) src/yolo_v2_class.cpp -o $@ $(LDFLAGS)
$(APPNAMESO): $(LIBNAMESO) include/yolo_v2_class.hpp src/yolo_console_dll.cpp
$(CPP) -std=c++11 $(COMMON) $(CFLAGS) -o $@ src/yolo_console_dll.cpp $(LDFLAGS) -L ./ -l:$(LIBNAMESO)
endif
$(EXEC): $(OBJS)
$(CPP) -std=c++11 $(COMMON) $(CFLAGS) $^ -o $@ $(LDFLAGS)
$(OBJDIR)%.o: %.c $(DEPS)
$(CC) $(COMMON) $(CFLAGS) -c $< -o $@
$(OBJDIR)%.o: %.cpp $(DEPS)
$(CPP) -std=c++11 $(COMMON) $(CFLAGS) -c $< -o $@
$(OBJDIR)%.o: %.cu $(DEPS)
$(NVCC) $(ARCH) $(COMMON) --compiler-options "$(CFLAGS)" -c $< -o $@
$(OBJDIR):
mkdir -p $(OBJDIR)
backup:
mkdir -p backup
results:
mkdir -p results
setchmod:
chmod +x *.sh
.PHONY: clean
clean:
rm -rf $(OBJS) $(EXEC) $(LIBNAMESO) $(APPNAMESO)