Skip to content

Commit

Permalink
update tranpose mini-app
Browse files Browse the repository at this point in the history
  • Loading branch information
cjknight committed Jul 20, 2024
1 parent 9c7c1e5 commit 825d5ce
Show file tree
Hide file tree
Showing 5 changed files with 345 additions and 210 deletions.
22 changes: 8 additions & 14 deletions gpu/mini-apps/transpose/Makefile
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
#
# Definition of MACROS

BASE_LIBGPU=../../src

BINROOT=./
EXE=libgpu.so
EXE=a.out
SHELL=/bin/sh
CXX = g++
CXXFLAGS=
Expand All @@ -15,9 +17,9 @@ CPP=cpp -P -traditional
INSTALL=../

ARCH ?= polaris
include ../../src/arch/$(ARCH)
include $(BASE_LIBGPU)/arch/$(ARCH)

CXXFLAGS += -I../../src
CXXFLAGS += -I$(BASE_LIBGPU)

$(info ARCH is [${ARCH}])

Expand All @@ -37,6 +39,8 @@ FOBJ = $(FSRC:.F90=.o)
#$(info MOD is [${MOD}])
#$(info FOBJ is [${FOBJ}])

# -- only copy source files; headers referenced with compiler flag
$(shell cp ../../src/pm*.cpp ./)

#
# -- target : Dependencies
Expand Down Expand Up @@ -73,17 +77,7 @@ realclean:
rm -f $(INSTALL)/$(EXE)
rm -rf $(EXE).dSYM
rm -f *.optrpt
rm -f pm*.h pm*.cpp

#
# -- Simple dependencies

libgpu.o : libgpu.cpp libgpu.h

pm_cuda.o : pm_cuda.cpp pm_cuda.h pm.h
pm_host.o : pm_host.cpp pm_host.h pm.h
pm_openmp.o : pm_openmp.cpp pm_openmp.h pm.h

device_cuda.o : device_cuda.cpp device.h
device_host.o : device_host.cpp device.h
device_openmp.o : device_openmp.cpp device.h
device.o : device.cpp device.h
86 changes: 86 additions & 0 deletions gpu/mini-apps/transpose/Makefile.nvcc
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@

# Definition of MACROS

BASE_LIBGPU=../../src

BINROOT=./
EXE=a.out
SHELL=/bin/sh
CXX = g++
CXXFLAGS=
FC=gfortran
FCFLAGS=
LD = $(CXX)
LDFLAGS = -fPIC -shared
AR = ar rcs
CPP = cpp -P -traditional
INSTALL=../

CUDA_CXX = nvcc
CUDA_CXXFLAGS =

ARCH ?= polaris-gnu-nvcc
include $(BASE_LIBGPU)/arch/$(ARCH)

CXXFLAGS += -I$(BASE_LIBGPU)
CUDA_CXXFLAGS += -I$(BASE_LIBGPU)

$(info ARCH is [${ARCH}])

# -- subset of src files with cuda kernels
CUDA_SRC = pm_cuda.cpp offload.cpp
CUDA_OBJ = $(CUDA_SRC:.cpp=.o)

CSRC = $(filter-out $(CUDA_SRC), $(wildcard *.cpp))
INC = $(wildcard *.h)
COBJ = $(CSRC:.cpp=.o)

FSRC = $(wildcard *.F)
MOD = $(FSRC:.F=.mod)
FOBJ = $(FSRC:.F=.o)

# -- only copy source files; headers referenced with compiler flag
$(shell cp ../../src/pm*.cpp ./)

#
# -- target : Dependencies
# -- Rule to create target

$(EXE): $(COBJ) $(CUDA_OBJ) $(FOBJ) $(MOD)
$(LD) $(LDFLAGS) -o $@ $(COBJ) $(CUDA_OBJ) $(LIB)

install: $(EXE)
cp $(EXE) $(INSTALL)
# cp $(MOD) $(FOBJ) $(INSTALL)/include

####################################################################

$(COBJ): %.o: %.cpp
$(CXX) $(CXXFLAGS) -c $<

$(FOBJ): %.o: %.F90
$(FC) $(FCFLAGS) -c $<

$(MOD): %.mod: %.F90
$(FC) $(FCFLAGS) -c $<

$(CUDA_OBJ): %.o: %.cpp
$(CUDA_CXX) -x cu $(CUDA_CXXFLAGS) -c $< -o $@

#
# -- Remove *.o and *~ from the directory
clean:
rm -f *.o *.mod *~ ./$(EXE)
rm -f $(INSTALL)/$(EXE)
rm -rf $(EXE).dSYM
#
# -- Remove *.o, *~, and executable from the directory
realclean:
rm -f *.o *.mod *~ ./$(EXE)
rm -f $(INSTALL)/$(EXE)
rm -rf $(EXE).dSYM
rm -f *.optrpt
rm -f pm*.h pm*.cpp

#
# -- Simple dependencies
Loading

0 comments on commit 825d5ce

Please sign in to comment.