diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f81ecb0..f1b40b1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,34 +1,34 @@ name: CI on: - push: - branches: main - paths: ['**.cu','**.c','**.cpp', '**.h', '**CMakeLists.txt'] - pull_request: - branches: main - paths: ['**.cu','**.c','**.cpp', '**.h', '**CMakeLists.txt'] + push: + branches: main + paths: ["**.cu", "**.c", "**.cpp", "**.h", "**CMakeLists.txt"] + pull_request: + branches: main + paths: ["**.cu", "**.c", "**.cpp", "**.h", "**CMakeLists.txt"] jobs: - build-and-test: - runs-on: ubuntu-latest + build-and-test: + runs-on: ubuntu-latest - steps: - - name: Checkout code - uses: actions/checkout@v4 + steps: + - name: Checkout code + uses: actions/checkout@v4 - - name: Setup python - uses: actions/setup-python@v5 - with: - python-version: '3.10' + - name: Setup python + uses: actions/setup-python@v5 + with: + python-version: "3.10" - - name: Install dependencies - run: | - pip install pandas + - name: Install dependencies + run: | + pip install pandas - - name: Build project - run: | - make build - - - name: Run test suite - run: | - make test \ No newline at end of file + - name: Build project + run: | + make build + + - name: Run test suite + run: | + make test_cpu diff --git a/CMakeLists.txt b/CMakeLists.txt index d2d5cbd..faea7b0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,30 +1,36 @@ cmake_minimum_required(VERSION 3.16) -# Set the project name -project(ichida-algo) +project(ichida-algo LANGUAGES C CXX) set(CMAKE_C_FLAGS "-O3 -march=native -ffast-math -funroll-loops -fopenmp -Wall -Wextra") - set(CMAKE_C_STANDARD 11) set(CMAKE_C_STANDARD_REQUIRED True) -# set(CMAKE_VERBOSE_MAKEFILE ON) +set(CMAKE_VERBOSE_MAKEFILE ON) -set(SRC_DIR src) set(INC_DIR include) -set(LIB_DIR lib) -set(TEST_DIR test) -set(BENCHMARK_DIR benchmark) +set(SRC_DIR src) +set(CUDA_SRC_DIR cudasrc) -# Source files -file(GLOB_RECURSE SOURCE_FILES ${SRC_DIR}/*.c) +include_directories(${INC_DIR}) -include_directories(include) +file(GLOB_RECURSE SOURCE_FILES ${SRC_DIR}/*.c) add_executable(speed_cpu ${SOURCE_FILES}) -# add_executable(benchmark ${SRC_DIR}/matrix.c ${BENCHMARK_DIR}/benchmark.c) - -target_link_libraries(speed_cpu m pthread) -# target_link_libraries(benchmark m) - +target_link_libraries(speed_cpu m pthread gomp) + +find_package(CUDA) + +if(CUDA_FOUND) + enable_language(CUDA) + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xptxas -O3 --use_fast_math -Xcompiler -march=native -unroll-aggressive -arch=sm_80") + find_package(MPI REQUIRED) + include_directories(${MPI_INCLUDE_PATH}) + file(GLOB_RECURSE CUDA_SOURCE_FILES ${CUDA_SRC_DIR}/*.cu) + add_executable(speed_gpu ${CUDA_SOURCE_FILES}) + set_target_properties(speed_gpu PROPERTIES CUDA_SEPARABLE_COMPILATION ON) + target_link_libraries(speed_gpu m ${MPI_LIBRARIES}) +else() + message(STATUS "CUDA not found, only CPU version will be built.") +endif() diff --git a/Makefile b/Makefile index 09a0ea2..0253aee 100644 --- a/Makefile +++ b/Makefile @@ -1,37 +1,36 @@ -.PHONY: all test clean run build run_test +.PHONY: all clean build run_cpu run_gpu test_cpu test_gpu bench stat -all: rebuild +# Default iterations +iterations ?= 1000 + +all: build clean: rm -f test/results.csv rm -f results.csv rm -rf build - rm -f speed_cpu + rm -f speed_cpu speed_gpu build: clean - cmake -Bbuild - $(MAKE) -C ./build - mv ./build/speed_cpu ./ - -rebuild: - $(MAKE) -C ./build - mv ./build/speed_cpu ./ - -run: build - ./speed_demo_cpu.sh ./weights_and_biases.txt ./tensors - -run_test: build - ./speed_cpu ./weights_and_biases.txt ./tensors - -test: build - ./speed_cpu ./weights_and_biases.txt ./tensors 1 - mv ./results.csv ./test - python3 ./test/verify_csv.py + cmake -S . -B build -DCMAKE_BUILD_TYPE=Release + $(MAKE) -C build + cp -u build/speed_cpu ./ + if [ -f build/speed_gpu ]; then cp -u build/speed_gpu ./; fi -bench: build - ./build/benchmark +run_cpu: build + ./speed_cpu ./weights_and_biases.txt ./tensors $(iterations) -stat: build - python3 ./benchmark/stat.py +run_gpu: build + n_gpus=$(shell nvidia-smi --query-gpu=name --format=csv,noheader | wc -l); \ + mpirun -np $$n_gpus ./speed_gpu ./weights_and_biases.txt ./tensors $(iterations) +test_cpu: build + ./speed_cpu ./weights_and_biases.txt ./tensors $(iterations) + mv ./results.csv ./test + python3 ./test/verify_csv.py +test_gpu: build + n_gpus=$(shell nvidia-smi --query-gpu=name --format=csv,noheader | wc -l); \ + mpirun -np $$n_gpus ./speed_gpu ./weights_and_biases.txt ./tensors $(iterations) + mv ./results.csv ./test + python3 ./test/verify_csv.py \ No newline at end of file diff --git a/benchmark/gpu/matrix_add/benchmark_plot.png b/benchmark/gpu/matrix_add/benchmark_plot.png deleted file mode 100644 index 72e786c..0000000 Binary files a/benchmark/gpu/matrix_add/benchmark_plot.png and /dev/null differ