-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #45 from kachi-group/main-staging
Main staging
- Loading branch information
Showing
39 changed files
with
664 additions
and
148 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,34 +1,34 @@ | ||
name: CI | ||
|
||
on: | ||
push: | ||
branches: main | ||
paths: ['**.cu','**.c','**.cpp', '**.h', '**CMakeLists.txt'] | ||
pull_request: | ||
branches: main | ||
paths: ['**.cu','**.c','**.cpp', '**.h', '**CMakeLists.txt'] | ||
push: | ||
branches: main | ||
paths: ["**.cu", "**.c", "**.cpp", "**.h", "**CMakeLists.txt"] | ||
pull_request: | ||
branches: main | ||
paths: ["**.cu", "**.c", "**.cpp", "**.h", "**CMakeLists.txt"] | ||
|
||
jobs: | ||
build-and-test: | ||
runs-on: ubuntu-latest | ||
build-and-test: | ||
runs-on: ubuntu-latest | ||
|
||
steps: | ||
- name: Checkout code | ||
uses: actions/checkout@v4 | ||
steps: | ||
- name: Checkout code | ||
uses: actions/checkout@v4 | ||
|
||
- name: Setup python | ||
uses: actions/setup-python@v5 | ||
with: | ||
python-version: '3.10' | ||
- name: Setup python | ||
uses: actions/setup-python@v5 | ||
with: | ||
python-version: "3.10" | ||
|
||
- name: Install dependencies | ||
run: | | ||
pip install pandas | ||
- name: Install dependencies | ||
run: | | ||
pip install pandas | ||
- name: Build project | ||
run: | | ||
make build | ||
- name: Run test suite | ||
run: | | ||
make test | ||
- name: Build project | ||
run: | | ||
make build | ||
- name: Run test suite | ||
run: | | ||
make test_cpu |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,30 +1,36 @@ | ||
cmake_minimum_required(VERSION 3.16) | ||
|
||
# Set the project name | ||
project(ichida-algo) | ||
project(ichida-algo LANGUAGES C CXX) | ||
|
||
set(CMAKE_C_FLAGS "-O3 -march=native -ffast-math -funroll-loops -fopenmp -Wall -Wextra") | ||
|
||
set(CMAKE_C_STANDARD 11) | ||
set(CMAKE_C_STANDARD_REQUIRED True) | ||
# set(CMAKE_VERBOSE_MAKEFILE ON) | ||
set(CMAKE_VERBOSE_MAKEFILE ON) | ||
|
||
set(SRC_DIR src) | ||
set(INC_DIR include) | ||
set(LIB_DIR lib) | ||
set(TEST_DIR test) | ||
set(BENCHMARK_DIR benchmark) | ||
set(SRC_DIR src) | ||
set(CUDA_SRC_DIR cudasrc) | ||
|
||
# Source files | ||
file(GLOB_RECURSE SOURCE_FILES ${SRC_DIR}/*.c) | ||
include_directories(${INC_DIR}) | ||
|
||
include_directories(include) | ||
file(GLOB_RECURSE SOURCE_FILES ${SRC_DIR}/*.c) | ||
|
||
add_executable(speed_cpu ${SOURCE_FILES}) | ||
# add_executable(benchmark ${SRC_DIR}/matrix.c ${BENCHMARK_DIR}/benchmark.c) | ||
|
||
target_link_libraries(speed_cpu m pthread) | ||
# target_link_libraries(benchmark m) | ||
|
||
target_link_libraries(speed_cpu m pthread gomp) | ||
|
||
find_package(CUDA) | ||
|
||
if(CUDA_FOUND) | ||
enable_language(CUDA) | ||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xptxas -O3 --use_fast_math -Xcompiler -march=native -unroll-aggressive -arch=sm_80") | ||
find_package(MPI REQUIRED) | ||
include_directories(${MPI_INCLUDE_PATH}) | ||
file(GLOB_RECURSE CUDA_SOURCE_FILES ${CUDA_SRC_DIR}/*.cu) | ||
add_executable(speed_gpu ${CUDA_SOURCE_FILES}) | ||
set_target_properties(speed_gpu PROPERTIES CUDA_SEPARABLE_COMPILATION ON) | ||
target_link_libraries(speed_gpu m ${MPI_LIBRARIES}) | ||
else() | ||
message(STATUS "CUDA not found, only CPU version will be built.") | ||
endif() | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,37 +1,36 @@ | ||
.PHONY: all test clean run build run_test | ||
.PHONY: all clean build run_cpu run_gpu test_cpu test_gpu bench stat | ||
|
||
all: rebuild | ||
# Default iterations | ||
iterations ?= 1000 | ||
|
||
all: build | ||
|
||
clean: | ||
rm -f test/results.csv | ||
rm -f results.csv | ||
rm -rf build | ||
rm -f speed_cpu | ||
rm -f speed_cpu speed_gpu | ||
|
||
build: clean | ||
cmake -Bbuild | ||
$(MAKE) -C ./build | ||
mv ./build/speed_cpu ./ | ||
|
||
rebuild: | ||
$(MAKE) -C ./build | ||
mv ./build/speed_cpu ./ | ||
|
||
run: build | ||
./speed_demo_cpu.sh ./weights_and_biases.txt ./tensors | ||
|
||
run_test: build | ||
./speed_cpu ./weights_and_biases.txt ./tensors | ||
|
||
test: build | ||
./speed_cpu ./weights_and_biases.txt ./tensors 1 | ||
mv ./results.csv ./test | ||
python3 ./test/verify_csv.py | ||
cmake -S . -B build -DCMAKE_BUILD_TYPE=Release | ||
$(MAKE) -C build | ||
cp -u build/speed_cpu ./ | ||
if [ -f build/speed_gpu ]; then cp -u build/speed_gpu ./; fi | ||
|
||
bench: build | ||
./build/benchmark | ||
run_cpu: build | ||
./speed_cpu ./weights_and_biases.txt ./tensors $(iterations) | ||
|
||
stat: build | ||
python3 ./benchmark/stat.py | ||
run_gpu: build | ||
n_gpus=$(shell nvidia-smi --query-gpu=name --format=csv,noheader | wc -l); \ | ||
mpirun -np $$n_gpus ./speed_gpu ./weights_and_biases.txt ./tensors $(iterations) | ||
|
||
test_cpu: build | ||
./speed_cpu ./weights_and_biases.txt ./tensors $(iterations) | ||
mv ./results.csv ./test | ||
python3 ./test/verify_csv.py | ||
|
||
test_gpu: build | ||
n_gpus=$(shell nvidia-smi --query-gpu=name --format=csv,noheader | wc -l); \ | ||
mpirun -np $$n_gpus ./speed_gpu ./weights_and_biases.txt ./tensors $(iterations) | ||
mv ./results.csv ./test | ||
python3 ./test/verify_csv.py |
This file was deleted.
Oops, something went wrong.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
compile = nvcc -O3 -arch=sm_75 --use_fast_math | ||
SRC_DIR := versions | ||
BIN_DIR := bin | ||
SRC_FILES := $(wildcard $(SRC_DIR)/*.cu) | ||
EXECUTABLES := $(patsubst $(SRC_DIR)/%.cu, $(BIN_DIR)/%, $(SRC_FILES)) | ||
|
||
all: clean $(EXECUTABLES) | ||
|
||
clean: | ||
rm -f -r bin | ||
mkdir bin | ||
|
||
$(BIN_DIR)/%: $(SRC_DIR)/%.cu | ||
$(compile) $< benchmark.cu -o $@.exe | ||
|
||
plot: all | ||
python3 ./plot.py | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
#include "template.cuh" | ||
#include <stdio.h> | ||
#include <time.h> | ||
|
||
int main(int argc, char* argv[]) { | ||
long n; | ||
if (argc > 1) { | ||
n = atol(argv[1]); | ||
} else { | ||
n = 100000; | ||
} | ||
printf("%f", time(n)); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
import os | ||
import subprocess | ||
import matplotlib.pyplot as plt | ||
|
||
result = subprocess.run(['make'], capture_output=True, text=True) | ||
# Define the folder containing the executables | ||
folder_path = './bin' # Change this to your bin folder path | ||
|
||
# Define the input sizes to test | ||
start=10000 | ||
end=10000 | ||
step=100000 | ||
|
||
input_sizes = list(range(start, end+1, step)) | ||
# Initialize a dictionary to store runtimes for each executable | ||
runtimes = {exe: [] for exe in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, exe))} | ||
|
||
# Loop through each executable | ||
for exe in runtimes.keys(): | ||
exe_path = os.path.join(folder_path, exe) | ||
|
||
# Loop through each input size | ||
for n in range(start,end+1,step): | ||
# Run the executable with the input size and capture its output | ||
result = subprocess.run([exe_path, str(n)], capture_output=True, text=True) | ||
|
||
# Parse the output to get the runtime | ||
runtime = float(result.stdout.strip()) | ||
print(exe,runtime) | ||
|
||
# Append the runtime to the corresponding executable list | ||
runtimes[exe].append(runtime) | ||
|
||
# Plot the data | ||
plt.figure(figsize=(12, 6)) | ||
|
||
# Loop through each executable and plot the runtimes | ||
for exe, times in runtimes.items(): | ||
plt.plot(input_sizes, times, marker='o', label=exe) | ||
|
||
plt.xlabel('Iterations') | ||
plt.ylabel('Runtime (s)') | ||
plt.title('Benchmark of Function Versions') | ||
plt.legend() | ||
plt.grid(True) | ||
plt.tight_layout() | ||
|
||
output_file = 'benchmark_plot.png' # Specify your desired output file name and format | ||
plt.savefig(output_file) | ||
# Show the plot |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
#pragma once | ||
|
||
typedef struct { | ||
int rows; | ||
int cols; | ||
float* data; // array | ||
} matrix; | ||
|
||
double time(int n); | ||
matrix* new_matrix_d(int rows, int cols); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
#include "../template.cuh" | ||
|
||
matrix* new_matrix(int rows, int cols) { | ||
matrix* res = (matrix*)malloc(sizeof(matrix)); | ||
res->rows = rows; | ||
res->cols = cols; | ||
res->data = (float*)malloc((rows * cols) * sizeof(float)); | ||
return res; | ||
} | ||
|
||
matrix* new_matrix_d(int rows, int cols) { | ||
matrix* res = (matrix*)malloc(sizeof(matrix)); | ||
res->rows = rows; | ||
res->cols = cols; | ||
res->cols = cols; | ||
cudaMalloc((void**)&(res->data), rows * cols * sizeof(float)); | ||
return res; | ||
} | ||
|
||
__global__ void matrix_add(float *a, float*b ,int rows) | ||
{ | ||
int idx = blockIdx.x * blockDim.x + threadIdx.x; | ||
if (idx<rows){ | ||
a[idx]+=b[idx]; | ||
} | ||
} | ||
|
||
double time(int n) { | ||
int row=100000; | ||
matrix* a = new_matrix_d(row, 1); | ||
matrix* b = new_matrix_d(row, 1); | ||
cudaStream_t stream1; | ||
cudaStreamCreate ( &stream1); | ||
|
||
int thread=1024; | ||
int block=((row+thread-1)/thread); | ||
|
||
clock_t start = clock(); | ||
for(int i=0;i<n;i++){ | ||
matrix_add<<<1,1,0,stream1>>>(a->data,b->data,row); | ||
} | ||
double seconds = (double)(clock() - (double)start) / CLOCKS_PER_SEC; | ||
return seconds; | ||
} |
Oops, something went wrong.