-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #14 from nhatdongdang/feat/benchmark
Add benchmark and multithread processing (experimental)
- Loading branch information
Showing
30 changed files
with
652 additions
and
40 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -36,5 +36,7 @@ speed_gpu | |
# Misc | ||
.vscode/ | ||
build/ | ||
bin/ | ||
tensors/ | ||
results.csv | ||
tensors/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
#include "../include/matrix.h" | ||
#include "util.h" | ||
#include <stdint.h> | ||
#include <stdio.h> | ||
#include <stdlib.h> | ||
|
||
double benchmark_matrix_mul(int iterations, matrix* a, matrix* b, matrix* c) { | ||
double res = 0; | ||
for (int i = 0; i < iterations; i++) { | ||
uint64_t start = rdtsc(); | ||
matrix_mul(a, b, c); | ||
uint64_t end = rdtsc(); | ||
res += (double)(end - start) / (a->rows * a->cols); | ||
} | ||
res /= iterations; | ||
return res; | ||
} | ||
|
||
double benchmark_matrix_add(int iterations, matrix* a, matrix* b) { | ||
double res = 0; | ||
for (int i = 0; i < iterations; i++) { | ||
uint64_t start = rdtsc(); | ||
matrix_add(a, b); | ||
uint64_t end = rdtsc(); | ||
res += (double)(end - start) / (a->rows); | ||
} | ||
res /= iterations; | ||
return res; | ||
} | ||
|
||
double benchmark_relu(int iterations, matrix* a) { | ||
double res = 0; | ||
for (int i = 0; i < iterations; i++) { | ||
uint64_t start = rdtsc(); | ||
relu(a); | ||
uint64_t end = rdtsc(); | ||
res += (double)(end - start) / (a->rows); | ||
} | ||
res /= iterations; | ||
return res; | ||
} | ||
|
||
double benchmark_softmax(int iterations, matrix* a) { | ||
double res = 0; | ||
for (int i = 0; i < iterations; i++) { | ||
uint64_t start = rdtsc(); | ||
softmax(a); | ||
uint64_t end = rdtsc(); | ||
res += (double)(end - start) / (a->rows * 2); | ||
} | ||
res /= iterations; | ||
return res; | ||
} | ||
|
||
int main() { | ||
int iterations = 200000; | ||
printf("- matrix_mul: %f CPE\n", | ||
benchmark_matrix_mul(iterations, new_matrix(2000, 1000), new_matrix(2000, 1), new_matrix(2000, 1))); | ||
printf("- matrix_add: %f CPE\n", benchmark_matrix_add(iterations, new_matrix(2000, 1), new_matrix(2000, 1))); | ||
printf("- relu: %f CPE\n", benchmark_relu(iterations, new_matrix(2000, 1))); | ||
printf("- softmax: %f CPE\n", benchmark_softmax(iterations, new_matrix(2000, 1))); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
compile = gcc -O3 -march=native -ffast-math -funroll-loops -Wall -Wextra | ||
|
||
SRC_DIR := versions | ||
BIN_DIR := bin | ||
SRC_FILES := $(wildcard $(SRC_DIR)/*.c) | ||
EXECUTABLES := $(patsubst $(SRC_DIR)/%.c, $(BIN_DIR)/%, $(SRC_FILES)) | ||
|
||
all: clean $(EXECUTABLES) | ||
|
||
clean: | ||
rm -f -r bin | ||
mkdir bin | ||
|
||
$(BIN_DIR)/%: $(SRC_DIR)/%.c | ||
$(compile) $< benchmark.c -o $@ | ||
|
||
plot: all | ||
python3 ./plot.py | ||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
#include "matrix_add.h" | ||
#include <stdint.h> | ||
#include <stdio.h> | ||
#include <stdlib.h> | ||
#include <time.h> | ||
|
||
typedef struct { | ||
float* data; | ||
int rows; | ||
int cols; | ||
} matrix; | ||
|
||
matrix* new_matrix(int rows, int cols) { | ||
matrix* res = (matrix*)malloc(sizeof(matrix)); | ||
res->rows = rows; | ||
res->cols = cols; | ||
res->data = (float*)malloc((rows * cols) * sizeof(float)); | ||
return res; | ||
} | ||
|
||
int main(int argc, char* argv[]) { | ||
long n = 0; | ||
if (argc > 1) { | ||
n = atol(argv[1]); | ||
} else { | ||
printf("Error!"); | ||
exit(1); | ||
} | ||
clock_t start = clock(); | ||
matrix* a = new_matrix(255, 1); | ||
matrix* b = new_matrix(255, 1); | ||
for (int i = 0; i < n; i++) { | ||
matrix_add(a->data, b->data, a->rows); | ||
} | ||
float seconds = (float)(clock() - (float)start) / CLOCKS_PER_SEC; | ||
printf("%f", seconds); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
#pragma once | ||
void matrix_add(const float* src, float* __restrict__ dest, int rows); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
import os | ||
import subprocess | ||
import matplotlib.pyplot as plt | ||
|
||
result = subprocess.run(['make'], capture_output=True, text=True) | ||
# Define the folder containing the executables | ||
folder_path = './bin' # Change this to your bin folder path | ||
|
||
# Define the input sizes to test | ||
start=100000 | ||
end=1000000 | ||
step=100000 | ||
|
||
input_sizes = list(range(start, end+1, step)) | ||
# Initialize a dictionary to store runtimes for each executable | ||
runtimes = {exe: [] for exe in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, exe))} | ||
|
||
# Loop through each executable | ||
for exe in runtimes.keys(): | ||
exe_path = os.path.join(folder_path, exe) | ||
|
||
# Loop through each input size | ||
for n in range(start,end+1,step): | ||
# Run the executable with the input size and capture its output | ||
result = subprocess.run([exe_path, str(n)], capture_output=True, text=True) | ||
|
||
# Parse the output to get the runtime | ||
runtime = float(result.stdout.strip()) | ||
print(exe,runtime) | ||
|
||
# Append the runtime to the corresponding executable list | ||
runtimes[exe].append(runtime) | ||
|
||
# Plot the data | ||
plt.figure(figsize=(12, 6)) | ||
|
||
# Loop through each executable and plot the runtimes | ||
for exe, times in runtimes.items(): | ||
plt.plot(input_sizes, times, marker='o', label=exe) | ||
|
||
plt.xlabel('Input Size') | ||
plt.ylabel('Runtime (s)') | ||
plt.title('Benchmark of Function Versions') | ||
plt.legend() | ||
plt.grid(True) | ||
plt.tight_layout() | ||
|
||
# Show the plot | ||
plt.show() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
#include "../matrix_add.h" | ||
void matrix_add(const float* src, float* __restrict__ dest, int rows) { | ||
for (int i = 0; i < rows; i++) { | ||
dest[i] += src[i]; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
compile = gcc -O3 -march=native -ffast-math -funroll-loops -Wall -Wextra | ||
SRC_DIR := versions | ||
BIN_DIR := bin | ||
SRC_FILES := $(wildcard $(SRC_DIR)/*.c) | ||
EXECUTABLES := $(patsubst $(SRC_DIR)/%.c, $(BIN_DIR)/%, $(SRC_FILES)) | ||
|
||
all: clean $(EXECUTABLES) | ||
|
||
clean: | ||
rm -f -r bin | ||
mkdir bin | ||
|
||
$(BIN_DIR)/%: $(SRC_DIR)/%.c | ||
$(compile) $< benchmark.c -o $@ | ||
|
||
plot: all | ||
python3 ./plot.py | ||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
#include "matrix_mul.h" | ||
#include <stdint.h> | ||
#include <stdio.h> | ||
#include <stdlib.h> | ||
#include <time.h> | ||
|
||
typedef struct { | ||
float* data; | ||
int rows; | ||
int cols; | ||
} matrix; | ||
|
||
matrix* new_matrix(int rows, int cols) { | ||
matrix* res = (matrix*)malloc(sizeof(matrix)); | ||
res->rows = rows; | ||
res->cols = cols; | ||
res->data = (float*)malloc((rows * cols) * sizeof(float)); | ||
return res; | ||
} | ||
|
||
int main(int argc, char* argv[]) { | ||
long n = 0; | ||
if (argc > 1) { | ||
n = atol(argv[1]); | ||
} else { | ||
printf("Error!"); | ||
exit(1); | ||
} | ||
clock_t start = clock(); | ||
matrix* a = new_matrix(98, 255); | ||
matrix* b = new_matrix(255, 1); | ||
matrix* c = new_matrix(98, 1); | ||
for (int i = 0; i < n; i++) { | ||
matrix_mul(a->data, b->data, c->data, c->rows, a->cols); | ||
} | ||
float seconds = (float)(clock() - (float)start) / CLOCKS_PER_SEC; | ||
printf("%f", seconds); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
#pragma once | ||
void matrix_mul(const float* weights, const float* inputs, float* __restrict__ results, int res_rows, int w_cols); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
import os | ||
import subprocess | ||
import matplotlib.pyplot as plt | ||
|
||
result = subprocess.run(['make'], capture_output=True, text=True) | ||
# Define the folder containing the executables | ||
folder_path = './bin' # Change this to your bin folder path | ||
|
||
# Define the input sizes to test | ||
start=100000 | ||
end=1000000 | ||
step=100000 | ||
|
||
input_sizes = list(range(start, end+1, step)) | ||
# Initialize a dictionary to store runtimes for each executable | ||
runtimes = {exe: [] for exe in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, exe))} | ||
|
||
# Loop through each executable | ||
for exe in runtimes.keys(): | ||
exe_path = os.path.join(folder_path, exe) | ||
|
||
# Loop through each input size | ||
for n in range(start,end+1,step): | ||
# Run the executable with the input size and capture its output | ||
result = subprocess.run([exe_path, str(n)], capture_output=True, text=True) | ||
|
||
# Parse the output to get the runtime | ||
runtime = float(result.stdout.strip()) | ||
print(exe,runtime) | ||
|
||
# Append the runtime to the corresponding executable list | ||
runtimes[exe].append(runtime) | ||
|
||
# Plot the data | ||
plt.figure(figsize=(12, 6)) | ||
|
||
# Loop through each executable and plot the runtimes | ||
for exe, times in runtimes.items(): | ||
plt.plot(input_sizes, times, marker='o', label=exe) | ||
|
||
plt.xlabel('Input Size') | ||
plt.ylabel('Runtime (s)') | ||
plt.title('Benchmark of Function Versions') | ||
plt.legend() | ||
plt.grid(True) | ||
plt.tight_layout() | ||
|
||
# Show the plot | ||
plt.show() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
#include "../matrix_mul.h" | ||
|
||
void matrix_mul(const float* weights, const float* inputs, float* __restrict__ results, int res_rows, int w_cols) { | ||
for (int i = 0; i < res_rows; i++) { | ||
float sum = 0; | ||
int h = i * w_cols; | ||
for (int k = 0; k < w_cols; k++) { | ||
sum += weights[h + k] * inputs[k]; | ||
} | ||
results[i] = sum; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
#include "../matrix_mul.h" | ||
|
||
void matrix_mul(const float* weights, const float* inputs, float* __restrict__ results, int res_rows, int w_cols) { | ||
for (int cur_row = 0; cur_row < res_rows; cur_row++) { | ||
results[cur_row] = 0; | ||
for (int col = 0; col < w_cols; col++) { | ||
results[cur_row] += weights[cur_row * w_cols + col] * inputs[col]; | ||
} | ||
} | ||
} |
Oops, something went wrong.