-
Notifications
You must be signed in to change notification settings - Fork 45
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fmk - adding session2 and 3 solitions
- Loading branch information
Showing
35 changed files
with
3,075 additions
and
0 deletions.
There are no files selected for viewing
Binary file not shown.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
|
||
// program to read values from a file, each file a csv list of int and two double | ||
// written: fmk | ||
|
||
#include <stdio.h> | ||
#include <stdlib.h> | ||
|
||
int main(int argc, char **argv) { | ||
|
||
if (argc != 3) { | ||
fprintf(stdout, "ERROR correct usage appName inputFile outputBinaryFile\n"); | ||
return -1; | ||
} | ||
|
||
// | ||
// read from ascii file | ||
// | ||
FILE *filePtr = fopen(argv[1],"r"); | ||
|
||
int i = 0; | ||
float float1, float2; | ||
int maxVectorSize = 100; | ||
double *vector1 = (double *)malloc(maxVectorSize*sizeof(double)); | ||
double *vector2 = (double *)malloc(maxVectorSize*sizeof(double)); | ||
int vectorSize = 0; | ||
|
||
while (fscanf(filePtr,"%d, %f, %f\n", &i, &float1, &float2) != EOF) { | ||
vector1[vectorSize] = float1; | ||
vector2[vectorSize] = float2; | ||
printf("%d, %f, %f\n",i, vector2[i], vector1[i]); | ||
vectorSize++; | ||
|
||
if (vectorSize == maxVectorSize) { | ||
|
||
// create new arrys & copy contents | ||
double *newVector1 = (double *)malloc(2*vectorSize*sizeof(double)); | ||
double *newVector2 = (double *)malloc(2*vectorSize*sizeof(double)); | ||
for (int i=0; i<vectorSize; i++) { | ||
newVector1[i]=vector1[i]; | ||
newVector2[i]=vector2[i]; | ||
} | ||
|
||
// release old memory, set vectors to point to new ones and update max vector size | ||
free(vector1); | ||
free(vector2); | ||
vector1 = newVector1; | ||
vector2 = newVector2; | ||
maxVectorSize *= 2; | ||
} | ||
} | ||
fclose(filePtr); | ||
|
||
// | ||
// write to binary file | ||
// | ||
|
||
FILE *filePtrB = fopen(argv[2],"wb"); | ||
fwrite(vector1, sizeof(double), vectorSize, filePtrB); | ||
fwrite(vector2, sizeof(double), vectorSize, filePtrB); | ||
fclose(filePtrB); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
0, 0.153779, 0.560532 | ||
1, 0.865013, 0.276724 | ||
2, 0.895919, 0.704462 | ||
3, 0.886472, 0.929641 | ||
4, 0.469290, 0.350208 | ||
5, 0.941637, 0.096535 | ||
6, 0.457211, 0.346164 | ||
7, 0.970019, 0.114938 | ||
8, 0.769819, 0.341565 | ||
9, 0.684224, 0.748597 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
cmake_minimum_required (VERSION 2.6) | ||
|
||
project (matMUL) | ||
|
||
find_package(BLAS REQUIRED) | ||
|
||
include_directories(${PROJECT_SOURCE_DIR}) | ||
|
||
add_executable(matMul matMul.c myDGEMM.c blasDGEMM.c) | ||
target_link_libraries(matMul m) | ||
target_link_libraries(matMul ${BLAS_LIBRARIES}) | ||
|
||
add_executable(benchmark benchmark.cpp myDGEMM.c blasDGEMM.c myDGEMM_OrderLoop.c myDGEMM_Transpose.c myDGEMM_Blocked.c) | ||
target_link_libraries(benchmark m) | ||
target_link_libraries(benchmark ${BLAS_LIBRARIES}) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,223 @@ | ||
#include <algorithm> | ||
#include <chrono> | ||
#include <iomanip> | ||
#include <iostream> | ||
#include <random> | ||
#include <vector> | ||
|
||
#include <cmath> // For: fabs | ||
|
||
extern "C" void blasDGEMM(int n, double* A, double* B, double* C); | ||
extern "C" void myDGEMM(int n, double *A, double *B, double *C); | ||
extern "C" void myDGEMM_OrderLoop(int n, double *A, double *B, double *C); | ||
extern "C" void myDGEMM_Transpose(int n, double *A, double *B, double *C); | ||
extern "C" void myDGEMM_Blocked(int n, double *A, double *B, double *C); | ||
|
||
|
||
void fill(double* p, int n) { | ||
static std::random_device rd; | ||
static std::default_random_engine gen(rd()); | ||
static std::uniform_real_distribution<> dis(-1.0, 1.0); | ||
for (int i = 0; i < n; ++i) | ||
p[i] = 2 * dis(gen) - 1; | ||
} | ||
|
||
/* The benchmarking program */ | ||
int main(int argc, char** argv) { | ||
|
||
bool test = false; | ||
if (argc != 1) | ||
test = true; | ||
|
||
int ok = 0; | ||
|
||
printf(" N BLAS NAIVE REORDER TRANSPOSE BLOCKED\n"); | ||
|
||
/* | ||
int numTests = 96; | ||
int testSizes[96]= {31, 32, 33, 63, 64, 65, 95, 96, 97, 127, 128, 129, 159, 160, 161, 191, | ||
192, 193, 223, 224, 225, 255, 256, 257, 287, 288, 289, 319, 320, 321, 351, 352, | ||
353, 383, 384, 385, 415, 416, 417, 447, 448, 449, 479, 480, 481, 511, 512, 513, | ||
543, 544, 545, 575, 576, 577, 607, 608, 609, 639, 640, 641, 671, 672, 673, 703, | ||
704, 705, 735, 736, 737, 767, 768, 769, 799, 800, 801, 831, 832, 833, 863, 864, | ||
865, 895, 896, 897, 927, 928, 929, 959, 960, 961, 991, 992, 993, 1023, 1024, 1025}; | ||
int numTests = 26; | ||
int testSizes[26] = {31, 32, 96, 97, 127, 128, 129, 191, 192, 229, 255, 256, 257, | ||
319, 320, 321, 417, 479, 480, 511, 512, 639, 640, 767, 768, 769}; | ||
*/ | ||
|
||
int numTests = 13; | ||
int testSizes[26] = {31, 32, 96, 97, 127, 128, 129, 191, 192, 229, 255, 256, 257}; | ||
|
||
double averageGFlopsBlas = 0; | ||
double averageGFlopsMine = 0; | ||
double averageGFlopsMineOrderLoop = 0; | ||
double averageGFlopsMineTranspose = 0; | ||
double averageGFlopsMineBlocked = 0; | ||
|
||
for (int i=0; i<numTests; i++) { | ||
|
||
int n = testSizes[i]; | ||
|
||
double *A = (double *)malloc(n*n*sizeof(double)); | ||
double *B = (double *)malloc(n*n*sizeof(double)); | ||
double *C = (double *)malloc(n*n*sizeof(double)); | ||
double *C1 = (double *)malloc(n*n*sizeof(double)); | ||
double *C2 = (double *)malloc(n*n*sizeof(double)); | ||
double *C3 = (double *)malloc(n*n*sizeof(double)); | ||
double *C4 = (double *)malloc(n*n*sizeof(double)); | ||
|
||
fill(A, n * n); | ||
fill(B, n * n); | ||
fill(C, n * n); | ||
|
||
for (int i=0; i<n*n; i++) { | ||
C1[i]=C[i]; | ||
C2[i]=C[i]; | ||
C3[i]=C[i]; | ||
C4[i]=C[i]; | ||
} | ||
|
||
/* Measure performance (in Gflops/s). */ | ||
|
||
double seconds; | ||
int n_iterations = 20; | ||
|
||
/* Warm-up */ | ||
blasDGEMM(n, A, B, C); | ||
|
||
/* Benchmark n_iterations runs of blasDGEMM */ | ||
auto start = std::chrono::steady_clock::now(); | ||
for (int it = 0; it < n_iterations; ++it) { | ||
blasDGEMM(n, A, B, C); | ||
} | ||
auto end = std::chrono::steady_clock::now(); | ||
std::chrono::duration<double> diff = end - start; | ||
seconds = diff.count(); | ||
|
||
/* compute GFlop/s rate */ | ||
double GFlopsBlas = 2.e-9 * n_iterations * n * n * n / seconds; | ||
averageGFlopsBlas += GFlopsBlas; | ||
|
||
|
||
/* Warm-up */ | ||
myDGEMM(n, A, B, C1); | ||
|
||
/* Benchmark n_iterations runs of myDGEMM */ | ||
start = std::chrono::steady_clock::now(); | ||
for (int it = 0; it < n_iterations; ++it) { | ||
myDGEMM(n, A, B, C1); | ||
} | ||
end = std::chrono::steady_clock::now(); | ||
diff = end - start; | ||
seconds = diff.count(); | ||
|
||
/* compute GFlop/s rate */ | ||
double GFlopsMine = 2.e-9 * n_iterations * n * n * n / seconds; | ||
|
||
averageGFlopsMine += GFlopsMine; | ||
|
||
|
||
|
||
/* Warm-up */ | ||
myDGEMM_OrderLoop(n, A, B, C2); | ||
|
||
/* Benchmark n_iterations runs of myDGEMM */ | ||
start = std::chrono::steady_clock::now(); | ||
for (int it = 0; it < n_iterations; ++it) { | ||
myDGEMM_OrderLoop(n, A, B, C2); | ||
} | ||
end = std::chrono::steady_clock::now(); | ||
diff = end - start; | ||
seconds = diff.count(); | ||
|
||
/* compute GFlop/s rate */ | ||
double GFlopsMineOrderLoop = 2.e-9 * n_iterations * n * n * n / seconds; | ||
|
||
averageGFlopsMineOrderLoop += GFlopsMineOrderLoop; | ||
|
||
|
||
/* Warm-up */ | ||
myDGEMM_Transpose(n, A, B, C3); | ||
|
||
/* Benchmark n_iterations runs of myDGEMM */ | ||
start = std::chrono::steady_clock::now(); | ||
for (int it = 0; it < n_iterations; ++it) { | ||
myDGEMM_Transpose(n, A, B, C3); | ||
} | ||
end = std::chrono::steady_clock::now(); | ||
diff = end - start; | ||
seconds = diff.count(); | ||
|
||
/* compute GFlop/s rate */ | ||
double GFlopsMineTranspose = 2.e-9 * n_iterations * n * n * n / seconds; | ||
|
||
averageGFlopsMineTranspose += GFlopsMineTranspose; | ||
|
||
|
||
// check they are the same .. take into account there will be differences | ||
for (int j=0; j<n*n; j++) { | ||
double diff = C3[j] - C[j]; | ||
double error = fabs(diff/C[j]); | ||
if (error > 1e-10) { | ||
ok = 1; | ||
// printf("%d %d %.20g %.20g\n",i, j, C[j], C1[j]); | ||
printf("%d\n", ok); | ||
exit(0); | ||
} | ||
} | ||
|
||
|
||
/* Warm-up */ | ||
myDGEMM_Blocked(n, A, B, C4); | ||
|
||
/* Benchmark n_iterations runs of myDGEMM */ | ||
start = std::chrono::steady_clock::now(); | ||
for (int it = 0; it < n_iterations; ++it) { | ||
myDGEMM_Blocked(n, A, B, C4); | ||
} | ||
end = std::chrono::steady_clock::now(); | ||
diff = end - start; | ||
seconds = diff.count(); | ||
|
||
/* compute GFlop/s rate */ | ||
double GFlopsMineBlocked = 2.e-9 * n_iterations * n * n * n / seconds; | ||
|
||
averageGFlopsMineBlocked += GFlopsMineBlocked; | ||
|
||
|
||
// check they are the same .. take into account there will be differences | ||
for (int j=0; j<n*n; j++) { | ||
double diff = C4[j] - C[j]; | ||
double error = fabs(diff/C[j]); | ||
if (error > 1e-10) { | ||
ok = 1; | ||
// printf("%d %d %.20g %.20g\n",i, j, C[j], C1[j]); | ||
printf("%d\n", ok); | ||
exit(0); | ||
} | ||
} | ||
|
||
|
||
|
||
if (!test) | ||
printf("%4d %12.8f %12.8f %12.8f %12.8f %12.8f\n", n, GFlopsBlas, GFlopsMine, GFlopsMineOrderLoop, GFlopsMineTranspose, GFlopsMineBlocked); | ||
|
||
free(A); | ||
free(B); | ||
free(C); | ||
free(C1); | ||
free(C2); | ||
free(C3); | ||
} | ||
|
||
/* Printing average percentage to screen */ | ||
if (!test) | ||
printf("Average GFLOP Me %.8g Blas: %.8g\n", averageGFlopsMine/numTests, averageGFlopsBlas/numTests); | ||
else | ||
printf("%d\n", ok); | ||
|
||
return 0; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
void dgemm_(char* transa, char* transb, int* m, int* n, int* k, | ||
double* alpha, double* a, int* lda, double* b, int* ldb, | ||
double* beta, double* c, int* ldc); | ||
|
||
/* | ||
* This routine performs a dgemm operation | ||
* C := C + A * B | ||
* where A, B, and C are lda-by-lda matrices stored in column-major format. | ||
* On exit, A and B maintain their input values. | ||
* This function wraps a call to the BLAS-3 routine DGEMM, | ||
*/ | ||
|
||
void blasDGEMM(int n, double* A, double* B, double* C) { | ||
|
||
char transa = 'N'; | ||
char transb = 'N'; | ||
double alpha = 1.0; | ||
double beta = 1.0; | ||
int lda = n; | ||
|
||
dgemm_(&transa, &transb, | ||
&n, &n, &n, | ||
&alpha, A, &n, | ||
B, &n, | ||
&beta, C, &n); | ||
} |
Oops, something went wrong.