Skip to content

Commit

Permalink
fmk - adding session2 and 3 solitions
Browse files Browse the repository at this point in the history
  • Loading branch information
fmckenna committed Aug 2, 2024
1 parent 9d453a4 commit d610a80
Show file tree
Hide file tree
Showing 35 changed files with 3,075 additions and 0 deletions.
Binary file added solutions/C-Session2/binaryFile/big.dat
Binary file not shown.
2,000 changes: 2,000 additions & 0 deletions solutions/C-Session2/binaryFile/big.txt

Large diffs are not rendered by default.

61 changes: 61 additions & 0 deletions solutions/C-Session2/binaryFile/file3.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@

// program to read values from a file, each file a csv list of int and two double
// written: fmk

#include <stdio.h>
#include <stdlib.h>

int main(int argc, char **argv) {

if (argc != 3) {
fprintf(stdout, "ERROR correct usage appName inputFile outputBinaryFile\n");
return -1;
}

//
// read from ascii file
//
FILE *filePtr = fopen(argv[1],"r");

int i = 0;
float float1, float2;
int maxVectorSize = 100;
double *vector1 = (double *)malloc(maxVectorSize*sizeof(double));
double *vector2 = (double *)malloc(maxVectorSize*sizeof(double));
int vectorSize = 0;

while (fscanf(filePtr,"%d, %f, %f\n", &i, &float1, &float2) != EOF) {
vector1[vectorSize] = float1;
vector2[vectorSize] = float2;
printf("%d, %f, %f\n",i, vector2[i], vector1[i]);
vectorSize++;

if (vectorSize == maxVectorSize) {

// create new arrys & copy contents
double *newVector1 = (double *)malloc(2*vectorSize*sizeof(double));
double *newVector2 = (double *)malloc(2*vectorSize*sizeof(double));
for (int i=0; i<vectorSize; i++) {
newVector1[i]=vector1[i];
newVector2[i]=vector2[i];
}

// release old memory, set vectors to point to new ones and update max vector size
free(vector1);
free(vector2);
vector1 = newVector1;
vector2 = newVector2;
maxVectorSize *= 2;
}
}
fclose(filePtr);

//
// write to binary file
//

FILE *filePtrB = fopen(argv[2],"wb");
fwrite(vector1, sizeof(double), vectorSize, filePtrB);
fwrite(vector2, sizeof(double), vectorSize, filePtrB);
fclose(filePtrB);
}
10 changes: 10 additions & 0 deletions solutions/C-Session2/binaryFile/small.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
0, 0.153779, 0.560532
1, 0.865013, 0.276724
2, 0.895919, 0.704462
3, 0.886472, 0.929641
4, 0.469290, 0.350208
5, 0.941637, 0.096535
6, 0.457211, 0.346164
7, 0.970019, 0.114938
8, 0.769819, 0.341565
9, 0.684224, 0.748597
16 changes: 16 additions & 0 deletions solutions/C-Session2/matMul/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
cmake_minimum_required (VERSION 2.6)

project (matMUL)

find_package(BLAS REQUIRED)

include_directories(${PROJECT_SOURCE_DIR})

add_executable(matMul matMul.c myDGEMM.c blasDGEMM.c)
target_link_libraries(matMul m)
target_link_libraries(matMul ${BLAS_LIBRARIES})

add_executable(benchmark benchmark.cpp myDGEMM.c blasDGEMM.c myDGEMM_OrderLoop.c myDGEMM_Transpose.c myDGEMM_Blocked.c)
target_link_libraries(benchmark m)
target_link_libraries(benchmark ${BLAS_LIBRARIES})

223 changes: 223 additions & 0 deletions solutions/C-Session2/matMul/benchmark.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,223 @@
#include <algorithm>
#include <chrono>
#include <iomanip>
#include <iostream>
#include <random>
#include <vector>

#include <cmath> // For: fabs

extern "C" void blasDGEMM(int n, double* A, double* B, double* C);
extern "C" void myDGEMM(int n, double *A, double *B, double *C);
extern "C" void myDGEMM_OrderLoop(int n, double *A, double *B, double *C);
extern "C" void myDGEMM_Transpose(int n, double *A, double *B, double *C);
extern "C" void myDGEMM_Blocked(int n, double *A, double *B, double *C);


void fill(double* p, int n) {
static std::random_device rd;
static std::default_random_engine gen(rd());
static std::uniform_real_distribution<> dis(-1.0, 1.0);
for (int i = 0; i < n; ++i)
p[i] = 2 * dis(gen) - 1;
}

/* The benchmarking program */
int main(int argc, char** argv) {

bool test = false;
if (argc != 1)
test = true;

int ok = 0;

printf(" N BLAS NAIVE REORDER TRANSPOSE BLOCKED\n");

/*
int numTests = 96;
int testSizes[96]= {31, 32, 33, 63, 64, 65, 95, 96, 97, 127, 128, 129, 159, 160, 161, 191,
192, 193, 223, 224, 225, 255, 256, 257, 287, 288, 289, 319, 320, 321, 351, 352,
353, 383, 384, 385, 415, 416, 417, 447, 448, 449, 479, 480, 481, 511, 512, 513,
543, 544, 545, 575, 576, 577, 607, 608, 609, 639, 640, 641, 671, 672, 673, 703,
704, 705, 735, 736, 737, 767, 768, 769, 799, 800, 801, 831, 832, 833, 863, 864,
865, 895, 896, 897, 927, 928, 929, 959, 960, 961, 991, 992, 993, 1023, 1024, 1025};
int numTests = 26;
int testSizes[26] = {31, 32, 96, 97, 127, 128, 129, 191, 192, 229, 255, 256, 257,
319, 320, 321, 417, 479, 480, 511, 512, 639, 640, 767, 768, 769};
*/

int numTests = 13;
int testSizes[26] = {31, 32, 96, 97, 127, 128, 129, 191, 192, 229, 255, 256, 257};

double averageGFlopsBlas = 0;
double averageGFlopsMine = 0;
double averageGFlopsMineOrderLoop = 0;
double averageGFlopsMineTranspose = 0;
double averageGFlopsMineBlocked = 0;

for (int i=0; i<numTests; i++) {

int n = testSizes[i];

double *A = (double *)malloc(n*n*sizeof(double));
double *B = (double *)malloc(n*n*sizeof(double));
double *C = (double *)malloc(n*n*sizeof(double));
double *C1 = (double *)malloc(n*n*sizeof(double));
double *C2 = (double *)malloc(n*n*sizeof(double));
double *C3 = (double *)malloc(n*n*sizeof(double));
double *C4 = (double *)malloc(n*n*sizeof(double));

fill(A, n * n);
fill(B, n * n);
fill(C, n * n);

for (int i=0; i<n*n; i++) {
C1[i]=C[i];
C2[i]=C[i];
C3[i]=C[i];
C4[i]=C[i];
}

/* Measure performance (in Gflops/s). */

double seconds;
int n_iterations = 20;

/* Warm-up */
blasDGEMM(n, A, B, C);

/* Benchmark n_iterations runs of blasDGEMM */
auto start = std::chrono::steady_clock::now();
for (int it = 0; it < n_iterations; ++it) {
blasDGEMM(n, A, B, C);
}
auto end = std::chrono::steady_clock::now();
std::chrono::duration<double> diff = end - start;
seconds = diff.count();

/* compute GFlop/s rate */
double GFlopsBlas = 2.e-9 * n_iterations * n * n * n / seconds;
averageGFlopsBlas += GFlopsBlas;


/* Warm-up */
myDGEMM(n, A, B, C1);

/* Benchmark n_iterations runs of myDGEMM */
start = std::chrono::steady_clock::now();
for (int it = 0; it < n_iterations; ++it) {
myDGEMM(n, A, B, C1);
}
end = std::chrono::steady_clock::now();
diff = end - start;
seconds = diff.count();

/* compute GFlop/s rate */
double GFlopsMine = 2.e-9 * n_iterations * n * n * n / seconds;

averageGFlopsMine += GFlopsMine;



/* Warm-up */
myDGEMM_OrderLoop(n, A, B, C2);

/* Benchmark n_iterations runs of myDGEMM */
start = std::chrono::steady_clock::now();
for (int it = 0; it < n_iterations; ++it) {
myDGEMM_OrderLoop(n, A, B, C2);
}
end = std::chrono::steady_clock::now();
diff = end - start;
seconds = diff.count();

/* compute GFlop/s rate */
double GFlopsMineOrderLoop = 2.e-9 * n_iterations * n * n * n / seconds;

averageGFlopsMineOrderLoop += GFlopsMineOrderLoop;


/* Warm-up */
myDGEMM_Transpose(n, A, B, C3);

/* Benchmark n_iterations runs of myDGEMM */
start = std::chrono::steady_clock::now();
for (int it = 0; it < n_iterations; ++it) {
myDGEMM_Transpose(n, A, B, C3);
}
end = std::chrono::steady_clock::now();
diff = end - start;
seconds = diff.count();

/* compute GFlop/s rate */
double GFlopsMineTranspose = 2.e-9 * n_iterations * n * n * n / seconds;

averageGFlopsMineTranspose += GFlopsMineTranspose;


// check they are the same .. take into account there will be differences
for (int j=0; j<n*n; j++) {
double diff = C3[j] - C[j];
double error = fabs(diff/C[j]);
if (error > 1e-10) {
ok = 1;
// printf("%d %d %.20g %.20g\n",i, j, C[j], C1[j]);
printf("%d\n", ok);
exit(0);
}
}


/* Warm-up */
myDGEMM_Blocked(n, A, B, C4);

/* Benchmark n_iterations runs of myDGEMM */
start = std::chrono::steady_clock::now();
for (int it = 0; it < n_iterations; ++it) {
myDGEMM_Blocked(n, A, B, C4);
}
end = std::chrono::steady_clock::now();
diff = end - start;
seconds = diff.count();

/* compute GFlop/s rate */
double GFlopsMineBlocked = 2.e-9 * n_iterations * n * n * n / seconds;

averageGFlopsMineBlocked += GFlopsMineBlocked;


// check they are the same .. take into account there will be differences
for (int j=0; j<n*n; j++) {
double diff = C4[j] - C[j];
double error = fabs(diff/C[j]);
if (error > 1e-10) {
ok = 1;
// printf("%d %d %.20g %.20g\n",i, j, C[j], C1[j]);
printf("%d\n", ok);
exit(0);
}
}



if (!test)
printf("%4d %12.8f %12.8f %12.8f %12.8f %12.8f\n", n, GFlopsBlas, GFlopsMine, GFlopsMineOrderLoop, GFlopsMineTranspose, GFlopsMineBlocked);

free(A);
free(B);
free(C);
free(C1);
free(C2);
free(C3);
}

/* Printing average percentage to screen */
if (!test)
printf("Average GFLOP Me %.8g Blas: %.8g\n", averageGFlopsMine/numTests, averageGFlopsBlas/numTests);
else
printf("%d\n", ok);

return 0;
}
26 changes: 26 additions & 0 deletions solutions/C-Session2/matMul/blasDGEMM.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
void dgemm_(char* transa, char* transb, int* m, int* n, int* k,
double* alpha, double* a, int* lda, double* b, int* ldb,
double* beta, double* c, int* ldc);

/*
* This routine performs a dgemm operation
* C := C + A * B
* where A, B, and C are lda-by-lda matrices stored in column-major format.
* On exit, A and B maintain their input values.
* This function wraps a call to the BLAS-3 routine DGEMM,
*/

void blasDGEMM(int n, double* A, double* B, double* C) {

char transa = 'N';
char transb = 'N';
double alpha = 1.0;
double beta = 1.0;
int lda = n;

dgemm_(&transa, &transb,
&n, &n, &n,
&alpha, A, &n,
B, &n,
&beta, C, &n);
}
Loading

0 comments on commit d610a80

Please sign in to comment.