-
Notifications
You must be signed in to change notification settings - Fork 1
/
benchmark.c
executable file
·114 lines (94 loc) · 3.33 KB
/
benchmark.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <float.h>
#include <math.h>
#include <time.h>
#include <sys/time.h>
#include <cblas.h>
#include <omp.h>
#include <nmmintrin.h>
/* Your function must have the following signature: */
void sgemm( int m, int n, int d, float *A, float *C );
/* The reference code */
void sgemm_reference( int m, int n, float *A, float *C )
{
#pragma omp parallel for
for( int i = 0; i < n; i++ )
for( int k = 0; k < m; k++ )
for( int j = 0; j < n; j++ )
C[i+j*n] += A[i+k*(n)] * A[j*(n+1)+k*(n)];
}
/* The benchmarking program */
int main( int argc, char **argv )
{
srand(time(NULL));
int counter = 0;
float sum = 0;
int nStart = 400;
int nEnd = 1500;
int mStart = 32;
int mEnd = 100;
int loopEnd = 1;
if (argc == 3) {
nStart = atoi(argv[1]);
nEnd = nStart+1;
mStart = atoi(argv[2]);
mEnd = mStart+1;
loopEnd = 10;
}
for( int loop = 0; loop < loopEnd; loop++ )
for( int n = nStart; n < nEnd; n = n+n/3 )
{
/* Try different m */
for( int m = mStart; m < mEnd; m = m+1+m/3 )
{
/* Allocate and fill 2 random matrices A, C */
float *A = (float*) malloc( (n+m) * n * sizeof(float) );
float *C = (float*) malloc( n * n * sizeof(float) );
float *C_ref = (float*) malloc( n * n * sizeof(float) );
for( int i = 0; i < (n+m)*n; i++ ) A[i] = 2 * drand48() - 1;
/* Ensure that error does not exceed the theoretical error bound */
/* Calculate A*B from C using reference */
memset( C_ref, 0, sizeof( float ) * n * n );
sgemm_reference( m,n,A,C_ref );
/* Set initial C to 0 and do matrix multiply of A*B */
memset( C, 0, sizeof( float ) * n * n );
sgemm( m,n,m, A, C );
/* Subtract the maximum allowed roundoff from each element of C */
for( int i = 0; i < n*n; i++ ) C[i] -= C_ref[i] ;
/* After this test if any element in C is still positive something went wrong in square_sgemm */
for( int i = 0; i < n * n; i++ )
if( C[i] * C[i] > 0.0001 ) {
printf( "FAILURE: error in matrix multiply exceeds an acceptable margin\n" );
printf( "Off by: %f, from the reference: %f, at n = %d, m = %d\n",C[i], C_ref[i], n, m );
return -1;
}
/* measure Gflop/s rate; time a sufficiently long sequence of calls to eliminate noise */
double Gflop_s, seconds = -1.0;
for( int n_iterations = 1; seconds < 0.1; n_iterations *= 2 )
{
/* warm-up */
sgemm( m, n,m, A, C );
/* measure time */
struct timeval start, end;
gettimeofday( &start, NULL );
for( int i = 0; i < n_iterations; i++ )
sgemm( m,n,m, A, C );
gettimeofday( &end, NULL );
seconds = (end.tv_sec - start.tv_sec) + 1.0e-6 * (end.tv_usec - start.tv_usec);
/* compute Gflop/s rate */
Gflop_s = 2e-9 * n_iterations * m * n * n / seconds;
}
printf( "n = %d, m = %d \t %g Gflop/s\n", n, m, Gflop_s );
counter++;
sum+=Gflop_s;
/* release memory */
free( C_ref );
free( C );
free( A );
}
}
printf("Average Gflops: %f\n", sum/counter);
return 0;
}