-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcp2b.cc
54 lines (51 loc) · 1.65 KB
/
cp2b.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
/*
This is the function you need to implement. Quick reference:
- input rows: 0 <= y < ny
- input columns: 0 <= x < nx
- element at row y and column x is stored in data[x + y*nx]
- correlation between rows i and row j has to be stored in result[i + j*ny]
- only parts with 0 <= j <= i < ny need to be filled
*/
#include "math.h"
#include "stdlib.h"
void correlate(int ny, int nx, const float *data, float *result) {
double *A = (double *)calloc(ny * nx, sizeof(double));
// normalize input rows so that mean = 0
#pragma omp parallel for schedule(dynamic, 1)
for (int y = 0; y < ny; y++) {
double sum = 0;
double mean;
for (int x = 0; x < nx; x++) {
sum += data[x + y * nx];
}
mean = sum / nx;
for (int x = 0; x < nx; x++) {
A[x + y * nx] = data[x + y * nx] - mean;
}
}
// normalize input rows so that sum of squares = 1
#pragma omp parallel for schedule(dynamic, 1)
for (int y = 0; y < ny; y++) {
double squaredsum = 0;
for (int x = 0; x < nx; x++) {
squaredsum += A[x + y * nx] * A[x + y * nx];
}
for (int x = 0; x < nx; x++) {
A[x + y * nx] = A[x + y * nx] / sqrt(squaredsum);
}
}
// compute the matrix product of A * A^T without explicitly computing A^T.
// i traverses rows of A, j traverses columns of A^T, and e traverses the
// specific elements to multiply. by j=i we only calculate the upper triangle.
#pragma omp parallel for schedule(dynamic, 1)
for (int i = 0; i < ny; i++) {
for (int j = i; j < ny; j++) {
double element = 0;
for (int e = 0; e < nx; e++) {
element += A[i * nx + e] * A[j * nx + e];
}
result[j + i * ny] = element;
}
}
free(A);
}