-
Notifications
You must be signed in to change notification settings - Fork 0
/
Correlation.java
88 lines (74 loc) · 3.08 KB
/
Correlation.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
/*
* Pearson correlation from two sets of similarity scores.
*
* Copyright (C) 2013 Lisa Vitolo <[email protected]>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the Creative Commons
* Attribution-NonCommercial-ShareAlike 3.0 license.
* You should have received a copy of the license with this product.
* Otherwise, visit http://creativecommons.org/licenses/by-nc-sa/3.0/
*/
import java.math.BigDecimal;
/*
* NOTE: boxing and unboxing don't work automatically for arrays as they do for primitive types. Since
* the arrays usually have thousands of elements, I avoided expensive copies from one type to the other
* by redefining the same method twice.
*/
public class Correlation
{
public static double getPearsonCorrelation(Double[] scores1, Double[] scores2)
{
double corr;
double sum_sq_x = 0;
double sum_sq_y = 0;
double sum_coproduct = 0;
double mean_x = scores1[0];
double mean_y = scores2[0];
for(int i = 2; i < scores1.length + 1; i++) {
double sweep = Double.valueOf(i-1) / i;
double delta_x = scores1[i-1] - mean_x;
double delta_y = scores2[i-1] - mean_y;
sum_sq_x += delta_x * delta_x * sweep;
sum_sq_y += delta_y * delta_y * sweep;
sum_coproduct += delta_x * delta_y * sweep;
mean_x += delta_x / i;
mean_y += delta_y / i;
}
double pop_sd_x = (double) Math.sqrt(sum_sq_x/scores1.length);
double pop_sd_y = (double) Math.sqrt(sum_sq_y/scores1.length);
double cov_x_y = sum_coproduct / scores1.length;
corr = cov_x_y / (pop_sd_x*pop_sd_y);
/* Round to 3 decimal digits */
BigDecimal bd = new BigDecimal(corr);
bd = bd.setScale(3, BigDecimal.ROUND_HALF_UP);
return bd.doubleValue();
}
public static double getPearsonCorrelation(double[] scores1, double[] scores2)
{
double corr;
double sum_sq_x = 0;
double sum_sq_y = 0;
double sum_coproduct = 0;
double mean_x = scores1[0];
double mean_y = scores2[0];
for(int i = 2; i < scores1.length + 1; i++) {
double sweep = Double.valueOf(i-1) / i;
double delta_x = scores1[i-1] - mean_x;
double delta_y = scores2[i-1] - mean_y;
sum_sq_x += delta_x * delta_x * sweep;
sum_sq_y += delta_y * delta_y * sweep;
sum_coproduct += delta_x * delta_y * sweep;
mean_x += delta_x / i;
mean_y += delta_y / i;
}
double pop_sd_x = (double) Math.sqrt(sum_sq_x/scores1.length);
double pop_sd_y = (double) Math.sqrt(sum_sq_y/scores1.length);
double cov_x_y = sum_coproduct / scores1.length;
corr = cov_x_y / (pop_sd_x*pop_sd_y);
/* Round to 3 decimal digits */
BigDecimal bd = new BigDecimal(corr);
bd = bd.setScale(3, BigDecimal.ROUND_HALF_UP);
return bd.doubleValue();
}
}