-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfunctions_sig.R
executable file
·124 lines (98 loc) · 4.13 KB
/
functions_sig.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
#################################################
# FOR PERMUTATION TESTS
# Our typical sum of square of differences test statistic of the column means.
get_stat_diff_squared <- function(matrix1, matrix2)
{
return(sum((colMeans(matrix1) - colMeans(matrix2))^2));
}
# The data on which this is performed is such that the rows are the patients
# and the columns are the epsilon increments (either of the networks or the
# simplicial complexes) and the data are the parameter values.
randomize <- function(df1, df2, fill, Replace_Subject=FALSE, Nitrs=10000)
{
m1 <- as.matrix(df1);
m2 <- as.matrix(df2);
# Record dimension data
n1 <- nrow(m1);
n2 <- nrow(m2);
N1 <- ncol(m1);
N2 <- ncol(m2);
# Fill in the missing rectangular matrix with the appropriate fill value so m1 and m2 are of the same size.
# NOTE: In sig_pcalc.R, the data is already rectangularized so any block of patients has the same
# number of columns as any other block of patients. This is here for generality.
if(N1 > N2) {
m2 <- cbind(m2,matrix(fill, nrow=n2, ncol=N1-N2));
N2 <- N1;
} else if(N2 > N1) {
m1 <- cbind(m1,matrix(fill, nrow=n1, ncol=N2-N1));
N1 <- N2;
}
if(N1 != N2) {
stop("The number of columns does not match! The extension statements didn't work...");
}
# Compute the reference test statistic
test_stat <- get_stat_diff_squared(m1,m2);
# area by epsilon test-control (positive if blue on top)
dif <- colMeans(m1) - colMeans(m2)
# a column with a flag for positive and negative numbers (1, -1, 0)
#sign<-sign(dif)
# get the positive area
#sumPos<-sum(dif[sign!=-1])
sumPos<-sum(dif[sign(dif)==1])
# get the negative area
sumNeg<-sum(dif[sign(dif)==-1])
if (test_stat==0) {
stop("The test statistic is 0! Something isn't right...");
}
# Combine the two classes of data
temp_m <- rbind(m1,m2);
# Create a vector of the row numbers
x <- seq(1, nrow(temp_m));
# Make a vector to store the test_stat for the permutations
rnd_vec <- replicate(Nitrs,
{
# Create the permutated group indices
group1 <- sample(x, n1, replace=Replace_Subject);
group2 <- sample(x[-group1], n2, replace=Replace_Subject);
# Calculate the statistic for the permutations
get_stat_diff_squared(temp_m[group1,], temp_m[group2,]);
}
);
return(list(sum(rnd_vec >= test_stat)/Nitrs, test_stat, sumPos, sumNeg));
}
########## FUNCTION curvesMeans
curvesMeans<-function(begPath, dataSet, subdir, param, dim, chrArm, phen1indices,phen2indices,seg=0){
# Returns a data frame with two numeric vectors: test and control.
# test and control contain the mean of conected components as epsilon increase
# if we are running segments use seg=segment number
# Note that this function is not working any more for parameter D
# Read in data regardless of phenotype (jagged files from ~/Results/SET/2D/Homology)
if (seg==0) {
dataFile <- paste(param, "_", dim, "D_", dataSet, "_", subdir,"_",chrArm, ".txt", sep="");
} else {
dataFile <- paste(param, "_", dim, "D_", dataSet, "_",subdir,"_", chrArm,"_seg",seg, ".txt", sep="");
}
dataPath <- paste(begPath, "/Results/", dataSet, "/",subdir,"/", dim, "D/Homology/", dataFile, sep="");
print(dataPath);
# Need to determine the maximum number of columns in the file ahead of time
# because, again, R is strange about reading in jagged arrays.
nColData <- max(count.fields(dataPath, sep="\t"));
# Read in parameter data
data <- read.table(dataPath, header=FALSE, sep="\t", fill=TRUE, col.names=1:nColData);
# Fill in the NAs in the jagged array with a parameter appropriate value.
# For B0 and CC that value is 1 because that is the minimum/maximum, respectively.
# For B1 that value is 0.
if (identical(param,"B1")) {
fillValue <- 0.0;
data[is.na(data)] <- fillValue;
} else if (identical(param,"CC") || identical(param,"B0")) {
fillValue <- 1.0;
data[is.na(data)] <- fillValue;
}
phen1data <- data[phen1indices,];
phen2data <- data[phen2indices,];
phen1curve <- colMeans(phen1data);
phen2curve <- colMeans(phen2data);
curvesMeans=data.frame(test=phen1curve, control=phen2curve);
return(curvesMeans);
}