forked from zhangyuqing/ComBat-seq
-
Notifications
You must be signed in to change notification settings - Fork 0
/
helper_seq.R
78 lines (68 loc) · 2.81 KB
/
helper_seq.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#### Expand a vector into matrix (columns as the original vector)
vec2mat <- function(vec, n_times){
return(matrix(rep(vec, n_times), ncol=n_times, byrow=FALSE))
}
#### Monte Carlo integration functions
monte_carlo_int_NB <- function(dat, mu, gamma, phi, gene.subset.n){
weights <- pos_res <- list()
for(i in 1:nrow(dat)){
m <- mu[-i,!is.na(dat[i,])]
x <- dat[i,!is.na(dat[i,])]
gamma_sub <- gamma[-i]
phi_sub <- phi[-i]
# take a subset of genes to do integration - save time
if(!is.null(gene.subset.n) & is.numeric(gene.subset.n) & length(gene.subset.n)==1){
if(i==1){cat(sprintf("Using %s random genes for Monte Carlo integration\n", gene.subset.n))}
mcint_ind <- sample(1:(nrow(dat)-1), gene.subset.n, replace=FALSE)
m <- m[mcint_ind, ]; gamma_sub <- gamma_sub[mcint_ind]; phi_sub <- phi_sub[mcint_ind]
G_sub <- gene.subset.n
}else{
if(i==1){cat("Using all genes for Monte Carlo integration; the function runs very slow for large number of genes\n")}
G_sub <- nrow(dat)-1
}
#LH <- sapply(1:G_sub, function(j){sum(log2(dnbinom(x, mu=m[j,], size=1/phi_sub[j])+1))})
LH <- sapply(1:G_sub, function(j){prod(dnbinom(x, mu=m[j,], size=1/phi_sub[j]))})
LH[is.nan(LH)]=0;
if(sum(LH)==0 | is.na(sum(LH))){
pos_res[[i]] <- c(gamma.star=as.numeric(gamma[i]), phi.star=as.numeric(phi[i]))
}else{
pos_res[[i]] <- c(gamma.star=sum(gamma_sub*LH)/sum(LH), phi.star=sum(phi_sub*LH)/sum(LH))
}
weights[[i]] <- as.matrix(LH/sum(LH))
}
pos_res <- do.call(rbind, pos_res)
weights <- do.call(cbind, weights)
res <- list(gamma_star=pos_res[, "gamma.star"], phi_star=pos_res[, "phi.star"], weights=weights)
return(res)
}
#### Match quantiles
match_quantiles <- function(counts_sub, old_mu, old_phi, new_mu, new_phi){
new_counts_sub <- matrix(NA, nrow=nrow(counts_sub), ncol=ncol(counts_sub))
for(a in 1:nrow(counts_sub)){
for(b in 1:ncol(counts_sub)){
if(counts_sub[a, b] <= 1){
new_counts_sub[a,b] <- counts_sub[a, b]
}else{
tmp_p <- pnbinom(counts_sub[a, b]-1, mu=old_mu[a, b], size=1/old_phi[a])
if(abs(tmp_p-1)<1e-4){
new_counts_sub[a,b] <- counts_sub[a, b]
# for outlier count, if p==1, will return Inf values -> use original count instead
}else{
new_counts_sub[a,b] <- 1+qnbinom(tmp_p, mu=new_mu[a, b], size=1/new_phi[a])
}
}
}
}
return(new_counts_sub)
}
mapDisp <- function(old_mu, new_mu, old_phi, divider){
new_phi <- matrix(NA, nrow=nrow(old_mu), ncol=ncol(old_mu))
for(a in 1:nrow(old_mu)){
for(b in 1:ncol(old_mu)){
old_var <- old_mu[a, b] + old_mu[a, b]^2 * old_phi[a, b]
new_var <- old_var / (divider[a, b]^2)
new_phi[a, b] <- (new_var - new_mu[a, b]) / (new_mu[a, b]^2)
}
}
return(new_phi)
}