forked from shiva1387/BatchEffects
-
Notifications
You must be signed in to change notification settings - Fork 0
/
analysisOfDistance.R
120 lines (103 loc) · 5.57 KB
/
analysisOfDistance.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
####################################### Analysis of distance
## Estimating the relations between strains before and after batch correction procedure
############### DAY12
a<-adonis(t(batch_corrected_mat_d12)~ SampleGroup_day12, method = "euclidean", perm=999)
# Call:
# adonis(formula = t(ms_data_day12_nonzero_scale) ~ SampleGroup_day12, permutations = 999, method = "euclidean")
#
# Terms added sequentially (first to last)
#
# Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
# SampleGroup_day12 21 169158 8055.2 4.9991 0.52235 0.001 ***
# Residuals 96 154685 1611.3 0.47765
# Total 117 323844 1.00000
# ---
# Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
## Subset data- from each batch
## A subset is used from the scaled data instead of subset the original data set and then scaline
## This is done to preserve the original variation in each datasets as scaling with a subset might have different distributions
## Raw data
d12_batch17_r<-RunDay_day12[RunDay_day12=="17"]
d12_batch17_s<-SampleGroup_day12[names(SampleGroup_day12) %in% names(d12_batch17_r)]#Getting the Sample group and strain names
d12_batch17_data<-ms_data_day12_nonzero_scale[,colnames(ms_data_day12_nonzero_scale) %in% names(d12_batch17_s)]
d12_batch17_data_aod<-adonis(t(d12_batch17_data)~ d12_batch17_s, method = "euclidean", perm=999)
# Call:
# adonis(formula = t(d12_batch17_data) ~ d12_batch17_s, permutations = 999, method = "euclidean")
#
# Terms added sequentially (first to last)
#
# Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
# d12_batch17_s 11 77775 7070.4 4.719 0.49013 0.001 ***
# Residuals 54 80907 1498.3 0.50987
# Total 65 158682 1.00000
# ---
# Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
## Batch corrected data
d12_cor_batch17_data<-batch_corrected_mat_d12[,colnames(batch_corrected_mat_d12) %in% names(d12_batch17_s)]
d12_cor_batch17_data_aod<-adonis(t(d12_cor_batch17_data)~ d12_batch17_s, method = "euclidean", perm=999)
# Call:
# adonis(formula = t(d12_cor_batch17_data) ~ d12_batch17_s, permutations = 999, method = "euclidean")
#
# Terms added sequentially (first to last)
#
# Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
# d12_batch17_s 11 60677 5516.1 4.2697 0.46517 0.001 ***
# Residuals 54 69762 1291.9 0.53483
# Total 65 130439 1.00000
# ---
# Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
############### DAY4
a<-adonis(t(ms_data_day4_nonzero_scale)~ SampleGroup_day4, method = "euclidean", perm=999)
# Call:
# adonis(formula = t(ms_data_day4_nonzero_scale) ~ SampleGroup_day4, permutations = 999, method = "euclidean")
#
# Terms added sequentially (first to last)
#
# Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
# SampleGroup_day4 21 143357 6826.5 3.605 0.42363 0.001 ***
# Residuals 103 195041 1893.6 0.57637
# Total 124 338397 1.00000
# ---
# Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
## Subset data- from each batch
## A subset is used from the scaled data instead of subset the original data set and then scaline
## This is done to preserve the original variation in each datasets as scaling with a subset might have different distributions
## Raw data
d4_batch23_r<-RunDay_day4[RunDay_day4=="23"]
d4_batch23_s<-SampleGroup_day4[names(SampleGroup_day4) %in% names(d4_batch23_r)]#Getting the Sample group and strain names
d4_batch23_data<-ms_data_day4_nonzero_scale[,colnames(ms_data_day4_nonzero_scale) %in% names(d4_batch23_s)]
d4_batch23_data_aod<-adonis(t(d4_batch23_data)~ d4_batch23_s, method = "euclidean", perm=999)
# Call:
# adonis(formula = t(d4_batch23_data) ~ d4_batch23_s, permutations = 999, method = "euclidean")
#
# Terms added sequentially (first to last)
#
# Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
# d4_batch23_s 13 61300 4715.4 2.4771 0.32792 0.001 ***
# Residuals 66 125636 1903.6 0.67208
# Total 79 186937 1.00000
# ---
# Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
d4_cor_batch23_data_nmds <- metaMDS (d4_cor_batch23_data, distance= "euclidean")
d4_batch23_data_nmds <- metaMDS (d4_batch23_data, distance= "euclidean")
## Batch corrected data
d4_cor_batch23_data<-batch_corrected_mat_d4[,colnames(batch_corrected_mat_d4) %in% names(d4_batch23_s)]
d4_cor_batch23_data_aod<-adonis(t(d4_cor_batch23_data)~ d4_batch23_s, method = "euclidean", perm=999)
# Call:
# adonis(formula = t(d4_cor_batch23_data) ~ d4_batch23_s, permutations = 999, method = "euclidean")
#
# Terms added sequentially (first to last)
#
# Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
# d4_batch23_s 13 56657 4358.2 3.2337 0.3891 0.001 ***
# Residuals 66 88951 1347.7 0.6109
# Total 79 145608 1.0000
# ---
# Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
pdf("aod_plots.pdf",height=8,width=8)
par(mfrow=c(2,2))
plot(density(d4_batch23_data_aod), xlim=c(0,5), main="DAY4 raw")
plot(density(d12_batch17_data_aod), xlim=c(0,5), main="DAY12 raw")
plot(density(d4_cor_batch23_data_aod), xlim=c(0,5), main="DAY4 corrected")
plot(density(d12_cor_batch17_data_aod), xlim=c(0,5), main="DAY12 corrected")
dev.off()