-
Notifications
You must be signed in to change notification settings - Fork 0
/
cp_hypothesis_tests.R
119 lines (78 loc) · 4.19 KB
/
cp_hypothesis_tests.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
#Module with functions for campare gene expression distributions between controls and treatment
source('cp_plots_pert.R')
#takes as input the gene_id of the gene and the cp_name, the name of the treatment to compare with the control
gene_ttest <- function(mod_df, ctl_df, gene_id, cp_name){
ctl_gene <- ctl_df %>% filter(pr_gene_id == gene_id)
pert_gene <- mod_df %>% filter(pr_gene_id == gene_id & pert_iname == cp_name) #tirar dependencia de mod_list_full
ttest <- (t.test(pert_gene$z_score, ctl_gene$z_score, alternative = "two.sided", paired = FALSE))
return(ttest)
}
#In following sections, we will implement functions to do a batch of t-tests at once.
#In order to better manipulate the results, it is simpler to convert the output to a dataframe
df_ttest <- function(list_ttest, all_cp){
all_test_df <- (rbindlist(list_ttest))
#the dataframe has repeated rows, so we delete them
toDelete <- seq(1, nrow(all_test_df), 2)
test_df <- all_test_df[toDelete,]
test_df <- cbind(all_cp, test_df)
return(test_df)
}
#The following function allows the user to run the t-test for a given gene across all drugs that are available in a give list of sig_id and modulation
test_per_gene <- function(gene, mod_df, ctl_df){
all_cp <-as.data.frame(unique(mod_df$pert_iname))
all_test <- apply(all_cp, 1, gene_ttest, mod_df = mod_df, ctl_df = ctl_df, gene_id = gene)
all_test_df <- df_ttest(all_test, all_cp)
all_test_df$pr_gene_id <- as.numeric(gene)
return(all_test_df)
}
test_all <- function(gene_list, mod_df, ctl_df){
all_tests <- data.frame()
for (i in 1:length(gene_list)) {
new_test <- test_per_gene(gene_list[i], mod_df, ctl_df)
new_test$pr_gene_id <- gene_list[i]
all_tests <- rbind(all_tests, new_test)
print(i)
i=i+1
}
colnames(all_tests)[1] <- "pert_iname"
return(all_tests)
}
#-------FUNCTIONS TO PLOT HEATMAPS WITH THE RESULT OF TESTS------------------
test_wide <- function(tests_df){
ph_df <- tests_df %>% select(pert_iname, pr_gene_id, statistic, p.value, conf.int)
#taking the log of p-value, with different signs for positive or negative t stats
ph_df <- ph_df %>% dplyr::mutate(logp = case_when( statistic > 0 ~ -log10(p.value),
statistic < 0 ~ log10(p.value))
)
#organizing the data for plotting
ph_wide <- ph_df %>%
select(pert_iname, pr_gene_id, logp) %>%
tidyr::spread(pert_iname, logp)
ph_wide <- merge(tcga_genes[,1:2], ph_wide, by = "pr_gene_id")
rownames(ph_wide) <- ph_wide$pr_gene_symbol
ph_wide$pr_gene_id <- NULL
ph_wide$pr_gene_symbol <- NULL
return(ph_wide)
}
plot_heatmap_test <- function(test_tidy, wide_table, clustering_distance_rows = "euclidean",
clustering_distance_cols = "euclidean", filename = "heatmap.png", height = 25, width = 15){
#annotation of category of genes
annot_df <- subset(tcga_genes, pr_gene_symbol %in% rownames(wide_table), c(pr_gene_symbol, Super.Category, Immune.Checkpoint))
rownames(annot_df) <- annot_df$pr_gene_symbol
annot_df$pr_gene_symbol <- NULL
#annotation of classes of drugs
pcl_df <- merge(pcl_custom[,2:3], test_tidy[1], by= "pert_iname") %>% distinct()
pcl_df <- subset(pcl_df, !duplicated(pcl_df [,1]) )
rownames(pcl_df) <- pcl_df$pert_iname
pcl_df$pert_iname <- NULL
# Making sure that 0 is in the center of the color scheme
breaks_list <- seq(-10, 10, by = 0.5)
my_colors <- colorRampPalette(rev(RColorBrewer::brewer.pal(n = 7, name = "RdYlBu")))(length(breaks_list))
#generating the heatmap
wide_table %>% t() %>%
pheatmap::pheatmap(color = my_colors, breaks = breaks_list, clustering_distance_rows = clustering_distance_rows, clustering_distance_cols = clustering_distance_cols,
annotation_row = pcl_df, annotation_col = annot_df, annotation_legend = FALSE)
wide_table %>% t() %>%
pheatmap::pheatmap(color = my_colors, breaks = breaks_list, clustering_distance_rows = clustering_distance_rows, clustering_distance_cols = clustering_distance_cols,
annotation_row = pcl_df, annotation_col = annot_df, filename = filename, height = height, width = width)
}