-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path06-RBFOX1_targets_09.Rmd
232 lines (191 loc) · 6.47 KB
/
06-RBFOX1_targets_09.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
# GO and KEGG: RBFOX1 targets 09
## Load the data
```{r}
# load table with targets (binding probability > 0.9)
data_MAPP <- read.table("data/inclusion_tables/0.9/rbfox1_exclusive_targets.tsv",
header = T)
# select control and cancer columns
CTRL <- data_MAPP %>% dplyr::select_if(., grepl("^CONTROL_", names(.)))
CANCER <- data_MAPP %>% dplyr::select_if(., grepl("^CANCER_", names(.)))
# use row_t_welch instead of usual t.test function since t.test produces
# errors in some cases: t.test error: data are essentially constant
# NAs are assigned for problematic cases
# https://stats.stackexchange.com/questions/499634/t-test-error-data-are-essentially-constant/499637
# library matrixTests
data_MAPP <- data_MAPP %>%
mutate(padj = p.adjust(row_t_welch(CTRL, CANCER)$pvalue,
method = "BH", n=nrow(data_MAPP)))
```
```{r}
# view table interactively
DT::datatable(data_MAPP, options = list(scrollX = TRUE))
```
## Gene ontology (GO)
### Prepare the gene list
```{r}
# select genes from data_MAPP table based on padj values
# threshold is set in index.Rmd: genes_padj_cutoff
genes <- na.omit(data_MAPP) %>% arrange(desc(ABSDIFF)) %>%
filter(padj < genes_padj_cutoff) %>% dplyr::select(gene_id) %>% unlist()
genes <- unlist(strsplit(genes, ","))
```
```{r}
# preview the gene names
head(genes)
```
### Biological process (BP)
```{r}
# select type of GO analysis
type_of_GO = "BP"
# create an object with a name based on the type of analysis
assign(paste0("GO_", type_of_GO), enrichGO(
genes,
organismDB,
keyType = "ENSEMBL",
ont = type_of_GO,
pvalueCutoff = pvalueCutoff,
minGSSize = minGSSize,
pAdjustMethod = "BH",
universe = universe,
readable = TRUE))
# assign the object created above to "df_GO" object
df_GO <- eval(parse(text = paste0("GO_", type_of_GO)))
```
```{r}
# if there are enriched terms in "df_GO" object, create a table with the results
# filtered by specified p-adjusted threshold, if no enriched terms were found
# print "no GOs were found"
if(nrow(df_GO) > 0){
DT::datatable(df_GO@result %>% filter(p.adjust < pvalueCutoff),
options = list(scrollX = TRUE))
} else "no GOs were found"
```
```{r}
# if there are enriched terms in "df_GO" object, create a barplot with
# specified number of GOs to plot, if no enriched terms were found
# print "no GOs to plot"
if(nrow(df_GO) > 0){
barplot(df_GO, showCategory = showCategory,
title = type_of_GO)
} else "no GOs to plot"
```
### Molecular function (MF)
```{r}
# select type of GO analysis
type_of_GO = "MF"
# create an object with a name based on the type of analysis
assign(paste0("GO_", type_of_GO), enrichGO(
genes,
organismDB,
keyType = "ENSEMBL",
ont = type_of_GO,
pvalueCutoff = pvalueCutoff,
minGSSize = minGSSize,
pAdjustMethod = "BH",
universe = universe,
readable = TRUE))
# assign the object created above to "df_GO" object
df_GO <- eval(parse(text = paste0("GO_", type_of_GO)))
```
```{r}
# if there are enriched terms in "df_GO" object, create a table with the results
# filtered by specified p-adjusted threshold, if no enriched terms were found
# print "no GOs were found"
if(nrow(df_GO) > 0){
DT::datatable(df_GO@result %>% filter(p.adjust < pvalueCutoff),
options = list(scrollX = TRUE))
} else "no GOs were found"
```
```{r}
# if there are enriched terms in "df_GO" object, create a barplot with
# specified number of GOs to plot, if no enriched terms were found
# print "no GOs to plot"
if(nrow(df_GO) > 0){
barplot(df_GO, showCategory = showCategory,
title = type_of_GO)
} else "no GOs to plot"
```
### Cellular component (CC)
```{r}
# select type of GO analysis
type_of_GO = "CC"
# create an object with a name based on the type of analysis
assign(paste0("GO_", type_of_GO), enrichGO(
genes,
organismDB,
keyType = "ENSEMBL",
ont = type_of_GO,
pvalueCutoff = pvalueCutoff,
minGSSize = minGSSize,
pAdjustMethod = "BH",
universe = universe,
readable = TRUE))
# assign the object created above to "df_GO" object
df_GO <- eval(parse(text = paste0("GO_", type_of_GO)))
```
```{r}
# if there are enriched terms in "df_GO" object, create a table with the results
# filtered by specified p-adjusted threshold, if no enriched terms were found
# print "no GOs were found"
if(nrow(df_GO) > 0){
DT::datatable(df_GO@result %>% filter(p.adjust < pvalueCutoff),
options = list(scrollX = TRUE))
} else "no GOs were found"
```
```{r}
# if there are enriched terms in "df_GO" object, create a barplot with
# specified number of GOs to plot, if no enriched terms were found
# print "no GOs to plot"
if(nrow(df_GO) > 0){
barplot(df_GO, showCategory = showCategory,
title = type_of_GO)
} else "no GOs to plot"
```
## KEGG
### Prepare the the data
```{r, warning = FALSE}
# select gene_id in ENSEMBL format and DIFF from the result table
DIFF <- data_MAPP %>% dplyr::select(gene_id, DIFF)
# rename the gene_id column
names(DIFF)[1] <- "ENSEMBL"
# preview the head of the table
head(DIFF)
# create new df by converting ENSEMBL ID into ENTREZID and adding DIFF column
# arrange table in descending order by abs(DIFF)
genes_ENTREZ_df <-
bitr(na.omit(data_MAPP$gene_id[data_MAPP$padj < genes_padj_cutoff]),
fromType = "ENSEMBL", toType = "ENTREZID", OrgDb= organismDB) %>%
left_join(DIFF, by = "ENSEMBL") %>% arrange(desc(abs(DIFF)))
# preview obtained table
head(genes_ENTREZ_df )
# create a vector with DIFF values
genes_DIFF <- genes_ENTREZ_df$DIFF
genes_DIFF
# add names based on ENTREZID
names(genes_DIFF) <- genes_ENTREZ_df$ENTREZID
# preview genes in ENTREZ format with corresponding fold changes
head(genes_DIFF)
```
### run enrichKEGG
```{r}
# run enrichKEGG using options specified in "index.Rmd" file
KEGGresults <- enrichKEGG(
names(genes_DIFF),
organism = "hsa",
keyType = "ncbi-geneid",
pvalueCutoff = pvalueCutoff,
pAdjustMethod = "BH",
universe = as.character(na.omit(ENTREZ_universe)),
use_internal_data = FALSE
)
```
```{r}
# if there are enriched KEGG pathways found in "KEGGresults" object,
# create a table with the results filtered by specified p-adjusted threshold,
# if no enriched terms were found, print "no KEGG pathways were found"
if (!is.null(KEGGresults)) {
if(nrow(KEGGresults@result %>% filter(p.adjust < pvalueCutoff)) > 0){
DT::datatable(KEGGresults@result %>% filter(p.adjust < pvalueCutoff),
options = list(scrollX = TRUE))}
} else "no KEGG pathways were found"
```