forked from mvab/mendelian-randomization-breast-cancer
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfunctions_mvmr.R
326 lines (250 loc) · 9.83 KB
/
functions_mvmr.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
require("purrr")
require("tidyr")
require("tibble")
require("TwoSampleMR")
get_mv_exposures <- function(exposure_list, full_gwas_list, clump_exposures=FALSE) {
# Here we are using a re-written source code of `mv_extract_exposures` function from 2SMR package.
# It was neccesary to do it, as it only works with MRBase database input at the moment (but we have external exposures)
# Get effects of each instrument from each exposure
# all tophit SNPs in both exposures, (clumped optionally)
exposures <- exposure_list %>% purrr::reduce(bind_rows) %>% flip_same_snp()
if (clump_exposures) {
# ***optional*** : clump exposures
temp <- exposures
temp$id.exposure <- 1
temp <- clump_data(temp)
#temp <- clump_data_local(temp, local_path)
exposures <- filter(exposures, SNP %in% temp$SNP)
}
# merge exposures (in 'outcomes' ~ full gwas format)
# extract all instruments from full GWAS of exposures
for (i in 1:length(full_gwas_list)){
full_gwas_list[[i]] <- full_gwas_list[[i]] %>% filter(SNP %in% exposures$SNP)
}
d1 <- full_gwas_list %>%
purrr::reduce(bind_rows) %>%
distinct()
# get ids
id_exposure <- unique(d1$id.outcome)
# convert first trait to exposure format -- exp1 is exposure
tmp_exposure <- d1 %>% filter(id.outcome == id_exposure[1]) %>% convert_outcome_to_exposure()
# keep other traits as outcome -- exp2+ are outcomes
tmp_outcome <- d1 %>% filter(id.outcome != id_exposure[1])
# Harmonise against the first trait
d <- harmonise_data(exposure_dat = tmp_exposure,
outcome_dat = tmp_outcome, action=2)
# Only keep SNPs that are present in all
snps_not_in_all <- d %>% dplyr::count(SNP) %>%
filter(n < length(exposure_list)-1) %>%
pull(SNP)
d <- filter(d, !SNP %in% snps_not_in_all)
# Subset and concat data
# for exp1 get exposure cols
dh1x <- d %>% filter(id.outcome == id.outcome[1]) %>%
select(SNP, contains("exposure"))
# for exp2 get outcome cols
dh2x <-d %>% select(SNP, contains("outcome"))
# rename outcome to exposure in these
names(dh2x) <- gsub("outcome", "exposure", names(dh2x) )
# join together (drop not needed cols)
exposure_dat <- bind_rows(dh1x, dh2x) %>%
select(-c("samplesize.exposure" ,"mr_keep.exposure", "pval_origin.exposure")) %>%
distinct()
return(exposure_dat)
}
# function to convert 2SMR format into MVMR format
make_mvmr_input <- function(exposure_dat, outcome.id.mrbase="", outcome.data=""){
# provide exposure_dat created in the same way as for TwoSampleMR
# also specify the outcome argument [only ONE!] (MR-base ID or full gwas data in .outcome format)
# extract SNPs for both exposures from outcome dataset
# (for the selected option mr.base or local outcome data)
if (outcome.id.mrbase != "") {
# if mrbase.id is provided
outcome_dat <- extract_outcome_data(snps = unique(exposure_dat$SNP),
outcomes = outcome.id.mrbase)
} else if (outcome.data != ""){
# if outcome df is provided
outcome_dat <- outcome.data %>% filter(SNP %in% exposure_dat$SNP)
}
# harmonize datasets
exposure_dat <- exposure_dat %>% mutate(id.exposure = exposure)
outcome_harmonised <- mv_harmonise_data(exposure_dat, outcome_dat)
exposures_order <- colnames(outcome_harmonised$exposure_beta)
# Create variables for the analysis
### works for many exposures
no_exp = dim(outcome_harmonised$exposure_beta)[2] # count exposures
# add beta/se names
colnames(outcome_harmonised$exposure_beta) <- paste0("betaX", 1:no_exp)
colnames(outcome_harmonised$exposure_se) <- paste0("seX", 1:no_exp)
XGs <-left_join(as.data.frame(outcome_harmonised$exposure_beta) %>% rownames_to_column('SNP'),
as.data.frame(outcome_harmonised$exposure_se) %>%rownames_to_column('SNP'),
by = "SNP")
YG <- data.frame(beta.outcome = outcome_harmonised$outcome_beta,
se.outcome = outcome_harmonised$outcome_se) %>%
mutate(SNP = XGs$SNP)
return(list(YG = YG,
XGs = XGs,
exposures = exposures_order))
}
tidy_mvmr_output <- function(mvmr_res) {
mvmr_res %>%
as.data.frame() %>%
rownames_to_column("exposure") %>%
rename(b=Estimate) %>%
rename(se="Std. Error") %>%
rename(pval="Pr(>|t|)") %>%
select(-c(`t value`)) %>%
TwoSampleMR::generate_odds_ratios()
}
flip_same_snp <- function(dat){
# test if there are any unharmonised dups
x <- dat %>% dplyr::count(SNP) %>% filter(n > 1) %>% arrange(-n)
if (dim(x)[1] !=0 ){
# all SNPs that occur > 1
y <- dat %>% filter(SNP %in% x$SNP)
dat.upd<-tibble()
for ( i in unique(y$SNP) ){
# subset to current snp
y.sub<- filter(y, SNP == i)
snp_n <- dim(y.sub)[1]
if (snp_n == 3){
# likely 2/3 are in sync, need to drop 1 of the 'in sync' ones
if (y.sub$effect_allele.exposure[1] == y.sub$effect_allele.exposure[2]) {
dat.upd<-rbind(dat.upd, y.sub[1,])
y.sub<-y.sub[2:3,]
} else if (y.sub$effect_allele.exposure[1] == y.sub$effect_allele.exposure[3]){
dat.upd<-rbind(dat.upd, y.sub[3,])
y.sub<-y.sub[1:2,]
} else if (y.sub$effect_allele.exposure[2] == y.sub$effect_allele.exposure[3]){
dat.upd<-rbind(dat.upd, y.sub[3,])
y.sub<-y.sub[1:2,]
}
}
# if there are SNPs that seem to be exact opposite, flip them
# flip and harmonise each pair of discordant SNPs
if (y.sub$effect_allele.exposure[1] == y.sub$other_allele.exposure[2] & y.sub$effect_allele.exposure[2] == y.sub$other_allele.exposure[1] ){
y.sub$other_allele.exposure[2] <- y.sub$other_allele.exposure[1]
y.sub$effect_allele.exposure[2] <- y.sub$effect_allele.exposure[1]
y.sub$beta.exposure[2] <- y.sub$beta.exposure[2] * -1
y.sub$eaf.exposure[2] <- 1 - y.sub$eaf.exposure[2]
}
# save new (or unchanged if if was FALSE) into tibbl dat.upd
dat.upd <- rbind(dat.upd, y.sub)
}
# discard old, add new harmonised data
dat <- dat %>% filter(!SNP %in% unique(y$SNP) )
dat <- rbind(dat, dat.upd)
return(dat)
} else {
# no dups
return(dat)
}
}
qhet_mvmr_tmp<- function (r_input, pcor, CI, iterations) {
library(boot)
warning("qhet_mvmr() is currently undergoing development.")
warning("This is Marina's local version with simple CIs.")
if (missing(CI)) {
CI <- F
warning("95 percent confidence interval not calculated")
}
if (missing(iterations)) {
iterations <- 1000
warning("Iterations for bootstrap not specified. Default = 1000")
}
exp.number <- length(names(r_input)[-c(1, 2, 3)])/2
Qtemp <- function(r_input, pcor) {
exp.number <- length(names(r_input)[-c(1, 2, 3)])/2
stderr <- as.matrix(r_input[, (exp.number + 4):length(r_input)])
correlation <- pcor
gammahat <- r_input$betaYG
segamma <- r_input$sebetaYG
pihat <- as.matrix(r_input[, c(4:(3 + exp.number))])
PL_MVMR = function(a) {
tau2 = a[1]
PL2_MVMR = function(ab) {
b <- ab
cov = matrix(nrow = exp.number, ncol = exp.number)
w = NULL
for (l in 1:nrow(r_input)) {
for (pp in 1:exp.number) {
for (p2 in 1:exp.number) {
cov[pp, p2] <- correlation[pp, p2] * stderr[l,
pp] * stderr[l, p2]
}
}
segamma <- r_input$sebetaYG
w[l] <- segamma[l]^2 + t(b) %*% cov %*% b +
tau2
}
q = sum((1/w) * ((gammahat - pihat %*% b)^2))
return(q)
}
st_PL2 = rep(0, exp.number)
bc = optim(st_PL2, PL2_MVMR)
bcresults <- bc$par
cov = matrix(nrow = exp.number, ncol = exp.number)
w = NULL
for (l in 1:nrow(r_input)) {
for (pp in 1:exp.number) {
for (p2 in 1:exp.number) {
cov[pp, p2] <- correlation[pp, p2] * stderr[l,
pp] * stderr[l, p2]
}
}
w[l] <- segamma[l]^2 + t(bcresults) %*% cov %*%
bcresults + tau2
}
q = (sum((1/w) * ((gammahat - pihat %*% bcresults)^2)) -
(nrow(r_input) - 2))^2
}
PL2_MVMR = function(ab) {
b = ab
w = NULL
cov = matrix(nrow = exp.number, ncol = exp.number)
for (l in 1:nrow(r_input)) {
for (pp in 1:exp.number) {
for (p2 in 1:exp.number) {
cov[pp, p2] <- correlation[pp, p2] * stderr[l,
pp] * stderr[l, p2]
}
}
w[l] <- segamma[l]^2 + t(b) %*% cov %*% b +
tau_i
}
q = sum((1/w) * ((gammahat - pihat %*% b)^2))
}
limltauest = optimize(PL_MVMR, interval = c(-10, 10))
tau_i = limltauest$objective
tau = tau_i
liml_het2 <- optim(rep(0, exp.number), PL2_MVMR)
limlhets <- liml_het2$par
Qexact_het <- liml_het2$value
Effects <- limlhets
Effects <- data.frame(Effects)
names(Effects) <- "Effect Estimates"
for (i in 1:exp.number) {
rownames(Effects)[i] <- paste("Exposure", i, sep = " ")
}
return(Effects)
}
if (CI == F) {
res <- Qtemp(r_input, pcor)
}
if (CI == T) {
bootse <- function(data, indices) {
bres <- Qtemp(data[indices, ], pcor)[, 1]
return(bres)
}
b.results <- boot(data = r_input, statistic = bootse,
R = iterations)
out<-data.frame(b=b.results$t0,
se=apply(b.results$t, 2, sd),
mean = apply(b.results$t, 2, mean))
out$bias = out$mean - out$b
out$mean <-NULL
rownames(out) <- paste("Exposure", c(1:exp.number), sep = " ")
out<-TwoSampleMR::generate_odds_ratios(out)
}
return(out)
}