-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathCOVID_replication_analysis.R
194 lines (140 loc) · 6.39 KB
/
COVID_replication_analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
#
# Replication code
# Athey, Grabarz, Luca, and Wernerfelt (2023)
#
# This file loads our simulated dataset and then runs the main analysis of the
# paper on the simulated data. Since all the data are simulated, the results
# do not match the ones in the paper, but we hope this provides a sense of how
# the analysis was done.
rm(list=ls())
# Load relevant packages
library(data.table)
library(dplyr)
library(survey)
library(meta)
###############################################################################
# Load data
###############################################################################
# Specify your own path to the downloaded .rdata file here
load('/Users/nilsw/Downloads/COVID_replication.rdata')
options(warn=-1)
# Two data tables are in COVID_replication.rdata:
# *rdt* is the simulated response-level data table
# *wdt* is the simulated data table for weights
# The variable names are self-explanatory, but see the main text if confused
###############################################################################
# Function definitions: Used to analyze each individual experiment
###############################################################################
# Analyze individual experiment
experiment_level_regression_hte <- function(data, pop_proportions) {
# Create survey design
svy.data.unweighted <- svydesign(ids=~1, data=data)
# Get the marginal probabilities for the variables that we want to weight the data by.
pop_proportions$Freq <- pop_proportions$weight_vals*nrow(data)
# Get the breakdown distribution
breakdown_dist <- pop_proportions[,.(breakdown_key, Freq)]
# Will use this in raking below
pop_test_control <- data.table(
test_group=c(1,0),
Freq=c(0.5*nrow(data), 0.5*nrow(data))
)
# Use the rake function in survey package to weight current data by pop values
data.svy.rake <- rake(design = svy.data.unweighted,
sample.margins = list(~breakdown_key, ~test_group),
population.margins = list(breakdown_dist, pop_test_control)
)
# Trim the weights
data.svy.rake.trim <- trimWeights(data.svy.rake, lower=0.3, upper=3, strict=TRUE)
# Put weights in a column
data[, weights := weights(data.svy.rake)]
data[, weights_trim := weights(data.svy.rake.trim)]
# Model specification
formula_dr = 'response_score ~ test_group+
demeaned_age2534+demeaned_age3544+demeaned_age4554+demeaned_age5564+demeaned_age65p+
test_group*demeaned_age2534+test_group*demeaned_age3544+test_group*demeaned_age4554+
test_group*demeaned_age5564+test_group*demeaned_age65p+demeaned_male+test_group*demeaned_male+
demeaned_ectr + test_group * demeaned_ectr+ demeaned_ecvr + test_group * demeaned_ecvr'
# Fit model
dr_mod <- svyglm(formula = formula_dr, design = data.svy.rake.trim)
# Grab main treatment effect coefficient
be_coef <- summary(dr_mod)$coefficients[2,1] # same
# Grab treatment effect SE
be_coef_se <- summary(dr_mod)$coefficients[2,2] # same
# Grab p-value
be_p_val <- summary(dr_mod)$coefficients[2,4] # same
output <- c(be_coef, be_coef_se,be_p_val)
return(output)
}
# Function for looping analysis across all experiments
execute_experiment_level_regression_hte <- function(unique_experiments, pop_props, dataset) {
# Prepare output: Create empty output data table
output_dt <- dataset %>%
dplyr::select(study_id, cell_id, experiment_id, question_short_name) %>%
dplyr::filter(experiment_id %in% unique_experiments) %>%
unique() %>%
data.table()
output_dt[,reweighted_lift_coef := 0.0]
output_dt[,reweighted_lift_coef_se := 0.0]
output_dt[,reweighted_lift_p_val := 0.0]
# Now loop through experiments and execute
for (i in 1:length(unique_experiments)) {
# Define experiment
exp_i <- unique_experiments[i]
# Filter data
exp_data_i <- dataset[experiment_id == exp_i]
# Filter weight data
weight_data_i <- pop_props[cell_id == unique(exp_data_i$cell_id)]
# Execute function to perform regression
output <- experiment_level_regression_hte(exp_data_i, weight_data_i)
output_dt[experiment_id==exp_i,5:7] <- as.list(output)
}
return(output_dt)
}
###############################################################################
# Run analysis of each experiment
###############################################################################
# Declare list of experiments
experiment_list <- unique(rdt$experiment_id)
# Run analysis
experiment_level_output_hte <- execute_experiment_level_regression_hte(experiment_list, wdt, rdt)
###############################################################################
# Take the results of each experiment, run meta-analysis, and print out results
###############################################################################
# Overall and Outcome-specific Fixed Effects Results
# Overall meta-analysis
meta_overall <- metagen(TE = reweighted_lift_coef,
seTE = reweighted_lift_coef_se,
studlab = experiment_id,
data = experiment_level_output_hte)
# Print out results
results_print <- paste(' Overall Fixed Effects Result (TE, SE, p-value):\n',
meta_overall$TE.fixed,
meta_overall$seTE.fixed,
meta_overall$pval.fixed, sep=' ')
cat(results_print)
# Outcome-specific: redo on each specific outcome question
category_meta <- function(data, category) {
# Subset data to just this category
this_data <- data %>%
dplyr::filter(question_short_name==category)
# Perform IVW using metagen
meta <- metagen(TE = reweighted_lift_coef,
seTE = reweighted_lift_coef_se,
studlab = experiment_id,
data = this_data)
# Fixed effects results
meta_treatment_effect <- meta$TE.fixed
meta_se <- meta$seTE.fixed
meta_p_val <- meta$pval.fixed
print(paste0('Results for ', category))
print(paste0('Treatment Effect: ',meta_treatment_effect))
print(paste0('Treatment Effect SE: ',meta_se))
print(paste0('Treatment Effect p-val: ',meta_p_val))
print('')
}
# Use function to perform analyses pooled by category
category_list <- sort(unique(experiment_level_output_hte$question_short_name))
# Call function to run on each category
for (this_category in category_list) {
category_meta(experiment_level_output_hte, this_category)
}