Skip to content

Commit

Permalink
Add resample based on CHC theory dimensions
Browse files Browse the repository at this point in the history
Signed-off-by: Liang Zhang <[email protected]>
  • Loading branch information
psychelzh committed Apr 10, 2024
1 parent 3a7fd72 commit e9bd1e8
Show file tree
Hide file tree
Showing 3 changed files with 134 additions and 1 deletion.
6 changes: 6 additions & 0 deletions R/constants.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,12 @@ game_id_reasoning <- bit64::as.integer64(
)
# we define here to be used in g factor resampling
num_vars_total <- 78
index_chc_labels <- readr::read_csv(
"config/game_dims_theory.csv",
show_col_types = FALSE
) |>
tidyr::unite("game_index", game_name_abbr, index_name, sep = ".") |>
dplyr::pull(label_chc, game_index)

# used in data quality check
thresh_prop_miss <- 0.25
Expand Down
21 changes: 21 additions & 0 deletions R/resample.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,24 @@ resample_vars <- function(vars, num_vars, use_pairs = FALSE) {
list(vars_sampled)
}
}

resample_pairs_chc <- function() {
labels <- sample(unique(index_chc_labels))
half <- length(labels) / 2
list(labels[seq_len(half)], labels[seq_len(half) + half])
}

extract_vars_chc <- function(pairs_chc) {
lapply(
pairs_chc,
\(labels) {
names(index_chc_labels)[index_chc_labels %in% labels]
}
)
}

allocate_num_vars_chc <- function(vars_chc) {
num_vars_max <- min(lengths(vars_chc))
num_vars <- seq(3, num_vars_max, by = 3)
num_vars[choose(num_vars_max, num_vars) > 1000]
}
108 changes: 107 additions & 1 deletion _scripts/g_factor.R
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,111 @@ branches_g <- tarchetypes::tar_map(
)
)

num_pairs_chc <- 20
branches_g_chc <- tarchetypes::tar_map(
list(id_rsmp = seq_len(num_pairs_chc)),
tar_target(pairs_chc, resample_pairs_chc()),
tar_target(vars_chc, extract_vars_chc(pairs_chc)),
tar_target(num_vars_chc, allocate_num_vars_chc(vars_chc)),
tar_target(
vars_rsmp_chc,
replicate(100, lapply(vars_chc, sample, num_vars_chc), simplify = FALSE),
pattern = map(num_vars_chc),
iteration = "list"
),
tar_target(
fit_g_chc,
lapply(
vars_rsmp_chc,
\(x) lapply(x, fit_efa_g, data = indices_cogstruct, missing = "ml")
),
pattern = map(vars_rsmp_chc),
iteration = "list"
),
tar_target(
scores_g_chc,
lapply(
fit_g_chc,
\(x) lapply(x, extract_g_scores, data = indices_cogstruct)
),
pattern = map(fit_g_chc),
iteration = "list"
),
tar_target(
rel_pairs_g_chc,
list_rbind(
lapply(
scores_g_chc,
\(x) tibble(r = as.vector(cor(x[[1]], x[[2]], use = "pairwise")))
),
names_to = "id_rep"
),
pattern = map(scores_g_chc),
iteration = "list"
),
tarchetypes::tar_map(
tidyr::expand_grid(
config_neural,
hypers_cpm |>
dplyr::filter(
thresh_method == "alpha",
thresh_level == 0.01
)
),
names = !all_of(names_exclude),
tar_target(
cpm_result_chc,
lapply(
scores_g_chc,
\(x) {
lapply(
x,
perform_cpm,
fc = qs::qread(file_fc),
confounds = match_confounds(users_confounds, fd),
subjs_keep_neural = subjs_keep_neural,
bias_correct = FALSE,
thresh_method = thresh_method,
thresh_level = thresh_level,
return_edges = "sum"
)
}
),
pattern = map(scores_g_chc),
iteration = "list",
retrieval = "worker",
storage = "worker"
),
tar_target(
dice_pairs_chc,
list_rbind(
lapply(cpm_result_chc, calc_dice_pairs, 0.5),
names_to = "id_rep"
),
pattern = map(cpm_result_chc),
iteration = "list",
retrieval = "worker",
storage = "worker"
),
tar_target(
cpm_performance_chc,
lapply(
cpm_result_chc,
\(result) {
list_rbind(
lapply(result, extract_cpm_performance),
names_to = "id_pairs"
)
}
) |>
list_rbind(names_to = "id_rep"),
pattern = map(cpm_result_chc),
retrieval = "worker",
storage = "worker"
)
)
)

list(
tarchetypes::tar_file_read(
indices_cogstruct,
Expand Down Expand Up @@ -216,5 +321,6 @@ list(
tar_target(
scores_g_full,
extract_g_scores(fit_g_full, data = indices_cogstruct)
)
),
branches_g_chc
)

0 comments on commit e9bd1e8

Please sign in to comment.