created 2025 pre-meds assessment

UCSB-MEDS · Aug 13, 2024 · 45d8902 · 45d8902
1 parent 3ffa8e0
commit 45d8902
Show file tree

Hide file tree

Showing 101 changed files with 22,683 additions and 86 deletions.
diff --git a/R/q18_prob_dist.R b/R/q18_prob_dist.R
@@ -81,6 +81,8 @@ clean_q18b_prob_dist_terms <- function(PLO_data_clean){
   post_meds <- df |> 
     filter(timepoint == "Post-MEDS") 
 
+  if("Post-MEDS" %in% df$timepoint){
+
   #................add 0s where missing (post-MEDS)................
   for (i in 1:length(options)){
 
@@ -122,6 +124,14 @@ clean_q18b_prob_dist_terms <- function(PLO_data_clean){
            perc_label = paste0(percentage, "%")) |>
     mutate(xvar = prob_dist_terms)
 
+
+  } else{
+    post_meds <- post_meds %>%
+      mutate(total_respondents = 0,
+             percentage = 0,
+             prob_dist_terms = NA)
+  }
+
   ##~~~~~~~~~~~~~~~~~~~~~~~
   ##  ~ recombine dfs  ----
   ##~~~~~~~~~~~~~~~~~~~~~~~
@@ -190,7 +200,7 @@ clean_q18b_FULLY_CORRECT <- function(PLO_data_clean){
   ##~~~~~~~~~~~~~~~~~~
 
   #....total respondents that continued onto answer question 18b...
-  total_pre_resp <- meds2024_before_clean |> 
+  total_pre_resp <- meds2025_before_clean |> 
     select(prob_dist) |>
     group_by(prob_dist) |>
     # filter(prob_dist != "1 (never heard of it)") |> # include this if you want % calculation to be out of only students who advanced to this question
@@ -216,32 +226,32 @@ clean_q18b_FULLY_CORRECT <- function(PLO_data_clean){
   ##~~~~~~~~~~~~~~~~~~~
 
   #....total respondents that continued onto answer question 18b...
-  total_post_resp <- meds2024_after_clean |> 
-    select(prob_dist) |>
-    group_by(prob_dist) |>
-    # filter(prob_dist != "1 (never heard of it)") |> # include this if you want % calculation to be out of only students who advanced to this question
-    count() |>
-    ungroup() |>
-    summarize(n = sum(n)) |>
-    pull()
+  # total_post_resp <- meds2025_after_clean |> 
+  #   select(prob_dist) |>
+  #   group_by(prob_dist) |>
+  #   # filter(prob_dist != "1 (never heard of it)") |> # include this if you want % calculation to be out of only students who advanced to this question
+  #   count() |>
+  #   ungroup() |>
+  #   summarize(n = sum(n)) |>
+  #   pull()
 
-  post_meds <- df |> 
-    filter(timepoint == "Post-MEDS") |> 
-    group_by(correctness) |> 
-    count() |> 
-    # summarize(total = sum(n)) |> 
-    # ungroup() |> 
-    mutate(timepoint = rep("Post-MEDS")) |> 
-    mutate(total_respondents = total_post_resp,
-           percentage = round((n/total_respondents)*100, 1),
-           perc_label = paste0(percentage, "%")) |> 
-    mutate(perc_label_long = paste0(perc_label, "\n(", n, "/", total_respondents, " respondents)"))
+  # post_meds <- df |> 
+  #   filter(timepoint == "Post-MEDS") |> 
+  #   group_by(correctness) |> 
+  #   count() |> 
+  #   # summarize(total = sum(n)) |> 
+  #   # ungroup() |> 
+  #   mutate(timepoint = rep("Post-MEDS")) |> 
+  #   mutate(total_respondents = total_post_resp,
+  #          percentage = round((n/total_respondents)*100, 1),
+  #          perc_label = paste0(percentage, "%")) |> 
+  #   mutate(perc_label_long = paste0(perc_label, "\n(", n, "/", total_respondents, " respondents)"))
 
   ##~~~~~~~~~~~~~~~~~~~~~~~
   ##  ~ recombine dfs  ----
   ##~~~~~~~~~~~~~~~~~~~~~~~
 
-  all_q18b_fully_correct <- rbind(pre_meds, post_meds) |> 
+  all_q18b_fully_correct <- pre_meds |> #rbind(pre_meds, post_meds) |> 
     filter(correctness == "yes") |> 
     mutate(timepoint = fct_relevel(timepoint, c("Pre-MEDS", "Post-MEDS"))) 
 

diff --git a/R/q21_spatial_data.R b/R/q21_spatial_data.R
@@ -81,6 +81,8 @@ clean_q21b_rep_spatial <- function(PLO_data_clean){
   post_meds <- df |> 
     filter(timepoint == "Post-MEDS") 
 
+  if("Post-MEDS" %in% df$timepoint){
+
   #................add 0s where missing (post-MEDS)................
   for (i in 1:length(options)){
 
@@ -122,6 +124,14 @@ clean_q21b_rep_spatial <- function(PLO_data_clean){
            perc_label = paste0(percentage, "%")) |>
     mutate(xvar = rep_spatial_data)
 
+  } else{
+    post_meds <- post_meds %>%
+      mutate(total_respondents = 0,
+             percentage = 0,
+             perc_label = NA,
+             rep_spatial_data = NA)
+  }
+
   ##~~~~~~~~~~~~~~~~~~~~~~~
   ##  ~ recombine dfs  ----
   ##~~~~~~~~~~~~~~~~~~~~~~~
@@ -193,7 +203,7 @@ clean_q21b_FULLY_CORRECT <- function(PLO_data_clean){
   ##~~~~~~~~~~~~~~~~~~
 
   #....total respondents that continued onto answer question 18b...
-  total_pre_resp <- meds2024_before_clean |> 
+  total_pre_resp <- meds2025_before_clean |> 
     select(rep_spatial_data) |>
     group_by(rep_spatial_data) |>
     filter(rep_spatial_data != "1 (never worked with it before)") |>
@@ -217,30 +227,30 @@ clean_q21b_FULLY_CORRECT <- function(PLO_data_clean){
   ##~~~~~~~~~~~~~~~~~~~
 
   #....total respondents that continued onto answer question 18b...
-  total_post_resp <- meds2024_after_clean |> 
-    select(rep_spatial_data) |>
-    group_by(rep_spatial_data) |>
-    filter(rep_spatial_data != "1 (never worked with it before)") |>
-    count() |>
-    ungroup() |>
-    summarize(n = sum(n)) |>
-    pull()
-
-  post_meds <- df |> 
-    filter(timepoint == "Post-MEDS") |> 
-    group_by(correctness) |> 
-    count() |> 
-    ungroup() |> 
-    mutate(timepoint = rep("Post-MEDS")) |> 
-    mutate(total_respondents = sum(n), #total_post_resp,
-           percentage = round((n/total_respondents)*100, 1),
-           perc_label = paste0(percentage, "%"))
+  # total_post_resp <- meds2025_after_clean |> 
+  #   select(rep_spatial_data) |>
+  #   group_by(rep_spatial_data) |>
+  #   filter(rep_spatial_data != "1 (never worked with it before)") |>
+  #   count() |>
+  #   ungroup() |>
+  #   summarize(n = sum(n)) |>
+  #   pull()
+  # 
+  # post_meds <- df |> 
+  #   filter(timepoint == "Post-MEDS") |> 
+  #   group_by(correctness) |> 
+  #   count() |> 
+  #   ungroup() |> 
+  #   mutate(timepoint = rep("Post-MEDS")) |> 
+  #   mutate(total_respondents = sum(n), #total_post_resp,
+  #          percentage = round((n/total_respondents)*100, 1),
+  #          perc_label = paste0(percentage, "%"))
 
   ##~~~~~~~~~~~~~~~~~~~~~~~
   ##  ~ recombine dfs  ----
   ##~~~~~~~~~~~~~~~~~~~~~~~
 
-  all_q21b_fully_correct <- rbind(pre_meds, post_meds) |> 
+  all_q21b_fully_correct <- pre_meds |> #rbind(pre_meds, post_meds) |> 
     filter(correctness == "yes") |> 
     mutate(timepoint = fct_relevel(timepoint, c("Pre-MEDS", "Post-MEDS"))) |> 
     mutate(perc_label_long = paste0(perc_label, "\n(", n, "/", total_respondents, " respondents)"))
@@ -314,20 +324,20 @@ clean_q21c_vec_ras <- function(PLO_data_clean){
   ##~~~~~~~~~~~~~~~~~~~
 
   #........separate post-MEDS (to add 0s for missing cats).........
-  post_meds <- df |> 
-    filter(timepoint == "Post-MEDS") |> 
-    # drop_na() |> # include this if you want % calculation to be out of only students who advanced to this question
-    mutate(total_respondents = sum(n),
-           percentage = round((n/total_respondents)*100, 1),
-           perc_label = paste0(percentage, "%")) |>
-    mutate(xvar = vec_or_ras) |> 
-    mutate(perc_label_long = paste0(perc_label, "\n(", n, "/", total_respondents, " respondents)"))
+  # post_meds <- df |> 
+  #   filter(timepoint == "Post-MEDS") |> 
+  #   # drop_na() |> # include this if you want % calculation to be out of only students who advanced to this question
+  #   mutate(total_respondents = sum(n),
+  #          percentage = round((n/total_respondents)*100, 1),
+  #          perc_label = paste0(percentage, "%")) |>
+  #   mutate(xvar = vec_or_ras) |> 
+  #   mutate(perc_label_long = paste0(perc_label, "\n(", n, "/", total_respondents, " respondents)"))
 
   #~~~~~~~~~~~~~~~~~~~~~~~
   ##  ~ recombine dfs  ----
   ##~~~~~~~~~~~~~~~~~~~~~~~
 
-  all_q21c_data <- rbind(pre_meds, post_meds) |> 
+  all_q21c_data <- pre_meds |> #rbind(pre_meds, post_meds) |> 
     filter(vec_or_ras == "vector") 
 
   return(all_q21c_data)

diff --git a/R/q26_TrainValidSplit.R b/R/q26_TrainValidSplit.R
@@ -85,6 +85,8 @@ clean_q26c_mod_perf <- function(PLO_data_clean){
   post_meds <- df |> 
     filter(timepoint == "Post-MEDS") 
 
+  if("Post-MEDS" %in% df$timepoint){
+
   #................add 0s where missing (post-MEDS)................
   for (i in 1:length(options)){
 
@@ -126,6 +128,15 @@ clean_q26c_mod_perf <- function(PLO_data_clean){
            perc_label = paste0(percentage, "%")) |>
     mutate(xvar = learning_from_model)
 
+  } else{
+    post_meds <- post_meds %>%
+      mutate(total_respondents = 0,
+             percentage = 0,
+             perc_label = NA,
+             learning_from_model = NA)
+  }
+
+
   ##~~~~~~~~~~~~~~~~~~~~~~~
   ##  ~ recombine dfs  ----
   ##~~~~~~~~~~~~~~~~~~~~~~~
@@ -203,7 +214,7 @@ clean_q26c_FULLY_CORRECT <- function(PLO_data_clean){
   ##~~~~~~~~~~~~~~~~~~
 
   #....total respondents that continued onto answer question 18b...
-  total_pre_resp <- meds2024_before_clean |> 
+  total_pre_resp <- meds2025_before_clean |> 
     select(learning_from_model) |>
     group_by(learning_from_model) |>
     # filter(learning_from_model != "1 (never heard of it)") |> # include this if you want % calculation to be out of only students who advanced to this question
@@ -229,32 +240,32 @@ clean_q26c_FULLY_CORRECT <- function(PLO_data_clean){
   ##~~~~~~~~~~~~~~~~~~~
 
   #....total respondents that continued onto answer question 18b...
-  total_post_resp <- meds2024_after_clean |> 
-    select(learning_from_model) |>
-    group_by(learning_from_model) |>
-    # filter(learning_from_model != "1 (never heard of it)") |> # include this if you want % calculation to be out of only students who advanced to this question
-    count() |>
-    ungroup() |>
-    summarize(n = sum(n)) |>
-    pull()
-
-  post_meds <- df |> 
-    filter(timepoint == "Post-MEDS") |> 
-    group_by(correctness) |> 
-    count() |> 
-    # summarize(total = sum(n)) |> 
-    # ungroup() |> 
-    mutate(timepoint = rep("Post-MEDS")) |> 
-    mutate(total_respondents = total_post_resp,
-           percentage = round((n/total_respondents)*100, 1),
-           perc_label = paste0(percentage, "%")) |> 
-    mutate(perc_label_long = paste0(perc_label, "\n(", n, "/", total_respondents, " respondents)"))
+  # total_post_resp <- meds2025_after_clean |> 
+  #   select(learning_from_model) |>
+  #   group_by(learning_from_model) |>
+  #   # filter(learning_from_model != "1 (never heard of it)") |> # include this if you want % calculation to be out of only students who advanced to this question
+  #   count() |>
+  #   ungroup() |>
+  #   summarize(n = sum(n)) |>
+  #   pull()
+  # 
+  # post_meds <- df |> 
+  #   filter(timepoint == "Post-MEDS") |> 
+  #   group_by(correctness) |> 
+  #   count() |> 
+  #   # summarize(total = sum(n)) |> 
+  #   # ungroup() |> 
+  #   mutate(timepoint = rep("Post-MEDS")) |> 
+  #   mutate(total_respondents = total_post_resp,
+  #          percentage = round((n/total_respondents)*100, 1),
+  #          perc_label = paste0(percentage, "%")) |> 
+  #   mutate(perc_label_long = paste0(perc_label, "\n(", n, "/", total_respondents, " respondents)"))
 
   ##~~~~~~~~~~~~~~~~~~~~~~~
   ##  ~ recombine dfs  ----
   ##~~~~~~~~~~~~~~~~~~~~~~~
 
-  all_q26c_data <- rbind(pre_meds, post_meds) |> 
+  all_q26c_data <- pre_meds |> #rbind(pre_meds, post_meds) |> 
     filter(correctness == "yes") |> 
     mutate(timepoint = fct_relevel(timepoint, c("Pre-MEDS", "Post-MEDS"))) 
 

diff --git a/R/q31_lang.R b/R/q31_lang.R
@@ -70,6 +70,8 @@ clean_q31_lang <- function(PLO_data_clean){
   post_meds <- df |> 
     filter(timepoint == "Post-MEDS") 
 
+  if("Post-MEDS" %in% df$timepoint){
+
   #................add 0s where missing (post-MEDS)................
   for (i in 1:length(options)){
 
@@ -102,6 +104,15 @@ clean_q31_lang <- function(PLO_data_clean){
            perc_label_long = paste0(perc_label, "\n(", n, "/", total_respondents, " respondents)")) |>
     mutate(xvar = what_lang_is_this)
 
+  } else{
+    post_meds <- post_meds %>%
+      mutate(total_respondents = 0,
+             percentage = 0,
+             perc_label = NA,
+             per_label_long = NA,
+             what_lang_is_this = NA)
+  }
+
   ##~~~~~~~~~~~~~~~~~~~~~~~
   ##  ~ recombine dfs  ----
   ##~~~~~~~~~~~~~~~~~~~~~~~

diff --git a/R/q8_workflow_satisfaction.R b/R/q8_workflow_satisfaction.R
@@ -58,21 +58,26 @@ clean_q8_workflow_sat <- function(PLO_data_clean){
     mutate(total_respondents = sum(n),
            percentage = round((n/total_respondents)*100, 1),
            perc_label = paste0(percentage, "%")) |>
-    mutate(xvar = rate_satisfaction)
+    mutate(xvar = rate_satisfaction) %>%
+    drop_na()
 
   ##~~~~~~~~~~~~~~~~~~~
   ##  ~ post-MEDS  ----
   ##~~~~~~~~~~~~~~~~~~~
-
+
+
+
   #........separate post-MEDS (to add 0s for missing cats).........
   post_meds <- df |> 
     filter(timepoint == "Post-MEDS") 
 
+  if("Post-MEDS" %in% df$timepoint){
+
   #................add 0s where missing (post-MEDS)................
   for (i in 1:length(options)){
 
     cat_name <- options[i]
-
+    print(cat_name)
     # if category already exists in df, skip to next one ----
     if (cat_name %in% pull(post_meds[,2])) {
 
@@ -99,6 +104,14 @@ clean_q8_workflow_sat <- function(PLO_data_clean){
            perc_label = paste0(percentage, "%")) |>
     mutate(xvar = rate_satisfaction)
 
+  } else{
+    post_meds <- post_meds %>%
+      mutate(total_respondents = 0,
+             percentage = 0,
+             rate_satisfaction = NA)
+  }
+
+
   ##~~~~~~~~~~~~~~~~~~~~~~~
   ##  ~ recombine dfs  ----
   ##~~~~~~~~~~~~~~~~~~~~~~~