easystats · strengejacke · Sep 13, 2024 · Sep 13, 2024 · Sep 13, 2024 · Sep 13, 2024
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Type: Package
 Package: performance
 Title: Assessment of Regression Models Performance
-Version: 0.12.3
+Version: 0.12.3.1
 Authors@R:
     c(person(given = "Daniel",
              family = "Lüdecke",

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,12 @@
+# performance 0.12.4
+
+## Changes
+
+* `check_dag()` now also checks for colliders, and suggests removing it in the
+  printed output.
+
+* Minor revisions to the printed output of `check_dag()`.
+
 # performance 0.12.3
 
 ## New functions

diff --git a/R/check_dag.R b/R/check_dag.R
@@ -103,7 +103,7 @@
 #'   Interpreting Confounder and Modifier Coefficients. American Journal of
 #'   Epidemiology, 177(4), 292–298. \doi{10.1093/aje/kws412}
 #'
 #' @examplesIf require("ggdag", quietly = TRUE) && require("dagitty", quietly = TRUE) && require("see", quietly = TRUE) && packageVersion("see") > "0.8.5"
 #' # no adjustment needed
 #' check_dag(
 #'   y ~ x + b,
@@ -247,11 +247,29 @@
   checks <- lapply(c("direct", "total"), function(x) {
     adjustment_set <- unlist(dagitty::adjustmentSets(dag, effect = x), use.names = FALSE)
     adjustment_nodes <- unlist(dagitty::adjustedNodes(dag), use.names = FALSE)
+    minimal_adjustments <- as.list(dagitty::adjustmentSets(dag, effect = x))
+    collider <- adjustment_nodes[vapply(adjustment_nodes, ggdag::is_collider, logical(1), .dag = dag)]
+    if (!length(collider)) {
+      # if we don't have colliders, set to NULL
+      collider <- NULL
+    } else {
+      # if we *have* colliders, remove them from minimal adjustments
+      minimal_adjustments <- lapply(minimal_adjustments, function(ma) {
+        setdiff(ma, collider)
+      })
+    }
     list(
-      adjustment_not_needed = is.null(adjustment_set) && is.null(adjustment_nodes),
-      incorrectly_adjusted = is.null(adjustment_set) && !is.null(adjustment_nodes),
+      # no adjustment needed when
+      # - required and current adjustment sets are NULL
+      # - AND we have no collider in current adjustments
+      adjustment_not_needed = is.null(adjustment_set) && is.null(adjustment_nodes) && is.null(collider),
+      # incorrect adjustment when
+      # - required is NULL and current adjustment not NULL
+      # - OR we have a collider in current adjustments
+      incorrectly_adjusted = (is.null(adjustment_set) && !is.null(adjustment_nodes)) || (!is.null(collider) && collider %in% adjustment_nodes),
       current_adjustments = adjustment_nodes,
-      minimal_adjustments = as.list(dagitty::adjustmentSets(dag, effect = x))
+      minimal_adjustments = minimal_adjustments,
+      collider = collider
     )
   })
 
@@ -260,6 +278,10 @@
   attr(dag, "exposure") <- exposure
   attr(dag, "adjusted") <- adjusted
   attr(dag, "adjustment_sets") <- checks[[1]]$current_adjustments
+  attr(dag, "collider") <- checks[[1]]$collider
+  # remove collider from sub-attributes
+  checks[[1]]$collider <- NULL
+  checks[[2]]$collider <- NULL
   attr(dag, "check_direct") <- insight::compact_list(checks[[1]])
   attr(dag, "check_total") <- insight::compact_list(checks[[2]])
 
@@ -296,6 +318,7 @@
 #' @export
 print.check_dag <- function(x, ...) {
   effect <- attributes(x)$effect
+  collider <- attributes(x)$collider
 
   # header
   cat(insight::print_color("# Check for correct adjustment sets", "blue"))
@@ -317,6 +340,16 @@
     )
   }
 
+  # add information on colliders
+  if (!is.null(collider)) {
+    exposure_outcome_text <- paste0(
+      exposure_outcome_text,
+      "\n- Collider",
+      ifelse(length(collider) > 1, "s", ""),
+      ": ", insight::color_text(datawizard::text_concatenate(collider), "cyan")
+    )
+  }
+
   cat(exposure_outcome_text)
   cat("\n\n")
 
@@ -331,12 +364,12 @@
       } else {
         out <- attributes(x)$check_total
       }
-      .print_dag_results(out, x, i, effect)
+      .print_dag_results(out, x, i, effect, collider)
     }
   }
 }
 
-.print_dag_results <- function(out, x, i, effect) {
+.print_dag_results <- function(out, x, i, effect, collider = NULL) {
   # missing adjustements - minimal_adjustment can be a list of different
   # options for minimal adjustements, so we check here if any of the minimal
   # adjustments are currently sufficient
@@ -356,8 +389,18 @@
       attributes(x)$outcome,
       "`."
     )
+  } else if (!is.null(collider)) {
+    # Scenario 2: adjusted for (downstream) collider
+    msg <- paste0(
+      insight::color_text("Incorrectly adjusted!", "red"),
+      "\nYour model adjusts for a (downstream) collider, ",
+      insight::color_text(datawizard::text_concatenate(collider, enclose = "`"), "cyan"),
+      ". To estimate the ", i, " effect, do ",
+      insight::color_text("not", "italic"),
+      " adjust for it, to avoid collider-bias."
+    )
   } else if (isTRUE(out$incorrectly_adjusted)) {
-    # Scenario 2: incorrectly adjusted, adjustments where none is allowed
+    # Scenario 3: incorrectly adjusted, adjustments where none is allowed
     msg <- paste0(
       insight::color_text("Incorrectly adjusted!", "red"),
       "\nTo estimate the ", i, " effect, do ",
@@ -367,13 +410,13 @@
       "."
     )
   } else if (any(sufficient_adjustments)) {
-    # Scenario 3: correct adjustment
+    # Scenario 4: correct adjustment
     msg <- paste0(
       insight::color_text("Model is correctly specified.", "green"),
       "\nAll minimal sufficient adjustments to estimate the ", i, " effect were done."
     )
   } else {
-    # Scenario 4: missing adjustments
+    # Scenario 5: missing adjustments
     msg <- paste0(
       insight::color_text("Incorrectly adjusted!", "red"),
       "\nTo estimate the ", i, " effect, ",
@@ -395,6 +438,7 @@
         ),
         "."
       )
+      current_str <- "\nCurrently"
     } else {
       msg <- paste0(
         msg,
@@ -404,14 +448,25 @@
         ), "yellow"),
         "."
       )
+      current_str <- " Currently"
     }
     if (is.null(out$current_adjustments)) {
-      msg <- paste0(msg, "\nCurrently, the model does not adjust for any variables.")
+      msg <- paste0(msg, current_str, ", the model does not adjust for any variables.")
     } else {
       msg <- paste0(
-        msg, "\nCurrently, the model only adjusts for ",
-        insight::color_text(datawizard::text_concatenate(out$current_adjustments, enclose = "`"), "yellow"), "."
+        msg, current_str, ", the model only adjusts for ",
+        datawizard::text_concatenate(out$current_adjustments, enclose = "`"),
+        "."
       )
+      # check if we could identify missing variables, and if so, add them to the message
+      missing_vars <- setdiff(unlist(out$minimal_adjustments), out$current_adjustments)
+      if (length(missing_vars) > 0) {
+        msg <- paste0(
+          msg, " You possibly also need to adjust for ",
+          insight::color_text(datawizard::text_concatenate(missing_vars, enclose = "`"), "yellow"),
+          " to block biasing paths."
+        )
+      }
     }
   }
 

diff --git a/tests/testthat/_snaps/check_dag.md b/tests/testthat/_snaps/check_dag.md
@@ -41,8 +41,7 @@
       Identification of direct and total effects
 
       Incorrectly adjusted!
-      To estimate the direct and total effect, at least adjust for `b`.
-      Currently, the model does not adjust for any variables.
+      To estimate the direct and total effect, at least adjust for `b`. Currently, the model does not adjust for any variables.
 
 
 ---
@@ -58,8 +57,7 @@
       Identification of direct and total effects
 
       Incorrectly adjusted!
-      To estimate the direct and total effect, at least adjust for `b` and `c`.
-      Currently, the model only adjusts for `c`.
+      To estimate the direct and total effect, at least adjust for `b` and `c`. Currently, the model only adjusts for `c`. You possibly also need to adjust for `b` to block biasing paths.
 
 
 ---
@@ -75,8 +73,7 @@
       Identification of direct and total effects
 
       Incorrectly adjusted!
-      To estimate the direct and total effect, at least adjust for `b` and `c`.
-      Currently, the model only adjusts for `c`.
+      To estimate the direct and total effect, at least adjust for `b` and `c`. Currently, the model only adjusts for `c`. You possibly also need to adjust for `b` to block biasing paths.
 
 
 ---
@@ -143,14 +140,12 @@
       Identification of direct effects
 
       Incorrectly adjusted!
-      To estimate the direct effect, at least adjust for `x1` and `x2`.
-      Currently, the model does not adjust for any variables.
+      To estimate the direct effect, at least adjust for `x1` and `x2`. Currently, the model does not adjust for any variables.
 
       Identification of total effects
 
       Incorrectly adjusted!
-      To estimate the total effect, at least adjust for `x1`.
-      Currently, the model does not adjust for any variables.
+      To estimate the total effect, at least adjust for `x1`. Currently, the model does not adjust for any variables.
 
 
 ---
@@ -166,8 +161,7 @@
       Identification of direct effects
 
       Incorrectly adjusted!
-      To estimate the direct effect, at least adjust for `x1` and `x2`.
-      Currently, the model only adjusts for `x1`.
+      To estimate the direct effect, at least adjust for `x1` and `x2`. Currently, the model only adjusts for `x1`. You possibly also need to adjust for `x2` to block biasing paths.
 
       Identification of total effects
 
@@ -188,8 +182,7 @@
       Identification of direct effects
 
       Incorrectly adjusted!
-      To estimate the direct effect, at least adjust for `x1` and `x2`.
-      Currently, the model only adjusts for `x2`.
+      To estimate the direct effect, at least adjust for `x1` and `x2`. Currently, the model only adjusts for `x2`. You possibly also need to adjust for `x1` to block biasing paths.
 
       Identification of total effects
 
@@ -218,3 +211,46 @@
       To estimate the total effect, do not adjust for `x1` and `x2`.
 
 
+# check_dag, collider bias
+
+    Code
+      print(dag)
+    Output
+      # Check for correct adjustment sets
+      - Outcome: SMD_ICD11
+      - Exposure: agegroup
+      - Adjustments: edgroup3, gender_kid, pss4_kid_sum_2sd and residence
+
+      Identification of direct effects
+
+      Incorrectly adjusted!
+      To estimate the direct effect, at least adjust for `edgroup3`, `gender_kid`, `pss4_kid_sum_2sd`, `residence` and `sm_h_total_kid`. Currently, the model only adjusts for `edgroup3`, `gender_kid`, `pss4_kid_sum_2sd` and `residence`. You possibly also need to adjust for `sm_h_total_kid` to block biasing paths.
+
+      Identification of total effects
+
+      Model is correctly specified.
+      All minimal sufficient adjustments to estimate the total effect were done.
+
+
+---
+
+    Code
+      print(dag)
+    Output
+      # Check for correct adjustment sets
+      - Outcome: SMD_ICD11
+      - Exposure: agegroup
+      - Adjustments: edgroup3, gender_kid, pss4_kid_sum_2sd, residence and sm_h_total_kid
+      - Collider: sm_h_total_kid
+
+      Identification of direct effects
+
+      Incorrectly adjusted!
+      Your model adjusts for a (downstream) collider, `sm_h_total_kid`. To estimate the direct effect, do not adjust for it, to avoid collider-bias.
+
+      Identification of total effects
+
+      Incorrectly adjusted!
+      Your model adjusts for a (downstream) collider, `sm_h_total_kid`. To estimate the total effect, do not adjust for it, to avoid collider-bias.
+
+
diff --git a/tests/testthat/test-check_dag.R b/tests/testthat/test-check_dag.R
@@ -133,3 +133,31 @@ test_that("check_dag, different adjustements for total and direct", {
   )
   expect_snapshot(print(dag))
 })
+
+test_that("check_dag, collider bias", {
+  dag <- check_dag(
+    SMD_ICD11 ~ agegroup + gender_kid + edgroup3 + residence + pss4_kid_sum_2sd + sm_h_total_kid,
+    pss4_kid_sum_2sd ~ gender_kid,
+    sm_h_total_kid ~ gender_kid + agegroup,
+    adjusted = c(
+      "agegroup", "gender_kid", "edgroup3", "residence",
+      "pss4_kid_sum_2sd"
+    ),
+    outcome = "SMD_ICD11",
+    exposure = "agegroup"
+  )
+  expect_snapshot(print(dag))
+
+  dag <- check_dag(
+    SMD_ICD11 ~ agegroup + gender_kid + edgroup3 + residence + pss4_kid_sum_2sd + sm_h_total_kid,
+    pss4_kid_sum_2sd ~ gender_kid,
+    sm_h_total_kid ~ gender_kid + agegroup,
+    adjusted = c(
+      "agegroup", "gender_kid", "edgroup3", "residence",
+      "pss4_kid_sum_2sd", "sm_h_total_kid"
+    ),
+    outcome = "SMD_ICD11",
+    exposure = "agegroup"
+  )
+  expect_snapshot(print(dag))
+})