nest rule results

ref #4
OuhscBbmc · Dec 29, 2021 · bb44ae0 · bb44ae0
1 parent a923d23
commit bb44ae0
Show file tree

Hide file tree

Showing 4 changed files with 164 additions and 78 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -16,6 +16,7 @@ Imports:
     readr (>= 2.0),
     rlang (>= 0.4.12),
     tibble (>= 1.4.0),
+    tidyr (>= 1.1),
     tidyselect,
     yaml
 Suggests:

diff --git a/R/execute-checks.R b/R/execute-checks.R
@@ -33,8 +33,7 @@ execute_checks <- function (ds, checks) {
       smells        = smells$ds_smell_result,
       smell_status  = smells$smell_status,
 
-      rules         = rules$rules,
-      rule_results  = rules$ds_rule_result
+      rules         = rules$rules
     ),
     class = "trawler_checks"
   )
@@ -153,50 +152,66 @@ execute_rules <- function (ds, checks) {
     rm(f, index, violations, ds_violation_single)
   } # End for loop
 
-  if (length(ds_rule_violation_list) == 0L) {
-
-    rule_empty_violation <- function() {
-      tibble::tibble(
-        check_name                = "all_passed",
-        record_id                 = 0L,
-        data_collector            = "",
-        error_message             = "No violations existed in the dataset",
-        priority                  = "",
-        instrument                = ""
-      )
-    }
-
-    ds_rule_violation        <- rule_empty_violation()   # MAKE SURE THIS IS rule_empty_violation()
-    # ds_rule_violation_pretty <- ds_rule_violation
-  } else {
-    ds_rule_violation        <-
-      ds_rule_violation_list |>
-      dplyr::bind_rows() |>
-      dplyr::arrange(.data$priority, .data$check_name, .data$record_id)
-
-    # ds_rule_violation_pretty <-
-    #   ds_rule_violation |>
-    #   dplyr::mutate(
-    #     # record_id         = sprintf(
-    #     #   checks$link_specific,
-    #     #   checks$redcap_version, checks$project_id, checks$default_arm, .data$record_id, .data$instrument, .data$record_id
-    #     # ),
-    #     check_name        = gsub("_", " ", .data$check_name),
-    #     data_collector    = gsub("_", " ", .data$data_collector),
-    #     instrument        = gsub("_", " ", .data$instrument),
-    #     check_name        = factor(.data$check_name),
-    #   )
-    # colnames(ds_rule_violation_pretty) <- gsub("_", " ", colnames(ds_rule_violation_pretty))
-  }
-
-
-
+  # if (length(ds_rule_violation_list) == 0L) {
+  #
+  #   rule_empty_violation <- function() {
+  #     tibble::tibble(
+  #       check_name                = "all_passed",
+  #       record_id                 = 0L,
+  #       data_collector            = "",
+  #       error_message             = "No violations existed in the dataset",
+  #       priority                  = "",
+  #       instrument                = ""
+  #     )
+  #   }
+  #
+  #   ds_rule_violation        <- rule_empty_violation()   # MAKE SURE THIS IS rule_empty_violation()
+  #   # ds_rule_violation_pretty <- ds_rule_violation
+  # } else {
+  #   ds_rule_violation        <-
+  #     ds_rule_violation_list |>
+  #     dplyr::bind_rows() |>
+  #     dplyr::arrange(.data$priority, .data$check_name, .data$record_id)
+  #
+  #   # ds_rule_violation_pretty <-
+  #   #   ds_rule_violation |>
+  #   #   dplyr::mutate(
+  #   #     # record_id         = sprintf(
+  #   #     #   checks$link_specific,
+  #   #     #   checks$redcap_version, checks$project_id, checks$default_arm, .data$record_id, .data$instrument, .data$record_id
+  #   #     # ),
+  #   #     check_name        = gsub("_", " ", .data$check_name),
+  #   #     data_collector    = gsub("_", " ", .data$data_collector),
+  #   #     instrument        = gsub("_", " ", .data$instrument),
+  #   #     check_name        = factor(.data$check_name),
+  #   #   )
+  #   # colnames(ds_rule_violation_pretty) <- gsub("_", " ", colnames(ds_rule_violation_pretty))
+  # }
+#
+# browser()
+
+  ds_rule_results <-
+    ds_rule_violation_list |>
+    dplyr::bind_rows() |>
+    dplyr::select(
+      .data$check_name,
+      .data$record_id,
+      .data$data_collector,
+      .data$consent_date,
+    ) |>
+    dplyr::arrange(.data$check_name, .data$record_id) |>
+    dplyr::group_by(.data$check_name) |>
+    tidyr::nest(
+      results = -.data$check_name
+    )
 
+  checks$rules <-
+    checks$rules |>
+    dplyr::left_join(ds_rule_results, by = "check_name")
 
   list(
-    rules           = checks$rules,
-    ds_rule_result = ds_rule_violation
-    # smell_status    = smell_status
+    rules           = checks$rules
+    # rule_status    = rule_status
   )
 
 }
diff --git a/tests/testthat/_snaps/execute-checks.md b/tests/testthat/_snaps/execute-checks.md
@@ -104,28 +104,28 @@
     Code
       result$rules
     Output
-      # A tibble: 14 x 6
-         check_name    error_message     priority debug instrument    passing_test    
-         <chr>         <chr>                <int> <lgl> <chr>         <chr>           
-       1 baseline_pre~ Serum pre-albumi~        1 FALSE baseline_data "function (d) {~
-       2 missing_seru~ Relevant nutriti~        1 FALSE baseline_data "function (d) {~
-       3 serum_prealb~ Baseline prealbu~        1 FALSE baseline_dat~ "function (d) {~
-       4 serum_prealb~ Baseline prealbu~        1 FALSE baseline_dat~ "function (d) {~
-       5 serum_prealb~ Baseline prealbu~        1 FALSE baseline_dat~ "function (d) {~
-       6 serum_prealb~ serum prealbumin~        1 FALSE baseline_dat~ "function (d) {~
-       7 baseline_fir~ Serum prealbumin~        1 FALSE baseline_dat~ "function (d) {~
-       8 daily_first_~ In-addition to b~        1 FALSE baseline_dat~ "function (d) {~
-       9 daily_protei~ npcr levels in s~        1 FALSE baseline_dat~ "function (d) {~
-      10 hospitalizat~ Patient was hosp~        1 FALSE completion_p~ "function (d) {~
-      11 optimal_dail~ Daily protein in~        1 FALSE completion_p~ "function (d) {~
-      12 recommended_~ NPCR values are ~        1 FALSE completion_d~ "function (d) {~
-      13 npcr          NPCR at completi~        1 FALSE completion_d~ "function (d) {~
-      14 npcr_compari~ NPCR at completi~        1 FALSE completion_d~ "function (d) {~
+      # A tibble: 14 x 7
+         check_name  error_message   priority debug instrument   passing_test  results
+         <chr>       <chr>              <int> <lgl> <chr>        <chr>         <list> 
+       1 baseline_p~ Serum pre-albu~        1 FALSE baseline_da~ "function (d~ <tibbl~
+       2 missing_se~ Relevant nutri~        1 FALSE baseline_da~ "function (d~ <tibbl~
+       3 serum_prea~ Baseline preal~        1 FALSE baseline_da~ "function (d~ <tibbl~
+       4 serum_prea~ Baseline preal~        1 FALSE baseline_da~ "function (d~ <NULL> 
+       5 serum_prea~ Baseline preal~        1 FALSE baseline_da~ "function (d~ <NULL> 
+       6 serum_prea~ serum prealbum~        1 FALSE baseline_da~ "function (d~ <NULL> 
+       7 baseline_f~ Serum prealbum~        1 FALSE baseline_da~ "function (d~ <NULL> 
+       8 daily_firs~ In-addition to~        1 FALSE baseline_da~ "function (d~ <NULL> 
+       9 daily_prot~ npcr levels in~        1 FALSE baseline_da~ "function (d~ <NULL> 
+      10 hospitaliz~ Patient was ho~        1 FALSE completion_~ "function (d~ <tibbl~
+      11 optimal_da~ Daily protein ~        1 FALSE completion_~ "function (d~ <tibbl~
+      12 recommende~ NPCR values ar~        1 FALSE completion_~ "function (d~ <tibbl~
+      13 npcr        NPCR at comple~        1 FALSE completion_~ "function (d~ <tibbl~
+      14 npcr_compa~ NPCR at comple~        1 FALSE completion_~ "function (d~ <tibbl~
 
 ---
 
     Code
-      as.data.frame(result$rules)
+      as.data.frame(dplyr::select(result$rules, !tidyselect::contains("results")))
     Output
                                           check_name
       1                   baseline_prealbumin_levels
@@ -206,20 +206,74 @@
 ---
 
     Code
-      result$rule_results
+      ds_result_unnested
     Output
-      # A tibble: 47 x 7
-         check_name   record_id data_collector error_message       priority instrument
-         <chr>            <int>          <int> <chr>                  <int> <chr>     
-       1 baseline_pr~         1              1 Serum pre-albumin ~        1 baseline_~
-       2 baseline_pr~         2              2 Serum pre-albumin ~        1 baseline_~
-       3 baseline_pr~         3              3 Serum pre-albumin ~        1 baseline_~
-       4 baseline_pr~         8              1 Serum pre-albumin ~        1 baseline_~
-       5 baseline_pr~         9              3 Serum pre-albumin ~        1 baseline_~
-       6 baseline_pr~        12              3 Serum pre-albumin ~        1 baseline_~
-       7 baseline_pr~        13              1 Serum pre-albumin ~        1 baseline_~
-       8 baseline_pr~        14              1 Serum pre-albumin ~        1 baseline_~
-       9 baseline_pr~        15              3 Serum pre-albumin ~        1 baseline_~
-      10 baseline_pr~        16              2 Serum pre-albumin ~        1 baseline_~
-      # ... with 37 more rows, and 1 more variable: consent_date <date>
+      # A tibble: 47 x 4
+         check_name                 record_id data_collector consent_date
+         <chr>                          <int>          <int> <date>      
+       1 baseline_prealbumin_levels         1              1 2015-01-02  
+       2 baseline_prealbumin_levels         2              2 2015-01-02  
+       3 baseline_prealbumin_levels         3              3 2015-01-05  
+       4 baseline_prealbumin_levels         8              1 2015-02-03  
+       5 baseline_prealbumin_levels         9              3 2015-02-08  
+       6 baseline_prealbumin_levels        12              3 2015-03-06  
+       7 baseline_prealbumin_levels        13              1 2015-03-15  
+       8 baseline_prealbumin_levels        14              1 2015-03-10  
+       9 baseline_prealbumin_levels        15              3 2015-03-03  
+      10 baseline_prealbumin_levels        16              2 2015-03-09  
+      # ... with 37 more rows
+
+---
+
+    Code
+      as.data.frame(ds_result_unnested)
+    Output
+                           check_name record_id data_collector consent_date
+      1    baseline_prealbumin_levels         1              1   2015-01-02
+      2    baseline_prealbumin_levels         2              2   2015-01-02
+      3    baseline_prealbumin_levels         3              3   2015-01-05
+      4    baseline_prealbumin_levels         8              1   2015-02-03
+      5    baseline_prealbumin_levels         9              3   2015-02-08
+      6    baseline_prealbumin_levels        12              3   2015-03-06
+      7    baseline_prealbumin_levels        13              1   2015-03-15
+      8    baseline_prealbumin_levels        14              1   2015-03-10
+      9    baseline_prealbumin_levels        15              3   2015-03-03
+      10   baseline_prealbumin_levels        16              2   2015-03-09
+      11   baseline_prealbumin_levels       100              1   2015-04-02
+      12   baseline_prealbumin_levels       220              1   2015-04-02
+      13  missing_serum_marker_levels         7              2   2015-01-27
+      14  missing_serum_marker_levels        10            255   2015-02-13
+      15  missing_serum_marker_levels        11              2   2015-02-19
+      16    serum_prealbumin_levels_1         1              1   2015-01-02
+      17    serum_prealbumin_levels_1         2              2   2015-01-02
+      18    serum_prealbumin_levels_1         3              3   2015-01-05
+      19    serum_prealbumin_levels_1         4            255   2015-01-10
+      20    serum_prealbumin_levels_1         5              1   2015-01-13
+      21    serum_prealbumin_levels_1         6              3   2015-01-16
+      22    serum_prealbumin_levels_1         8              1   2015-02-03
+      23    serum_prealbumin_levels_1         9              3   2015-02-08
+      24    serum_prealbumin_levels_1        12              3   2015-03-06
+      25    serum_prealbumin_levels_1        13              1   2015-03-15
+      26    serum_prealbumin_levels_1        14              1   2015-03-10
+      27    serum_prealbumin_levels_1        15              3   2015-03-03
+      28    serum_prealbumin_levels_1        16              2   2015-03-09
+      29    serum_prealbumin_levels_1       100              1   2015-04-02
+      30    serum_prealbumin_levels_1       220              1   2015-04-02
+      31       hospitalization_reason         8             NA         <NA>
+      32       hospitalization_reason        14             NA         <NA>
+      33 optimal_daily_protein_intake         3             NA         <NA>
+      34 optimal_daily_protein_intake         5             NA         <NA>
+      35 optimal_daily_protein_intake         6             NA         <NA>
+      36 optimal_daily_protein_intake         7             NA         <NA>
+      37 optimal_daily_protein_intake         8             NA         <NA>
+      38 optimal_daily_protein_intake         9             NA         <NA>
+      39 optimal_daily_protein_intake        11             NA         <NA>
+      40 optimal_daily_protein_intake        15             NA         <NA>
+      41 optimal_daily_protein_intake        16             NA         <NA>
+      42 optimal_daily_protein_intake       100             NA         <NA>
+      43       recommended_npcr_range         1             NA         <NA>
+      44       recommended_npcr_range        12             NA         <NA>
+      45                         npcr        10             NA         <NA>
+      46              npcr_comparison         1             NA         <NA>
+      47              npcr_comparison        12             NA         <NA>
 
diff --git a/tests/testthat/test-execute-checks.R b/tests/testthat/test-execute-checks.R
@@ -14,10 +14,26 @@ test_that("execute-checks-biochemical", {
   expect_snapshot(result$smell_status)
 
   expect_snapshot(result$rules)
-  expect_snapshot(as.data.frame(result$rules))
-  expect_snapshot(result$rule_results)
+  result$rules |>
+    dplyr::select(
+      !tidyselect::contains("results")
+    ) |>
+    as.data.frame() |>
+    expect_snapshot()
+
+  ds_result_unnested <-
+    result$rules |>
+    dplyr::select(
+      check_name,
+      results,
+    ) |>
+    tidyr::unnest(results)
+
+  expect_snapshot(ds_result_unnested)
+  expect_snapshot(as.data.frame(ds_result_unnested))
 })
 
+
 # result$ds_smell_result |>
 #   dplyr::group_by(check_name) |>
 #   tidyr::nest(