Skip to content

Commit

Permalink
Merge branch 'main' into brief-recycle
Browse files Browse the repository at this point in the history
  • Loading branch information
yjunechoe authored Dec 3, 2024
2 parents 83e7761 + b5b880b commit c872541
Show file tree
Hide file tree
Showing 13 changed files with 175 additions and 180 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ Imports:
cli (>= 3.6.0),
DBI (>= 1.1.0),
digest (>= 0.6.27),
dplyr (>= 1.0.10),
dplyr (>= 1.1.0),
dbplyr (>= 2.3.0),
fs (>= 1.6.0),
glue (>= 1.6.2),
Expand All @@ -54,7 +54,7 @@ Suggests:
data.table,
duckdb,
ggforce,
ggplot2,
ggplot2 (>= 3.5.0),
jsonlite,
log4r,
lubridate,
Expand Down
10 changes: 4 additions & 6 deletions R/get_agent_report.R
Original file line number Diff line number Diff line change
Expand Up @@ -1612,7 +1612,7 @@ get_agent_report <- function(
n_pass, f_pass, n_fail, f_fail, W, S, N, extract,
W_val, S_val, N_val, eval, active
) %>%
gt::gt(id = "pb_agent") %>%
gt::gt(id = "pb_agent", locale = locale) %>%
gt::tab_header(
title = title_text,
subtitle = gt::md(combined_subtitle)
Expand Down Expand Up @@ -1666,15 +1666,13 @@ get_agent_report <- function(
align = "right",
columns = "i"
) %>%
gt::fmt_number(
gt::fmt_integer(
columns = c("units", "n_pass", "n_fail", "f_pass", "f_fail"),
decimals = 0, drop_trailing_zeros = TRUE, suffixing = TRUE,
locale = locale
suffixing = TRUE
) %>%
gt::fmt_number(
columns = c("f_pass", "f_fail"),
decimals = 2,
locale = locale
decimals = 2
) %>%
gt::fmt_markdown(
columns = c(
Expand Down
2 changes: 1 addition & 1 deletion R/get_informant_report.R
Original file line number Diff line number Diff line change
Expand Up @@ -359,7 +359,7 @@ get_informant_report <- function(
gsub("(\\(|\\)|\\[|\\]|\\||\\.|\\^|\\?|\\+|\\$|\\*)", "\\\\\\1", .)

row_idx <-
which(grepl(paste0("^<code.*?>", column_escaped, "<.*?"), tbl$item))
grep(paste0("^<code.*?>", column_escaped, "<.*?"), tbl$item)

if (length(miniheader_vec) > 0) {
tbl$item[[row_idx]] <-
Expand Down
4 changes: 2 additions & 2 deletions R/incorporate.R
Original file line number Diff line number Diff line change
Expand Up @@ -283,10 +283,10 @@ incorporate <- function(informant) {
# of the `lang` value (from `NULL` to the informant `lang`)

select_call_idx <-
which(grepl("select", snippet_f_rhs_str))
grep("select", snippet_f_rhs_str)

pb_str_catalog_call_idx <-
which(grepl("pb_str_catalog", snippet_f_rhs_str))
grep("pb_str_catalog", snippet_f_rhs_str)

snippet_f_rhs_str[pb_str_catalog_call_idx] <-
gsub(
Expand Down
17 changes: 7 additions & 10 deletions R/interrogate.R
Original file line number Diff line number Diff line change
Expand Up @@ -339,8 +339,8 @@ interrogate <- function(

if (any_1_unit && any_n_unit) {

col_is_idx <- which(grepl("col_is|col_exists", validation_fns))
col_vals_idx <- which(grepl("col_vals", validation_fns))
col_is_idx <- grep("col_is|col_exists", validation_fns)
col_vals_idx <- grep("col_vals", validation_fns)

validation_formulas <-
c(
Expand Down Expand Up @@ -427,10 +427,7 @@ interrogate <- function(
tbl_checked %>%
dplyr::mutate(pb_is_good_ = !!rlang::parse_expr(columns_str_add)) %>%
dplyr::select(-dplyr::all_of(columns_str_vec)) %>%
dplyr::mutate(pb_is_good_ = dplyr::case_when(
pb_is_good_ == validation_n ~ TRUE,
TRUE ~ FALSE
))
dplyr::mutate(pb_is_good_ = pb_is_good_ == validation_n)
}

# Perform rowwise validations for the column
Expand Down Expand Up @@ -1158,7 +1155,7 @@ tbl_val_comparison <- function(
table %>%
dplyr::mutate(pb_is_good_ = dplyr::case_when(
!!expression ~ 1,
TRUE ~ 0
.default = 0
))

} else {
Expand All @@ -1167,7 +1164,7 @@ tbl_val_comparison <- function(
dplyr::mutate(pb_is_good_ = !!expression) %>%
dplyr::mutate(pb_is_good_ = dplyr::case_when(
is.na(pb_is_good_) ~ na_pass,
TRUE ~ pb_is_good_
.default = pb_is_good_
))
}
}
Expand Down Expand Up @@ -1363,7 +1360,7 @@ tbl_vals_between <- function(
table %>%
dplyr::mutate(pb_is_good_ = dplyr::case_when(
is.na({{ column }}) ~ na_pass_bool,
TRUE ~ pb_is_good_
.default = pb_is_good_
))
}

Expand Down Expand Up @@ -2031,7 +2028,7 @@ interrogate_within_spec <- function(
check_vin_db(table, column = {{ column }}) %>%
dplyr::mutate(pb_is_good_ = dplyr::case_when(
is.na(pb_is_good_) ~ na_pass,
TRUE ~ pb_is_good_
.default = pb_is_good_
))
}

Expand Down
2 changes: 1 addition & 1 deletion R/remove_deactivate.R
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,7 @@ remove_steps <- function(

# Renumber steps
agent$validation_set$i <-
as.integer(seq(from = 1, to = nrow(agent$validation_set), by = 1))
seq.int(from = 1L, to = nrow(agent$validation_set), by = 1L)

# Remove any data extracts
agent$extracts <- NULL
Expand Down
13 changes: 8 additions & 5 deletions R/scan_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ scan_data <- function(

if (any(c("interactions", "correlations") %in% sections)) {
rlang::check_installed(
c("ggforce", "ggplot2"),
c("ggforce", "ggplot2 (>= 3.5.0)"),
"to use the `interactions` and `correlations` sections."
)
}
Expand Down Expand Up @@ -1421,9 +1421,11 @@ get_corr_plot <- function(

corr_df <-
as.data.frame(as.table(mat)) %>%
dplyr::mutate(Freq = ifelse(Var1 == Var2, NA_real_, Freq)) %>%
dplyr::mutate(Var1 = factor(Var1, levels = names(labels_vec))) %>%
dplyr::mutate(Var2 = factor(Var2, levels = rev(names(labels_vec))))
dplyr::mutate(
Freq = ifelse(Var1 == Var2, NA_real_, Freq),
Var1 = factor(Var1, levels = names(labels_vec)),
Var2 = factor(Var2, levels = rev(names(labels_vec)))
)

plot_missing <-
corr_df %>%
Expand All @@ -1444,7 +1446,8 @@ get_corr_plot <- function(
panel.grid = ggplot2::element_blank(),
legend.direction = "horizontal",
legend.title = ggplot2::element_blank(),
legend.position = c(0.5, 1.03),
legend.position = "inside",
legend.position.inside = c(0.5, 1.03),
plot.margin = ggplot2::unit(c(1, 0.5, 0, 0), "cm"),
legend.key.width = ggplot2::unit(2.0, "cm"),
legend.key.height = ggplot2::unit(3.0, "mm")
Expand Down
1 change: 1 addition & 0 deletions R/steps_and_briefs.R
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,7 @@ hash_validation_step <- function(assertion_type,
seg_val = as.character(seg_val %||% NA_character_)
)

# Maybe consider replacing with rlang::hash?
step_hash <- digest::sha1(step_chr)

paste(step_hash, hash_version, sep = "-")
Expand Down
13 changes: 8 additions & 5 deletions R/utils-profiling.R
Original file line number Diff line number Diff line change
Expand Up @@ -339,7 +339,7 @@ get_table_column_histogram <- function(data_column, lang, locale) {
ggplot2::geom_col(fill = "steelblue", width = 0.5) +
ggplot2::geom_hline(yintercept = 0, color = "#B2B2B2") +
ggplot2::labs(x = x_label, y = y_label) +
ggplot2::scale_y_continuous(labels = scales::comma_format()) +
ggplot2::scale_y_continuous(labels = scales::label_comma()) +
ggplot2::theme_minimal()
)
}
Expand Down Expand Up @@ -423,8 +423,10 @@ get_tbl_dbi_missing_tbl <- function(data) {

frequency_list %>%
dplyr::bind_rows() %>%
dplyr::mutate(value = ifelse(value == 0, NA_real_, value)) %>%
dplyr::mutate(col_name = factor(col_name, levels = colnames(data)))
dplyr::mutate(
value = ifelse(value == 0, NA_real_, value),
col_name = factor(col_name, levels = colnames(data))
)
}

# nocov start
Expand Down Expand Up @@ -484,7 +486,7 @@ get_tbl_df_missing_tbl <- function(data) {
rowid < cuts_[18] ~ 18L,
rowid < cuts_[19] ~ 19L,
rowid < cuts_[20] ~ 20L,
TRUE ~ 20L
.default = 20L
))
data <- dplyr::select(data, -rowid)
data <- dplyr::group_by(data, `::cut_group::`)
Expand Down Expand Up @@ -604,7 +606,8 @@ get_missing_value_plot <- function(data, frequency_tbl, missing_by_column_tbl) {
panel.grid = ggplot2::element_blank(),
legend.direction = "horizontal",
legend.title = ggplot2::element_blank(),
legend.position = c(0.5, 1.0),
legend.position = "inside",
legend.position.inside = c(0.5, 1.0),
plot.margin = ggplot2::unit(c(1, 0.5, 0, 0), "cm"),
legend.key.width = ggplot2::unit(2.0, "cm"),
legend.key.height = ggplot2::unit(3.0, "mm")
Expand Down
134 changes: 65 additions & 69 deletions R/utils-specifications.R
Original file line number Diff line number Diff line change
Expand Up @@ -276,88 +276,84 @@ check_vin_db <- function(table,

table <-
table %>%
dplyr::mutate(pb_vin_all_ = {{ column }}) %>%
dplyr::mutate(pb_vin_all_ = tolower(as.character((pb_vin_all_)))) %>%
dplyr::mutate(pb_vin_nch_ = ifelse(nchar(pb_vin_all_) == 17, TRUE, FALSE)) %>%
dplyr::mutate(pb_vin_001_ = ifelse(pb_vin_nch_, substr(pb_vin_all_, 1, 1), "8")) %>%
dplyr::mutate(pb_vin_002_ = ifelse(pb_vin_nch_, substr(pb_vin_all_, 2, 2), "8")) %>%
dplyr::mutate(pb_vin_003_ = ifelse(pb_vin_nch_, substr(pb_vin_all_, 3, 3), "8")) %>%
dplyr::mutate(pb_vin_004_ = ifelse(pb_vin_nch_, substr(pb_vin_all_, 4, 4), "8")) %>%
dplyr::mutate(pb_vin_005_ = ifelse(pb_vin_nch_, substr(pb_vin_all_, 5, 5), "8")) %>%
dplyr::mutate(pb_vin_006_ = ifelse(pb_vin_nch_, substr(pb_vin_all_, 6, 6), "8")) %>%
dplyr::mutate(pb_vin_007_ = ifelse(pb_vin_nch_, substr(pb_vin_all_, 7, 7), "8")) %>%
dplyr::mutate(pb_vin_008_ = ifelse(pb_vin_nch_, substr(pb_vin_all_, 8, 8), "8")) %>%
dplyr::mutate(pb_vin_009_ = ifelse(pb_vin_nch_, substr(pb_vin_all_, 9, 9), "8")) %>%
dplyr::mutate(pb_vin_010_ = ifelse(pb_vin_nch_, substr(pb_vin_all_, 10, 10), "8")) %>%
dplyr::mutate(pb_vin_011_ = ifelse(pb_vin_nch_, substr(pb_vin_all_, 11, 11), "8")) %>%
dplyr::mutate(pb_vin_012_ = ifelse(pb_vin_nch_, substr(pb_vin_all_, 12, 12), "8")) %>%
dplyr::mutate(pb_vin_013_ = ifelse(pb_vin_nch_, substr(pb_vin_all_, 13, 13), "8")) %>%
dplyr::mutate(pb_vin_014_ = ifelse(pb_vin_nch_, substr(pb_vin_all_, 14, 14), "8")) %>%
dplyr::mutate(pb_vin_015_ = ifelse(pb_vin_nch_, substr(pb_vin_all_, 15, 15), "8")) %>%
dplyr::mutate(pb_vin_016_ = ifelse(pb_vin_nch_, substr(pb_vin_all_, 16, 16), "8")) %>%
dplyr::mutate(pb_vin_017_ = ifelse(pb_vin_nch_, substr(pb_vin_all_, 17, 17), "8")) %>%
dplyr::mutate_at(
dplyr::vars(dplyr::matches("pb_vin_[0-9]{3}_")),
.funs = list(~ case_when(
. %in% c("a", "j") ~ "1",
. %in% c("b", "k", "s") ~ "2",
. %in% c("c", "l", "t") ~ "3",
. %in% c("d", "m", "u") ~ "4",
. %in% c("e", "n", "v") ~ "5",
. %in% c("f", "w") ~ "6",
. %in% c("g", "p", "x") ~ "7",
. %in% c("h", "y") ~ "8",
. %in% c("r", "z") ~ "9",
!(. %in% c("0", "1", "2", "3", "4", "5", "6", "7", "8", "9")) ~ "100000",
TRUE ~ .
)
)) %>%
dplyr::mutate(pb_vin_chk_ = pb_vin_009_) %>%
dplyr::mutate_at(
dplyr::vars(dplyr::matches("pb_vin_[0-9]{3}_")),
.funs = as.integer
) %>%
dplyr::mutate(pb_vin_001_w = pb_vin_001_ * 8L) %>%
dplyr::mutate(pb_vin_002_w = pb_vin_002_ * 7L) %>%
dplyr::mutate(pb_vin_003_w = pb_vin_003_ * 6L) %>%
dplyr::mutate(pb_vin_004_w = pb_vin_004_ * 5L) %>%
dplyr::mutate(pb_vin_005_w = pb_vin_005_ * 4L) %>%
dplyr::mutate(pb_vin_006_w = pb_vin_006_ * 3L) %>%
dplyr::mutate(pb_vin_007_w = pb_vin_007_ * 2L) %>%
dplyr::mutate(pb_vin_008_w = pb_vin_008_ * 10L) %>%
dplyr::mutate(pb_vin_009_w = pb_vin_009_ * 0L) %>%
dplyr::mutate(pb_vin_010_w = pb_vin_010_ * 9L) %>%
dplyr::mutate(pb_vin_011_w = pb_vin_011_ * 8L) %>%
dplyr::mutate(pb_vin_012_w = pb_vin_012_ * 7L) %>%
dplyr::mutate(pb_vin_013_w = pb_vin_013_ * 6L) %>%
dplyr::mutate(pb_vin_014_w = pb_vin_014_ * 5L) %>%
dplyr::mutate(pb_vin_015_w = pb_vin_015_ * 4L) %>%
dplyr::mutate(pb_vin_016_w = pb_vin_016_ * 3L) %>%
dplyr::mutate(pb_vin_017_w = pb_vin_017_ * 2L) %>%
dplyr::mutate(
pb_vin_all_ = {{ column }},
pb_vin_all_ = tolower(as.character((pb_vin_all_))),
pb_vin_nch_ = nchar(pb_vin_all_) == 17,
pb_vin_001_ = ifelse(pb_vin_nch_, substr(pb_vin_all_, 1, 1), "8"),
pb_vin_002_ = ifelse(pb_vin_nch_, substr(pb_vin_all_, 2, 2), "8"),
pb_vin_003_ = ifelse(pb_vin_nch_, substr(pb_vin_all_, 3, 3), "8"),
pb_vin_004_ = ifelse(pb_vin_nch_, substr(pb_vin_all_, 4, 4), "8"),
pb_vin_005_ = ifelse(pb_vin_nch_, substr(pb_vin_all_, 5, 5), "8"),
pb_vin_006_ = ifelse(pb_vin_nch_, substr(pb_vin_all_, 6, 6), "8"),
pb_vin_007_ = ifelse(pb_vin_nch_, substr(pb_vin_all_, 7, 7), "8"),
pb_vin_008_ = ifelse(pb_vin_nch_, substr(pb_vin_all_, 8, 8), "8"),
pb_vin_009_ = ifelse(pb_vin_nch_, substr(pb_vin_all_, 9, 9), "8"),
pb_vin_010_ = ifelse(pb_vin_nch_, substr(pb_vin_all_, 10, 10), "8"),
pb_vin_011_ = ifelse(pb_vin_nch_, substr(pb_vin_all_, 11, 11), "8"),
pb_vin_012_ = ifelse(pb_vin_nch_, substr(pb_vin_all_, 12, 12), "8"),
pb_vin_013_ = ifelse(pb_vin_nch_, substr(pb_vin_all_, 13, 13), "8"),
pb_vin_014_ = ifelse(pb_vin_nch_, substr(pb_vin_all_, 14, 14), "8"),
pb_vin_015_ = ifelse(pb_vin_nch_, substr(pb_vin_all_, 15, 15), "8"),
pb_vin_016_ = ifelse(pb_vin_nch_, substr(pb_vin_all_, 16, 16), "8"),
pb_vin_017_ = ifelse(pb_vin_nch_, substr(pb_vin_all_, 17, 17), "8"),
dplyr::across(
.cols = dplyr::matches("pb_vin_[0-9]{3}_"),
function(x) {
dplyr::case_match(x,
c("a", "j") ~ "1",
c("b", "k", "s") ~ "2",
c("c", "l", "t") ~ "3",
c("d", "m", "u") ~ "4",
c("e", "n", "v") ~ "5",
c("f", "w") ~ "6",
c("g", "p", "x") ~ "7",
c("h", "y") ~ "8",
c("r", "z") ~ "9",
c("0", "1", "2", "3", "4", "5", "6", "7", "8", "9") ~ "100000",
.default = x
)
}),
pb_vin_chk_ = pb_vin_009_,
dplyr::across(dplyr::matches("pb_vin_[0-9]{3}_"), as.integer),
pb_vin_001_w = pb_vin_001_ * 8L,
pb_vin_002_w = pb_vin_002_ * 7L,
pb_vin_003_w = pb_vin_003_ * 6L,
pb_vin_004_w = pb_vin_004_ * 5L,
pb_vin_005_w = pb_vin_005_ * 4L,
pb_vin_006_w = pb_vin_006_ * 3L,
pb_vin_007_w = pb_vin_007_ * 2L,
pb_vin_008_w = pb_vin_008_ * 10L,
pb_vin_009_w = pb_vin_009_ * 0L,
pb_vin_010_w = pb_vin_010_ * 9L,
pb_vin_011_w = pb_vin_011_ * 8L,
pb_vin_012_w = pb_vin_012_ * 7L,
pb_vin_013_w = pb_vin_013_ * 6L,
pb_vin_014_w = pb_vin_014_ * 5L,
pb_vin_015_w = pb_vin_015_ * 4L,
pb_vin_016_w = pb_vin_016_ * 3L,
pb_vin_017_w = pb_vin_017_ * 2L,
pb_vin_sum_uw =
pb_vin_001_ + pb_vin_002_ + pb_vin_003_ + pb_vin_004_ +
pb_vin_005_ + pb_vin_006_ + pb_vin_007_ + pb_vin_008_ +
pb_vin_009_ + pb_vin_010_ + pb_vin_011_ + pb_vin_012_ +
pb_vin_013_ + pb_vin_014_ + pb_vin_015_ + pb_vin_016_ +
pb_vin_017_
) %>%
dplyr::mutate(
pb_vin_017_,
pb_vin_sum_ =
pb_vin_001_w + pb_vin_002_w + pb_vin_003_w + pb_vin_004_w +
pb_vin_005_w + pb_vin_006_w + pb_vin_007_w + pb_vin_008_w +
pb_vin_009_w + pb_vin_010_w + pb_vin_011_w + pb_vin_012_w +
pb_vin_013_w + pb_vin_014_w + pb_vin_015_w + pb_vin_016_w +
pb_vin_017_w
) %>%
dplyr::mutate(pb_vin_mod_ = as.character(pb_vin_sum_ %% 11L)) %>%
dplyr::mutate(pb_vin_mod_ = ifelse(pb_vin_mod_ == "10", "x", pb_vin_mod_)) %>%
dplyr::mutate(pb_is_good_ = pb_vin_mod_ == pb_vin_chk_) %>%
dplyr::mutate(pb_is_good_ = ifelse(!pb_vin_nch_, FALSE, pb_vin_nch_)) %>%
dplyr::mutate(pb_is_good_ = ifelse(pb_vin_sum_uw >= 100000, FALSE, pb_is_good_))
pb_vin_017_w,
pb_vin_mod_ = as.character(pb_vin_sum_ %% 11L),
pb_vin_mod_ = ifelse(pb_vin_mod_ == "10", "x", pb_vin_mod_),
pb_is_good_ = pb_vin_mod_ == pb_vin_chk_,
pb_is_good_ = ifelse(!pb_vin_nch_, FALSE, pb_vin_nch_),
pb_is_good_ = ifelse(pb_vin_sum_uw >= 100000, FALSE, pb_is_good_)
)

table <-
table %>% dplyr::select(c(tbl_colnames, "pb_is_good_"))
table %>% dplyr::select(dplyr::all_of(c(tbl_colnames, "pb_is_good_")))

table
}
Expand Down
Loading

0 comments on commit c872541

Please sign in to comment.