diff --git a/ChangeLog.md b/ChangeLog.md index 301e9a5..05cf296 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -2,6 +2,19 @@ ChangeLog for SamsElves Package -------------------------------------------------------------------------------- +## 2024-09-30 v.0.3.4 + +- added: + - utils_io.R from ihme.covid package + - `make_new_output_dir` - create a version-incremented run-date folder based on a 'YYYY_MM_DD.VV' run-date folder structure + - supported by `get_latest_output_date_index` and `get_new_output_dir` + - `get_latest_output_dir` - get the latest output directory based on a 'YYYY_MM_DD.VV' run-date folder structure + - added tests, updated deprecated methods +- documented: + - some previously undocumented helper functions for various methods (not exported) + + + ## 2024-09-23 v.0.3.3 - updated: @@ -13,6 +26,7 @@ ChangeLog for SamsElves Package - retains original file extension + ## 2024-09-19 v.0.3.2 - updated: @@ -20,6 +34,8 @@ ChangeLog for SamsElves Package - now includes option for custom csv reading function since `data.table::fread` can have quotation-doubling issues - also includes `...` arg to pass additional user-desired args to the reader function (works for any underlying reader function) + + ## 2024-09-18 - deprecated: @@ -34,6 +50,7 @@ ChangeLog for SamsElves Package - now includes a selection of sessionInfo for R version, package versions, etc. for pipeline provenance + ## 2024-09-06 - added: @@ -42,6 +59,8 @@ ChangeLog for SamsElves Package - `submit_job` & `submit_job_array` - added console-style log option (combine stderr and stdout) + + ## 2023-12-04 - deprecated: diff --git a/DESCRIPTION b/DESCRIPTION index 316e474..639fcad 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: SamsElves Title: Helper functions for the data science at IHME -Version: 0.3.3 +Version: 0.3.4 Author: Sam Byrne (ssbyrne@uw.edu) Description: Helper functions for the data science at IHME License: none diff --git a/NAMESPACE b/NAMESPACE index d1930ab..659171c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -13,10 +13,14 @@ export(datetime_stamp) export(extract_sessionInfo) export(extract_submission_commands) export(find_file_extension) +export(get_latest_output_date_index) +export(get_latest_output_dir) +export(get_new_output_dir) export(increment_file_version) export(is_empty) export(is_sequential_int_vec) export(make_directory) +export(make_new_output_dir) export(make_versioned_dir) export(msg_multiline) export(msg_prt) diff --git a/R/children_of_parents.R b/R/children_of_parents.R index 470c412..353df96 100644 --- a/R/children_of_parents.R +++ b/R/children_of_parents.R @@ -68,7 +68,13 @@ children_of_parents <- function( } -#' @description Helper function for children_of_parents. +#' Helper function for children_of_parents. +#' +#' @param parent_loc_ids [int] ihme location ids +#' @param output [character] output options +#' @param hierarchy [data.table] ihme location hierarchy +#' +#' @return [none] stop on failure validate_children_of_parents_inputs = function(parent_loc_ids, output, hierarchy){ # Check for valid parent_locs_ids type if (!is.vector(parent_loc_ids) | !is.numeric(parent_loc_ids)) { @@ -87,14 +93,15 @@ validate_children_of_parents_inputs = function(parent_loc_ids, output, hierarchy } } -#' @description Helper function for children_of_parents. +#' Helper function for children_of_parents. +#' #' Given a single parent_id and a path_to_top_parent, #' returns TRUE if that parent_id is in the path. #' #' @param parent_id [int] Location ID of parent to test #' @param path_to_top_parent [character] String of path to top parent from hierarchy #' -#' @return boolean +#' @return [lgl] TRUE if parent_id is in path_to_top_parent is_child_of_parent = function(parent_id, path_to_top_parent){ path_to_top_parent = as.integer(unlist(strsplit(path_to_top_parent, ","))) return(parent_id %in% path_to_top_parent) diff --git a/R/parents_of_children.R b/R/parents_of_children.R index b0a1fc7..be9db5e 100644 --- a/R/parents_of_children.R +++ b/R/parents_of_children.R @@ -53,7 +53,13 @@ parent_of_child <- function( parent_level %i", child_location_id, head(hierarchy), parent_level)) } -#' @description Helper function to validate inputs to function +#' Helper function to validate inputs to function +#' +#' @param child_location_id [int] ihme location_id +#' @param hierarchy [data.table] ihme location hierarchy +#' @param parent_level [int] ihme location level +#' +#' @return [none] stop on failure validate_parents_of_children_inputs <- function(child_location_id, hierarchy, parent_level){ # Check for valid parent_level if(length(parent_level) != 1){ diff --git a/R/utils_io.R b/R/utils_io.R index a74d8b5..bbca015 100644 --- a/R/utils_io.R +++ b/R/utils_io.R @@ -115,9 +115,9 @@ save_file <- function(object, f_path, forbid_overwrite = TRUE, verbose = FALSE){ #' Read a file of an arbitrary type #' -#' @param path_to_file [chr] full path with extenstion +#' @param path_to_file [chr] full path with extension #' @param verbose [lgl] noisy or quiet function? -#' @param csv_opt [chr] namespaced function call for csv reads (default `"data.table::fread"`) +#' @param csv_opt [chr] name spaced function call for csv reads (default `"data.table::fread"`) #' @param ... [any] additional arguments to pass to the reader function #' #' @return [file] an object of appropriate file type @@ -189,3 +189,112 @@ increment_file_version <- function(outpath){ return(outpath_new) } + + +#' get the latest index for given an output dir and a date +#' +#' directories are assumed to be named in YYYY_MM_DD.VV format with sane +#' year/month/date/version values. +#' +#' @param dir [chr] path to directory with versioned dirs +#' @param date [chr] character in be YYYY_MM_DD format +#' +#' @return [dbl] largest version in directory tree or 0 if there are no version OR +#' the directory tree does not exist +#' @export +#' +#' @examples +#' get_latest_output_date_index("tests/testthat/fixtures/versioned-dirs/nested/1999_09_09", date = "1999_09_09") # expect 2 +get_latest_output_date_index <- function(dir, date) { + currentfolders <- list.files(dir) + + # subset to date + pat <- sprintf("^%s[.]\\d{2}$", date) + date_dirs <- grep(pat, currentfolders, value = TRUE) + + if (length(date_dirs) == 0) { + return(0) + } + + # get the index after day + date_list <- strsplit(date_dirs, "[.]") + + inds <- unlist(lapply(date_list, function(x) x[2])) + if (is.na(max(inds, na.rm = T))) inds <- 0 + + return(max(as.numeric(inds))) +} + + +#' Find the latest output directory with format YYYY_MM_DD.VV +#' +#' @param root [chr] path to root of output results +#' +#' @return [chr] path to latest output directory +#' @export +#' +#' @examples +#' get_latest_output_dir("tests/testthat/fixtures/versioned-dirs/nested/1999_09_09") # expect "tests/testthat/fixtures/versioned-dirs/nested/1999_09_09/1999_09_09.02" +get_latest_output_dir <- function(root) { + if (!dir.exists(root)) { + stop(sprintf("root %s does not exist", root)) + } + raw <- list.dirs(root, full.names = FALSE, recursive = FALSE) + valid.idx <- grep("^\\d{4}_\\d{2}_\\d{2}[.]\\d{2}$", raw) + if (length(valid.idx) == 0) { + stop(sprintf("No YYYY_MM_DD.VV directories in %s", root)) + } + return(file.path(root, max(raw[valid.idx]))) +} + + + +#' Increment a new output folder date-version +#' +#' Get a new directory path, but don't make it +#' +#' @param root [chr] path to root of output results +#' @param date [chr] character date in form of "YYYY_MM_DD" or "today". "today" will be interpreted as today's date. +#' +#' @return [chr] path to new output direcctory +#' @export +#' +#' @examples +#' get_new_output_dir(root = tempdir(), date = "today") +get_new_output_dir <- function(root, date){ + if (date == "today") { + date <- format(Sys.Date(), "%Y_%m_%d") + } + cur.version <- get_latest_output_date_index(root, date = date) + + dir.name <- sprintf("%s.%02i", date, cur.version + 1) + dir.path <- file.path(root, dir.name) + return(dir.path) +} + + +#' Get output directory for results to save in +#' +#' Returns an appropriate path to save results in, creating it if necessary. +#' +#' @param root [chr] path to root of output results +#' @param date [chr] character date in form of "YYYY_MM_DD" or "today". "today" will be interpreted as today's date. +#' +#' @return [chr] path to new output directory +#' @export +#' +#' @examples +#' \dontrun{ +#' make_new_output_dir("my/root/folder", date = "today") +#' } +make_new_output_dir <- function(root, date) { + dir.path <- get_new_output_dir(root, date) + if (!dir.exists(dir.path)) { + # handle quirk with singularity image default umask + old.umask <- Sys.umask() + Sys.umask("002") + dir.create(dir.path, showWarnings = FALSE, recursive = TRUE, mode = "0777") + Sys.umask(old.umask) + } + return(dir.path) +} diff --git a/R/wait_on_slurm_job_id.R b/R/wait_on_slurm_job_id.R index e30a973..629fa7d 100644 --- a/R/wait_on_slurm_job_id.R +++ b/R/wait_on_slurm_job_id.R @@ -214,12 +214,14 @@ wait_on_slurm_job_id <- print(paste0("Job(s) ", job_id_msg, " no longer PENDING, RUNNING, or FAILED. Time elapsed: ", job.runtime, " seconds")) } +#' Helper function for wait_on_slurm_job_id - how do you want jobs to break and display user messages? +#' #' @param cmd_fail [chr] #' @param cmd_fail_feedback [chr] #' @param job_id_regex_raw [regex] #' @param filter_by [chr] #' -#' @description Helper function for wait_on_slurm_job_id - how do you want jobs to break and display user messages? +#' @return [none] stop on failure break_for_failed_jobs <- function( cmd_fail, diff --git a/man/break_for_failed_jobs.Rd b/man/break_for_failed_jobs.Rd new file mode 100644 index 0000000..6f8592a --- /dev/null +++ b/man/break_for_failed_jobs.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/wait_on_slurm_job_id.R +\name{break_for_failed_jobs} +\alias{break_for_failed_jobs} +\title{Helper function for wait_on_slurm_job_id - how do you want jobs to break and display user messages?} +\usage{ +break_for_failed_jobs(cmd_fail, cmd_fail_feedback, job_id_regex_raw, filter_by) +} +\arguments{ +\item{cmd_fail}{[chr]} + +\item{cmd_fail_feedback}{[chr]} + +\item{job_id_regex_raw}{[regex]} + +\item{filter_by}{[chr]} +} +\value{ +[none] stop on failure +} +\description{ +Helper function for wait_on_slurm_job_id - how do you want jobs to break and display user messages? +} diff --git a/man/get_latest_output_date_index.Rd b/man/get_latest_output_date_index.Rd index ac1d8fa..5f63218 100644 --- a/man/get_latest_output_date_index.Rd +++ b/man/get_latest_output_date_index.Rd @@ -1,21 +1,32 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/make_versioned_dir.R +% Please edit documentation in R/make_versioned_dir.R, R/utils_io.R \name{get_latest_output_date_index} \alias{get_latest_output_date_index} \title{get the latest index for given an output dir and a date} \usage{ +get_latest_output_date_index(dir, date) + get_latest_output_date_index(dir, date) } \arguments{ -\item{dir}{path to directory with versioned dirs} +\item{dir}{[chr] path to directory with versioned dirs} -\item{date}{character in YYYY_MM_DD format} +\item{date}{[chr] character in be YYYY_MM_DD format} } \value{ largest version in directory tree or 0 if there are no version OR the directory tree does not exist + +[dbl] largest version in directory tree or 0 if there are no version OR +the directory tree does not exist } \description{ directories are assumed to be named in YYYY_MM_DD.VV format with sane year/month/date/version values. + +directories are assumed to be named in YYYY_MM_DD.VV format with sane +year/month/date/version values. +} +\examples{ +get_latest_output_date_index("tests/testthat/fixtures/versioned-dirs/nested/1999_09_09", date = "1999_09_09") # expect 2 } diff --git a/man/get_latest_output_dir.Rd b/man/get_latest_output_dir.Rd new file mode 100644 index 0000000..bf4668e --- /dev/null +++ b/man/get_latest_output_dir.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils_io.R +\name{get_latest_output_dir} +\alias{get_latest_output_dir} +\title{Find the latest output directory with format YYYY_MM_DD.VV} +\usage{ +get_latest_output_dir(root) +} +\arguments{ +\item{root}{[chr] path to root of output results} +} +\value{ +[chr] path to latest output directory +} +\description{ +Find the latest output directory with format YYYY_MM_DD.VV +} +\examples{ +get_latest_output_dir("tests/testthat/fixtures/versioned-dirs/nested/1999_09_09") # expect "tests/testthat/fixtures/versioned-dirs/nested/1999_09_09/1999_09_09.02" +} diff --git a/man/get_new_output_dir.Rd b/man/get_new_output_dir.Rd new file mode 100644 index 0000000..9e7ca7f --- /dev/null +++ b/man/get_new_output_dir.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils_io.R +\name{get_new_output_dir} +\alias{get_new_output_dir} +\title{Increment a new output folder date-version} +\usage{ +get_new_output_dir(root, date) +} +\arguments{ +\item{root}{[chr] path to root of output results} + +\item{date}{[chr] character date in form of "YYYY_MM_DD" or "today". "today" will be interpreted as today's date.} +} +\value{ +[chr] path to new output direcctory +} +\description{ +Get a new directory path, but don't make it +} +\examples{ +get_new_output_dir(root = tempdir(), date = "today") +} diff --git a/man/is_child_of_parent.Rd b/man/is_child_of_parent.Rd new file mode 100644 index 0000000..c249809 --- /dev/null +++ b/man/is_child_of_parent.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/children_of_parents.R +\name{is_child_of_parent} +\alias{is_child_of_parent} +\title{Helper function for children_of_parents.} +\usage{ +is_child_of_parent(parent_id, path_to_top_parent) +} +\arguments{ +\item{parent_id}{[int] Location ID of parent to test} + +\item{path_to_top_parent}{[character] String of path to top parent from hierarchy} +} +\value{ +[lgl] TRUE if parent_id is in path_to_top_parent +} +\description{ +Given a single parent_id and a path_to_top_parent, +returns TRUE if that parent_id is in the path. +} diff --git a/man/make_new_output_dir.Rd b/man/make_new_output_dir.Rd new file mode 100644 index 0000000..c27cdad --- /dev/null +++ b/man/make_new_output_dir.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils_io.R +\name{make_new_output_dir} +\alias{make_new_output_dir} +\title{Get output directory for results to save in} +\usage{ +make_new_output_dir(root, date) +} +\arguments{ +\item{root}{[chr] path to root of output results} + +\item{date}{[chr] character date in form of "YYYY_MM_DD" or "today". "today" will be interpreted as today's date.} +} +\value{ +[chr] path to new output directory +} +\description{ +Returns an appropriate path to save results in, creating it if necessary. +} +\examples{ +\dontrun{ +make_new_output_dir("my/root/folder", date = "today") +} +} diff --git a/man/read_file.Rd b/man/read_file.Rd index fe0190e..3733ebc 100644 --- a/man/read_file.Rd +++ b/man/read_file.Rd @@ -7,11 +7,11 @@ read_file(path_to_file, verbose = FALSE, csv_opt = "data.table::fread", ...) } \arguments{ -\item{path_to_file}{[chr] full path with extenstion} +\item{path_to_file}{[chr] full path with extension} \item{verbose}{[lgl] noisy or quiet function?} -\item{csv_opt}{[chr] namespaced function call for csv reads (default `"data.table::fread"`)} +\item{csv_opt}{[chr] name spaced function call for csv reads (default `"data.table::fread"`)} \item{...}{[any] additional arguments to pass to the reader function} } diff --git a/man/validate_children_of_parents_inputs.Rd b/man/validate_children_of_parents_inputs.Rd new file mode 100644 index 0000000..2259a5f --- /dev/null +++ b/man/validate_children_of_parents_inputs.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/children_of_parents.R +\name{validate_children_of_parents_inputs} +\alias{validate_children_of_parents_inputs} +\title{Helper function for children_of_parents.} +\usage{ +validate_children_of_parents_inputs(parent_loc_ids, output, hierarchy) +} +\arguments{ +\item{parent_loc_ids}{[int] ihme location ids} + +\item{output}{[character] output options} + +\item{hierarchy}{[data.table] ihme location hierarchy} +} +\value{ +[none] stop on failure +} +\description{ +Helper function for children_of_parents. +} diff --git a/man/validate_parents_of_children_inputs.Rd b/man/validate_parents_of_children_inputs.Rd new file mode 100644 index 0000000..a2fb86d --- /dev/null +++ b/man/validate_parents_of_children_inputs.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/parents_of_children.R +\name{validate_parents_of_children_inputs} +\alias{validate_parents_of_children_inputs} +\title{Helper function to validate inputs to function} +\usage{ +validate_parents_of_children_inputs(child_location_id, hierarchy, parent_level) +} +\arguments{ +\item{child_location_id}{[int] ihme location_id} + +\item{hierarchy}{[data.table] ihme location hierarchy} + +\item{parent_level}{[int] ihme location level} +} +\value{ +[none] stop on failure +} +\description{ +Helper function to validate inputs to function +} diff --git a/tests/testthat/fixtures/closure_criteria_sheet.xlsx b/tests/testthat/fixtures/closure_criteria_sheet.xlsx new file mode 100644 index 0000000..204abbd Binary files /dev/null and b/tests/testthat/fixtures/closure_criteria_sheet.xlsx differ diff --git a/tests/testthat/fixtures/data.csv b/tests/testthat/fixtures/data.csv new file mode 100644 index 0000000..0be8816 --- /dev/null +++ b/tests/testthat/fixtures/data.csv @@ -0,0 +1,3 @@ +name,rank +Kirk,Admiral +Picard,Captain diff --git a/tests/testthat/fixtures/model-inputs/2020_05_25.02/locations/ids.yaml b/tests/testthat/fixtures/model-inputs/2020_05_25.02/locations/ids.yaml new file mode 100644 index 0000000..9d62e6d --- /dev/null +++ b/tests/testthat/fixtures/model-inputs/2020_05_25.02/locations/ids.yaml @@ -0,0 +1,6 @@ +covariate: + location_set_id: 111 + location_set_version_id: 680 +modeling: + location_set_id: 111 + location_set_version_id: 674 diff --git a/tests/testthat/fixtures/versioned-dirs/nested/1999_09_09/1999_09_09.01/data.txt b/tests/testthat/fixtures/versioned-dirs/nested/1999_09_09/1999_09_09.01/data.txt new file mode 100644 index 0000000..e69de29 diff --git a/tests/testthat/fixtures/versioned-dirs/nested/1999_09_09/1999_09_09.02/data.txt b/tests/testthat/fixtures/versioned-dirs/nested/1999_09_09/1999_09_09.02/data.txt new file mode 100644 index 0000000..e69de29 diff --git a/tests/testthat/fixtures/versioned-dirs/nested/1999_09_09/1999_09_09.15blargfoobar/contents-to-populate-directory b/tests/testthat/fixtures/versioned-dirs/nested/1999_09_09/1999_09_09.15blargfoobar/contents-to-populate-directory new file mode 100644 index 0000000..e69de29 diff --git a/tests/testthat/fixtures/versioned-dirs/nested/1999_09_09/1999_09_09.NA/QC_1999_09_09.01 b/tests/testthat/fixtures/versioned-dirs/nested/1999_09_09/1999_09_09.NA/QC_1999_09_09.01 new file mode 100644 index 0000000..e69de29 diff --git a/tests/testthat/test-utils_io.R b/tests/testthat/test-utils_io.R index 31cd8a2..d6b8155 100644 --- a/tests/testthat/test-utils_io.R +++ b/tests/testthat/test-utils_io.R @@ -169,7 +169,7 @@ test_that("read_file errors correctly", ) }) -test_that(".csv option errors and works properly with alternate functions", +test_that("read_file .csv option errors and works properly with alternate functions", { withr::local_file(dir_full) dir.create(dir_full) @@ -194,7 +194,7 @@ test_that(".csv option errors and works properly with alternate functions", }) -test_that("... works to pass extra args to reader function", +test_that("read_file ... works to pass extra args to reader function", { withr::local_file(dir_full) dir.create(dir_full) @@ -213,8 +213,70 @@ test_that("... works to pass extra args to reader function", }) + +# cribbed from ihme.covid repo +test_that("get_latest_output_date_index returns 0 if no dirs exist", { + # neither of these directories exist + expect_equal(0, get_latest_output_date_index("/does/not/exist", date = "2001_01_01")) + expect_equal(0, get_latest_output_date_index("fixtures/versioned-dirs/2000_01_01", date = "2001_01_01")) +}) + +test_that("get_latest_output_date_index returns correct value", { + expect_equal(2, get_latest_output_date_index("fixtures/versioned-dirs/nested/1999_09_09", date = "1999_09_09")) +}) + +test_that("get_latest_output_dir works", { + latest_dir <- get_latest_output_dir(root = "fixtures/versioned-dirs/nested/1999_09_09") + + expect_equal(latest_dir, "fixtures/versioned-dirs/nested/1999_09_09/1999_09_09.02") +}) + +test_that("get_latest_output_dir errors correctly", { + expect_error( + get_latest_output_dir(root = "fixtures/DOES-NOT-EXIST"), + "root fixtures/DOES-NOT-EXIST does not exist" + ) + + expect_error( + get_latest_output_dir(root = "fixtures/versioned-dirs"), + "No YYYY_MM_DD.VV directories in fixtures/versioned-dirs" + ) +}) + + +test_that("get_new_output_dir functionality works", { + + # create random root directory with self-teardown (`teardown()` is deprecated) + withr::local_file(dir_full) + dir.create(dir_full) + + # expect bootstrap to work + expect_equal(file.path(dir_full, "1999_09_09.01"), get_new_output_dir(root = dir_full, date = "1999_09_09")) + expect_false(dir.exists(file.path(dir_full, "1999_09_09.01"))) +}) + + +test_that("make_new_output_dir functionality works", { + + # create random root directory with self-teardown (`teardown()` is deprecated) + withr::local_file(dir_full) + dir.create(dir_full) + + # expect bootstrap to work + expect_equal(file.path(dir_full, "1999_09_09.01"), make_new_output_dir(root = dir_full, date = "1999_09_09")) + expect_true(dir.exists(file.path(dir_full, "1999_09_09.01"))) + + # incrementing automatically happens + expect_equal(file.path(dir_full, "1999_09_09.02"), make_new_output_dir(root = dir_full, date = "1999_09_09")) + + # handle convenience "today" value + today.v1 <- format(Sys.Date(), "%Y_%m_%d.01") + expect_equal(file.path(dir_full, today.v1), make_new_output_dir(root = dir_full, date = "today")) +}) + + # Last test -test_that("tempdir (dir_parent) exists and dir_full does not", +test_that("test cleanup works - tempdir (dir_parent) exists and dir_full does not", { expect_true(dir.exists(dir_parent)) expect_false(dir.exists(dir_full))