diff --git a/R/createHashFromColumns.R b/R/createHashFromColumns.R index 1a9a799..a62572c 100644 --- a/R/createHashFromColumns.R +++ b/R/createHashFromColumns.R @@ -1,18 +1,26 @@ # createHashFromColumns -------------------------------------------------------- -createHashFromColumns <- function(data, columns, n.chars = 8L) +createHashFromColumns <- function(data, columns, nchars = 8L, silent = FALSE) { duplicates <- kwb.utils::findPartialDuplicates(data, columns) if (!is.null(duplicates)) { - print(duplicates) - stop("There are duplicates in the key columns (see above)!") + + if (!silent) { + message( + "Cannot create unique hashes due to duplicates in the key columns (", + kwb.utils::stringList(columns), + ")! Returning -1L. Check attribute 'duplicates'." + ) + } + + return(structure(-1L, duplicates = duplicates)) } keys <- kwb.utils::pasteColumns(data, columns, "|") stopifnot(!anyDuplicated(keys)) - hashes <- kwb.utils::left(unlist(lapply(keys, digest::digest)), n.chars) + hashes <- kwb.utils::left(unlist(lapply(keys, digest::digest)), nchars) stopifnot(!anyDuplicated(hashes)) diff --git a/R/setGlobalInspectionID.R b/R/setGlobalInspectionID.R index 08c5644..dabf66b 100644 --- a/R/setGlobalInspectionID.R +++ b/R/setGlobalInspectionID.R @@ -12,6 +12,8 @@ #' seconds, just to increase the chance that setting the time is enough to #' generate a unique key. #' @param name.convention one of \code{c("norm", "camel", "snake")} +#' @param file optional. Path to file to which duplicates are are written (if +#' any). Default: \code{"setGlobalInspectionID_duplicates.txt"} #' @return list with the same elements as in \code{inspection.data} but with #' columns \code{inspid} being added to the data frames "inspections" and #' "observations" @@ -19,7 +21,8 @@ setGlobalInspectionID <- function( inspection.data, project = NULL, default.time = "22:22", - name.convention = "norm" + name.convention = "norm", + file = NULL ) { if (is.null(project)) { @@ -97,10 +100,16 @@ setGlobalInspectionID <- function( )) # Create the inspection IDs and store them in column "inspection_id" - inspections[["inspection_id"]] <- createHashFromColumns( - data = inspections, - columns = columns + hashes <- createHashFromColumns( + data = inspections, + columns = columns, + silent = TRUE ) + + # Check for duplicates in the hashes + stop_on_hash_duplicates(hashes, file = file) + + inspections[["inspection_id"]] <- hashes i <- kwb.utils::selectColumns(observations, "inspno") @@ -111,11 +120,33 @@ setGlobalInspectionID <- function( observations <- kwb.utils::removeColumns(observations, "inspno") # Just a shortcut - idFirst <- function(df) kwb.utils::moveColumnsToFront(df, "inspection_id") + id_first <- function(x) kwb.utils::moveColumnsToFront(x, "inspection_id") list( header.info = get_elements(inspection.data, "header.info"), - inspections = idFirst(inspections), - observations = idFirst(observations) + inspections = id_first(inspections), + observations = id_first(observations) ) } + +# stop_on_hash_duplicates ------------------------------------------------------ +stop_on_hash_duplicates <- function(hashes, file = NULL) +{ + if (identical(kwb.utils::removeAttributes(hashes), -1L)) { + + duplicates <- kwb.utils::getAttribute(hashes, "duplicates") + + if (is.null(file)) { + print(duplicates) + } else { + writeLines(capture.output(print(duplicates)), file) + } + + stop( + "There were duplicates in the key columns (see ", + ifelse(is.null(file), "above", dQuote(file, '"')), + ").", + call. = FALSE + ) + } +} diff --git a/man/setGlobalInspectionID.Rd b/man/setGlobalInspectionID.Rd index fc68193..3691dae 100644 --- a/man/setGlobalInspectionID.Rd +++ b/man/setGlobalInspectionID.Rd @@ -8,7 +8,8 @@ setGlobalInspectionID( inspection.data, project = NULL, default.time = "22:22", - name.convention = "norm" + name.convention = "norm", + file = NULL ) } \arguments{ @@ -24,6 +25,9 @@ seconds, just to increase the chance that setting the time is enough to generate a unique key.} \item{name.convention}{one of \code{c("norm", "camel", "snake")}} + +\item{file}{optional. Path to file to which duplicates are are written (if +any). Default: \code{"setGlobalInspectionID_duplicates.txt"}} } \value{ list with the same elements as in \code{inspection.data} but with diff --git a/tests/testthat/test-function-setGlobalInspectionID.R b/tests/testthat/test-function-setGlobalInspectionID.R index 5b40ac8..8c8996c 100644 --- a/tests/testthat/test-function-setGlobalInspectionID.R +++ b/tests/testthat/test-function-setGlobalInspectionID.R @@ -1,7 +1,30 @@ +library(testthat) + test_that("setGlobalInspectionID() works", { f <- kwb.en13508.2:::setGlobalInspectionID expect_error(f()) + file <- file.path(tempdir(), "duplicates.txt") + + expect_error(f( + inspection.data = list( + inspections = data.frame( + inspection_date = "2024-01-03", + inspection_time = "22:22", + node_1_ref = "A", + node_2_ref = "B" + )[c(1, 1), ], + observations = data.frame( + inspno = 1L + ), + header.info = list() + ), + project = "Lausanne", + name.convention = "snake", + file = file + )) + + expect_true(file.exists(file)) })