Skip to content

Commit

Permalink
Allow to write duplicates to a file
Browse files Browse the repository at this point in the history
  • Loading branch information
hsonne committed Jan 4, 2024
1 parent edd32d3 commit d232432
Show file tree
Hide file tree
Showing 4 changed files with 78 additions and 12 deletions.
16 changes: 12 additions & 4 deletions R/createHashFromColumns.R
Original file line number Diff line number Diff line change
@@ -1,18 +1,26 @@
# createHashFromColumns --------------------------------------------------------
createHashFromColumns <- function(data, columns, n.chars = 8L)
createHashFromColumns <- function(data, columns, nchars = 8L, silent = FALSE)
{
duplicates <- kwb.utils::findPartialDuplicates(data, columns)

if (!is.null(duplicates)) {
print(duplicates)
stop("There are duplicates in the key columns (see above)!")

if (!silent) {
message(
"Cannot create unique hashes due to duplicates in the key columns (",
kwb.utils::stringList(columns),
")! Returning -1L. Check attribute 'duplicates'."
)
}

return(structure(-1L, duplicates = duplicates))
}

keys <- kwb.utils::pasteColumns(data, columns, "|")

stopifnot(!anyDuplicated(keys))

hashes <- kwb.utils::left(unlist(lapply(keys, digest::digest)), n.chars)
hashes <- kwb.utils::left(unlist(lapply(keys, digest::digest)), nchars)

stopifnot(!anyDuplicated(hashes))

Expand Down
45 changes: 38 additions & 7 deletions R/setGlobalInspectionID.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,17 @@
#' seconds, just to increase the chance that setting the time is enough to
#' generate a unique key.
#' @param name.convention one of \code{c("norm", "camel", "snake")}
#' @param file optional. Path to file to which duplicates are are written (if
#' any). Default: \code{"setGlobalInspectionID_duplicates.txt"}
#' @return list with the same elements as in \code{inspection.data} but with
#' columns \code{inspid} being added to the data frames "inspections" and
#' "observations"
setGlobalInspectionID <- function(
inspection.data,
project = NULL,
default.time = "22:22",
name.convention = "norm"
name.convention = "norm",
file = NULL
)
{
if (is.null(project)) {
Expand Down Expand Up @@ -97,10 +100,16 @@ setGlobalInspectionID <- function(
))

# Create the inspection IDs and store them in column "inspection_id"
inspections[["inspection_id"]] <- createHashFromColumns(
data = inspections,
columns = columns
hashes <- createHashFromColumns(
data = inspections,
columns = columns,
silent = TRUE
)

# Check for duplicates in the hashes
stop_on_hash_duplicates(hashes, file = file)

inspections[["inspection_id"]] <- hashes

i <- kwb.utils::selectColumns(observations, "inspno")

Expand All @@ -111,11 +120,33 @@ setGlobalInspectionID <- function(
observations <- kwb.utils::removeColumns(observations, "inspno")

# Just a shortcut
idFirst <- function(df) kwb.utils::moveColumnsToFront(df, "inspection_id")
id_first <- function(x) kwb.utils::moveColumnsToFront(x, "inspection_id")

list(
header.info = get_elements(inspection.data, "header.info"),
inspections = idFirst(inspections),
observations = idFirst(observations)
inspections = id_first(inspections),
observations = id_first(observations)
)
}

# stop_on_hash_duplicates ------------------------------------------------------
stop_on_hash_duplicates <- function(hashes, file = NULL)
{
if (identical(kwb.utils::removeAttributes(hashes), -1L)) {

duplicates <- kwb.utils::getAttribute(hashes, "duplicates")

if (is.null(file)) {
print(duplicates)
} else {
writeLines(capture.output(print(duplicates)), file)
}

stop(
"There were duplicates in the key columns (see ",
ifelse(is.null(file), "above", dQuote(file, '"')),
").",
call. = FALSE
)
}
}
6 changes: 5 additions & 1 deletion man/setGlobalInspectionID.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

23 changes: 23 additions & 0 deletions tests/testthat/test-function-setGlobalInspectionID.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,30 @@
library(testthat)

test_that("setGlobalInspectionID() works", {

f <- kwb.en13508.2:::setGlobalInspectionID

expect_error(f())

file <- file.path(tempdir(), "duplicates.txt")

expect_error(f(
inspection.data = list(
inspections = data.frame(
inspection_date = "2024-01-03",
inspection_time = "22:22",
node_1_ref = "A",
node_2_ref = "B"
)[c(1, 1), ],
observations = data.frame(
inspno = 1L
),
header.info = list()
),
project = "Lausanne",
name.convention = "snake",
file = file
))

expect_true(file.exists(file))
})

0 comments on commit d232432

Please sign in to comment.