From 70313f2722261f10ec8418bf526131e2df7c6c0a Mon Sep 17 00:00:00 2001 From: Robert Ashton Date: Wed, 22 Dec 2021 09:20:58 +0000 Subject: [PATCH 1/6] Return more informative error from deduplication --- DESCRIPTION | 2 +- R/deduplicate.R | 7 +++++++ tests/testthat/test-deduplicate.R | 17 +++++++++++++++++ 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index f61ca2fd2..03fce2c17 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: orderly Title: Lightweight Reproducible Reporting -Version: 1.4.4 +Version: 1.4.5 Description: Order, create and store reports from R. By defining a lightweight interface around the inputs and outputs of an analysis, a lot of the repetitive work for reproducible research diff --git a/R/deduplicate.R b/R/deduplicate.R index 2b5ba76af..28efa0d84 100644 --- a/R/deduplicate.R +++ b/R/deduplicate.R @@ -79,6 +79,13 @@ orderly_deduplicate_info <- function(config) { fsep = "/") paths <- file.path(config$root, "archive", files$path) + exists <- vlapply(paths, file.exists) + if (any(!exists)) { + stop(paste0("Cannot deduplicate archive as database references files ", + "which don't exist, this could be because they have been ", + "pulled from an archive with recursive = FALSE")) + } + ## Information about the physical files, so we can work out which ## files are already hardlinked files <- cbind( diff --git a/tests/testthat/test-deduplicate.R b/tests/testthat/test-deduplicate.R index 4815b9908..7699fce61 100644 --- a/tests/testthat/test-deduplicate.R +++ b/tests/testthat/test-deduplicate.R @@ -172,3 +172,20 @@ test_that("relink error handling", { expect_error(relink(from, to), "Some error linking") expect_true(all(fs::file_info(c(from, to))$inode == info)) }) + + +test_that("deduplicate fails if file is missing", { + skip_on_cran() + path <- orderly_example("demo") + id1 <- orderly_run("minimal", root = path, echo = FALSE) + id2 <- orderly_run("minimal", root = path, echo = FALSE) + orderly_commit(id1, root = path) + orderly_commit(id2, root = path) + + unlink(file.path(path, "archive", "minimal", id1, "script.R")) + + expect_error(orderly_deduplicate_info(orderly_config(path), paste0( + "Cannot deduplicate archive as database references files ", + "which don't exist, this could be because they have been ", + "pulled from an archive with recursive = FALSE "))) +}) From ae0c71b19b1e9ce39340819e5a922ad099a017cc Mon Sep 17 00:00:00 2001 From: Robert Ashton Date: Wed, 22 Dec 2021 12:28:53 +0000 Subject: [PATCH 2/6] Return specific error if deduplicate fails because archive pulled with recursive FALSE --- R/deduplicate.R | 22 ++++++++++++---------- tests/testthat/test-deduplicate.R | 20 ++++++++++++++++---- 2 files changed, 28 insertions(+), 14 deletions(-) diff --git a/R/deduplicate.R b/R/deduplicate.R index 28efa0d84..b2e0dab11 100644 --- a/R/deduplicate.R +++ b/R/deduplicate.R @@ -68,6 +68,11 @@ orderly_deduplicate <- function(root = NULL, locate = TRUE, dry_run = TRUE, orderly_deduplicate_info <- function(config) { + report_metadata <- list.files(path_metadata(config$root)) + if (length(report_metadata) > 0) { + stop(paste("Cannot deduplicate archive reports have been pulled from", + "remote with recursive = FALSE.")) + } con <- orderly_db("destination", config) on.exit(DBI::dbDisconnect(con)) @@ -78,14 +83,6 @@ orderly_deduplicate_info <- function(config) { files$filename, fsep = "/") - paths <- file.path(config$root, "archive", files$path) - exists <- vlapply(paths, file.exists) - if (any(!exists)) { - stop(paste0("Cannot deduplicate archive as database references files ", - "which don't exist, this could be because they have been ", - "pulled from an archive with recursive = FALSE")) - } - ## Information about the physical files, so we can work out which ## files are already hardlinked files <- cbind( @@ -105,8 +102,13 @@ orderly_deduplicate_info <- function(config) { unname(tapply(files$inode, files$hash, function(x) x[[1L]]))[i] ## Quick check: - stopifnot(all(vlapply(split(files, files$hash), function(x) - all(x$inode_first == x$inode[[1]])))) + cannot_deduplicate <- all(vlapply(split(files, files$hash), function(x) { + all(x$inode_first == x$inode[[1]]) + })) + if (cannot_deduplicate) { + stop(paste("Cannot deduplicate archive as database references files", + "which don't exist.")) + } ## Classify the files into different states files$state <- rep("distinct", nrow(files)) diff --git a/tests/testthat/test-deduplicate.R b/tests/testthat/test-deduplicate.R index 7699fce61..05877d11a 100644 --- a/tests/testthat/test-deduplicate.R +++ b/tests/testthat/test-deduplicate.R @@ -184,8 +184,20 @@ test_that("deduplicate fails if file is missing", { unlink(file.path(path, "archive", "minimal", id1, "script.R")) - expect_error(orderly_deduplicate_info(orderly_config(path), paste0( - "Cannot deduplicate archive as database references files ", - "which don't exist, this could be because they have been ", - "pulled from an archive with recursive = FALSE "))) + expect_error(orderly_deduplicate_info(orderly_config(path), paste( + "Cannot deduplicate archive as database references files", + "which don't exist."))) +}) + +test_that("deduplicate fails if report pulled from remote recursive FALSE", { + skip_on_cran() + dat <- prepare_orderly_remote_example() + id3 <- orderly_run("depend", root = dat$path_remote, echo = FALSE) + orderly_commit(id3, root = dat$path_remote) + + orderly_pull_archive("depend", root = dat$config, remote = dat$remote, + recursive = FALSE) + expect_error(orderly_deduplicate_info(dat$config), paste( + "Cannot deduplicate archive reports have been pulled from", + "remote with recursive = FALSE.")) }) From 8fc46963afd42ff9e8f0bdd21bae1b2fe1feed48 Mon Sep 17 00:00:00 2001 From: Robert Ashton Date: Thu, 23 Dec 2021 12:40:53 +0000 Subject: [PATCH 3/6] Add back mistakenly removed line --- R/deduplicate.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/deduplicate.R b/R/deduplicate.R index b2e0dab11..e909a369a 100644 --- a/R/deduplicate.R +++ b/R/deduplicate.R @@ -83,6 +83,7 @@ orderly_deduplicate_info <- function(config) { files$filename, fsep = "/") + paths <- file.path(config$root, "archive", files$path) ## Information about the physical files, so we can work out which ## files are already hardlinked files <- cbind( From 6dc1f33decfc4096c728290ec1a3b52285e804a8 Mon Sep 17 00:00:00 2001 From: Robert Ashton Date: Thu, 23 Dec 2021 15:08:04 +0000 Subject: [PATCH 4/6] Fix deduplicate checking logic --- R/deduplicate.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/deduplicate.R b/R/deduplicate.R index e909a369a..6fa3ca6d1 100644 --- a/R/deduplicate.R +++ b/R/deduplicate.R @@ -103,10 +103,10 @@ orderly_deduplicate_info <- function(config) { unname(tapply(files$inode, files$hash, function(x) x[[1L]]))[i] ## Quick check: - cannot_deduplicate <- all(vlapply(split(files, files$hash), function(x) { + can_deduplicate <- all(vlapply(split(files, files$hash), function(x) { all(x$inode_first == x$inode[[1]]) })) - if (cannot_deduplicate) { + if (!can_deduplicate) { stop(paste("Cannot deduplicate archive as database references files", "which don't exist.")) } From 6c4530e4db4e30a66bbeeff6ef7a6a604a23f0ea Mon Sep 17 00:00:00 2001 From: Robert Ashton Date: Thu, 23 Dec 2021 16:51:08 +0000 Subject: [PATCH 5/6] Try mount vignette dir into build --- docker/build_website | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docker/build_website b/docker/build_website index c9192c4ce..86441e251 100755 --- a/docker/build_website +++ b/docker/build_website @@ -22,6 +22,7 @@ DB_NAME=orderly SCHEMASPY_NAME=vimc/orderly-schemaspy DOCS_DIR=$PACKAGE_ROOT/docs +VIGNETTES_DIR=$PACKAGE_ROOT/vignettes docker build \ --build-arg ORDERLY_BASE=$ORDERLY_BASE \ @@ -65,6 +66,7 @@ docker run --rm \ --network=$DB_NW \ -w /orderly \ -v $DOCS_DIR:/orderly/docs \ + -v $VIGNETTES_DIR:/orderly/vignettes \ --user "$USER_STR" \ $ORDERLY_DEV \ Rscript -e 'pkgdown::build_site(document = FALSE)' From 9fd642d32e994ef5a01600220738ffd95e8ad6df Mon Sep 17 00:00:00 2001 From: Robert Ashton Date: Thu, 23 Dec 2021 17:27:36 +0000 Subject: [PATCH 6/6] Fix test --- R/deduplicate.R | 2 +- tests/testthat/test-deduplicate.R | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/R/deduplicate.R b/R/deduplicate.R index 6fa3ca6d1..acb478dee 100644 --- a/R/deduplicate.R +++ b/R/deduplicate.R @@ -104,7 +104,7 @@ orderly_deduplicate_info <- function(config) { ## Quick check: can_deduplicate <- all(vlapply(split(files, files$hash), function(x) { - all(x$inode_first == x$inode[[1]]) + isTRUE(all(x$inode_first == x$inode[[1]])) })) if (!can_deduplicate) { stop(paste("Cannot deduplicate archive as database references files", diff --git a/tests/testthat/test-deduplicate.R b/tests/testthat/test-deduplicate.R index 05877d11a..1e63dad2a 100644 --- a/tests/testthat/test-deduplicate.R +++ b/tests/testthat/test-deduplicate.R @@ -184,9 +184,9 @@ test_that("deduplicate fails if file is missing", { unlink(file.path(path, "archive", "minimal", id1, "script.R")) - expect_error(orderly_deduplicate_info(orderly_config(path), paste( + expect_error(orderly_deduplicate_info(orderly_config(path)), paste( "Cannot deduplicate archive as database references files", - "which don't exist."))) + "which don't exist.")) }) test_that("deduplicate fails if report pulled from remote recursive FALSE", {