diff --git a/DESCRIPTION b/DESCRIPTION index 74c440e4..eb515715 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -24,7 +24,8 @@ Imports: rlang, rstudioapi, withr, - yaml + yaml, + zip Suggests: DBI, RSQLite, diff --git a/NAMESPACE b/NAMESPACE index 4c96b240..8694a828 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -46,4 +46,6 @@ export(orderly_search_options) export(orderly_shared_resource) export(orderly_strict_mode) export(orderly_validate_archive) +export(orderly_zip_export) +export(orderly_zip_import) importFrom(R6,R6Class) diff --git a/R/location.R b/R/location.R index d0223dd2..bc32b418 100644 --- a/R/location.R +++ b/R/location.R @@ -695,10 +695,8 @@ location_build_push_plan <- function(packet_id, location_name, root) { files_msg <- character(0) } else { packet_id_msg <- sort(packet_id_msg) - metadata <- metadata ## All files across all missing ids: - files <- unique(unlist( - lapply(packet_id_msg, function(i) metadata[[i]]$files$hash))) + files <- find_all_files(packet_id_msg, metadata) ## Which of these does the server not know about: files_msg <- driver$list_unknown_files(files) diff --git a/R/outpack_misc.R b/R/outpack_misc.R index eb669a91..e07295b3 100644 --- a/R/outpack_misc.R +++ b/R/outpack_misc.R @@ -63,6 +63,10 @@ find_all_dependencies <- function(id, metadata) { sort(ret) } +## Get all the files for a set of packets, filtering any overlap. +find_all_files <- function(id, metadata) { + unique(unlist(lapply(id, function(i) metadata[[i]]$files$hash))) +} validate_parameters <- function(parameters, call) { if (is.null(parameters) || length(parameters) == 0) { diff --git a/R/zip.R b/R/zip.R new file mode 100644 index 00000000..d1060a9b --- /dev/null +++ b/R/zip.R @@ -0,0 +1,148 @@ +##' Export packets as a zip file. +##' +##' The packets can be imported into a different repository using the +##' [orderly2::orderly_zip_import] function. +##' +##' This is useful as one-time way to publish your results, for example as an +##' artefact accompanying a paper. For back-and-forth collaboration, a shared +##' location should be preferred, as this offers more flexibility. +##' +##' @param path the path where the zip file will be created +##' +##' @param packets One or more packets to export +##' +##' @inheritParams orderly_metadata +##' +##' @return Invisibly, the path to the zip file +##' @export +orderly_zip_export <- function(path, packets, root = NULL, locate = TRUE) { + root <- root_open(root, locate = locate, require_orderly = FALSE, + call = environment()) + + index <- root$index$data() + packets <- find_all_dependencies(packets, index$metadata) + files <- find_all_files(packets, index$metadata) + + dest <- withr::local_tempfile() + fs::dir_create(dest) + fs::dir_create(file.path(dest, "metadata")) + store <- file_store$new(file.path(dest, "files")) + + fs::file_copy( + file.path(root$path, ".outpack", "metadata", packets), + file.path(dest, "metadata", packets)) + + if (root$config$core$use_file_store) { + for (hash in files) { + store$put(root$files$filename(hash), hash) + } + } else { + for (hash in files) { + store$put(find_file_by_hash(root, hash), hash) + } + } + + packet_list <- index$location[ + match(index$location$packet, packets), c("packet", "hash")] + contents <- list(packets = packet_list) + + writeLines(to_json(contents, "orderly/export.json"), + file.path(dest, "outpack.json")) + + zip::zip(fs::path_abs(path), root = dest, + files = c("outpack.json", "metadata", "files")) + + invisible(path) +} + +##' Import packets from a zip file. +##' +##' @param path the path to the zip file to be imported. +##' +##' @inheritParams orderly_metadata +##' +##' @return Invisibly, the IDs of the imported packets +##' @export +orderly_zip_import <- function(path, root = NULL, locate = TRUE) { + root <- root_open(root, locate = locate, require_orderly = FALSE, + call = environment()) + + if (!("outpack.json" %in% zip::zip_list(path)$filename)) { + cli::cli_abort( + c("Zip file does not contain an 'outpack.json' file at its root", + i = paste("Are you sure this file was produced by", + "orderly2::orderly_zip_export?"))) + } + + src <- withr::local_tempfile() + zip::unzip(path, exdir = src) + + contents <- jsonlite::read_json(file.path(src, "outpack.json"), + simplifyVector = TRUE) + + zip_import_metadata(root, src, contents$packets, call = environment()) + zip_import_packets(root, src, contents$packets) + + invisible(contents$packets$packet) +} + +zip_import_metadata <- function(root, src, packets, call) { + index <- root$index$data() + new_packets <- !(packets$packet %in% names(index$metadata)) + + ids <- packets$packet[new_packets] + src_paths <- file.path(src, "metadata", ids) + dst_paths <- file.path(root$path, ".outpack", "metadata", ids) + expected_hash <- packets$hash[new_packets] + + for (i in seq_along(src_paths)) { + metadata <- read_string(src_paths[[i]]) + + hash_validate_data(metadata, expected_hash[[i]], + sprintf("metadata for '%s'", id), call = call) + + writeLines(metadata, dst_paths[[i]]) + } + + seen_before <- intersect(packets$packet, index$location$packet) + hash_there <- packets$hash[match(seen_before, packets$packet)] + hash_here <- index$location$hash[match(seen_before, index$location$packet)] + err <- hash_there != hash_here + if (any(err)) { + cli::cli_abort( + c("Imported file has conflicting metadata", + x = paste("This is {.strong really} bad news. The zip file contains", + "packets with a different hash than the metadata already in", + "this repository. I'm not going to import this new metadata", + "but there's no guarantee that the older metadata is", + "actually what you want!"), + i = "Conflicts for: {squote(ids[err])}", + i = "We would be interested in this case, please let us know"), + call = call) + } + + invisible() +} + +zip_import_packets <- function(root, src, packets) { + store <- file_store$new(file.path(src, "files")) + index <- root$index$data() + missing_packets <- packets[!(packets$packet %in% index$unpacked), ] + + if (root$config$core$use_file_store) { + files <- find_all_files(missing_packets$packet, index$metadata) + files <- files[!root$files$exists(files)] + for (hash in files) { + file_path <- store$get(hash, root$files$tmp(), overwrite = FALSE) + root$files$put(file_path, hash, move = TRUE) + } + } + + for (i in seq_along(missing_packets$packet)) { + if (!is.null(root$config$core$path_archive)) { + location_pull_files_archive(missing_packets$packet[[i]], store, root) + } + mark_packet_known(missing_packets$packet[[i]], local, + missing_packets$hash[[i]], Sys.time(), root) + } +} diff --git a/_pkgdown.yml b/_pkgdown.yml index 72bbe997..42fbe877 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -66,6 +66,10 @@ reference: - orderly_location_list - orderly_location_remove - orderly_location_rename + - title: Exporting packets + contents: + - orderly_zip_export + - orderly_zip_import - title: Help for developing contents: - orderly_new diff --git a/inst/schema/orderly/export.json b/inst/schema/orderly/export.json new file mode 100644 index 00000000..36d78594 --- /dev/null +++ b/inst/schema/orderly/export.json @@ -0,0 +1,21 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "index of a exported zip file", + "version": "0.0.1", + + "type": "object", + "properties": { + "packets": { + "type": "array", + "items": { + "packet": { + "$ref": "../outpack/packet-id.json" + }, + + "hash": { + "$ref": "../outpack/hash.json" + } + } + } + } +} diff --git a/man/orderly_zip_export.Rd b/man/orderly_zip_export.Rd new file mode 100644 index 00000000..a1e418d4 --- /dev/null +++ b/man/orderly_zip_export.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/zip.R +\name{orderly_zip_export} +\alias{orderly_zip_export} +\title{Export packets as a zip file.} +\usage{ +orderly_zip_export(path, packets, root = NULL, locate = TRUE) +} +\arguments{ +\item{path}{the path where the zip file will be created} + +\item{packets}{One or more packets to export} + +\item{root}{The path to the root directory, or \code{NULL} (the +default) to search for one from the current working directory if +\code{locate} is \code{TRUE}. This function does not require that the +directory is configured for orderly, and can be any \code{outpack} +root (see \link{orderly_init} for details).} + +\item{locate}{Logical, indicating if the root should be searched +for. If \code{TRUE}, then we looks in the directory given for \code{root} +(or the working directory if \code{NULL}) and then up through its +parents until it finds an \code{.outpack} directory or +\code{orderly_config.yml}} +} +\value{ +Invisibly, the path to the zip file +} +\description{ +The packets can be imported into a different repository using the +\link{orderly_zip_import} function. +} +\details{ +This is useful as one-time way to publish your results, for example as an +artefact accompanying a paper. For back-and-forth collaboration, a shared +location should be preferred, as this offers more flexibility. +} diff --git a/man/orderly_zip_import.Rd b/man/orderly_zip_import.Rd new file mode 100644 index 00000000..140a3838 --- /dev/null +++ b/man/orderly_zip_import.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/zip.R +\name{orderly_zip_import} +\alias{orderly_zip_import} +\title{Import packets from a zip file.} +\usage{ +orderly_zip_import(path, root = NULL, locate = TRUE) +} +\arguments{ +\item{path}{the path to the zip file to be imported.} + +\item{root}{The path to the root directory, or \code{NULL} (the +default) to search for one from the current working directory if +\code{locate} is \code{TRUE}. This function does not require that the +directory is configured for orderly, and can be any \code{outpack} +root (see \link{orderly_init} for details).} + +\item{locate}{Logical, indicating if the root should be searched +for. If \code{TRUE}, then we looks in the directory given for \code{root} +(or the working directory if \code{NULL}) and then up through its +parents until it finds an \code{.outpack} directory or +\code{orderly_config.yml}} +} +\value{ +Invisibly, the IDs of the imported packets +} +\description{ +Import packets from a zip file. +} diff --git a/tests/testthat/test-zip.R b/tests/testthat/test-zip.R new file mode 100644 index 00000000..760c265e --- /dev/null +++ b/tests/testthat/test-zip.R @@ -0,0 +1,213 @@ +export_info <- function(path) { + listing <- zip::zip_list(path)$filename + + metadata <- grep("^metadata/.*[^/]$", listing, value = TRUE) + metadata <- sub("^metadata/", "", metadata) + + files <- grep("^files/.*[^/]$", listing, value = TRUE) + files <- sub("^files/", "", files) + + list(metadata = metadata, files = files) +} + +test_that("Exporting a packet includes its transitive dependencies", { + root <- create_temporary_root() + ids <- create_random_packet_chain(root, 3) + other <- create_random_packet(root) + + path <- withr::local_tempfile() + orderly_zip_export(path, ids[[3]], root = root) + + info <- export_info(path) + expect_setequal(info$metadata, ids) + + # The root packet has one file, and each downstream one has 2 (one source file + # and one data). The downstreams actually have three, but one of them is a + # copy of the parent packet's data, hence is deduplicated and doesn't count. + expect_equal(length(info$files), 5) +}) + +test_that("Can export multiple packets", { + root <- create_temporary_root() + first <- create_random_packet(root) + second <- create_random_packet(root) + ids <- c(first, second) + + path <- withr::local_tempfile() + orderly_zip_export(path, ids, root = root) + + info <- export_info(path) + expect_setequal(info$metadata, ids) + expect_equal(length(info$files), 2) +}) + +test_that("Can export from a file store", { + root <- create_temporary_root(use_file_store = TRUE, path_archive = NULL) + ids <- create_random_packet_chain(root, 3) + + path <- withr::local_tempfile() + orderly_zip_export(path, ids[[3]], root = root) + + info <- export_info(path) + expect_setequal(info$metadata, ids) + expect_equal(length(info$files), 5) +}) + +test_that("Packet files are de-duplicated when exported", { + root <- create_temporary_root() + ids <- c(create_deterministic_packet(root), create_deterministic_packet(root)) + + path <- withr::local_tempfile() + orderly_zip_export(path, ids, root = root) + + info <- export_info(path) + expect_setequal(info$metadata, ids) + expect_equal(length(info$files), 1) +}) + +test_that("Importing an invalid zip fails", { + dir <- withr::local_tempfile() + fs::dir_create(dir) + fs::file_create(file.path(dir, "hello.txt")) + + zipfile <- withr::local_tempfile() + zip::zip(zipfile, files = c("hello.txt"), root = dir) + + root <- create_temporary_root() + expect_error( + orderly_zip_import(zipfile, root = root), + "Zip file does not contain an 'outpack.json' file at its root") +}) + +test_that("Can import a zip file", { + upstream <- create_temporary_root() + downstream <- create_temporary_root() + + id <- create_random_packet(upstream) + + path <- withr::local_tempfile() + orderly_zip_export(path, id, root = upstream) + + imported <- orderly_zip_import(path, root = downstream) + expect_equal(imported, id) + + index <- downstream$index$data() + expect_setequal(names(index$metadata), id) + expect_mapequal(index$metadata, upstream$index$data()$metadata) + expect_setequal(index$unpacked, id) + + files <- upstream$index$metadata(id)$files + file_paths <- file.path(downstream$path, downstream$config$core$path_archive, + upstream$index$metadata(id)$name, id, files$path) + + for (i in seq_along(file_paths)) { + expect_no_error(hash_validate_file(file_paths[[i]], files$hash[[i]])) + } +}) + +test_that("Can import a zip file to a file store", { + upstream <- create_temporary_root() + downstream <- create_temporary_root(use_file_store = TRUE) + + ids <- create_random_packet_chain(upstream, 3) + + path <- withr::local_tempfile() + orderly_zip_export(path, ids[[3]], root = upstream) + orderly_zip_import(path, root = downstream) + + index <- downstream$index$data() + expect_setequal(names(index$metadata), ids) + expect_mapequal(index$metadata, upstream$index$data()$metadata) + expect_setequal(index$unpacked, ids) + + for (id in ids) { + files <- upstream$index$metadata(id)$files + expect_true(all(downstream$files$exists(files$hash))) + } +}) + +test_that("Importing a zip file is idempotent", { + upstream <- create_temporary_root() + downstream <- create_temporary_root() + + id <- create_random_packet(upstream) + + path <- withr::local_tempfile() + orderly_zip_export(path, id, root = upstream) + imported_once <- orderly_zip_import(path, root = downstream) + imported_twice <- orderly_zip_import(path, root = downstream) + + expect_equal(imported_once, id) + expect_equal(imported_twice, id) + + index <- downstream$index$data() + expect_setequal(names(index$metadata), id) + expect_mapequal(index$metadata, upstream$index$data()$metadata) + expect_setequal(index$unpacked, id) +}) + +test_that("New packets are imported", { + upstream <- create_temporary_root() + + first_id <- create_random_packet(upstream) + first_zip <- withr::local_tempfile() + orderly_zip_export(first_zip, first_id, root = upstream) + + second_id <- create_random_packet(upstream) + second_zip <- withr::local_tempfile() + orderly_zip_export(second_zip, c(first_id, second_id), root = upstream) + + downstream <- create_temporary_root() + + orderly_zip_import(first_zip, root = downstream) + index <- downstream$index$data() + expect_setequal(names(index$metadata), first_id) + expect_setequal(index$unpacked, first_id) + + orderly_zip_import(second_zip, root = downstream) + index <- downstream$index$data() + expect_setequal(names(index$metadata), c(first_id, second_id)) + expect_mapequal(index$metadata, upstream$index$data()$metadata) + expect_setequal(index$unpacked, c(first_id, second_id)) +}) + +test_that("Can import packet with existing metadata", { + upstream <- create_temporary_root(use_file_store = TRUE) + id <- create_random_packet(upstream) + + # We want to bring in the packets metadata into the downstream repository, + # but not copy any of the actual files (yet). We do this by adding a path + # location and pulling the metadata from it. + downstream <- create_temporary_root() + orderly_location_add("upstream", "path", list(path = upstream$path), + root = downstream) + orderly_location_pull_metadata(root = downstream) + + index <- downstream$index$data() + expect_setequal(names(index$metadata), id) + expect_equal(length(index$unpacked), 0) + + path <- withr::local_tempfile() + orderly_zip_export(path, id, root = upstream) + orderly_zip_import(path, root = downstream) + + index <- downstream$index$data() + expect_setequal(names(index$metadata), id) + expect_setequal(index$unpacked, id) +}) + +test_that("Importing a zip file with mismatching metadata fails", { + upstream <- create_temporary_root() + downstream <- create_temporary_root() + + id <- outpack_id() + create_random_packet(upstream, id = id) + create_random_packet(downstream, id = id) + + path <- withr::local_tempfile() + orderly_zip_export(path, id, root = upstream) + + expect_error( + orderly_zip_import(path, root = downstream), + "Imported file has conflicting metadata") +})