Skip to content

Commit

Permalink
Handle version semantics for collections (#115)
Browse files Browse the repository at this point in the history
* Handling of latest semantics

* Fix docs

* Run pkg check, bump version
  • Loading branch information
anngvu authored Aug 17, 2023
1 parent c1896e9 commit 7c8804d
Show file tree
Hide file tree
Showing 8 changed files with 152 additions and 24 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: nfportalutils
Title: NF Portal Utilities
Version: 0.0.0.9310
Version: 0.0.0.9320
Authors@R: c(
person(given = "Robert", family = "Allaway", role = c("aut", "cre"),
email = "[email protected]",
Expand Down
67 changes: 53 additions & 14 deletions R/datasets.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,27 @@
# and dataset collections (collection of datasets).


#' As collection items
#' Structure as collection items
#'
#' Helper taking entity ids to create records used for dataset items or dataset collection items.
#' Helper taking entity ids to create records used for dataset items *or* dataset collection items.
#' Collection items have the form `list(entityId = id, versionNumber = x)`.
#'
#' Note: For item version, dataset items allow two meanings of literal or absolute "latest"
#' vs. "stable_latest", but with files either one can be used to mean the same thing
#' since there will be correct interpretation done under the hood.
#' See implementation in `latest_version`.
#'
#' @param ids Ids of entities to make into dataset items.
#' @param item_version Integer for version that will be used for all items, e.g. 1.
#' If NULL, this will look up the latest version for each id and use that.
#' Otherwise, "latest" or "stable_latest". See details.
#' @keywords internal
as_coll_items <- function(ids, item_version = NULL) {
if(is.null(item_version)) {
item_version <- lapply(ids, function(id) .syn$get(id, downloadFile = FALSE)$properties$versionNumber)
as_coll_items <- function(ids, item_version = c("abs", "stable")) {

if(!is.integer(item_version)) {
version_semantics <- match.arg(item_version)
item_version <- lapply(ids, function(id) latest_version(id, version_semantics))
}

items <- Map(function(id, version) list(entityId = id, versionNumber = version), ids, item_version)
names(items) <- NULL # need to unname list for API
items
Expand Down Expand Up @@ -44,24 +52,25 @@ update_items <- function(current_coll, update_coll) {
# reconversion; using pure apply as.list coerces versionNumber into char
updated <- apply(updated, 1, function(i) list(entityId = unname(i[1]), versionNumber = as.integer(i[2])))
updated
}
}


#' Update item versions to "latest" in a collection
#'
#' Update an _existing_ collection so that all items or a subset of items reference their latest version.
#' This should work for both datasets (collection of files) and dataset collections (collection of datasets).
#' Should work for both datasets (collection of files) and dataset collections (collection of datasets).
#'
#' @inheritParams latest_version
#' @param collection_id Collection id.
#' @param items Vector of dataset ids for which to update reference to latest version,
#' or "all" (default) to update all in the dataset collection.
#' @param items Vector of dataset ids for which to update reference to latest version, or "all" (default) to update all.
#' @export
use_latest_in_collection <- function(collection_id, items = "all") {
use_latest_in_collection <- function(collection_id, items = "all", version_semantics = "abs") {

coll <- .syn$restGET(glue::glue("https://repo-prod.prod.sagebase.org/repo/v1/entity/{collection_id}"))
current_items <- sapply(coll$items, function(i) i$entityId)

if((length(items) == 1) && (items == "all")) {
coll$items <- as_coll_items(current_items)
coll$items <- as_coll_items(current_items, item_version = version_semantics)
} else {

# Check subset; if no check, this becomes `add_to_collection`
Expand All @@ -73,7 +82,7 @@ use_latest_in_collection <- function(collection_id, items = "all") {
return(coll)
}
}
updated_items <- update_items(coll$items, as_coll_items(items))
updated_items <- update_items(coll$items, as_coll_items(items, item_version = version_semantics))
coll$items <- updated_items
}
.syn$restPUT(glue::glue("https://repo-prod.prod.sagebase.org/repo/v1/entity/{collection_id}"), body = jsonlite::toJSON(coll, auto_unbox = TRUE))
Expand Down Expand Up @@ -149,6 +158,36 @@ new_dataset <- function(name, parent, items, item_version = NULL, dry_run = TRUE
}


#' Get the latest version
#'
#' Get latest version, with special handling for semantics of "latest" regarding new collection types.
#' Datasets and dataset collections always start out as draft so unlike other entities
#' there is a concept of a stable version which is the "real" latest, but which might not always exist.
#' For datasets/dataset collections the latest version refers to a DRAFT, so latest stable version is `versionNumber` - 1
#' under the condition that the `versionNumber` is greater or equal to 2.
#' When `versionNumber` = 1 and `isLatestVersion` is TRUE, this means there is not yet a stable version.
#' When using stable version semantics, if a stable version does not exist an error will be thrown.
#'
#' The parameter `version_semantics` allows user to specify "what type of *latest* do you mean?".
#'
#' Note: Do not use with versioned ids of the form "syn12345678.3"
#'
#' @param id Dataset id. See details.
#' @param version_semantics Use "abs" for absolute latest version or "stable". Only used for collection entities. See details.
latest_version <- function(id, version_semantics = c("abs", "stable")) {

entity <- .syn$get(id, downloadFile = FALSE)
version <- entity$properties$versionNumber
if(entity$properties$concreteType %in% c("org.sagebionetworks.repo.model.table.Dataset", "org.sagebionetworks.repo.model.table.DatasetCollection")
&& version_semantics == "stable_latest") {
version <- version - 1
if(!version) stop("No stable version exists for ", id)
}

version
}


#' Create Sarek-processed datasets
#'
#' Organize variant call files from Nextflow Sarek into 3-4 datasets,
Expand Down
28 changes: 28 additions & 0 deletions R/views.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#' Create a view
#'
#' This creates a generic view, including by default just file entities and the default columns
#' (i.e. defaults to a generic fileview).
#' This is often useful to get ids of files for a large number of nested files by creating a temp fileview
#' (the alternative is to use `walk`, but if the tree structure is not regular it can be messy to parse the output).
#'
#' @param scope Character id(s) of project or folder container(s) in scope.
#' @param project Parent project id to create the view in.
#' @param name Name of view.
#' @param include Which entity type(s) to include in scope. Defaults to files.
#'
new_view <- function(scope,
project,
name = "New View",
include = "FILE") {

included <- match.arg(include, several.ok = TRUE)
view <- synapseclient$EntityViewSchema(
name = name,
columns = list(),
parent = project,
scopes = scope,
includeEntityTypes = list(synapseclient$EntityViewType),
add_default_columns = TRUE)
view <- .syn$store(view)
invisible(view)
}
14 changes: 10 additions & 4 deletions man/as_coll_items.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

27 changes: 27 additions & 0 deletions man/latest_version.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/new_dataset.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

23 changes: 23 additions & 0 deletions man/new_view.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 9 additions & 4 deletions man/use_latest_in_collection.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 7c8804d

Please sign in to comment.