From 9a4c5ce87c64155cfacabf9353f50b6195388513 Mon Sep 17 00:00:00 2001 From: Anh Nguyet Vu Date: Thu, 24 Aug 2023 21:28:12 -0600 Subject: [PATCH 1/3] Add find helpers --- R/find.R | 87 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) create mode 100644 R/find.R diff --git a/R/find.R b/R/find.R new file mode 100644 index 00000000..44ffa8ba --- /dev/null +++ b/R/find.R @@ -0,0 +1,87 @@ +# Utils to help overcome nested folders + +#' Find in path +#' +#' Get the Synapse id of an entity nested several folder layers deep without +#' having to click through the UI or create a fileview as long as the structure/path is known. +#' +#' @param scope Id of the container (project or folder) to begin search. +#' @param path Path string in format "subdir1/subdir2/file.txt", where the last-level element will be the id returned. +find_in <- function(scope, path) { + + path_list <- strsplit(path, split = "/", fixed = TRUE)[[1]] + here <- scope + id <- NULL + while(length(path_list)) { + child <- first(path_list) + path_list <- path_list[-1] + here <- find_child(child_name = child, parent = here) + id <- here + } + id +} + +#' Find id of a child entity in a container +#' +#' @param parent Parent container (project or folder). +#' @param child_name Name of child entity. +find_child <- function(child_name, parent) { + + q <- .syn$getChildren(parent) + child_id <- NULL + repeat { + x <- reticulate::iter_next(q) + if(is.null(x) || x$name == child_name) { + child_id <- x$id + break + } + } + child_id +} + +# Find nextflow assets --------------------------------------------------------- # + +# Convenience functions for getting Synapse ids of nextflow assets + +#' Find a standard nextflow workflow output asset +#' +#' Note that samplesheets became part of the output only for newer versions of nf-core/rna-seq; +#' older runs may not find samplesheets. +#' Paths default to known working paths corresponding to the latest major workflow version, +#' but this may change and may need to be updated as part of util maintenance. +#' +#' @param syn_out Id of top-level folder that corresponds to `publishDir` in a nextflow workflow. +#' @param asset Name of asset to find. +#' @returns Id of samplesheet. +#' @export +nf_find_asset <- function(syn_out, + asset = c("software_versions", "multiqc_report", "samplesheet", "samtools_stats")) { + + asset <- match.arg(asset) + path <- switch(asset, + software_versions = "pipeline_info/software_versions.yml", + multiqc_report = "multiqc/star_salmon/multiqc_report.html", + samplesheet = "pipeline_info/samplesheet.valid.csv", + samtools_stats = "multiqc/star_salmon/multiqc_data/multiqc_samtools_stats.txt" + ) + + id <- find_in(syn_out, path) + if(is.null(id)) stop("File not found. Is this the right output directory/path?") + id +} + + +#' Return workflow version according to workflow meta +#' +#' @returns Version string. +#' @export +nf_workflow_version <- function(syn_out) { + + version_meta <- nf_find_asset(syn_out, asset = "software_versions") + file <- .syn$get(version_meta, downloadFile = TRUE) + yml <- yaml::read_yaml(file$path) + workflow <- grep("nf-core", names(yml$Workflow)) + yaml$Workflow[[workflow]] + +} + From 97e316bc2ebabb9315a7397f7948172cae4cd002 Mon Sep 17 00:00:00 2001 From: Anh Nguyet Vu Date: Fri, 25 Aug 2023 07:54:51 -0600 Subject: [PATCH 2/3] Export --- R/find.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/find.R b/R/find.R index 44ffa8ba..6889ac2d 100644 --- a/R/find.R +++ b/R/find.R @@ -7,6 +7,7 @@ #' #' @param scope Id of the container (project or folder) to begin search. #' @param path Path string in format "subdir1/subdir2/file.txt", where the last-level element will be the id returned. +#' @export find_in <- function(scope, path) { path_list <- strsplit(path, split = "/", fixed = TRUE)[[1]] @@ -25,6 +26,7 @@ find_in <- function(scope, path) { #' #' @param parent Parent container (project or folder). #' @param child_name Name of child entity. +#' @export find_child <- function(child_name, parent) { q <- .syn$getChildren(parent) From bc9ddd60ecd51a8320a22c9a52a2ca9624a6d955 Mon Sep 17 00:00:00 2001 From: Anh Nguyet Vu Date: Fri, 25 Aug 2023 09:01:15 -0600 Subject: [PATCH 3/3] Update pkgdown and docs --- NAMESPACE | 4 ++++ R/find.R | 1 + _pkgdown.yml | 7 +++++++ man/find_child.Rd | 16 ++++++++++++++++ man/find_in.Rd | 17 +++++++++++++++++ man/nf_find_asset.Rd | 25 +++++++++++++++++++++++++ man/nf_workflow_version.Rd | 17 +++++++++++++++++ 7 files changed, 87 insertions(+) create mode 100644 man/find_child.Rd create mode 100644 man/find_in.Rd create mode 100644 man/nf_find_asset.Rd create mode 100644 man/nf_workflow_version.Rd diff --git a/NAMESPACE b/NAMESPACE index dc3f3a8a..6b5202f1 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -38,6 +38,8 @@ export(convert_to_stringlist) export(copy_annotations) export(data_curator_app_subpage) export(delete_provenance) +export(find_child) +export(find_in) export(from_pubmed) export(get_by_prop_from_json_schema) export(get_dependency_from_json_schema) @@ -57,8 +59,10 @@ export(missing_annotation_email) export(new_dataset) export(new_project) export(nf_cnv_dataset) +export(nf_find_asset) export(nf_sarek_datasets) export(nf_star_salmon_datasets) +export(nf_workflow_version) export(processing_flowchart) export(qc_manifest) export(register_study) diff --git a/R/find.R b/R/find.R index 6889ac2d..eb1d6269 100644 --- a/R/find.R +++ b/R/find.R @@ -75,6 +75,7 @@ nf_find_asset <- function(syn_out, #' Return workflow version according to workflow meta #' +#' @inheritParams nf_find_asset #' @returns Version string. #' @export nf_workflow_version <- function(syn_out) { diff --git a/_pkgdown.yml b/_pkgdown.yml index 77688b17..47174bda 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -44,6 +44,7 @@ reference: - annotate_cnv - annotate_reports_sarek - annotate_with_tool_stats + - nf_workflow_version - title: Dataset Creation and Management - subtitle: General dataset creation and citation @@ -80,6 +81,12 @@ reference: - summarize_file_access - grant_specific_file_access +- title: Search Utils + desc: Help locate Synapse accessions, etc. +- contents: + - contains("find") + - nf_find_asset + - title: Provenance Utils desc: Manage provenance metadata - contents: diff --git a/man/find_child.Rd b/man/find_child.Rd new file mode 100644 index 00000000..6bc227e8 --- /dev/null +++ b/man/find_child.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/find.R +\name{find_child} +\alias{find_child} +\title{Find id of a child entity in a container} +\usage{ +find_child(child_name, parent) +} +\arguments{ +\item{child_name}{Name of child entity.} + +\item{parent}{Parent container (project or folder).} +} +\description{ +Find id of a child entity in a container +} diff --git a/man/find_in.Rd b/man/find_in.Rd new file mode 100644 index 00000000..2c9dce91 --- /dev/null +++ b/man/find_in.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/find.R +\name{find_in} +\alias{find_in} +\title{Find in path} +\usage{ +find_in(scope, path) +} +\arguments{ +\item{scope}{Id of the container (project or folder) to begin search.} + +\item{path}{Path string in format "subdir1/subdir2/file.txt", where the last-level element will be the id returned.} +} +\description{ +Get the Synapse id of an entity nested several folder layers deep without +having to click through the UI or create a fileview as long as the structure/path is known. +} diff --git a/man/nf_find_asset.Rd b/man/nf_find_asset.Rd new file mode 100644 index 00000000..66da1d63 --- /dev/null +++ b/man/nf_find_asset.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/find.R +\name{nf_find_asset} +\alias{nf_find_asset} +\title{Find a standard nextflow workflow output asset} +\usage{ +nf_find_asset( + syn_out, + asset = c("software_versions", "multiqc_report", "samplesheet", "samtools_stats") +) +} +\arguments{ +\item{syn_out}{Id of top-level folder that corresponds to \code{publishDir} in a nextflow workflow.} + +\item{asset}{Name of asset to find.} +} +\value{ +Id of samplesheet. +} +\description{ +Note that samplesheets became part of the output only for newer versions of nf-core/rna-seq; +older runs may not find samplesheets. +Paths default to known working paths corresponding to the latest major workflow version, +but this may change and may need to be updated as part of util maintenance. +} diff --git a/man/nf_workflow_version.Rd b/man/nf_workflow_version.Rd new file mode 100644 index 00000000..a6960515 --- /dev/null +++ b/man/nf_workflow_version.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/find.R +\name{nf_workflow_version} +\alias{nf_workflow_version} +\title{Return workflow version according to workflow meta} +\usage{ +nf_workflow_version(syn_out) +} +\arguments{ +\item{syn_out}{Id of top-level folder that corresponds to \code{publishDir} in a nextflow workflow.} +} +\value{ +Version string. +} +\description{ +Return workflow version according to workflow meta +}