Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat/pathfinder #121

Merged
merged 3 commits into from
Aug 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ export(convert_to_stringlist)
export(copy_annotations)
export(data_curator_app_subpage)
export(delete_provenance)
export(find_child)
export(find_in)
export(from_pubmed)
export(get_by_prop_from_json_schema)
export(get_dependency_from_json_schema)
Expand All @@ -57,8 +59,10 @@ export(missing_annotation_email)
export(new_dataset)
export(new_project)
export(nf_cnv_dataset)
export(nf_find_asset)
export(nf_sarek_datasets)
export(nf_star_salmon_datasets)
export(nf_workflow_version)
export(processing_flowchart)
export(qc_manifest)
export(register_study)
Expand Down
90 changes: 90 additions & 0 deletions R/find.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
# Utils to help overcome nested folders

#' Find in path
#'
#' Get the Synapse id of an entity nested several folder layers deep without
#' having to click through the UI or create a fileview as long as the structure/path is known.
#'
#' @param scope Id of the container (project or folder) to begin search.
#' @param path Path string in format "subdir1/subdir2/file.txt", where the last-level element will be the id returned.
#' @export
find_in <- function(scope, path) {

path_list <- strsplit(path, split = "/", fixed = TRUE)[[1]]
here <- scope
id <- NULL
while(length(path_list)) {
child <- first(path_list)
path_list <- path_list[-1]
here <- find_child(child_name = child, parent = here)
id <- here
}
id
}

#' Find id of a child entity in a container
#'
#' @param parent Parent container (project or folder).
#' @param child_name Name of child entity.
#' @export
find_child <- function(child_name, parent) {

q <- .syn$getChildren(parent)
child_id <- NULL
repeat {
x <- reticulate::iter_next(q)
if(is.null(x) || x$name == child_name) {
child_id <- x$id
break
}
}
child_id
}

# Find nextflow assets --------------------------------------------------------- #

# Convenience functions for getting Synapse ids of nextflow assets

#' Find a standard nextflow workflow output asset
#'
#' Note that samplesheets became part of the output only for newer versions of nf-core/rna-seq;
#' older runs may not find samplesheets.
#' Paths default to known working paths corresponding to the latest major workflow version,
#' but this may change and may need to be updated as part of util maintenance.
#'
#' @param syn_out Id of top-level folder that corresponds to `publishDir` in a nextflow workflow.
#' @param asset Name of asset to find.
#' @returns Id of samplesheet.
#' @export
nf_find_asset <- function(syn_out,
asset = c("software_versions", "multiqc_report", "samplesheet", "samtools_stats")) {

asset <- match.arg(asset)
path <- switch(asset,
software_versions = "pipeline_info/software_versions.yml",
multiqc_report = "multiqc/star_salmon/multiqc_report.html",
samplesheet = "pipeline_info/samplesheet.valid.csv",
samtools_stats = "multiqc/star_salmon/multiqc_data/multiqc_samtools_stats.txt"
)

id <- find_in(syn_out, path)
if(is.null(id)) stop("File not found. Is this the right output directory/path?")
id
}


#' Return workflow version according to workflow meta
#'
#' @inheritParams nf_find_asset
#' @returns Version string.
#' @export
nf_workflow_version <- function(syn_out) {

version_meta <- nf_find_asset(syn_out, asset = "software_versions")
file <- .syn$get(version_meta, downloadFile = TRUE)
yml <- yaml::read_yaml(file$path)
workflow <- grep("nf-core", names(yml$Workflow))
yaml$Workflow[[workflow]]

}

7 changes: 7 additions & 0 deletions _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ reference:
- annotate_cnv
- annotate_reports_sarek
- annotate_with_tool_stats
- nf_workflow_version

- title: Dataset Creation and Management
- subtitle: General dataset creation and citation
Expand Down Expand Up @@ -80,6 +81,12 @@ reference:
- summarize_file_access
- grant_specific_file_access

- title: Search Utils
desc: Help locate Synapse accessions, etc.
- contents:
- contains("find")
- nf_find_asset

- title: Provenance Utils
desc: Manage provenance metadata
- contents:
Expand Down
16 changes: 16 additions & 0 deletions man/find_child.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 17 additions & 0 deletions man/find_in.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

25 changes: 25 additions & 0 deletions man/nf_find_asset.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 17 additions & 0 deletions man/nf_workflow_version.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading