From 04fd074042041dd400c253c76e03d5b9d15d7a90 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Fri, 20 Oct 2023 07:33:44 -0500 Subject: [PATCH] Add Workflow Run RO-crate format Signed-off-by: Ben Sherman --- README.md | 4 +- nextflow.config | 4 + .../main/nextflow/prov/ProvObserver.groovy | 5 +- .../main/nextflow/prov/WrrocRenderer.groovy | 261 ++++++++++++++++++ 4 files changed, 272 insertions(+), 2 deletions(-) create mode 100644 plugins/nf-prov/src/main/nextflow/prov/WrrocRenderer.groovy diff --git a/README.md b/README.md index e4da46a..6bf3bc9 100644 --- a/README.md +++ b/README.md @@ -24,7 +24,7 @@ prov { } ``` -Finally, run your Nextflow pipeline. You do not need to modify your pipeline script in order to use the `nf-prov` plugin. The plugin will automatically generate a JSON file with provenance information. +Finally, run your Nextflow pipeline. You do not need to modify your pipeline script in order to use the `nf-prov` plugin. The plugin will automatically produce the specified provenance reports at the end of the workflow run. ## Configuration @@ -48,6 +48,8 @@ Configuration scope for the desired output formats. The following formats are av - `legacy`: Render the legacy format originally defined in this plugin (default). Supports the `file` and `overwrite` options. +- `wrroc`: Render a [Workflow Run RO-Crate](https://www.researchobject.org/workflow-run-crate/). Includes all three profiles (Process, Workflow, and Provenance). + Any number of formats can be specified, for example: ```groovy diff --git a/nextflow.config b/nextflow.config index 6219b1b..0baf57e 100644 --- a/nextflow.config +++ b/nextflow.config @@ -20,5 +20,9 @@ prov { file = "${params.outdir}/manifest.json" overwrite = true } + wrroc { + file = "${params.outdir}/ro-crate-metadata.json" + overwrite = true + } } } diff --git a/plugins/nf-prov/src/main/nextflow/prov/ProvObserver.groovy b/plugins/nf-prov/src/main/nextflow/prov/ProvObserver.groovy index 658de6d..4a6aa08 100644 --- a/plugins/nf-prov/src/main/nextflow/prov/ProvObserver.groovy +++ b/plugins/nf-prov/src/main/nextflow/prov/ProvObserver.groovy @@ -38,7 +38,7 @@ import nextflow.trace.TraceRecord @CompileStatic class ProvObserver implements TraceObserver { - public static final List VALID_FORMATS = ['bco', 'dag', 'legacy'] + public static final List VALID_FORMATS = ['bco', 'dag', 'legacy', 'wrroc'] private Session session @@ -67,6 +67,9 @@ class ProvObserver implements TraceObserver { if( name == 'legacy' ) return new LegacyRenderer(opts) + if( name == 'wrroc' ) + return new WrrocRenderer(opts) + throw new IllegalArgumentException("Invalid provenance format -- valid formats are ${VALID_FORMATS.join(', ')}") } diff --git a/plugins/nf-prov/src/main/nextflow/prov/WrrocRenderer.groovy b/plugins/nf-prov/src/main/nextflow/prov/WrrocRenderer.groovy new file mode 100644 index 0000000..953c124 --- /dev/null +++ b/plugins/nf-prov/src/main/nextflow/prov/WrrocRenderer.groovy @@ -0,0 +1,261 @@ +/* + * Copyright 2023, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.prov + +import java.nio.file.Files +import java.nio.file.Path +import java.time.format.DateTimeFormatter + +import groovy.json.JsonOutput +import groovy.transform.CompileStatic +import nextflow.Session +import nextflow.exception.AbortOperationException +import nextflow.processor.TaskRun + +/** + * Renderer for the Provenance Run RO Crate format. + * + * @author Ben Sherman + */ +@CompileStatic +class WrrocRenderer implements Renderer { + + private Path path + + private boolean overwrite + + @Delegate + private PathNormalizer normalizer + + WrrocRenderer(Map opts) { + path = opts.file as Path + overwrite = opts.overwrite as Boolean + + ProvHelper.checkFileOverwrite(path, overwrite) + } + + @Override + void render(Session session, Set tasks, Map workflowOutputs) { + // get workflow inputs + final taskLookup = ProvHelper.getTaskLookup(tasks) + final workflowInputs = ProvHelper.getWorkflowInputs(tasks, taskLookup) + + // get workflow metadata + final metadata = session.workflowMetadata + this.normalizer = new PathNormalizer(metadata) + + final manifest = metadata.manifest + final nextflowMeta = metadata.nextflow + + final formatter = DateTimeFormatter.ISO_OFFSET_DATE_TIME + final dateStarted = formatter.format(metadata.start) + final dateCompleted = formatter.format(metadata.complete) + final nextflowVersion = nextflowMeta.version.toString() + final params = session.config.params as Map + + // create manifest + final softwareApplicationId = UUID.randomUUID() + final organizeActionId = UUID.randomUUID() + + final authors = (manifest.author ?: '') + .tokenize(',') + .withIndex() + .collect { String name, int i -> [ + "@id": "author-${i + 1}", + "@type": "Person", + "name": name.trim() + ] } + + final formalParameters = params + .toConfigObject() + .flatten() + .collect { name, value -> [ + "@id": "#${name}", + "@type": "FormalParameter", + // TODO: infer type from value at runtime + // "additionalType": "File", + // "defaultValue": "", + "conformsTo": ["@id": "https://bioschemas.org/profiles/FormalParameter/1.0-RELEASE"], + "description": "", + // TODO: apply only if type is Path + // "encodingFormat": "text/plain", + // TODO: match to output if type is Path + // "workExample": ["@id": outputId], + "name": name, + // "valueRequired": "True" + ] } + + final inputFiles = workflowInputs + .collect { source -> [ + "@id": normalizePath(source), + "@type": "File", + "description": "", + "encodingFormat": Files.probeContentType(source) ?: "", + // TODO: apply if matching param is found + // "exampleOfWork": ["@id": paramId] + ] } + + // TODO: create PropertyValue for each non-file FormalParameter output + final propertyValues = [:] + .collect { name, value -> [ + "@id": "#${name}", + "@type": "PropertyValue", + // TODO: match to param + // "exampleOfWork": ["@id": "#verbose-param"], + "name": name, + "value": value + ] } + + final outputFiles = workflowOutputs + .collect { source, target -> [ + "@id": normalizePath(source), + "@type": "File", + "name": source.name, + "description": "", + "encodingFormat": Files.probeContentType(source) ?: "", + // TODO: create FormalParameter for each output file? + // "exampleOfWork": {"@id": "#reversed"} + ] } + + final wrroc = [ + "@context": "https://w3id.org/ro/crate/1.1/context", + "@graph": [ + [ + "@id": path.name, + "@type": "CreativeWork", + "about": ["@id": "./"], + "conformsTo": [ + ["@id": "https://w3id.org/ro/crate/1.1"], + ["@id": "https://w3id.org/workflowhub/workflow-ro-crate/1.0"] + ] + ], + [ + "@id": "./", + "@type": "Dataset", + "conformsTo": [ + ["@id": "https://w3id.org/ro/wfrun/process/0.1"], + ["@id": "https://w3id.org/ro/wfrun/workflow/0.1"], + ["@id": "https://w3id.org/ro/wfrun/provenance/0.1"], + ["@id": "https://w3id.org/workflowhub/workflow-ro-crate/1.0"] + ], + "name": "Workflow run of ${metadata.projectName}", + "description": manifest.description ?: "", + "hasPart": [ + ["@id": metadata.projectName], + *inputFiles.collect( file -> ["@id": file["@id"]] ), + *outputFiles.collect( file -> ["@id": file["@id"]] ) + ], + "mainEntity": ["@id": metadata.projectName], + "mentions": ["@id": "#${session.uniqueId}"] + ], + [ + "@id": "https://w3id.org/ro/wfrun/process/0.1", + "@type": "CreativeWork", + "name": "Process Run Crate", + "version": "0.1" + ], + [ + "@id": "https://w3id.org/ro/wfrun/workflow/0.1", + "@type": "CreativeWork", + "name": "Workflow Run Crate", + "version": "0.1" + ], + [ + "@id": "https://w3id.org/ro/wfrun/provenance/0.1", + "@type": "CreativeWork", + "name": "Provenance Run Crate", + "version": "0.1" + ], + [ + "@id": "https://w3id.org/workflowhub/workflow-ro-crate/1.0", + "@type": "CreativeWork", + "name": "Workflow RO-Crate", + "version": "1.0" + ], + [ + "@id": metadata.projectName, + "@type": ["File", "SoftwareSourceCode", "ComputationalWorkflow", "HowTo"], + "name": metadata.projectName, + "programmingLanguage": ["@id": "https://w3id.org/workflowhub/workflow-ro-crate#nextflow"], + "hasPart": [ + // TODO: module files? processes? + ], + "input": formalParameters.collect( fp -> + ["@id": fp["@id"]] + ), + "output": [ + // TODO: id of FormalParameter for each output file + ], + "step": [ + // TODO: processes? + ] + ], + [ + "@id": "https://w3id.org/workflowhub/workflow-ro-crate#nextflow", + "@type": "ComputerLanguage", + "name": "Nextflow", + "identifier": "https://www.nextflow.io/", + "url": "https://www.nextflow.io/", + "version": nextflowVersion + ], + // TODO: SoftwareApplication for each process w/ formal parameters + *formalParameters, + [ + "@id": "#${softwareApplicationId}", + "@type": "SoftwareApplication", + "name": "Nextflow ${nextflowVersion}" + ], + [ + "@id": "#${organizeActionId}", + "@type": "OrganizeAction", + "agent": authors ? ["@id": "author-1"] : null, + "instrument": ["@id": "#${softwareApplicationId}"], + "name": "Run of Nextflow ${nextflowVersion}", + "object": [ + ["@id": "#4f7f887f-1b9b-4417-9beb-58618a125cc5"], + ["@id": "#793b3df4-cbb7-4d17-94d4-0edb18566ed3"] + ], + "result": ["@id": "#${session.uniqueId}"], + "startTime": dateStarted + ], + *authors, + [ + "@id": "#${session.uniqueId}", + "@type": "CreateAction", + "name": "Nextflow workflow run ${session.uniqueId}", + "startTime": dateStarted, + "endTime": dateCompleted, + "instrument": ["@id": metadata.projectName], + "object": [ + *inputFiles.collect( file -> ["@id": file["@id"]] ), + *propertyValues.collect( pv -> ["@id", pv["@id"]] ) + ], + "result": outputFiles.collect( file -> + ["@id": file["@id"]] + ) + ], + *inputFiles, + *propertyValues, + *outputFiles + ] + ] + + // render manifest to JSON file + path.text = JsonOutput.prettyPrint(JsonOutput.toJson(wrroc)) + } + +}