Skip to content

Commit

Permalink
Support multiple formats in the same run
Browse files Browse the repository at this point in the history
Signed-off-by: Ben Sherman <[email protected]>
  • Loading branch information
bentsherman committed Oct 9, 2023
1 parent a0f0dd4 commit 0cfa1b1
Show file tree
Hide file tree
Showing 9 changed files with 134 additions and 60 deletions.
43 changes: 29 additions & 14 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,41 +14,56 @@ plugins {
}
prov {
enabled = true
overwrite = true
file = "${params.outdir}/manifest.json"
enabled = true
formats {
legacy {
file = 'manifest.json'
overwrite = true
}
}
}
```

Finally, run your Nextflow pipeline. You do not need to modify your pipeline script in order to use the `nf-prov` plugin. The plugin will automatically generate a JSON file with provenance information.

## Configuration

*The `file`, `format`, and `overwrite` options have been deprecated since version 1.2.0. Use `formats` instead.*

The following options are available:

`prov.enabled`

Create the provenance report (default: `true` if plugin is loaded).

`prov.file`

The path of the provenance report (default: `manifest.json`).
`prov.formats`

`prov.format`
Configuration scope for the desired output formats. The following formats are available:

The report format. The following formats are available:

- `bco`: Render a [BioCompute Object](https://biocomputeobject.org/).
- `bco`: Render a [BioCompute Object](https://biocomputeobject.org/). Supports the `file` and `overwrite` options.

Visit the [BCO User Guide](https://docs.biocomputeobject.org/user_guide/) to learn more about this format and how to extend it with information that isn't available to Nextflow.

- `dag`: Render the task graph as a Mermaid diagram embedded in an HTML document.
- `dag`: Render the task graph as a Mermaid diagram embedded in an HTML document. Supports the `file` and `overwrite` options.

- `legacy`: Render the legacy format originally defined in this plugin (default).
- `legacy`: Render the legacy format originally defined in this plugin (default). Supports the `file` and `overwrite` options.

`prov.overwrite`
Any number of formats can be specified, for example:

Overwrite any existing provenance report with the same name (default: `false`).
```groovy
prov {
formats {
bco {
file = 'bco.json'
overwrite = true
}
legacy {
file = 'manifest.json'
overwrite = true
}
}
}
```

`prov.patterns`

Expand Down
17 changes: 14 additions & 3 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,18 @@ params {
}

prov {
overwrite = true
file = "${params.outdir}/bco.json"
format = 'bco'
formats {
bco {
file = "${params.outdir}/bco.json"
overwrite = true
}
dag {
file = "${params.outdir}/dag.html"
overwrite = true
}
legacy {
file = "${params.outdir}/manifest.json"
overwrite = true
}
}
}
13 changes: 12 additions & 1 deletion plugins/nf-prov/src/main/nextflow/prov/BcoRenderer.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,22 @@ import nextflow.util.CacheHelper
@CompileStatic
class BcoRenderer implements Renderer {

private Path path

private boolean overwrite

@Delegate
private PathNormalizer normalizer

BcoRenderer(Map opts) {
path = opts.file as Path
overwrite = opts.overwrite as Boolean

ProvHelper.checkFileOverwrite(path, overwrite)
}

@Override
void render(Session session, Set<TaskRun> tasks, Map<Path,Path> workflowOutputs, Path path) {
void render(Session session, Set<TaskRun> tasks, Map<Path,Path> workflowOutputs) {
// get workflow inputs
final taskLookup = ProvHelper.getTaskLookup(tasks)
final workflowInputs = ProvHelper.getWorkflowInputs(tasks, taskLookup)
Expand Down
13 changes: 12 additions & 1 deletion plugins/nf-prov/src/main/nextflow/prov/DagRenderer.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,22 @@ import nextflow.util.StringUtils
@CompileStatic
class DagRenderer implements Renderer {

private Path path

private boolean overwrite

@Delegate
private PathNormalizer normalizer

DagRenderer(Map opts) {
path = opts.file as Path
overwrite = opts.overwrite as Boolean

ProvHelper.checkFileOverwrite(path, overwrite)
}

@Override
void render(Session session, Set<TaskRun> tasks, Map<Path,Path> workflowOutputs, Path path) {
void render(Session session, Set<TaskRun> tasks, Map<Path,Path> workflowOutputs) {
// get workflow metadata
final metadata = session.workflowMetadata
this.normalizer = new PathNormalizer(metadata)
Expand Down
13 changes: 12 additions & 1 deletion plugins/nf-prov/src/main/nextflow/prov/LegacyRenderer.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,17 @@ import nextflow.processor.TaskRun
@CompileStatic
class LegacyRenderer implements Renderer {

private Path path

private boolean overwrite

LegacyRenderer(Map opts) {
path = opts.file as Path
overwrite = opts.overwrite as Boolean

ProvHelper.checkFileOverwrite(path, overwrite)
}

private static def jsonify(root) {
if ( root instanceof Map )
root.collectEntries( (k, v) -> [k, jsonify(v)] )
Expand Down Expand Up @@ -79,7 +90,7 @@ class LegacyRenderer implements Renderer {
}

@Override
void render(Session session, Set<TaskRun> tasks, Map<Path,Path> outputs, Path path) {
void render(Session session, Set<TaskRun> tasks, Map<Path,Path> outputs) {
// generate task manifest
def tasksMap = tasks.inject([:]) { accum, task ->
accum[task.id] = renderTask(task)
Expand Down
19 changes: 19 additions & 0 deletions plugins/nf-prov/src/main/nextflow/prov/ProvHelper.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ package nextflow.prov
import java.nio.file.Path

import groovy.transform.CompileStatic
import nextflow.exception.AbortOperationException
import nextflow.file.FileHelper
import nextflow.processor.TaskRun
import nextflow.script.params.FileOutParam

Expand All @@ -30,6 +32,23 @@ import nextflow.script.params.FileOutParam
@CompileStatic
class ProvHelper {

/**
* Check whether a file already exists and throw an
* error if it cannot be overwritten.
*
* @param path
* @param overwrite
*/
static void checkFileOverwrite(Path path, boolean overwrite) {
final attrs = FileHelper.readAttributes(path)
if( attrs ) {
if( overwrite && (attrs.isDirectory() || !path.delete()) )
throw new AbortOperationException("Unable to overwrite existing provenance file: ${path.toUriString()}")
else if( !overwrite )
throw new AbortOperationException("Provenance file already exists: ${path.toUriString()}")
}
}

/**
* Get the list of output files for a task.
*
Expand Down
51 changes: 17 additions & 34 deletions plugins/nf-prov/src/main/nextflow/prov/ProvObserver.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,10 @@ import java.nio.file.PathMatcher
import groovy.transform.CompileStatic
import groovy.util.logging.Slf4j
import nextflow.Session
import nextflow.trace.TraceObserver
import nextflow.trace.TraceRecord
import nextflow.file.FileHelper
import nextflow.processor.TaskHandler
import nextflow.processor.TaskRun
import nextflow.exception.AbortOperationException
import nextflow.trace.TraceObserver
import nextflow.trace.TraceRecord

/**
* Plugin observer of workflow events
Expand All @@ -40,58 +38,41 @@ import nextflow.exception.AbortOperationException
@CompileStatic
class ProvObserver implements TraceObserver {

public static final String DEF_FILE_NAME = 'manifest.json'

public static final List<String> VALID_FORMATS = ['bco', 'dag', 'legacy']

private Session session

private Path path

private Renderer renderer

private Boolean overwrite
private List<Renderer> renderers

private List<PathMatcher> matchers

private Set<TaskRun> tasks = []

private Map<Path,Path> workflowOutputs = [:]

ProvObserver(Path path, String format, Boolean overwrite, List patterns) {
this.path = path
this.renderer = createRenderer(format)
this.overwrite = overwrite
this.matchers = patterns.collect { pattern ->
ProvObserver(Map<String,Map> formats, List<String> patterns) {
this.renderers = formats.collect( (name, config) -> createRenderer(name, config) )
this.matchers = patterns.collect( pattern ->
FileSystems.getDefault().getPathMatcher("glob:**/${pattern}")
}
)
}

private Renderer createRenderer(String format) {
if( format == 'bco' )
return new BcoRenderer()
private Renderer createRenderer(String name, Map opts) {
if( name == 'bco' )
return new BcoRenderer(opts)

if( format == 'dag' )
return new DagRenderer()
if( name == 'dag' )
return new DagRenderer(opts)

if( format == 'legacy' )
return new LegacyRenderer()
if( name == 'legacy' )
return new LegacyRenderer(opts)

throw new IllegalArgumentException("Invalid provenance format -- valid formats are ${VALID_FORMATS.join(', ')}")
}

@Override
void onFlowCreate(Session session) {
this.session = session

// check file existance
final attrs = FileHelper.readAttributes(path)
if( attrs ) {
if( overwrite && (attrs.isDirectory() || !path.delete()) )
throw new AbortOperationException("Unable to overwrite existing provenance manifest: ${path.toUriString()}")
else if( !overwrite )
throw new AbortOperationException("Provenance manifest already exists: ${path.toUriString()}")
}
}

@Override
Expand Down Expand Up @@ -126,7 +107,9 @@ class ProvObserver implements TraceObserver {
if( !session.isSuccess() )
return

renderer.render(session, tasks, workflowOutputs, path)
renderers.each( renderer ->
renderer.render(session, tasks, workflowOutputs)
)
}

}
23 changes: 18 additions & 5 deletions plugins/nf-prov/src/main/nextflow/prov/ProvObserverFactory.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@ package nextflow.prov
import java.nio.file.Path

import groovy.transform.CompileStatic
import groovy.util.logging.Slf4j
import nextflow.Session
import nextflow.exception.AbortOperationException
import nextflow.trace.TraceObserver
import nextflow.trace.TraceObserverFactory

Expand All @@ -28,6 +30,7 @@ import nextflow.trace.TraceObserverFactory
*
* @author Ben Sherman <[email protected]>
*/
@Slf4j
@CompileStatic
class ProvObserverFactory implements TraceObserverFactory {

Expand All @@ -41,11 +44,21 @@ class ProvObserverFactory implements TraceObserverFactory {
if( !enabled )
return

final file = config.navigate('prov.file', ProvObserver.DEF_FILE_NAME)
final path = (file as Path).complete()
final format = config.navigate('prov.format', 'legacy') as String
final format = config.navigate('prov.format') as String
final file = config.navigate('prov.file', 'manifest.json') as String
final overwrite = config.navigate('prov.overwrite') as Boolean
final patterns = config.navigate('prov.patterns', []) as List
new ProvObserver(path, format, overwrite, patterns)
def formats = [:]
if( format ) {
log.warn "Config options `prov.format`, `prov.file`, and `prov.overwrite` are deprecated -- use `prov.formats` instead"
formats[format] = [file: file, overwrite: overwrite]
}

formats = config.navigate('prov.formats', formats) as Map

if( !formats )
throw new AbortOperationException("Config setting `prov.formats` is required to specify provenance output formats")

final patterns = config.navigate('prov.patterns', []) as List<String>
new ProvObserver(formats, patterns)
}
}
2 changes: 1 addition & 1 deletion plugins/nf-prov/src/main/nextflow/prov/Renderer.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,6 @@ import nextflow.processor.TaskRun
*/
interface Renderer {

abstract void render(Session session, Set<TaskRun> tasks, Map<Path,Path> outputs, Path path)
abstract void render(Session session, Set<TaskRun> tasks, Map<Path,Path> outputs)

}

0 comments on commit 0cfa1b1

Please sign in to comment.