Skip to content

Commit

Permalink
Merge pull request #461 from nf-core/nextflow-enable-strict
Browse files Browse the repository at this point in the history
Try adding nextflow strict
  • Loading branch information
jfy133 authored Apr 4, 2024
2 parents d697e6a + 2ebce4c commit 593c013
Show file tree
Hide file tree
Showing 6 changed files with 73 additions and 11 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### `Added`

- [#454](https://github.com/nf-core/taxprofiler/pull/454) Updated to nf-core pipeline template v2.13.1 (added by @LilyAnderssonLee & @sofstam)
- [#461](https://github.com/nf-core/taxprofiler/pull/461) Turned on 'strict' Nextflow evaluation runs (added by @jfy133)
- [#461](https://github.com/nf-core/taxprofiler/pull/461) Optimised database compression so each compressed input database is untarred once, and shared amongst each run with different parameters (added by @jfy133)
- [#461](https://github.com/nf-core/taxprofiler/pull/461) Added new parameter to optionally save uncompressed databases (added by @jfy133)

### `Fixed`

Expand Down
17 changes: 17 additions & 0 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,15 @@ process {
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]

withName: UNTAR {
ext.prefix = { "${archive.simpleName}" }
publishDir = [
path: { "${params.outdir}/untar/databases" },
mode: params.publish_dir_mode,
enabled: params.save_untarred_databases
]
}

withName: FASTQC {
ext.args = '--quiet'
ext.prefix = { "${meta.id}_${meta.run_accession}_raw" }
Expand Down Expand Up @@ -512,6 +521,14 @@ process {
]
}

withName: KRAKENTOOLS_KREPORT2KRONA {
publishDir = [
enabled: false,
mode: params.publish_dir_mode,
pattern: '*.txt'
]
}

withName: KRONA_CLEANUP {
ext.prefix = params.perform_runmerging ? { "${meta.id}_${meta.db_name}" } : { "${meta.id}_${meta.run_accession}_${meta.db_name}" }
publishDir = [
Expand Down
16 changes: 16 additions & 0 deletions docs/output.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ The directories listed below will be created in the results directory after the

The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps:

- [UNTAR](#untar) - Optionally saved decompressed input databases
- [FastQC](#fastqc) - Raw read QC
- [falco](#fastqc) - Alternative to FastQC for raw read QC
- [fastp](#fastp) - Adapter trimming for Illumina data
Expand Down Expand Up @@ -40,6 +41,21 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d

![](images/taxprofiler_tube.png)

### untar

untar is used in nf-core/taxprofiler to decompress various input files ending in `.tar.gz`. This process is mainly used for decompressing input database archive files.

<details markdown="1">
<summary>Output files</summary>

- `untar/`
- `database/`
- `<database_file_name>`: directory containing contents of the decompressed archive

</details>

This directory will only be present if `--save_untarred_databases` is supplied. The contained directories can be useful for moving the decompressed directories to a central 'cache' location allowing users to re-use the same databases. This is useful to save unnecessary computational time of decompressing the archives on every run.

### FastQC or Falco

<details markdown="1">
Expand Down
8 changes: 6 additions & 2 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
----------------------------------------------------------------------------------------
*/

nextflow.enable.strict = true

// Global default params, used in configs
params {

Expand All @@ -30,6 +32,7 @@ params {
email_on_fail = null
plaintext_email = false
monochrome_logs = false
monochromeLogs = false // required so nf-validation nextflow.enabled.strict works nicely together
hook_url = null
help = false
version = false
Expand All @@ -51,12 +54,13 @@ params {
// Schema validation default options
validationFailUnrecognisedParams = false
validationLenientMode = false
validationSchemaIgnoreParams = 'genomes,igenomes_base,fasta'
validationSchemaIgnoreParams = 'genomes,igenomes_base,fasta,monochromeLogs'
validationShowHiddenParams = false
validate_params = true

// Databases
databases = null
databases = null
save_untarred_databases = false

// FASTQ preprocessing
skip_preprocessing_qc = false
Expand Down
6 changes: 6 additions & 0 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,12 @@
"description": "Path to comma-separated file containing information about databases and profiling parameters for each taxonomic profiler",
"help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 4 columns, and a header row. See [usage docs](https://nf-co.re/taxprofiler/dev/usage#full-database-sheet).\n\nProfilers will only be executed if a corresponding database are supplied. \n\nWe recommend storing this database sheet somewhere centrally and accessible by others members of your lab/institutions, as this file will likely be regularly reused."
},
"save_untarred_databases": {
"type": "boolean",
"fa_icon": "fas fa-database",
"description": "Specify to save decompressed user-supplied TAR archives of databases",
"help_text": "If input databases are supplied as gzipped TAR archives, in some cases you may want to move and re-use these for future runs. Specifying this parameter will save these to `--outdir results/` under a directory called `untar`."
},
"outdir": {
"type": "string",
"format": "directory-path",
Expand Down
34 changes: 25 additions & 9 deletions workflows/taxprofiler.nf
Original file line number Diff line number Diff line change
Expand Up @@ -153,21 +153,37 @@ workflow TAXPROFILER {
skip: true
}
// Filter the channel to untar only those databases for tools that are selected to be run by the user.
ch_input_untar = ch_dbs_for_untar.untar
// Also, to ensure only untar once per file, group together all databases of one file
ch_inputdb_untar = ch_dbs_for_untar.untar
.filter { db_meta, db_path ->
params[ "run_${db_meta.tool}" ]
}
UNTAR ( ch_input_untar )

ch_final_dbs = ch_dbs_for_untar.skip.mix( UNTAR.out.untar )
ch_final_dbs
.map { db_meta, db -> [ db_meta.db_params ]
def corrected_db_params = db_meta.db_params == null ? '' : db_meta.db_params
db_meta.db_params = corrected_db_params
[ db_meta, db ]
.groupTuple(by: 1)
.map {
meta, dbfile ->
def new_meta = [ 'id': dbfile.baseName ] + [ 'meta': meta ]
[new_meta , dbfile ]
}

// Untar the databases
UNTAR ( ch_inputdb_untar )
ch_versions = ch_versions.mix( UNTAR.out.versions.first() )

// Spread out the untarred and shared databases
ch_outputdb_from_untar = UNTAR.out.untar
.map {
meta, db ->
[meta.meta, db]
}
.transpose(by: 0)

ch_final_dbs = ch_dbs_for_untar.skip
.mix( ch_outputdb_from_untar )
.map { db_meta, db ->
def corrected_db_params = db_meta.db_params ? [ db_params: db_meta.db_params ] : [ db_params: '' ]
[ db_meta + corrected_db_params, db ]
}

/*
MODULE: Run FastQC
*/
Expand Down

0 comments on commit 593c013

Please sign in to comment.