Skip to content

Commit

Permalink
Merge pull request #307 from nf-core/standardisation-database-linking
Browse files Browse the repository at this point in the history
Fix possible wrong database being assigned in post-taxprofiling steps
  • Loading branch information
jfy133 authored Jul 17, 2023
2 parents 36d3156 + a4ed304 commit 6d81565
Show file tree
Hide file tree
Showing 4 changed files with 71 additions and 18 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- [#290](https://github.com/nf-core/taxprofiler/pull/290) Minor database input documentation improvements (♥ to @alneberg for reporting, fix by @jfy133)
- [#305](https://github.com/nf-core/taxprofiler/pull/305) Fix docker/podman registry definition for tower compatibility (fix by @adamrtalbot, @jfy133)
- [#304](https://github.com/nf-core/taxprofiler/pull/304) Correct mistake in kaiju2table documentation, only single rank can be supplied (♥ to @artur-matysik for reporting, fix by @jfy133)
- [#307](https://github.com/nf-core/taxprofiler/pull/307) Fix databases being sometimes associated with the wrong tool (e.g. Kaiju) (fix by @jfy133)
- [#313](https://github.com/nf-core/taxprofiler/pull/304) Fix pipeline not providing error when database sheet does not have a header (♥ to @noah472 for reporting, fix by @jfy133)

### `Dependencies`
Expand Down
2 changes: 2 additions & 0 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -676,6 +676,7 @@ process {
}

withName: TAXPASTA_MERGE {
tag = { "${meta.tool}|${meta.id}" }
ext.args = {
[
"-p ${meta.tool} -o ${meta.tool}_${meta.id}.${params.standardisation_taxpasta_format}",
Expand All @@ -693,6 +694,7 @@ process {
}

withName: TAXPASTA_STANDARDISE {
tag = { "${meta.tool}|${meta.id}" }
ext.args = {
[
"-p ${meta.tool} -o ${meta.tool}_${meta.id}.${params.standardisation_taxpasta_format}",
Expand Down
64 changes: 48 additions & 16 deletions subworkflows/local/profiling.nf
Original file line number Diff line number Diff line change
Expand Up @@ -75,17 +75,17 @@ workflow PROFILING {
// as we don't run run on a per-sample basis due to huge datbaases
// so all samples are in one run and so sample-specific metadata
// unnecessary. Set as database name to prevent `null` job ID and prefix.
def temp_meta = [ id: meta['db_name'] ]
def temp_meta = [ id: meta.db_name ]

// Extend database parameters to specify whether to save alignments or not
def new_db_meta = db_meta.clone()
def sam_format = params.malt_save_reads ? ' --alignments ./ -za false' : ""
new_db_meta['db_params'] = db_meta['db_params'] + sam_format
new_db_meta.db_params = db_meta.db_params + sam_format

// Combine reduced sample metadata with updated database parameters metadata,
// make sure id is db_name for publishing purposes.
def new_meta = temp_meta + new_db_meta
new_meta['id'] = new_meta['db_name']
new_meta.id = new_meta.db_name

[ new_meta, reads, db ]

Expand Down Expand Up @@ -165,8 +165,8 @@ workflow PROFILING {
def ch_kraken2_output = KRAKEN2_KRAKEN2.out.report
.filter {
meta, report ->
if ( meta['instrument_platform'] == 'OXFORD_NANOPORE' ) log.warn "[nf-core/taxprofiler] Bracken has not been evaluated for Nanopore data. Skipping Bracken for sample ${meta.id}."
meta['tool'] == 'bracken' && meta['instrument_platform'] != 'OXFORD_NANOPORE'
if ( meta.instrument_platform == 'OXFORD_NANOPORE' ) log.warn "[nf-core/taxprofiler] Bracken has not been evaluated for Nanopore data. Skipping Bracken for sample ${meta.id}."
meta.tool == 'bracken' && meta.instrument_platform != 'OXFORD_NANOPORE'
}

// If necessary, convert the eight column output to six column output.
Expand All @@ -176,12 +176,12 @@ workflow PROFILING {

// Extract the database name to combine by.
ch_bracken_databases = databases
.filter { meta, db -> meta['tool'] == 'bracken' }
.map { meta, db -> [meta['db_name'], meta, db] }
.filter { meta, db -> meta.tool == 'bracken' }
.map { meta, db -> [meta.db_name, meta, db] }

// Combine back with the reads
ch_input_for_bracken = ch_kraken2_output
.map { meta, report -> [meta['db_name'], meta, report] }
.map { meta, report -> [meta.db_name, meta, report] }
.combine(ch_bracken_databases, by: 0)
.map {

Expand All @@ -190,7 +190,7 @@ workflow PROFILING {

// Have to pick second element if using bracken, as first element
// contains kraken parameters
if ( db_meta['tool'] == 'bracken' ) {
if ( db_meta.tool == 'bracken' ) {

// Only take second element if one exists
def parsed_params = db_meta_new['db_params'].split(";")
Expand Down Expand Up @@ -231,9 +231,26 @@ workflow PROFILING {
}

CENTRIFUGE_CENTRIFUGE ( ch_input_for_centrifuge.reads, ch_input_for_centrifuge.db, params.centrifuge_save_reads, params.centrifuge_save_reads, params.centrifuge_save_reads )
CENTRIFUGE_KREPORT (CENTRIFUGE_CENTRIFUGE.out.report, ch_input_for_centrifuge.db)
ch_versions = ch_versions.mix( CENTRIFUGE_CENTRIFUGE.out.versions.first() )
ch_raw_classifications = ch_raw_classifications.mix( CENTRIFUGE_CENTRIFUGE.out.results )

// Ensure the correct database goes with the generated report for KREPORT
ch_database_for_centrifugekreport = databases
.filter { meta, db -> meta.tool == 'centrifuge' }
.map { meta, db -> [meta.db_name, meta, db] }

ch_input_for_centrifuge_kreport = CENTRIFUGE_CENTRIFUGE.out.report
.map { meta, profile -> [meta.db_name, meta, profile] }
.combine(ch_database_for_centrifugekreport, by: 0)
.multiMap {
key, meta, profile, db_meta, db ->
profile: [meta, profile]
db: db
}

// Generate profile
CENTRIFUGE_KREPORT (ch_input_for_centrifuge_kreport.profile, ch_input_for_centrifuge_kreport.db)
ch_versions = ch_versions.mix( CENTRIFUGE_KREPORT.out.versions.first() )
ch_raw_profiles = ch_raw_profiles.mix( CENTRIFUGE_KREPORT.out.kreport )
ch_multiqc_files = ch_multiqc_files.mix( CENTRIFUGE_KREPORT.out.kreport )

Expand Down Expand Up @@ -267,10 +284,25 @@ workflow PROFILING {
ch_versions = ch_versions.mix( KAIJU_KAIJU.out.versions.first() )
ch_raw_classifications = ch_raw_classifications.mix( KAIJU_KAIJU.out.results )

KAIJU_KAIJU2TABLE_SINGLE ( KAIJU_KAIJU.out.results, ch_input_for_kaiju.db, params.kaiju_taxon_rank)
// Ensure the correct database goes with the generated report for KAIJU2TABLE
ch_database_for_kaiju2table = databases
.filter { meta, db -> meta.tool == 'kaiju' }
.map { meta, db -> [meta.db_name, meta, db] }

ch_input_for_kaiju2table = KAIJU_KAIJU.out.results
.map { meta, profile -> [meta.db_name, meta, profile] }
.combine(ch_database_for_kaiju2table, by: 0)
.multiMap {
key, meta, profile, db_meta, db ->
profile: [meta, profile]
db: db
}

// Generate profile
KAIJU_KAIJU2TABLE_SINGLE ( ch_input_for_kaiju2table.profile, ch_input_for_kaiju2table.db, params.kaiju_taxon_rank)
ch_versions = ch_versions.mix( KAIJU_KAIJU2TABLE_SINGLE.out.versions )
ch_multiqc_files = ch_multiqc_files.mix( KAIJU_KAIJU2TABLE_SINGLE.out.summary )
ch_raw_profiles = ch_raw_profiles.mix( KAIJU_KAIJU2TABLE_SINGLE.out.summary )
ch_raw_profiles = ch_raw_profiles.mix( KAIJU_KAIJU2TABLE_SINGLE.out.summary )
}

if ( params.run_diamond ) {
Expand Down Expand Up @@ -342,8 +374,8 @@ workflow PROFILING {
ch_input_for_ganonclassify = ch_input_for_profiling.ganon
.filter {
meta, reads, meta_db, db ->
if ( meta['instrument_platform'] == 'OXFORD_NANOPORE' ) log.warn "[nf-core/taxprofiler] Ganon has not been evaluated for Nanopore data. Skipping Ganon for sample ${meta.id}."
meta_db['tool'] == 'ganon' && meta['instrument_platform'] != 'OXFORD_NANOPORE'
if ( meta.instrument_platform == 'OXFORD_NANOPORE' ) log.warn "[nf-core/taxprofiler] Ganon has not been evaluated for Nanopore data. Skipping Ganon for sample ${meta.id}."
meta_db.tool == 'ganon' && meta.instrument_platform != 'OXFORD_NANOPORE'
}
.multiMap {
it ->
Expand All @@ -359,7 +391,7 @@ workflow PROFILING {
ch_report_for_ganonreport = GANON_CLASSIFY.out.report
.map{
meta, report ->
def meta_db = [ meta['db_name'] ]
def meta_db = [ meta.db_name ]

[ meta_db, meta, report ]

Expand All @@ -368,7 +400,7 @@ workflow PROFILING {
ch_database_for_ganonreport = databases
.map{
meta, database ->
def meta_db = [ meta['db_name'] ]
def meta_db = [ meta.db_name ]

[ meta_db, meta, database ]

Expand Down
22 changes: 20 additions & 2 deletions subworkflows/local/standardisation_profiles.nf
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,16 @@ workflow STANDARDISATION_PROFILES {
[[id:it[0]], it[1]]
}

KAIJU_KAIJU2TABLE_COMBINED ( ch_profiles_for_kaiju, ch_input_databases.kaiju.map{it[1]}, params.kaiju_taxon_rank)
ch_input_for_kaiju2tablecombine = ch_profiles_for_kaiju
.map { meta, profile -> [meta.id, meta, profile] }
.combine(ch_input_databases.kaiju.map{meta, db -> [meta.db_name, meta, db]}, by: 0)
.multiMap {
key, meta, profile, db_meta, db ->
profile: [meta, profile]
db: db
}

KAIJU_KAIJU2TABLE_COMBINED ( ch_input_for_kaiju2tablecombine.profile, ch_input_for_kaiju2tablecombine.db, params.kaiju_taxon_rank)
ch_multiqc_files = ch_multiqc_files.mix( KAIJU_KAIJU2TABLE_COMBINED.out.summary )
ch_versions = ch_versions.mix( KAIJU_KAIJU2TABLE_COMBINED.out.versions )

Expand Down Expand Up @@ -163,7 +172,16 @@ workflow STANDARDISATION_PROFILES {
[[id:it[0]], it[1]]
}

MOTUS_MERGE ( ch_profiles_for_motus, ch_input_databases.motus.map{it[1]}, motu_version )
ch_input_for_motusmerge = ch_profiles_for_motus
.map { meta, profile -> [meta.id, meta, profile] }
.combine(ch_input_databases.motus.map{meta, db -> [meta.db_name, meta, db]}, by: 0)
.multiMap {
key, meta, profile, db_meta, db ->
profile: [meta, profile]
db: db
}

MOTUS_MERGE ( ch_input_for_motusmerge.profile, ch_input_for_motusmerge.db, motu_version )
ch_versions = ch_versions.mix( MOTUS_MERGE.out.versions )

// Ganon
Expand Down

0 comments on commit 6d81565

Please sign in to comment.