From 6214ebc3fd3e1e0d8d31f864b68457b994f221a0 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Sun, 4 Jun 2023 19:03:24 +0200 Subject: [PATCH 1/7] Add code for centrifuge - should be tested then applied for all othee cases --- subworkflows/local/profiling.nf | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index 7f8b9437..e6e4fca1 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -228,7 +228,23 @@ workflow PROFILING { } CENTRIFUGE_CENTRIFUGE ( ch_input_for_centrifuge.reads, ch_input_for_centrifuge.db, params.centrifuge_save_reads, params.centrifuge_save_reads, params.centrifuge_save_reads ) - CENTRIFUGE_KREPORT (CENTRIFUGE_CENTRIFUGE.out.report, ch_input_for_centrifuge.db) + + // Ensure the correct database goes with the generated report for KREPORT + ch_database_for_centrifugekreport = databases + .filter { meta, db -> meta['tool'] == 'centrifuge' } + .map { meta, db -> [meta['db_name'], meta, db] } + + ch_input_for_centrifuge_kreport = CENTRIFUGE_CENTRIFUGE.out.report + .map { meta, profile -> [meta['db_name'], meta, profile] } + .join(ch_database_for_centrifugekreport) + .multiMap { + key, meta, profile, db_meta, db -> + profile: [meta, profile] + db: db + } + + CENTRIFUGE_KREPORT (ch_input_for_centrifuge_kreport.profile, ch_input_for_centrifuge_kreport.db) + ch_versions = ch_versions.mix( CENTRIFUGE_CENTRIFUGE.out.versions.first() ) ch_raw_classifications = ch_raw_classifications.mix( CENTRIFUGE_CENTRIFUGE.out.results ) ch_raw_profiles = ch_raw_profiles.mix( CENTRIFUGE_KREPORT.out.kreport ) From 18be1e4ba4ec29d935b27f9e4ae229633eae7dd8 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Thu, 29 Jun 2023 15:36:45 +0200 Subject: [PATCH 2/7] Add kaiju2table dataabase binding --- subworkflows/local/profiling.nf | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index e6e4fca1..9444511e 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -228,6 +228,8 @@ workflow PROFILING { } CENTRIFUGE_CENTRIFUGE ( ch_input_for_centrifuge.reads, ch_input_for_centrifuge.db, params.centrifuge_save_reads, params.centrifuge_save_reads, params.centrifuge_save_reads ) + ch_versions = ch_versions.mix( CENTRIFUGE_CENTRIFUGE.out.versions.first() ) + ch_raw_classifications = ch_raw_classifications.mix( CENTRIFUGE_CENTRIFUGE.out.results ) // Ensure the correct database goes with the generated report for KREPORT ch_database_for_centrifugekreport = databases @@ -243,10 +245,9 @@ workflow PROFILING { db: db } + // Generate profile CENTRIFUGE_KREPORT (ch_input_for_centrifuge_kreport.profile, ch_input_for_centrifuge_kreport.db) - - ch_versions = ch_versions.mix( CENTRIFUGE_CENTRIFUGE.out.versions.first() ) - ch_raw_classifications = ch_raw_classifications.mix( CENTRIFUGE_CENTRIFUGE.out.results ) + ch_versions = ch_versions.mix( CENTRIFUGE_KREPORT.out.versions.first() ) ch_raw_profiles = ch_raw_profiles.mix( CENTRIFUGE_KREPORT.out.kreport ) ch_multiqc_files = ch_multiqc_files.mix( CENTRIFUGE_KREPORT.out.kreport ) @@ -284,10 +285,25 @@ workflow PROFILING { ch_versions = ch_versions.mix( KAIJU_KAIJU.out.versions.first() ) ch_raw_classifications = ch_raw_classifications.mix( KAIJU_KAIJU.out.results ) - KAIJU_KAIJU2TABLE_SINGLE ( KAIJU_KAIJU.out.results, ch_input_for_kaiju.db, params.kaiju_taxon_rank) + // Ensure the correct database goes with the generated report for KAIJU2TABLE + ch_database_for_kaiju2table = databases + .filter { meta, db -> meta['tool'] == 'kaiju' } + .map { meta, db -> [meta['db_name'], meta, db] } + + ch_input_for_kaiju2table = KAIJU_KAIJU.out.results + .map { meta, profile -> [meta['db_name'], meta, profile] } + .join(ch_database_for_kaiju2table) + .multiMap { + key, meta, profile, db_meta, db -> + profile: [meta, profile] + db: db + } + + // Generate profile + KAIJU_KAIJU2TABLE_SINGLE ( ch_input_for_kaiju2table.profile, ch_input_for_kaiju2table.db, params.kaiju_taxon_rank) ch_versions = ch_versions.mix( KAIJU_KAIJU2TABLE_SINGLE.out.versions ) ch_multiqc_files = ch_multiqc_files.mix( KAIJU_KAIJU2TABLE_SINGLE.out.summary ) - ch_raw_profiles = ch_raw_profiles.mix( KAIJU_KAIJU2TABLE_SINGLE.out.summary ) + ch_raw_profiles = ch_raw_profiles.mix( KAIJU_KAIJU2TABLE_SINGLE.out.summary ) } if ( params.run_diamond ) { From 6b1c47daf463f9eb22e4b8373a7d0a4eed1df634 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Thu, 6 Jul 2023 15:31:59 +0200 Subject: [PATCH 3/7] Fix only single databases being used and make taxpasta merge tag more informatiove --- conf/modules.config | 2 ++ subworkflows/local/profiling.nf | 4 +-- .../local/standardisation_profiles.nf | 28 +++++++++++++++++-- 3 files changed, 30 insertions(+), 4 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 0f1b20dd..8ab4bdc3 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -638,6 +638,7 @@ process { } withName: TAXPASTA_MERGE { + tag = { "${meta.tool}|${meta.id}" } ext.args = { [ "-p ${meta.tool} -o ${meta.tool}_${meta.id}.${params.standardisation_taxpasta_format}", @@ -655,6 +656,7 @@ process { } withName: TAXPASTA_STANDARDISE { + tag = { "${meta.tool}|${meta.id}" } ext.args = { [ "-p ${meta.tool} -o ${meta.tool}_${meta.id}.${params.standardisation_taxpasta_format}", diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index 9444511e..54c174e2 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -238,7 +238,7 @@ workflow PROFILING { ch_input_for_centrifuge_kreport = CENTRIFUGE_CENTRIFUGE.out.report .map { meta, profile -> [meta['db_name'], meta, profile] } - .join(ch_database_for_centrifugekreport) + .combine(ch_database_for_centrifugekreport, by: 0) .multiMap { key, meta, profile, db_meta, db -> profile: [meta, profile] @@ -292,7 +292,7 @@ workflow PROFILING { ch_input_for_kaiju2table = KAIJU_KAIJU.out.results .map { meta, profile -> [meta['db_name'], meta, profile] } - .join(ch_database_for_kaiju2table) + .combine(ch_database_for_kaiju2table, by: 0) .multiMap { key, meta, profile, db_meta, db -> profile: [meta, profile] diff --git a/subworkflows/local/standardisation_profiles.nf b/subworkflows/local/standardisation_profiles.nf index b131e028..b17e7cac 100644 --- a/subworkflows/local/standardisation_profiles.nf +++ b/subworkflows/local/standardisation_profiles.nf @@ -116,7 +116,19 @@ workflow STANDARDISATION_PROFILES { [[id:it[0]], it[1]] } - KAIJU_KAIJU2TABLE_COMBINED ( ch_profiles_for_kaiju, ch_input_databases.kaiju.map{it[1]}, params.kaiju_taxon_rank) + ch_input_for_kaiju2tablecombine = ch_profiles_for_kaiju + .dump(tag: "B41") + .map { meta, profile -> [meta['id'], meta, profile] } + .dump(tag: "B42") + .combine(ch_input_databases.kaiju.map{meta, db -> [meta.db_name, meta, db]}, by: 0) + .dump(tag: "AFTER") + .multiMap { + key, meta, profile, db_meta, db -> + profile: [meta, profile] + db: db + } + + KAIJU_KAIJU2TABLE_COMBINED ( ch_input_for_kaiju2tablecombine.profile, ch_input_for_kaiju2tablecombine.db, params.kaiju_taxon_rank) ch_multiqc_files = ch_multiqc_files.mix( KAIJU_KAIJU2TABLE_COMBINED.out.summary ) ch_versions = ch_versions.mix( KAIJU_KAIJU2TABLE_COMBINED.out.versions ) @@ -162,7 +174,19 @@ workflow STANDARDISATION_PROFILES { [[id:it[0]], it[1]] } - MOTUS_MERGE ( ch_profiles_for_motus, ch_input_databases.motus.map{it[1]}, motu_version ) + ch_input_for_motus = ch_profiles_for_motus + .dump(tag: "B41") + .map { meta, profile -> [meta['id'], meta, profile] } + .dump(tag: "B42") + .combine(ch_input_databases.motus.map{meta, db -> [meta.db_name, meta, db]}, by: 0) + .dump(tag: "AFTER") + .multiMap { + key, meta, profile, db_meta, db -> + profile: [meta, profile] + db: db + } + + MOTUS_MERGE ( ch_profiles_for_motus.profile, ch_profiles_for_motus.profile.db, motu_version ) ch_versions = ch_versions.mix( MOTUS_MERGE.out.versions ) emit: From 69b2158bd0a4bf58e2d5ba95d99343b96c2c4deb Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Thu, 6 Jul 2023 15:53:06 +0200 Subject: [PATCH 4/7] Remove debugging, should be working now just need to finally test MOTUS_MERGE gets executed --- subworkflows/local/standardisation_profiles.nf | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/subworkflows/local/standardisation_profiles.nf b/subworkflows/local/standardisation_profiles.nf index b17e7cac..35a3fec5 100644 --- a/subworkflows/local/standardisation_profiles.nf +++ b/subworkflows/local/standardisation_profiles.nf @@ -174,19 +174,16 @@ workflow STANDARDISATION_PROFILES { [[id:it[0]], it[1]] } - ch_input_for_motus = ch_profiles_for_motus - .dump(tag: "B41") + ch_input_for_motusmerge = ch_profiles_for_motus .map { meta, profile -> [meta['id'], meta, profile] } - .dump(tag: "B42") .combine(ch_input_databases.motus.map{meta, db -> [meta.db_name, meta, db]}, by: 0) - .dump(tag: "AFTER") .multiMap { key, meta, profile, db_meta, db -> profile: [meta, profile] db: db } - MOTUS_MERGE ( ch_profiles_for_motus.profile, ch_profiles_for_motus.profile.db, motu_version ) + MOTUS_MERGE ( ch_input_for_motusmerge.profile, ch_input_for_motusmerge.db, motu_version ) ch_versions = ch_versions.mix( MOTUS_MERGE.out.versions ) emit: From e0eecccbee764159e7a2277c4108057fe956d618 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 13 Jul 2023 14:45:53 +0200 Subject: [PATCH 5/7] Update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3714629f..4fb26461 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#290](https://github.com/nf-core/taxprofiler/pull/290) Minor database input documentation improvements (♥ to @alneberg for reporting, fix by @jfy133) - [#305](https://github.com/nf-core/taxprofiler/pull/305) Fix docker/podman registry definition for tower compatibility (fix by @adamrtalbot, @jfy133) - [#304](https://github.com/nf-core/taxprofiler/pull/304) Correct mistake in kaiju2table documentation, only single rank can be supplied (♥ to @artur-matysik for reporting, fix by @jfy133) +- [#307](https://github.com/nf-core/taxprofiler/pull/307) Fix databases being sometimes associated with the wrong tool (e.g. Kaiju) (fix by @jfy133) ### `Dependencies` From 7cd17a43236512f0559cb64c03637e6e23158734 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Mon, 17 Jul 2023 08:34:06 +0200 Subject: [PATCH 6/7] Apply suggestions from code review Co-authored-by: Moritz E. Beber --- subworkflows/local/standardisation_profiles.nf | 3 --- 1 file changed, 3 deletions(-) diff --git a/subworkflows/local/standardisation_profiles.nf b/subworkflows/local/standardisation_profiles.nf index 8da0a724..00bdd67f 100644 --- a/subworkflows/local/standardisation_profiles.nf +++ b/subworkflows/local/standardisation_profiles.nf @@ -118,11 +118,8 @@ workflow STANDARDISATION_PROFILES { } ch_input_for_kaiju2tablecombine = ch_profiles_for_kaiju - .dump(tag: "B41") .map { meta, profile -> [meta['id'], meta, profile] } - .dump(tag: "B42") .combine(ch_input_databases.kaiju.map{meta, db -> [meta.db_name, meta, db]}, by: 0) - .dump(tag: "AFTER") .multiMap { key, meta, profile, db_meta, db -> profile: [meta, profile] From a4ed304634909f997880994f0b4cab2a36deb46b Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Mon, 17 Jul 2023 08:43:36 +0200 Subject: [PATCH 7/7] Replace instances of map['element'] to shorter map.element for consistency --- subworkflows/local/profiling.nf | 38 +++++++++---------- .../local/standardisation_profiles.nf | 4 +- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index a01ff0c7..e39ca4f7 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -75,17 +75,17 @@ workflow PROFILING { // as we don't run run on a per-sample basis due to huge datbaases // so all samples are in one run and so sample-specific metadata // unnecessary. Set as database name to prevent `null` job ID and prefix. - def temp_meta = [ id: meta['db_name'] ] + def temp_meta = [ id: meta.db_name ] // Extend database parameters to specify whether to save alignments or not def new_db_meta = db_meta.clone() def sam_format = params.malt_save_reads ? ' --alignments ./ -za false' : "" - new_db_meta['db_params'] = db_meta['db_params'] + sam_format + new_db_meta.db_params = db_meta.db_params + sam_format // Combine reduced sample metadata with updated database parameters metadata, // make sure id is db_name for publishing purposes. def new_meta = temp_meta + new_db_meta - new_meta['id'] = new_meta['db_name'] + new_meta.id = new_meta.db_name [ new_meta, reads, db ] @@ -165,8 +165,8 @@ workflow PROFILING { def ch_kraken2_output = KRAKEN2_KRAKEN2.out.report .filter { meta, report -> - if ( meta['instrument_platform'] == 'OXFORD_NANOPORE' ) log.warn "[nf-core/taxprofiler] Bracken has not been evaluated for Nanopore data. Skipping Bracken for sample ${meta.id}." - meta['tool'] == 'bracken' && meta['instrument_platform'] != 'OXFORD_NANOPORE' + if ( meta.instrument_platform == 'OXFORD_NANOPORE' ) log.warn "[nf-core/taxprofiler] Bracken has not been evaluated for Nanopore data. Skipping Bracken for sample ${meta.id}." + meta.tool == 'bracken' && meta.instrument_platform != 'OXFORD_NANOPORE' } // If necessary, convert the eight column output to six column output. @@ -176,12 +176,12 @@ workflow PROFILING { // Extract the database name to combine by. ch_bracken_databases = databases - .filter { meta, db -> meta['tool'] == 'bracken' } - .map { meta, db -> [meta['db_name'], meta, db] } + .filter { meta, db -> meta.tool == 'bracken' } + .map { meta, db -> [meta.db_name, meta, db] } // Combine back with the reads ch_input_for_bracken = ch_kraken2_output - .map { meta, report -> [meta['db_name'], meta, report] } + .map { meta, report -> [meta.db_name, meta, report] } .combine(ch_bracken_databases, by: 0) .map { @@ -190,7 +190,7 @@ workflow PROFILING { // Have to pick second element if using bracken, as first element // contains kraken parameters - if ( db_meta['tool'] == 'bracken' ) { + if ( db_meta.tool == 'bracken' ) { // Only take second element if one exists def parsed_params = db_meta_new['db_params'].split(";") @@ -236,11 +236,11 @@ workflow PROFILING { // Ensure the correct database goes with the generated report for KREPORT ch_database_for_centrifugekreport = databases - .filter { meta, db -> meta['tool'] == 'centrifuge' } - .map { meta, db -> [meta['db_name'], meta, db] } + .filter { meta, db -> meta.tool == 'centrifuge' } + .map { meta, db -> [meta.db_name, meta, db] } ch_input_for_centrifuge_kreport = CENTRIFUGE_CENTRIFUGE.out.report - .map { meta, profile -> [meta['db_name'], meta, profile] } + .map { meta, profile -> [meta.db_name, meta, profile] } .combine(ch_database_for_centrifugekreport, by: 0) .multiMap { key, meta, profile, db_meta, db -> @@ -290,11 +290,11 @@ workflow PROFILING { // Ensure the correct database goes with the generated report for KAIJU2TABLE ch_database_for_kaiju2table = databases - .filter { meta, db -> meta['tool'] == 'kaiju' } - .map { meta, db -> [meta['db_name'], meta, db] } + .filter { meta, db -> meta.tool == 'kaiju' } + .map { meta, db -> [meta.db_name, meta, db] } ch_input_for_kaiju2table = KAIJU_KAIJU.out.results - .map { meta, profile -> [meta['db_name'], meta, profile] } + .map { meta, profile -> [meta.db_name, meta, profile] } .combine(ch_database_for_kaiju2table, by: 0) .multiMap { key, meta, profile, db_meta, db -> @@ -378,8 +378,8 @@ workflow PROFILING { ch_input_for_ganonclassify = ch_input_for_profiling.ganon .filter { meta, reads, meta_db, db -> - if ( meta['instrument_platform'] == 'OXFORD_NANOPORE' ) log.warn "[nf-core/taxprofiler] Ganon has not been evaluated for Nanopore data. Skipping Ganon for sample ${meta.id}." - meta_db['tool'] == 'ganon' && meta['instrument_platform'] != 'OXFORD_NANOPORE' + if ( meta.instrument_platform == 'OXFORD_NANOPORE' ) log.warn "[nf-core/taxprofiler] Ganon has not been evaluated for Nanopore data. Skipping Ganon for sample ${meta.id}." + meta_db.tool == 'ganon' && meta.instrument_platform != 'OXFORD_NANOPORE' } .multiMap { it -> @@ -395,7 +395,7 @@ workflow PROFILING { ch_report_for_ganonreport = GANON_CLASSIFY.out.report .map{ meta, report -> - def meta_db = [ meta['db_name'] ] + def meta_db = [ meta.db_name ] [ meta_db, meta, report ] @@ -404,7 +404,7 @@ workflow PROFILING { ch_database_for_ganonreport = databases .map{ meta, database -> - def meta_db = [ meta['db_name'] ] + def meta_db = [ meta.db_name ] [ meta_db, meta, database ] diff --git a/subworkflows/local/standardisation_profiles.nf b/subworkflows/local/standardisation_profiles.nf index 00bdd67f..8ced9a5e 100644 --- a/subworkflows/local/standardisation_profiles.nf +++ b/subworkflows/local/standardisation_profiles.nf @@ -118,7 +118,7 @@ workflow STANDARDISATION_PROFILES { } ch_input_for_kaiju2tablecombine = ch_profiles_for_kaiju - .map { meta, profile -> [meta['id'], meta, profile] } + .map { meta, profile -> [meta.id, meta, profile] } .combine(ch_input_databases.kaiju.map{meta, db -> [meta.db_name, meta, db]}, by: 0) .multiMap { key, meta, profile, db_meta, db -> @@ -173,7 +173,7 @@ workflow STANDARDISATION_PROFILES { } ch_input_for_motusmerge = ch_profiles_for_motus - .map { meta, profile -> [meta['id'], meta, profile] } + .map { meta, profile -> [meta.id, meta, profile] } .combine(ch_input_databases.motus.map{meta, db -> [meta.db_name, meta, db]}, by: 0) .multiMap { key, meta, profile, db_meta, db ->