From 6214ebc3fd3e1e0d8d31f864b68457b994f221a0 Mon Sep 17 00:00:00 2001
From: James Fellows Yates <jfy133@gmail.com>
Date: Sun, 4 Jun 2023 19:03:24 +0200
Subject: [PATCH 1/7] Add code for centrifuge - should be tested then applied
 for all othee cases

---
 subworkflows/local/profiling.nf | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf
index 7f8b9437..e6e4fca1 100644
--- a/subworkflows/local/profiling.nf
+++ b/subworkflows/local/profiling.nf
@@ -228,7 +228,23 @@ workflow PROFILING {
                                 }
 
         CENTRIFUGE_CENTRIFUGE ( ch_input_for_centrifuge.reads, ch_input_for_centrifuge.db, params.centrifuge_save_reads, params.centrifuge_save_reads, params.centrifuge_save_reads  )
-        CENTRIFUGE_KREPORT (CENTRIFUGE_CENTRIFUGE.out.report, ch_input_for_centrifuge.db)
+
+        // Ensure the correct database goes with the generated report for KREPORT
+        ch_database_for_centrifugekreport = databases
+                                                .filter { meta, db -> meta['tool'] == 'centrifuge' }
+                                                .map { meta, db -> [meta['db_name'], meta, db] }
+
+        ch_input_for_centrifuge_kreport = CENTRIFUGE_CENTRIFUGE.out.report
+                                            .map { meta, profile -> [meta['db_name'], meta, profile] }
+                                            .join(ch_database_for_centrifugekreport)
+                                            .multiMap {
+                                                key, meta, profile, db_meta, db ->
+                                                    profile: [meta, profile]
+                                                    db: db
+                                            }
+
+        CENTRIFUGE_KREPORT (ch_input_for_centrifuge_kreport.profile, ch_input_for_centrifuge_kreport.db)
+
         ch_versions            = ch_versions.mix( CENTRIFUGE_CENTRIFUGE.out.versions.first() )
         ch_raw_classifications = ch_raw_classifications.mix( CENTRIFUGE_CENTRIFUGE.out.results )
         ch_raw_profiles        = ch_raw_profiles.mix( CENTRIFUGE_KREPORT.out.kreport )

From 18be1e4ba4ec29d935b27f9e4ae229633eae7dd8 Mon Sep 17 00:00:00 2001
From: James Fellows Yates <jfy133@gmail.com>
Date: Thu, 29 Jun 2023 15:36:45 +0200
Subject: [PATCH 2/7] Add kaiju2table dataabase binding

---
 subworkflows/local/profiling.nf | 26 +++++++++++++++++++++-----
 1 file changed, 21 insertions(+), 5 deletions(-)

diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf
index e6e4fca1..9444511e 100644
--- a/subworkflows/local/profiling.nf
+++ b/subworkflows/local/profiling.nf
@@ -228,6 +228,8 @@ workflow PROFILING {
                                 }
 
         CENTRIFUGE_CENTRIFUGE ( ch_input_for_centrifuge.reads, ch_input_for_centrifuge.db, params.centrifuge_save_reads, params.centrifuge_save_reads, params.centrifuge_save_reads  )
+        ch_versions            = ch_versions.mix( CENTRIFUGE_CENTRIFUGE.out.versions.first() )
+        ch_raw_classifications = ch_raw_classifications.mix( CENTRIFUGE_CENTRIFUGE.out.results )
 
         // Ensure the correct database goes with the generated report for KREPORT
         ch_database_for_centrifugekreport = databases
@@ -243,10 +245,9 @@ workflow PROFILING {
                                                     db: db
                                             }
 
+        // Generate profile
         CENTRIFUGE_KREPORT (ch_input_for_centrifuge_kreport.profile, ch_input_for_centrifuge_kreport.db)
-
-        ch_versions            = ch_versions.mix( CENTRIFUGE_CENTRIFUGE.out.versions.first() )
-        ch_raw_classifications = ch_raw_classifications.mix( CENTRIFUGE_CENTRIFUGE.out.results )
+        ch_versions            = ch_versions.mix( CENTRIFUGE_KREPORT.out.versions.first() )
         ch_raw_profiles        = ch_raw_profiles.mix( CENTRIFUGE_KREPORT.out.kreport )
         ch_multiqc_files       = ch_multiqc_files.mix( CENTRIFUGE_KREPORT.out.kreport )
 
@@ -284,10 +285,25 @@ workflow PROFILING {
         ch_versions = ch_versions.mix( KAIJU_KAIJU.out.versions.first() )
         ch_raw_classifications = ch_raw_classifications.mix( KAIJU_KAIJU.out.results )
 
-        KAIJU_KAIJU2TABLE_SINGLE ( KAIJU_KAIJU.out.results, ch_input_for_kaiju.db, params.kaiju_taxon_rank)
+        // Ensure the correct database goes with the generated report for KAIJU2TABLE
+        ch_database_for_kaiju2table = databases
+                                                .filter { meta, db -> meta['tool'] == 'kaiju' }
+                                                .map { meta, db -> [meta['db_name'], meta, db] }
+
+        ch_input_for_kaiju2table = KAIJU_KAIJU.out.results
+                                            .map { meta, profile -> [meta['db_name'], meta, profile] }
+                                            .join(ch_database_for_kaiju2table)
+                                            .multiMap {
+                                                key, meta, profile, db_meta, db ->
+                                                    profile: [meta, profile]
+                                                    db: db
+                                            }
+
+        // Generate profile
+        KAIJU_KAIJU2TABLE_SINGLE ( ch_input_for_kaiju2table.profile, ch_input_for_kaiju2table.db, params.kaiju_taxon_rank)
         ch_versions = ch_versions.mix( KAIJU_KAIJU2TABLE_SINGLE.out.versions )
         ch_multiqc_files = ch_multiqc_files.mix( KAIJU_KAIJU2TABLE_SINGLE.out.summary )
-        ch_raw_profiles    = ch_raw_profiles.mix( KAIJU_KAIJU2TABLE_SINGLE.out.summary )
+        ch_raw_profiles  = ch_raw_profiles.mix( KAIJU_KAIJU2TABLE_SINGLE.out.summary )
     }
 
     if ( params.run_diamond ) {

From 6b1c47daf463f9eb22e4b8373a7d0a4eed1df634 Mon Sep 17 00:00:00 2001
From: James Fellows Yates <jfy133@gmail.com>
Date: Thu, 6 Jul 2023 15:31:59 +0200
Subject: [PATCH 3/7] Fix only single databases being used and make taxpasta
 merge tag more informatiove

---
 conf/modules.config                           |  2 ++
 subworkflows/local/profiling.nf               |  4 +--
 .../local/standardisation_profiles.nf         | 28 +++++++++++++++++--
 3 files changed, 30 insertions(+), 4 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index 0f1b20dd..8ab4bdc3 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -638,6 +638,7 @@ process {
     }
 
     withName: TAXPASTA_MERGE {
+        tag = { "${meta.tool}|${meta.id}" }
         ext.args =  {
             [
                 "-p ${meta.tool} -o ${meta.tool}_${meta.id}.${params.standardisation_taxpasta_format}",
@@ -655,6 +656,7 @@ process {
     }
 
     withName: TAXPASTA_STANDARDISE {
+        tag = { "${meta.tool}|${meta.id}" }
         ext.args =  {
             [
                 "-p ${meta.tool} -o ${meta.tool}_${meta.id}.${params.standardisation_taxpasta_format}",
diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf
index 9444511e..54c174e2 100644
--- a/subworkflows/local/profiling.nf
+++ b/subworkflows/local/profiling.nf
@@ -238,7 +238,7 @@ workflow PROFILING {
 
         ch_input_for_centrifuge_kreport = CENTRIFUGE_CENTRIFUGE.out.report
                                             .map { meta, profile -> [meta['db_name'], meta, profile] }
-                                            .join(ch_database_for_centrifugekreport)
+                                            .combine(ch_database_for_centrifugekreport, by: 0)
                                             .multiMap {
                                                 key, meta, profile, db_meta, db ->
                                                     profile: [meta, profile]
@@ -292,7 +292,7 @@ workflow PROFILING {
 
         ch_input_for_kaiju2table = KAIJU_KAIJU.out.results
                                             .map { meta, profile -> [meta['db_name'], meta, profile] }
-                                            .join(ch_database_for_kaiju2table)
+                                            .combine(ch_database_for_kaiju2table, by: 0)
                                             .multiMap {
                                                 key, meta, profile, db_meta, db ->
                                                     profile: [meta, profile]
diff --git a/subworkflows/local/standardisation_profiles.nf b/subworkflows/local/standardisation_profiles.nf
index b131e028..b17e7cac 100644
--- a/subworkflows/local/standardisation_profiles.nf
+++ b/subworkflows/local/standardisation_profiles.nf
@@ -116,7 +116,19 @@ workflow STANDARDISATION_PROFILES {
                                     [[id:it[0]], it[1]]
                                 }
 
-    KAIJU_KAIJU2TABLE_COMBINED ( ch_profiles_for_kaiju, ch_input_databases.kaiju.map{it[1]}, params.kaiju_taxon_rank)
+    ch_input_for_kaiju2tablecombine = ch_profiles_for_kaiju
+                                        .dump(tag: "B41")
+                                        .map { meta, profile -> [meta['id'], meta, profile] }
+                                        .dump(tag: "B42")
+                                        .combine(ch_input_databases.kaiju.map{meta, db -> [meta.db_name, meta, db]}, by: 0)
+                                        .dump(tag: "AFTER")
+                                        .multiMap {
+                                            key, meta, profile, db_meta, db ->
+                                                profile: [meta, profile]
+                                                db: db
+                                        }
+
+    KAIJU_KAIJU2TABLE_COMBINED ( ch_input_for_kaiju2tablecombine.profile, ch_input_for_kaiju2tablecombine.db, params.kaiju_taxon_rank)
     ch_multiqc_files = ch_multiqc_files.mix( KAIJU_KAIJU2TABLE_COMBINED.out.summary )
     ch_versions = ch_versions.mix( KAIJU_KAIJU2TABLE_COMBINED.out.versions )
 
@@ -162,7 +174,19 @@ workflow STANDARDISATION_PROFILES {
                                     [[id:it[0]], it[1]]
                                 }
 
-    MOTUS_MERGE ( ch_profiles_for_motus, ch_input_databases.motus.map{it[1]}, motu_version )
+    ch_input_for_motus = ch_profiles_for_motus
+                                        .dump(tag: "B41")
+                                        .map { meta, profile -> [meta['id'], meta, profile] }
+                                        .dump(tag: "B42")
+                                        .combine(ch_input_databases.motus.map{meta, db -> [meta.db_name, meta, db]}, by: 0)
+                                        .dump(tag: "AFTER")
+                                        .multiMap {
+                                            key, meta, profile, db_meta, db ->
+                                                profile: [meta, profile]
+                                                db: db
+                                        }
+
+    MOTUS_MERGE ( ch_profiles_for_motus.profile, ch_profiles_for_motus.profile.db, motu_version )
     ch_versions = ch_versions.mix( MOTUS_MERGE.out.versions )
 
     emit:

From 69b2158bd0a4bf58e2d5ba95d99343b96c2c4deb Mon Sep 17 00:00:00 2001
From: James Fellows Yates <jfy133@gmail.com>
Date: Thu, 6 Jul 2023 15:53:06 +0200
Subject: [PATCH 4/7] Remove debugging, should be working now just need to
 finally test MOTUS_MERGE gets executed

---
 subworkflows/local/standardisation_profiles.nf | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/subworkflows/local/standardisation_profiles.nf b/subworkflows/local/standardisation_profiles.nf
index b17e7cac..35a3fec5 100644
--- a/subworkflows/local/standardisation_profiles.nf
+++ b/subworkflows/local/standardisation_profiles.nf
@@ -174,19 +174,16 @@ workflow STANDARDISATION_PROFILES {
                                     [[id:it[0]], it[1]]
                                 }
 
-    ch_input_for_motus = ch_profiles_for_motus
-                                        .dump(tag: "B41")
+    ch_input_for_motusmerge = ch_profiles_for_motus
                                         .map { meta, profile -> [meta['id'], meta, profile] }
-                                        .dump(tag: "B42")
                                         .combine(ch_input_databases.motus.map{meta, db -> [meta.db_name, meta, db]}, by: 0)
-                                        .dump(tag: "AFTER")
                                         .multiMap {
                                             key, meta, profile, db_meta, db ->
                                                 profile: [meta, profile]
                                                 db: db
                                         }
 
-    MOTUS_MERGE ( ch_profiles_for_motus.profile, ch_profiles_for_motus.profile.db, motu_version )
+    MOTUS_MERGE ( ch_input_for_motusmerge.profile, ch_input_for_motusmerge.db, motu_version )
     ch_versions = ch_versions.mix( MOTUS_MERGE.out.versions )
 
     emit:

From e0eecccbee764159e7a2277c4108057fe956d618 Mon Sep 17 00:00:00 2001
From: "James A. Fellows Yates" <jfy133@gmail.com>
Date: Thu, 13 Jul 2023 14:45:53 +0200
Subject: [PATCH 5/7] Update CHANGELOG.md

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3714629f..4fb26461 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -24,6 +24,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - [#290](https://github.com/nf-core/taxprofiler/pull/290) Minor database input documentation improvements (♥ to @alneberg for reporting, fix by @jfy133)
 - [#305](https://github.com/nf-core/taxprofiler/pull/305) Fix docker/podman registry definition for tower compatibility (fix by @adamrtalbot, @jfy133)
 - [#304](https://github.com/nf-core/taxprofiler/pull/304) Correct mistake in kaiju2table documentation, only single rank can be supplied (♥ to @artur-matysik for reporting, fix by @jfy133)
+- [#307](https://github.com/nf-core/taxprofiler/pull/307) Fix databases being sometimes associated with the wrong tool (e.g. Kaiju) (fix by @jfy133)
 
 ### `Dependencies`
 

From 7cd17a43236512f0559cb64c03637e6e23158734 Mon Sep 17 00:00:00 2001
From: "James A. Fellows Yates" <jfy133@gmail.com>
Date: Mon, 17 Jul 2023 08:34:06 +0200
Subject: [PATCH 6/7] Apply suggestions from code review

Co-authored-by: Moritz E. Beber <midnighter@posteo.net>
---
 subworkflows/local/standardisation_profiles.nf | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/subworkflows/local/standardisation_profiles.nf b/subworkflows/local/standardisation_profiles.nf
index 8da0a724..00bdd67f 100644
--- a/subworkflows/local/standardisation_profiles.nf
+++ b/subworkflows/local/standardisation_profiles.nf
@@ -118,11 +118,8 @@ workflow STANDARDISATION_PROFILES {
                                 }
 
     ch_input_for_kaiju2tablecombine = ch_profiles_for_kaiju
-                                        .dump(tag: "B41")
                                         .map { meta, profile -> [meta['id'], meta, profile] }
-                                        .dump(tag: "B42")
                                         .combine(ch_input_databases.kaiju.map{meta, db -> [meta.db_name, meta, db]}, by: 0)
-                                        .dump(tag: "AFTER")
                                         .multiMap {
                                             key, meta, profile, db_meta, db ->
                                                 profile: [meta, profile]

From a4ed304634909f997880994f0b4cab2a36deb46b Mon Sep 17 00:00:00 2001
From: James Fellows Yates <jfy133@gmail.com>
Date: Mon, 17 Jul 2023 08:43:36 +0200
Subject: [PATCH 7/7] Replace instances of map['element'] to shorter
 map.element for consistency

---
 subworkflows/local/profiling.nf               | 38 +++++++++----------
 .../local/standardisation_profiles.nf         |  4 +-
 2 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf
index a01ff0c7..e39ca4f7 100644
--- a/subworkflows/local/profiling.nf
+++ b/subworkflows/local/profiling.nf
@@ -75,17 +75,17 @@ workflow PROFILING {
                     // as we don't run run on a per-sample basis due to huge datbaases
                     // so all samples are in one run and so sample-specific metadata
                     // unnecessary. Set as database name to prevent `null` job ID and prefix.
-                    def temp_meta = [ id: meta['db_name'] ]
+                    def temp_meta = [ id: meta.db_name ]
 
                     // Extend database parameters to specify whether to save alignments or not
                     def new_db_meta = db_meta.clone()
                     def sam_format = params.malt_save_reads ? ' --alignments ./ -za false' : ""
-                    new_db_meta['db_params'] = db_meta['db_params'] + sam_format
+                    new_db_meta.db_params = db_meta.db_params + sam_format
 
                     // Combine reduced sample metadata with updated database parameters metadata,
                     // make sure id is db_name for publishing purposes.
                     def new_meta = temp_meta + new_db_meta
-                    new_meta['id'] = new_meta['db_name']
+                    new_meta.id = new_meta.db_name
 
                     [ new_meta, reads, db ]
 
@@ -165,8 +165,8 @@ workflow PROFILING {
         def ch_kraken2_output = KRAKEN2_KRAKEN2.out.report
             .filter {
                 meta, report ->
-                    if ( meta['instrument_platform'] == 'OXFORD_NANOPORE' ) log.warn "[nf-core/taxprofiler] Bracken has not been evaluated for Nanopore data. Skipping Bracken for sample ${meta.id}."
-                    meta['tool'] == 'bracken' && meta['instrument_platform'] != 'OXFORD_NANOPORE'
+                    if ( meta.instrument_platform == 'OXFORD_NANOPORE' ) log.warn "[nf-core/taxprofiler] Bracken has not been evaluated for Nanopore data. Skipping Bracken for sample ${meta.id}."
+                    meta.tool == 'bracken' && meta.instrument_platform != 'OXFORD_NANOPORE'
             }
 
         // If necessary, convert the eight column output to six column output.
@@ -176,12 +176,12 @@ workflow PROFILING {
 
         // Extract the database name to combine by.
         ch_bracken_databases = databases
-            .filter { meta, db -> meta['tool'] == 'bracken' }
-            .map { meta, db -> [meta['db_name'], meta, db] }
+            .filter { meta, db -> meta.tool == 'bracken' }
+            .map { meta, db -> [meta.db_name, meta, db] }
 
         // Combine back with the reads
         ch_input_for_bracken = ch_kraken2_output
-            .map { meta, report -> [meta['db_name'], meta, report] }
+            .map { meta, report -> [meta.db_name, meta, report] }
             .combine(ch_bracken_databases, by: 0)
             .map {
 
@@ -190,7 +190,7 @@ workflow PROFILING {
 
                     // Have to pick second element if using bracken, as first element
                     // contains kraken parameters
-                    if ( db_meta['tool'] == 'bracken' ) {
+                    if ( db_meta.tool == 'bracken' ) {
 
                         // Only take second element if one exists
                         def parsed_params = db_meta_new['db_params'].split(";")
@@ -236,11 +236,11 @@ workflow PROFILING {
 
         // Ensure the correct database goes with the generated report for KREPORT
         ch_database_for_centrifugekreport = databases
-                                                .filter { meta, db -> meta['tool'] == 'centrifuge' }
-                                                .map { meta, db -> [meta['db_name'], meta, db] }
+                                                .filter { meta, db -> meta.tool == 'centrifuge' }
+                                                .map { meta, db -> [meta.db_name, meta, db] }
 
         ch_input_for_centrifuge_kreport = CENTRIFUGE_CENTRIFUGE.out.report
-                                            .map { meta, profile -> [meta['db_name'], meta, profile] }
+                                            .map { meta, profile -> [meta.db_name, meta, profile] }
                                             .combine(ch_database_for_centrifugekreport, by: 0)
                                             .multiMap {
                                                 key, meta, profile, db_meta, db ->
@@ -290,11 +290,11 @@ workflow PROFILING {
 
         // Ensure the correct database goes with the generated report for KAIJU2TABLE
         ch_database_for_kaiju2table = databases
-                                                .filter { meta, db -> meta['tool'] == 'kaiju' }
-                                                .map { meta, db -> [meta['db_name'], meta, db] }
+                                                .filter { meta, db -> meta.tool == 'kaiju' }
+                                                .map { meta, db -> [meta.db_name, meta, db] }
 
         ch_input_for_kaiju2table = KAIJU_KAIJU.out.results
-                                            .map { meta, profile -> [meta['db_name'], meta, profile] }
+                                            .map { meta, profile -> [meta.db_name, meta, profile] }
                                             .combine(ch_database_for_kaiju2table, by: 0)
                                             .multiMap {
                                                 key, meta, profile, db_meta, db ->
@@ -378,8 +378,8 @@ workflow PROFILING {
         ch_input_for_ganonclassify = ch_input_for_profiling.ganon
                                 .filter {
                                     meta, reads, meta_db, db ->
-                                        if ( meta['instrument_platform'] == 'OXFORD_NANOPORE' ) log.warn "[nf-core/taxprofiler] Ganon has not been evaluated for Nanopore data. Skipping Ganon for sample ${meta.id}."
-                                        meta_db['tool'] == 'ganon' && meta['instrument_platform'] != 'OXFORD_NANOPORE'
+                                        if ( meta.instrument_platform == 'OXFORD_NANOPORE' ) log.warn "[nf-core/taxprofiler] Ganon has not been evaluated for Nanopore data. Skipping Ganon for sample ${meta.id}."
+                                        meta_db.tool == 'ganon' && meta.instrument_platform != 'OXFORD_NANOPORE'
                                 }
                                 .multiMap {
                                     it ->
@@ -395,7 +395,7 @@ workflow PROFILING {
         ch_report_for_ganonreport = GANON_CLASSIFY.out.report
                                         .map{
                                             meta, report ->
-                                                def meta_db = [ meta['db_name'] ]
+                                                def meta_db = [ meta.db_name ]
 
                                             [ meta_db, meta, report ]
 
@@ -404,7 +404,7 @@ workflow PROFILING {
         ch_database_for_ganonreport = databases
                                         .map{
                                             meta, database ->
-                                                def meta_db = [ meta['db_name'] ]
+                                                def meta_db = [ meta.db_name ]
 
                                         [ meta_db, meta, database ]
 
diff --git a/subworkflows/local/standardisation_profiles.nf b/subworkflows/local/standardisation_profiles.nf
index 00bdd67f..8ced9a5e 100644
--- a/subworkflows/local/standardisation_profiles.nf
+++ b/subworkflows/local/standardisation_profiles.nf
@@ -118,7 +118,7 @@ workflow STANDARDISATION_PROFILES {
                                 }
 
     ch_input_for_kaiju2tablecombine = ch_profiles_for_kaiju
-                                        .map { meta, profile -> [meta['id'], meta, profile] }
+                                        .map { meta, profile -> [meta.id, meta, profile] }
                                         .combine(ch_input_databases.kaiju.map{meta, db -> [meta.db_name, meta, db]}, by: 0)
                                         .multiMap {
                                             key, meta, profile, db_meta, db ->
@@ -173,7 +173,7 @@ workflow STANDARDISATION_PROFILES {
                                 }
 
     ch_input_for_motusmerge = ch_profiles_for_motus
-                                        .map { meta, profile -> [meta['id'], meta, profile] }
+                                        .map { meta, profile -> [meta.id, meta, profile] }
                                         .combine(ch_input_databases.motus.map{meta, db -> [meta.db_name, meta, db]}, by: 0)
                                         .multiMap {
                                             key, meta, profile, db_meta, db ->