From b0ceef70cb66c707d9cf0ad99c3d59be714106a4 Mon Sep 17 00:00:00 2001
From: Sofia Stamouli <sofia.stamouli@scilifelab.se>
Date: Mon, 14 Oct 2024 14:49:38 +0200
Subject: [PATCH] Add function

---
 docs/output.md                                | 22 +++----
 nextflow_schema.json                          | 30 ++++------
 .../generate_downstream_samplesheets/main.nf  | 57 +++++++++++--------
 3 files changed, 55 insertions(+), 54 deletions(-)

diff --git a/docs/output.md b/docs/output.md
index fa4c4bac..4403898c 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -130,7 +130,7 @@ You can change the default value for low complexity filtering by using the argum
 
 By default nf-core/taxprofiler will only provide the `.settings` file if AdapterRemoval is selected.
 
-You will only find the `.fastq` files in the results directory if you provide ` --save_preprocessed_reads`. If this is selected, you may receive different combinations of `.fastq` files for each sample depending on the input types - e.g. whether you have merged or not, or if you're supplying both single- and paired-end reads. Alternatively, if you wish only to have the 'final' reads that go into classification/profiling (i.e., that may have additional processing), do not specify this flag but rather specify `--save_analysis_ready_reads`, in which case the reads will be in the folder `analysis_ready_reads`.
+You will only find the `.fastq` files in the results directory if you provide ` --save_preprocessed_reads`. If this is selected, you may receive different combinations of `.fastq` files for each sample depending on the input types - e.g. whether you have merged or not, or if you're supplying both single- and paired-end reads. Alternatively, if you wish only to have the 'final' reads that go into classification/profiling (i.e., that may have additional processing), do not specify this flag but rather specify `--save_analysis_ready_fastqs`, in which case the reads will be in the folder `analysis_ready_reads`.
 
 :::warning
 The resulting `.fastq` files may _not_ always be the 'final' reads that go into taxprofiling, if you also run other steps such as complexity filtering, host removal, run merging etc..
@@ -174,7 +174,7 @@ The `.npo` files can be used for re-generating and customising the plots using t
 
 The output logs are saved in the output folder and are part of MultiQC report.You do not normally need to check these manually.
 
-You will only find the `.fastq` files in the results directory if you provide ` --save_preprocessed_reads`. Alternatively, if you wish only to have the 'final' reads that go into classification/profiling (i.e., that may have additional processing), do not specify this flag but rather specify `--save_analysis_ready_reads`, in which case the reads will be in the folder `analysis_ready_reads`.
+You will only find the `.fastq` files in the results directory if you provide ` --save_preprocessed_reads`. Alternatively, if you wish only to have the 'final' reads that go into classification/profiling (i.e., that may have additional processing), do not specify this flag but rather specify `--save_analysis_ready_fastqs`, in which case the reads will be in the folder `analysis_ready_reads`.
 
 :::warning
 We do **not** recommend using Porechop if you are already trimming the adapters with ONT's basecaller Guppy.
@@ -195,7 +195,7 @@ We do **not** recommend using Porechop if you are already trimming the adapters
 
 The output logs are saved in the output folder and are part of MultiQC report.You do not normally need to check these manually.
 
-You will only find the `.fastq` files in the results directory if you provide ` --save_preprocessed_reads`. Alternatively, if you wish only to have the 'final' reads that go into classification/profiling (i.e., that may have additional processing), do not specify this flag but rather specify `--save_analysis_ready_reads`, in which case the reads will be in the folder `analysis_ready_reads`.
+You will only find the `.fastq` files in the results directory if you provide ` --save_preprocessed_reads`. Alternatively, if you wish only to have the 'final' reads that go into classification/profiling (i.e., that may have additional processing), do not specify this flag but rather specify `--save_analysis_ready_fastqs`, in which case the reads will be in the folder `analysis_ready_reads`.
 
 ### BBDuk
 
@@ -212,7 +212,7 @@ It is used in nf-core/taxprofiler for complexity filtering using different algor
 
 </details>
 
-By default nf-core/taxprofiler will only provide the `.log` file if BBDuk is selected as the complexity filtering tool. You will only find the complexity filtered reads in your results directory if you provide ` --save_complexityfiltered_reads`. Alternatively, if you wish only to have the 'final' reads that go into classification/profiling (i.e., that may have additional processing), do not specify this flag but rather specify `--save_analysis_ready_reads`, in which case the reads will be in the folder `analysis_ready_reads`.
+By default nf-core/taxprofiler will only provide the `.log` file if BBDuk is selected as the complexity filtering tool. You will only find the complexity filtered reads in your results directory if you provide ` --save_complexityfiltered_reads`. Alternatively, if you wish only to have the 'final' reads that go into classification/profiling (i.e., that may have additional processing), do not specify this flag but rather specify `--save_analysis_ready_fastqs`, in which case the reads will be in the folder `analysis_ready_reads`.
 
 :::warning
 The resulting `.fastq` files may _not_ always be the 'final' reads that go into taxprofiling, if you also run other steps such as host removal, run merging etc..
@@ -233,7 +233,7 @@ It is used in nf-core/taxprofiler for complexity filtering using different algor
 
 </details>
 
-By default nf-core/taxprofiler will only provide the `.log` file if PRINSEQ++ is selected as the complexity filtering tool. You will only find the complexity filtered `.fastq` files in your results directory if you supply ` --save_complexityfiltered_reads`. Alternatively, if you wish only to have the 'final' reads that go into classification/profiling (i.e., that may have additional processing), do not specify this flag but rather specify `--save_analysis_ready_reads`, in which case the reads will be in the folder `analysis_ready_reads`.
+By default nf-core/taxprofiler will only provide the `.log` file if PRINSEQ++ is selected as the complexity filtering tool. You will only find the complexity filtered `.fastq` files in your results directory if you supply ` --save_complexityfiltered_reads`. Alternatively, if you wish only to have the 'final' reads that go into classification/profiling (i.e., that may have additional processing), do not specify this flag but rather specify `--save_analysis_ready_fastqs`, in which case the reads will be in the folder `analysis_ready_reads`.
 
 :::warning
 The resulting `.fastq` files may _not_ always be the 'final' reads that go into taxprofiling, if you also run other steps such as host removal, run merging etc..
@@ -252,7 +252,7 @@ The resulting `.fastq` files may _not_ always be the 'final' reads that go into
 
 </details>
 
-You will only find the `.fastq` files in the results directory if you provide ` --save_preprocessed_reads`. Alternatively, if you wish only to have the 'final' reads that go into classification/profiling (i.e., that may have additional processing), do not specify this flag but rather specify `--save_analysis_ready_reads`, in which case the reads will be in the folder `analysis_ready_reads`.
+You will only find the `.fastq` files in the results directory if you provide ` --save_preprocessed_reads`. Alternatively, if you wish only to have the 'final' reads that go into classification/profiling (i.e., that may have additional processing), do not specify this flag but rather specify `--save_analysis_ready_fastqs`, in which case the reads will be in the folder `analysis_ready_reads`.
 
 :::warning
 We do _not_ recommend using Filtlong if you are performing filtering of low quality reads with ONT's basecaller Guppy.
@@ -271,7 +271,7 @@ We do _not_ recommend using Filtlong if you are performing filtering of low qual
 
 </details>
 
-You will only find the `.fastq` files in the results directory if you provide ` --save_preprocessed_reads`. Alternatively, if you wish only to have the 'final' reads that go into classification/profiling (i.e., that may have additional processing), do not specify this flag but rather specify `--save_analysis_ready_reads`, in which case the reads will be in the folder `analysis_ready_reads`.
+You will only find the `.fastq` files in the results directory if you provide ` --save_preprocessed_reads`. Alternatively, if you wish only to have the 'final' reads that go into classification/profiling (i.e., that may have additional processing), do not specify this flag but rather specify `--save_analysis_ready_fastqs`, in which case the reads will be in the folder `analysis_ready_reads`.
 
 ### Bowtie2
 
@@ -292,7 +292,7 @@ It is used with nf-core/taxprofiler to allow removal of 'host' (e.g. human) and/
 
 </details>
 
-By default nf-core/taxprofiler will only provide the `.log` file if host removal is turned on. You will only have a `.bam` file if you specify `--save_hostremoval_bam`. This will contain _both_ mapped and unmapped reads. You will only get FASTQ files if you specify to save `--save_hostremoval_unmapped` - these contain only unmapped reads. Alternatively, if you wish only to have the 'final' reads that go into classification/profiling (i.e., that may have additional processing), do not specify this flag but rather specify `--save_analysis_ready_reads`, in which case the reads will be in the folder `analysis_ready_reads`.
+By default nf-core/taxprofiler will only provide the `.log` file if host removal is turned on. You will only have a `.bam` file if you specify `--save_hostremoval_bam`. This will contain _both_ mapped and unmapped reads. You will only get FASTQ files if you specify to save `--save_hostremoval_unmapped` - these contain only unmapped reads. Alternatively, if you wish only to have the 'final' reads that go into classification/profiling (i.e., that may have additional processing), do not specify this flag but rather specify `--save_analysis_ready_fastqs`, in which case the reads will be in the folder `analysis_ready_reads`.
 
 :::info
 Unmapped reads in FASTQ are only found in this directory for short-reads, for long-reads see [`samtools/fastq/`](#samtools-fastq).
@@ -345,7 +345,7 @@ Unlike Bowtie2, minimap2 does not produce an unmapped FASTQ file by itself. See
 
 </details>
 
-This directory will be present and contain the unmapped reads from the `.fastq` format from long-read minimap2 host removal, if `--save_hostremoval_unmapped` is supplied. Alternatively, if you wish only to have the 'final' reads that go into classification/profiling (i.e., that may have additional processing), do not specify this flag but rather specify `--save_analysis_ready_reads`, in which case the reads will be in the folder `analysis_ready_reads`.
+This directory will be present and contain the unmapped reads from the `.fastq` format from long-read minimap2 host removal, if `--save_hostremoval_unmapped` is supplied. Alternatively, if you wish only to have the 'final' reads that go into classification/profiling (i.e., that may have additional processing), do not specify this flag but rather specify `--save_analysis_ready_fastqs`, in which case the reads will be in the folder `analysis_ready_reads`.
 
 :::info
 For short-read unmapped reads, see [bowtie2](#bowtie2).
@@ -354,7 +354,7 @@ For short-read unmapped reads, see [bowtie2](#bowtie2).
 ### Analysis Ready Reads
 
 :::info
-This optional results directory will only be present in the pipeline results when supplying `--save_analysis_ready_reads`.
+This optional results directory will only be present in the pipeline results when supplying `--save_analysis_ready_fastqs`.
 :::
 
 <details markdown="1">
@@ -401,7 +401,7 @@ This is the last possible preprocessing step, so if you have multiple runs or li
 
 Note that you will only find samples that went through the run merging step in this directory. For samples that had a single run or library will not go through this step of the pipeline and thus will not be present in this directory.
 
-This directory and its FASTQ files will only be present if you supply `--save_runmerged_reads`.Alternatively, if you wish only to have the 'final' reads that go into classification/profiling (i.e., that may have additional processing), do not specify this flag but rather specify `--save_analysis_ready_reads`, in which case the reads will be in the folder `analysis_ready_reads`.
+This directory and its FASTQ files will only be present if you supply `--save_runmerged_reads`.Alternatively, if you wish only to have the 'final' reads that go into classification/profiling (i.e., that may have additional processing), do not specify this flag but rather specify `--save_analysis_ready_fastqs`, in which case the reads will be in the folder `analysis_ready_reads`.
 
 ### Bracken
 
diff --git a/nextflow_schema.json b/nextflow_schema.json
index b0c43a22..798e2004 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -712,25 +712,6 @@
             },
             "fa_icon": "fas fa-chart-line"
         },
-        "generate_samplesheet_options": {
-            "title": "Downstream pipeline samplesheet generation options",
-            "type": "object",
-            "fa_icon": "fas fa-align-justify",
-            "description": "Options for generating input samplesheets for complementary downstream pipelines.",
-            "properties": {
-                "generate_downstream_samplesheets": {
-                    "type": "boolean",
-                    "description": "Turn on generation of samplesheets for downstream pipelines.",
-                    "fa_icon": "fas fa-toggle-on"
-                },
-                "generate_pipeline_samplesheets": {
-                    "type": "string",
-                    "default": "taxprofiler",
-                    "description": "Specify which pipeline to generate a samplesheet for.",
-                    "fa_icon": "fas fa-toolbox"
-                }
-            }
-        },
         "institutional_config_options": {
             "title": "Institutional config options",
             "type": "object",
@@ -1003,5 +984,14 @@
         {
             "$ref": "#/definitions/reference_genome_options"
         }
-    ]
+    ],
+    "properties": {
+        "generate_downstream_samplesheets": {
+            "type": "boolean"
+        },
+        "generate_pipeline_samplesheets": {
+            "type": "string",
+            "default": "mag"
+        }
+    }
 }
diff --git a/subworkflows/local/generate_downstream_samplesheets/main.nf b/subworkflows/local/generate_downstream_samplesheets/main.nf
index b1499bd4..9d1ccdf1 100644
--- a/subworkflows/local/generate_downstream_samplesheets/main.nf
+++ b/subworkflows/local/generate_downstream_samplesheets/main.nf
@@ -1,8 +1,8 @@
 //
-// Subworkflow with functionality specific to the nf-core/taxprofiler pipeline
+// Subworkflow with functionality specific to the nf-core/mag pipeline
 //
 
-workflow GENERATE_DOWNSTREAM_SAMPLESHEETS {
+workflow SAMPLESHEET_MAG {
     take:
     ch_processed_reads
 
@@ -10,34 +10,45 @@ workflow GENERATE_DOWNSTREAM_SAMPLESHEETS {
     format     = 'csv' // most common format in nf-core
     format_sep = ','
 
-    if ( params.generate_pipeline_samplesheets == 'mag' && params.save_analysis_ready_fastqs ) {
-        def fastq_rel_path = '/'
-        format = 'csv'
-        format_sep = ','
-        ch_list_for_samplesheet = ch_processed_reads
-                .filter { meta, sample_id, instrument_platform,fastq_1,fastq_2,fasta -> (fastq_1 && fastq_2) && !fasta }
-                    .map {
-                            meta, sample_id, instrument_platform,fastq_1,fastq_2,fasta ->
-                                def sample        = meta.id
-                                def run           = meta.run_accession  //this should be optional
-                                def group         = ""
-                                def short_reads_1 = file(params.outdir).toString() + '/' + meta.id + '/' + fastq_1.getName()
-                                def short_reads_2 = file(params.outdir).toString() + '/' + meta.id + '/' + fastq_2.getName()
-                                def long_reads    = ""
-                    [sample: sample, run: run, group: group, short_reads_1: short_reads_1, short_reads_2: short_reads_2, long_reads: long_reads]
+
+    ch_list_for_samplesheet = ch_processed_reads
+            .filter { meta, sample_id, instrument_platform,fastq_1,fastq_2,fasta -> (fastq_1 && fastq_2) && !fasta }.view()
+                .map {
+                        meta, sample_id, instrument_platform,fastq_1,fastq_2,fasta ->
+                            def sample        = meta.id
+                            def run           = meta.run_accession  //this should be optional
+                            def group         = ""
+                            def short_reads_1 = file(params.outdir).toString() + '/' + meta.id + '/' + fastq_1.getName()
+                            def short_reads_2 = file(params.outdir).toString() + '/' + meta.id + '/' + fastq_2.getName()
+                            def long_reads    = ""
+                [sample: sample, run: run, group: group, short_reads_1: short_reads_1, short_reads_2: short_reads_2, long_reads: long_reads]
         }
-        .tap{ ch_header }
-    }
+        .tap { ch_colnames }
+
+    channelToSamplesheet(ch_colnames, ch_list_for_samplesheet, 'downstream_samplesheets', format, format_sep)
+}
+
+workflow GENERATE_DOWNSTREAM_SAMPLESHEETS {
 
+    take:
+    ch_processed_reads
+
+    main:
+    if ( params.generate_pipeline_samplesheets == 'mag' && params.save_analysis_ready_fastqs ) {
+        SAMPLESHEET_MAG(ch_processed_reads)
+    }
+}
 
+def channelToSamplesheet(ch_header, ch_list_for_samplesheet, outdir_subdir, format, format_sep) {
+    // Constructs the header string and then the strings of each row, and
+    // finally concatenates for saving. Originally designed by @mahesh-panchal
     ch_header
         .first()
-        .map{ it.keySet().join(format_sep) }
-        .concat( ch_list_for_samplesheet.map{ it.values().join(format_sep) })
+        .map { it.keySet().join(format_sep) }
+        .concat(ch_list_for_samplesheet.map { it.values().join(format_sep) })
         .collectFile(
-            name:"${params.outdir}/downstream_samplesheet/${params.generate_pipeline_samplesheets}.${format}",
+            name: "${params.outdir}/${outdir_subdir}/${params.generate_pipeline_samplesheets}.${format}",
             newLine: true,
             sort: false
         )
-
 }