Merge pull request #1018 from scarlhoff/dsl2_ref_sheet

DSL2: Pull input files from reference sheet columns
nf-core · Oct 27, 2023 · 6cb0c30 · 6cb0c30
2 parents bc9c7b5 + 6e2abeb
commit 6cb0c30
Show file tree

Hide file tree

Showing 15 changed files with 387 additions and 126 deletions.
diff --git a/conf/modules.config b/conf/modules.config
@@ -375,7 +375,7 @@ process {
     // READ MAPPING
     //
     withName: BWA_ALN {
-        tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" }
+        tag = { "${meta.id_index}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" }
         ext.args = { "-n ${params.mapping_bwaaln_n} -k ${params.mapping_bwaaln_k} -l ${params.mapping_bwaaln_l} -o ${params.mapping_bwaaln_o}" }
         ext.prefix = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}_${meta.reference}" }
         publishDir = [
@@ -384,7 +384,7 @@ process {
     }
 
     withName: 'BWA_SAMSE|BWA_SAMPE' {
-        tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" }
+        tag = { "${meta.id_index}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" }
         ext.args = { "-r '@RG\\tID:ILLUMINA-${meta.library_id}\\tSM:${meta.sample_id}\\tPL:illumina\\tPU:ILLUMINA-${meta.library_id}-${meta.strandedness}'" }
         ext.prefix = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}_${meta.reference}" }
         publishDir = [
@@ -403,7 +403,7 @@ process {
     }
 
     withName: ".*MAP:FASTQ_ALIGN_BWAALN:SAMTOOLS_INDEX" {
-        tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" }
+        tag = { "${meta.id_index}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" }
         ext.args = { params.fasta_largeref ? "-c" : "" }
         ext.prefix = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}_${meta.reference}" }
         publishDir = [
@@ -828,7 +828,7 @@ process {
         ]
     }
 
-    withName: "QUALIMAP_BAMQC" {
+    withName: 'QUALIMAP_BAMQC_WITHBED|QUALIMAP_BAMQC_NOBED' {
         tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" }
         publishDir = [
             path: { "${params.outdir}/mapstats/qualimap/${meta.reference}/${meta.sample_id}/}" },

diff --git a/conf/test_humanbam.config b/conf/test_humanbam.config
@@ -31,6 +31,9 @@ params {
     contamination_estimation_angsd_mapq = 0
     contamination_estimation_angsd_minq = 0
 
+    // Qualimap
+    snpcapture_bed = 'https://raw.githubusercontent.com/nf-core/test-datasets/eager/reference/Human/1240K.pos.list_hs37d5.0based.bed.gz'
+
     // TODO Reactivate sexDet and genotyping params when those steps get implemented.
     // //Sex Determination
     // sexdeterrmine_bedfile = 'https://raw.githubusercontent.com/nf-core/test-datasets/eager/reference/Human/1240K.pos.list_hs37d5.0based.bed.gz'

diff --git a/conf/test_multiref.config b/conf/test_multiref.config
@@ -23,7 +23,7 @@ params {
     input = 'https://github.com/nf-core/test-datasets/raw/eager/testdata/Mammoth/samplesheet_multilane_multilib.tsv'
 
     // Genome references
-    fasta = 'https://github.com/jfy133/nf-core-test-datasets/raw/eager/reference/reference_sheet_multiref.csv'
+    fasta = 'https://github.com/nf-core/test-datasets/raw/eager/reference/reference_sheet_multiref.csv'
 
     // BAM filtering
     run_bamfiltering                      = true

diff --git a/docs/development/manual_tests.md b/docs/development/manual_tests.md
@@ -94,10 +94,11 @@ Tool Specific combinations
     - with stricter threshold
 
   - BAM trimming
+
     - with default parameters
     - different length by udg treatment
 
-- All together
+  - All together
 
 ### Multi-reference tests
 
@@ -145,6 +146,10 @@ nextflow run ../main.nf -profile singularity,test --outdir ./results --input sam
 ## Test: (11) Broken path correctly fails pipeline ✅
 ## Expect: Expect fail
 nextflow run ../main.nf -profile singularity,test --outdir ./results --input samplesheet.tsv --fasta reference_sheet_multiref_test11.csv -ansi-log false -dump-channels --save_reference
+
+# Test: File input via reference sheet
+# Expect: Qualimap with bed, mtnucratio and angsd successful and bedtools not run for hs37d5, qualimap without bed file, mtnucratio and bedtools successful and angsd not run for Mammoth_MT
+nextflow run main.nf -profile test_multiref,docker --outdir ./results --run_bedtools_coverage --run_contamination_estimation_angsd --run_mtnucratio
 ```
 
 ### AdapterRemoval

diff --git a/modules.json b/modules.json
@@ -221,7 +221,7 @@
                     },
                     "fastq_align_bwaaln": {
                         "branch": "master",
-                        "git_sha": "a9784afdd5dcda23b84e64db75dc591065d64653",
+                        "git_sha": "e2c81fea3daeacfa190f78d2b82f82361b734507",
                         "installed_by": ["subworkflows"]
                     }
                 }

diff --git a/nextflow.config b/nextflow.config
@@ -17,7 +17,6 @@ params {
     fasta_dict                 = null
     fasta_mapperindexdir       = null
     fasta_circular_target      = null
-    fasta_mitochondrion_header = null
     fasta_largeref             = false
 
     // References
@@ -219,6 +218,7 @@ try {
 // Additional configs for subworkflows
 includeConfig 'subworkflows/nf-core/bam_split_by_region/nextflow.config'
 includeConfig 'subworkflows/nf-core/bam_docounts_contamination_angsd/nextflow.config'
+includeConfig 'subworkflows/nf-core/fastq_align_bwaaln/nextflow.config'
 
 profiles {
     debug {

diff --git a/nextflow_schema.json b/nextflow_schema.json
@@ -111,12 +111,6 @@
                     "description": "Specify the FASTA header of the target chromosome to extend. Only applies when using `circularmapper`.",
                     "help_text": "The entry (chromosome, contig, etc.) in your FASTA reference that you'd like to be treated as circular.\n\nApplies only when providing a single FASTA file via `--fasta` (NOT multi-reference input - see reference TSV/CSV input).\n\n> Modifies tool parameter(s):\n> - circulargenerator `-s`\n",
                     "fa_icon": "fas fa-bullseye"
-                },
-                "fasta_mitochondrion_header": {
-                    "type": "string",
-                    "fa_icon": "fas fa-tag",
-                    "description": "Specify the name of the reference FASTA entry corresponding to the mitochondrial genome, up to the first space. Only applies when using `--run_mtnucratio`.",
-                    "help_text": "Specify the FASTA entry in the reference file specified as `--fasta` that acts as the mitochondrial 'chromosome' to base a mitochondrial-to-nuclear ratio calculation on. \n\nThe tool only accepts the first section of the header before the first space. For example, mitochondrion chromosome name is `MT` for the hs37d5/GrCH37 human reference genome.\n"
                 }
             }
         },

diff --git a/subworkflows/local/map.nf b/subworkflows/local/map.nf
@@ -2,13 +2,13 @@
 // Prepare reference indexing for downstream
 //
 
-include { FASTQ_ALIGN_BWAALN                                                                                                  } from '../../subworkflows/nf-core/fastq_align_bwaaln/main'
-include { BWA_MEM                                                                                                             } from '../../modules/nf-core/bwa/mem/main'
-include { BOWTIE2_ALIGN                                                                                                       } from '../../modules/nf-core/bowtie2/align/main'
-include { SAMTOOLS_MERGE as SAMTOOLS_MERGE_LANES                                                                              } from '../../modules/nf-core/samtools/merge/main'
-include { SAMTOOLS_SORT  as SAMTOOLS_SORT_MERGED_LANES                                                                        } from '../../modules/nf-core/samtools/sort/main'
+include { FASTQ_ALIGN_BWAALN                                                                                                        } from '../../subworkflows/nf-core/fastq_align_bwaaln/main'
+include { BWA_MEM                                                                                                                   } from '../../modules/nf-core/bwa/mem/main'
+include { BOWTIE2_ALIGN                                                                                                             } from '../../modules/nf-core/bowtie2/align/main'
+include { SAMTOOLS_MERGE as SAMTOOLS_MERGE_LANES                                                                                    } from '../../modules/nf-core/samtools/merge/main'
+include { SAMTOOLS_SORT  as SAMTOOLS_SORT_MERGED_LANES                                                                              } from '../../modules/nf-core/samtools/sort/main'
 include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_MEM; SAMTOOLS_INDEX as SAMTOOLS_INDEX_BT2; SAMTOOLS_INDEX as SAMTOOLS_INDEX_MERGED_LANES } from '../../modules/nf-core/samtools/index/main'
-include { SAMTOOLS_FLAGSTAT as SAMTOOLS_FLAGSTAT_MAPPED                                                                       } from '../../modules/nf-core/samtools/flagstat/main'
+include { SAMTOOLS_FLAGSTAT as SAMTOOLS_FLAGSTAT_MAPPED                                                                             } from '../../modules/nf-core/samtools/flagstat/main'
 
 workflow MAP {
     take:
@@ -19,24 +19,32 @@ workflow MAP {
     ch_versions       = Channel.empty()
     ch_multiqc_files  = Channel.empty()
 
-    ch_input_for_mapping = reads
-                            .combine(index)
-                            .multiMap {
-                                meta, reads, meta2, index ->
-                                    new_meta = meta.clone()
-                                    new_meta.reference = meta2.id
-                                    reads: [ new_meta, reads ]
-                                    index: [ meta2, index]
-                            }
-
     if ( params.mapping_tool == 'bwaaln' ) {
-        FASTQ_ALIGN_BWAALN ( ch_input_for_mapping.reads, ch_input_for_mapping.index )
+        ch_index_for_mapping = index
+        ch_reads_for_mapping = reads
 
+        FASTQ_ALIGN_BWAALN ( ch_reads_for_mapping, ch_index_for_mapping )
         ch_versions        = ch_versions.mix ( FASTQ_ALIGN_BWAALN.out.versions.first() )
         ch_mapped_lane_bam = FASTQ_ALIGN_BWAALN.out.bam
+                                .map{
+                                    // create meta consistent with rest of workflow
+                                    meta, bam ->
+                                    new_meta = meta + [ reference: meta.id_index ]
+                                [ new_meta, bam ]
+                                }
+
         ch_mapped_lane_bai = params.fasta_largeref ? FASTQ_ALIGN_BWAALN.out.csi : FASTQ_ALIGN_BWAALN.out.bai
 
     } else if ( params.mapping_tool == 'bwamem' ) {
+        ch_input_for_mapping = reads
+                            .combine( index )
+                            .multiMap {
+                                meta, reads, meta2, index ->
+                                    new_meta = meta + [ reference: meta2.id ]
+                                    reads: [ new_meta, reads ]
+                                    index: [ meta2, index ]
+                            }
+
         BWA_MEM ( ch_input_for_mapping.reads, ch_input_for_mapping.index, true )
         ch_versions        = ch_versions.mix ( BWA_MEM.out.versions.first() )
         ch_mapped_lane_bam = BWA_MEM.out.bam
@@ -46,6 +54,15 @@ workflow MAP {
         ch_mapped_lane_bai = params.fasta_largeref ? SAMTOOLS_INDEX_MEM.out.csi : SAMTOOLS_INDEX_MEM.out.bai
 
     } else if ( params.mapping_tool == 'bowtie2' ) {
+        ch_input_for_mapping = reads
+                            .combine( index )
+                            .multiMap {
+                                meta, reads, meta2, index ->
+                                    new_meta = meta + [ reference: meta2.id ]
+                                    reads: [ new_meta, reads ]
+                                    index: [ meta2, index ]
+                            }
+
         BOWTIE2_ALIGN ( ch_input_for_mapping.reads, ch_input_for_mapping.index, false, true )
         ch_versions        = ch_versions.mix ( BOWTIE2_ALIGN.out.versions.first() )
         ch_mapped_lane_bam = BOWTIE2_ALIGN.out.bam

diff --git a/subworkflows/local/reference_indexing.nf b/subworkflows/local/reference_indexing.nf
@@ -4,6 +4,7 @@
 
 include { REFERENCE_INDEXING_SINGLE } from '../../subworkflows/local/reference_indexing_single.nf'
 include { REFERENCE_INDEXING_MULTI  } from '../../subworkflows/local/reference_indexing_multi.nf'
+include { GUNZIP as GUNZIP_SNPBED   } from '../../modules/nf-core/gunzip/main.nf'
 
 workflow REFERENCE_INDEXING {
     take:
@@ -17,20 +18,77 @@ workflow REFERENCE_INDEXING {
 
     // Warn user if they've given a reference sheet that already includes fai/dict/mapper index etc.
     if ( ( fasta.extension == 'csv' || fasta.extension == 'tsv' ) && (fasta_fai || fasta_dict || fasta_mapperindexdir)) log.warn("A TSV or CSV has been supplied to `--fasta` as well as e.g. `--fasta_fai`. --fasta CSV/TSV takes priority and --fasta_* parameters will be ignored.")
+    if ( ( fasta.extension == 'csv' || fasta.extension == 'tsv' ) && (params.mitochondrion_header || params.contamination_estimation_angsd_hapmap || params.damage_manipulation_pmdtools_reference_mask || params.snpcapture_bed || params.mapstats_bedtools_featurefile )) log.warn("A TSV or CSV has been supplied to `--fasta` as well as individual reference-specific input files, e.g. `--contamination_estimation_angsd_hapmap`. Input files specified in the --fasta CSV/TSV take priority and other input parameters will be ignored.")
 
     if ( fasta.extension == 'csv' || fasta.extension == 'tsv' ) {
         // If input (multi-)reference sheet supplied
         REFERENCE_INDEXING_MULTI ( fasta )
         ch_reference_for_mapping = REFERENCE_INDEXING_MULTI.out.reference
+        ch_mitochondrion_header  = REFERENCE_INDEXING_MULTI.out.mitochondrion_header
+        ch_hapmap                = REFERENCE_INDEXING_MULTI.out.hapmap
+        ch_pmd_mask              = REFERENCE_INDEXING_MULTI.out.pmd_mask
+        ch_snp_capture_bed       = REFERENCE_INDEXING_MULTI.out.snp_capture_bed
+        ch_pileupcaller_snp      = REFERENCE_INDEXING_MULTI.out.pileupcaller_snp
+        ch_sexdeterrmine_bed     = REFERENCE_INDEXING_MULTI.out.sexdeterrmine_bed
+        ch_bedtools_feature      = REFERENCE_INDEXING_MULTI.out.bedtools_feature
         ch_versions = ch_versions.mix( REFERENCE_INDEXING_MULTI.out.versions )
     } else {
         // If input FASTA and/or indicies supplied
         REFERENCE_INDEXING_SINGLE ( fasta, fasta_fai, fasta_dict, fasta_mapperindexdir )
+        ch_mitochondrion_header  = REFERENCE_INDEXING_SINGLE.out.mitochondrion_header
+        ch_hapmap                = REFERENCE_INDEXING_SINGLE.out.hapmap
+        ch_pmd_mask              = REFERENCE_INDEXING_SINGLE.out.pmd_mask
+        ch_snp_capture_bed       = REFERENCE_INDEXING_SINGLE.out.snp_capture_bed
+        ch_pileupcaller_snp      = REFERENCE_INDEXING_SINGLE.out.pileupcaller_snp
+        ch_sexdeterrmine_bed     = REFERENCE_INDEXING_SINGLE.out.sexdeterrmine_bed
+        ch_bedtools_feature      = REFERENCE_INDEXING_SINGLE.out.bedtools_feature
         ch_reference_for_mapping = REFERENCE_INDEXING_SINGLE.out.reference
         ch_versions = ch_versions.mix( REFERENCE_INDEXING_SINGLE.out.versions )
     }
+
+    // Filter out input options that are not provided
+    ch_mitochondrion_header = ch_mitochondrion_header
+                    .filter{ it[1] != "" }
+
+    ch_hapmap = ch_hapmap
+                    .filter{ it[1] != "" }
+
+    ch_pmd_mask = ch_pmd_mask
+                    .filter{ it[1] != "" && it[2] != "" }
+
+    ch_capture_bed = ch_snp_capture_bed //optional
+                    .branch {
+                        meta, capture_bed ->
+                        input: capture_bed != ""
+                        skip: true
+                    }
+    ch_capture_bed_gunzip = ch_capture_bed.input //unzip
+                    .branch {
+                        meta, capture_bed ->
+                        forgunzip: capture_bed.extension == "gz"
+                        skip: true
+                    }
+    GUNZIP_SNPBED( ch_capture_bed_gunzip.forgunzip )
+    ch_capture_bed = GUNZIP_SNPBED.out.gunzip.mix( ch_capture_bed_gunzip.skip ).mix( ch_capture_bed.skip )
+
+    ch_pileupcaller_snp = ch_pileupcaller_snp
+                    .filter{ it[1] != "" && it[2] != "" }
+
+    ch_sexdeterrmine_bed = ch_sexdeterrmine_bed
+                    .filter{ it[1] != "" }
+
+    ch_bedtools_feature = ch_bedtools_feature
+                    .filter{ it[1] != "" }
+
     emit:
-    reference = ch_reference_for_mapping // [ meta, fasta, fai, dict, mapindex ]
-    versions  = ch_versions
+    reference            = ch_reference_for_mapping // [ meta, fasta, fai, dict, mapindex, circular_target ]
+    mitochondrion_header = ch_mitochondrion_header  // [ meta, mitochondrion_header ]
+    hapmap               = ch_hapmap                // [ meta, hapmap ]
+    pmd_mask             = ch_pmd_mask              // [ meta, masked_fasta, capture_bed ]
+    snp_capture_bed      = ch_capture_bed           // [ meta, capture_bed ]
+    pileupcaller_snp     = ch_pileupcaller_snp      // [ meta, pileupcaller_bed, pileupcaller_snp ]
+    sexdeterrmine_bed    = ch_sexdeterrmine_bed     // [ meta, sexdet_bed ]
+    bedtools_feature     = ch_bedtools_feature      // [ meta, bedtools_feature ]
+    versions             = ch_versions
 
 }