From 6859ebf73933b458b835f94c63e78809be6b25ba Mon Sep 17 00:00:00 2001
From: Lucio Di Filippo <difilippolucio@gmail.com>
Date: Tue, 5 Sep 2023 19:33:41 +0200
Subject: [PATCH 01/14] added subworkflow to align with bwa

---
 modules.json                                  |  26 +++-
 modules/nf-core/bwa/sampe/main.nf             |  42 +++++++
 modules/nf-core/bwa/sampe/meta.yml            |  63 ++++++++++
 modules/nf-core/bwa/samse/main.nf             |  42 +++++++
 modules/nf-core/bwa/samse/meta.yml            |  64 ++++++++++
 modules/nf-core/samtools/index/main.nf        |  48 ++++++++
 modules/nf-core/samtools/index/meta.yml       |  53 +++++++++
 .../nf-core/fastq_align_bwaaln/main.nf        | 112 ++++++++++++++++++
 .../nf-core/fastq_align_bwaaln/meta.yml       |  51 ++++++++
 .../fastq_align_bwaaln/nextflow.config        |  26 ++++
 workflows/sammyseq.nf                         |  10 +-
 11 files changed, 534 insertions(+), 3 deletions(-)
 create mode 100644 modules/nf-core/bwa/sampe/main.nf
 create mode 100644 modules/nf-core/bwa/sampe/meta.yml
 create mode 100644 modules/nf-core/bwa/samse/main.nf
 create mode 100644 modules/nf-core/bwa/samse/meta.yml
 create mode 100644 modules/nf-core/samtools/index/main.nf
 create mode 100644 modules/nf-core/samtools/index/meta.yml
 create mode 100644 subworkflows/nf-core/fastq_align_bwaaln/main.nf
 create mode 100644 subworkflows/nf-core/fastq_align_bwaaln/meta.yml
 create mode 100644 subworkflows/nf-core/fastq_align_bwaaln/nextflow.config

diff --git a/modules.json b/modules.json
index 865e37c..64491b8 100644
--- a/modules.json
+++ b/modules.json
@@ -8,13 +8,23 @@
                     "bwa/aln": {
                         "branch": "master",
                         "git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
-                        "installed_by": ["modules"]
+                        "installed_by": ["fastq_align_bwaaln", "modules"]
                     },
                     "bwa/index": {
                         "branch": "master",
                         "git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
                         "installed_by": ["modules"]
                     },
+                    "bwa/sampe": {
+                        "branch": "master",
+                        "git_sha": "603ecbd9f45300c9788f197d2a15a005685b4220",
+                        "installed_by": ["fastq_align_bwaaln"]
+                    },
+                    "bwa/samse": {
+                        "branch": "master",
+                        "git_sha": "603ecbd9f45300c9788f197d2a15a005685b4220",
+                        "installed_by": ["fastq_align_bwaaln"]
+                    },
                     "cat/fastq": {
                         "branch": "master",
                         "git_sha": "5c460c5a4736974abde2843294f35307ee2b0e5e",
@@ -40,6 +50,11 @@
                         "git_sha": "f2d63bd5b68925f98f572eed70993d205cc694b7",
                         "installed_by": ["modules"]
                     },
+                    "samtools/index": {
+                        "branch": "master",
+                        "git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
+                        "installed_by": ["fastq_align_bwaaln"]
+                    },
                     "trimmomatic": {
                         "branch": "master",
                         "git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
@@ -51,6 +66,15 @@
                         "installed_by": ["modules"]
                     }
                 }
+            },
+            "subworkflows": {
+                "nf-core": {
+                    "fastq_align_bwaaln": {
+                        "branch": "master",
+                        "git_sha": "dedc0e31087f3306101c38835d051bf49789445a",
+                        "installed_by": ["subworkflows"]
+                    }
+                }
             }
         }
     }
diff --git a/modules/nf-core/bwa/sampe/main.nf b/modules/nf-core/bwa/sampe/main.nf
new file mode 100644
index 0000000..45bfffb
--- /dev/null
+++ b/modules/nf-core/bwa/sampe/main.nf
@@ -0,0 +1,42 @@
+process BWA_SAMPE {
+    tag "$meta.id"
+    label 'process_medium'
+
+    conda "bioconda::bwa=0.7.17 bioconda::samtools=1.16.1"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:219b6c272b25e7e642ae3ff0bf0c5c81a5135ab4-0' :
+        'biocontainers/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:219b6c272b25e7e642ae3ff0bf0c5c81a5135ab4-0' }"
+
+    input:
+    tuple val(meta), path(reads), path(sai)
+    tuple val(meta2), path(index)
+
+    output:
+    tuple val(meta), path("*.bam"), emit: bam
+    path "versions.yml"           , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def read_group = meta.read_group ? "-r ${meta.read_group}" : ""
+
+    """
+    INDEX=`find -L ./ -name "*.amb" | sed 's/\\.amb\$//'`
+
+    bwa sampe \\
+        $args \\
+        $read_group \\
+        \$INDEX \\
+        $sai \\
+        $reads | samtools sort -@ ${task.cpus} -O bam - > ${prefix}.bam
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//')
+        samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/bwa/sampe/meta.yml b/modules/nf-core/bwa/sampe/meta.yml
new file mode 100644
index 0000000..0cefb96
--- /dev/null
+++ b/modules/nf-core/bwa/sampe/meta.yml
@@ -0,0 +1,63 @@
+name: bwa_sampe
+description: Convert paired-end bwa SA coordinate files to SAM format
+keywords:
+  - bwa
+  - aln
+  - short-read
+  - align
+  - reference
+  - fasta
+  - map
+  - sam
+  - bam
+tools:
+  - bwa:
+      description: |
+        BWA is a software package for mapping DNA sequences against
+        a large reference genome, such as the human genome.
+      homepage: http://bio-bwa.sourceforge.net/
+      documentation: http://bio-bwa.sourceforge.net/
+      doi: "10.1093/bioinformatics/btp324"
+      licence: ["GPL-3.0-or-later"]
+
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information.
+        e.g. [ id:'test', single_end:false ]
+  - reads:
+      type: file
+      description: FASTQ files specified alongside meta in input channel.
+      pattern: "*.{fastq,fq}.gz"
+  - sai:
+      type: file
+      description: SAI file specified alongside meta and reads in input channel.
+      pattern: "*.sai"
+  - meta2:
+      type: map
+      description: |
+        Groovy Map containing reference information.
+        e.g. [ id:'test', single_end:false ]
+  - index:
+      type: directory
+      description: Directory containing BWA index files (amb,ann,bwt,pac,sa) from BWA_INDEX
+      pattern: "bwa/"
+
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - bam:
+      type: file
+      description: BAM file
+      pattern: "*.bam"
+
+authors:
+  - "@jfy133"
diff --git a/modules/nf-core/bwa/samse/main.nf b/modules/nf-core/bwa/samse/main.nf
new file mode 100644
index 0000000..38752ed
--- /dev/null
+++ b/modules/nf-core/bwa/samse/main.nf
@@ -0,0 +1,42 @@
+process BWA_SAMSE {
+    tag "$meta.id"
+    label 'process_medium'
+
+    conda "bioconda::bwa=0.7.17 bioconda::samtools=1.16.1"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:219b6c272b25e7e642ae3ff0bf0c5c81a5135ab4-0' :
+        'biocontainers/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:219b6c272b25e7e642ae3ff0bf0c5c81a5135ab4-0' }"
+
+    input:
+    tuple val(meta), path(reads), path(sai)
+    tuple val(meta2), path(index)
+
+    output:
+    tuple val(meta), path("*.bam"), emit: bam
+    path "versions.yml"           , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def read_group = meta.read_group ? "-r ${meta.read_group}" : ""
+
+    """
+    INDEX=`find -L ./ -name "*.amb" | sed 's/\\.amb\$//'`
+
+    bwa samse \\
+        $args \\
+        $read_group \\
+        \$INDEX \\
+        $sai \\
+        $reads | samtools sort -@ ${task.cpus - 1} -O bam - > ${prefix}.bam
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//')
+        samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/bwa/samse/meta.yml b/modules/nf-core/bwa/samse/meta.yml
new file mode 100644
index 0000000..1deab21
--- /dev/null
+++ b/modules/nf-core/bwa/samse/meta.yml
@@ -0,0 +1,64 @@
+name: bwa_samse
+description: Convert bwa SA coordinate file to SAM format
+keywords:
+  - bwa
+  - aln
+  - short-read
+  - align
+  - reference
+  - fasta
+  - map
+  - sam
+  - bam
+
+tools:
+  - bwa:
+      description: |
+        BWA is a software package for mapping DNA sequences against
+        a large reference genome, such as the human genome.
+      homepage: http://bio-bwa.sourceforge.net/
+      documentation: http://bio-bwa.sourceforge.net/
+      doi: "10.1093/bioinformatics/btp324"
+      licence: ["GPL-3.0-or-later"]
+
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information.
+        e.g. [ id:'test', single_end:false ]
+  - reads:
+      type: file
+      description: FASTQ files specified alongside meta in input channel.
+      pattern: "*.{fastq,fq}.gz"
+  - sai:
+      type: file
+      description: SAI file specified alongside meta and reads in input channel.
+      pattern: "*.sai"
+  - meta2:
+      type: map
+      description: |
+        Groovy Map containing reference information.
+        e.g. [ id:'test', single_end:false ]
+  - index:
+      type: directory
+      description: Directory containing BWA index files (amb,ann,bwt,pac,sa) from BWA_INDEX
+      pattern: "bwa/"
+
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - bam:
+      type: file
+      description: BAM file
+      pattern: "*.bam"
+
+authors:
+  - "@jfy133"
diff --git a/modules/nf-core/samtools/index/main.nf b/modules/nf-core/samtools/index/main.nf
new file mode 100644
index 0000000..0b20aa4
--- /dev/null
+++ b/modules/nf-core/samtools/index/main.nf
@@ -0,0 +1,48 @@
+process SAMTOOLS_INDEX {
+    tag "$meta.id"
+    label 'process_low'
+
+    conda "bioconda::samtools=1.17"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' :
+        'biocontainers/samtools:1.17--h00cdaf9_0' }"
+
+    input:
+    tuple val(meta), path(input)
+
+    output:
+    tuple val(meta), path("*.bai") , optional:true, emit: bai
+    tuple val(meta), path("*.csi") , optional:true, emit: csi
+    tuple val(meta), path("*.crai"), optional:true, emit: crai
+    path  "versions.yml"           , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    """
+    samtools \\
+        index \\
+        -@ ${task.cpus-1} \\
+        $args \\
+        $input
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
+    END_VERSIONS
+    """
+
+    stub:
+    """
+    touch ${input}.bai
+    touch ${input}.crai
+    touch ${input}.csi
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/samtools/index/meta.yml b/modules/nf-core/samtools/index/meta.yml
new file mode 100644
index 0000000..8bd2fa6
--- /dev/null
+++ b/modules/nf-core/samtools/index/meta.yml
@@ -0,0 +1,53 @@
+name: samtools_index
+description: Index SAM/BAM/CRAM file
+keywords:
+  - index
+  - bam
+  - sam
+  - cram
+tools:
+  - samtools:
+      description: |
+        SAMtools is a set of utilities for interacting with and post-processing
+        short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li.
+        These files are generated as output by short read aligners like BWA.
+      homepage: http://www.htslib.org/
+      documentation: http://www.htslib.org/doc/samtools.html
+      doi: 10.1093/bioinformatics/btp352
+      licence: ["MIT"]
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - bam:
+      type: file
+      description: BAM/CRAM/SAM file
+      pattern: "*.{bam,cram,sam}"
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - bai:
+      type: file
+      description: BAM/CRAM/SAM index file
+      pattern: "*.{bai,crai,sai}"
+  - crai:
+      type: file
+      description: BAM/CRAM/SAM index file
+      pattern: "*.{bai,crai,sai}"
+  - csi:
+      type: file
+      description: CSI index file
+      pattern: "*.{csi}"
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+authors:
+  - "@drpatelh"
+  - "@ewels"
+  - "@maxulysse"
diff --git a/subworkflows/nf-core/fastq_align_bwaaln/main.nf b/subworkflows/nf-core/fastq_align_bwaaln/main.nf
new file mode 100644
index 0000000..647e83a
--- /dev/null
+++ b/subworkflows/nf-core/fastq_align_bwaaln/main.nf
@@ -0,0 +1,112 @@
+//
+// Alignment with bwa aln and sort
+//
+
+include { BWA_ALN            } from '../../../modules/nf-core/bwa/aln/main'
+include { BWA_SAMSE          } from '../../../modules/nf-core/bwa/samse/main'
+include { BWA_SAMPE          } from '../../../modules/nf-core/bwa/sampe/main'
+include { SAMTOOLS_INDEX     } from '../../../modules/nf-core/samtools/index/main'
+
+workflow FASTQ_ALIGN_BWAALN {
+
+    take:
+    ch_reads // channel (mandatory): [ val(meta), path(reads) ]. subworkImportant: meta REQUIRES single_end` entry!
+    ch_index // channel (mandatory): [ val(meta), path(index) ]
+
+    main:
+
+    ch_versions = Channel.empty()
+
+    // WARNING: You must specify in your prefix `meta.id_index` in your `modules.conf`
+    // to ensure that you do not overwrite multiple BAM files from one sample mapped
+    // against multiple references. This meta map is added by the subworkflow but can be removed
+    // after if necessary.
+
+    // Ensure when multiple references are provide, that reference/read combinations
+    // are correctly associated throughout the subworkflow by copying the sample
+    // specific metadata to the index on each combination
+
+    ch_prepped_input = ch_reads
+                        .combine(ch_index)
+                        .map{
+                            meta, reads, meta_index, index ->
+
+                                // Create a combined id that includes the ids of the reads and the index used.
+                                // Also keep the id of the index with a new name to avoid name collisions.
+                                def key_read_ref = meta.id + "_" + meta_index.id
+                                def id_index = meta_index.id
+
+                            [ meta + [key_read_ref: key_read_ref] + [id_index: id_index], reads, meta_index + [key_read_ref: key_read_ref]  + [id_index: id_index], index  ]
+                        }
+
+    // Drop the index_meta, as the id of the index is now kept within the read meta.
+    ch_preppedinput_for_bwaaln = ch_prepped_input
+                        .multiMap {
+                            meta, reads, meta_index, index ->
+                                reads: [ meta, reads ]
+                                index: [ meta, index ]
+                        }
+
+
+    // Set as independent channel to allow repeated joining but _with_ sample specific metadata
+    // to ensure right reference goes with right sample
+    ch_reads_newid = ch_prepped_input.map{ meta, reads, meta_index, index -> [ meta, reads ] }
+    ch_index_newid = ch_prepped_input.map{ meta, reads, meta_index, index -> [ meta, index ] }
+
+    // Alignment and conversion to bam
+    BWA_ALN ( ch_preppedinput_for_bwaaln.reads, ch_preppedinput_for_bwaaln.index )
+    ch_versions = ch_versions.mix( BWA_ALN.out.versions.first() )
+
+    ch_sai_for_bam = ch_reads_newid
+                        .join ( BWA_ALN.out.sai )
+                        .branch {
+                            meta, reads, sai ->
+                                pe: !meta.single_end
+                                se: meta.single_end
+                        }
+
+    // Split as PE/SE have different SAI -> BAM commands
+    ch_sai_for_bam_pe =  ch_sai_for_bam.pe
+                            .join ( ch_index_newid )
+                            .multiMap {
+                                meta, reads, sai, index ->
+                                    reads: [ meta, reads, sai ]
+                                    index: [ meta, index      ]
+                            }
+
+    ch_sai_for_bam_se =  ch_sai_for_bam.se
+                            .join ( ch_index_newid )
+                            .multiMap {
+                                meta, reads, sai, index ->
+                                    reads: [ meta, reads, sai ]
+                                    index: [ meta, index      ]
+                            }
+
+
+    BWA_SAMPE ( ch_sai_for_bam_pe.reads, ch_sai_for_bam_pe.index )
+    ch_versions = ch_versions.mix( BWA_SAMPE.out.versions.first() )
+
+    BWA_SAMSE ( ch_sai_for_bam_se.reads, ch_sai_for_bam_se.index )
+    ch_versions = ch_versions.mix( BWA_SAMSE.out.versions.first() )
+
+    ch_bam_for_index = BWA_SAMPE.out.bam.mix( BWA_SAMSE.out.bam )
+
+    // Index all
+    SAMTOOLS_INDEX ( ch_bam_for_index )
+    ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first())
+
+    // Remove superfluous internal maps to minimise clutter as much as possible
+    ch_bam_for_emit = ch_bam_for_index.map{ meta, bam -> [meta - meta.subMap('key_read_ref'), bam] }
+    ch_bai_for_emit = SAMTOOLS_INDEX.out.bai.map{ meta, bai -> [meta - meta.subMap('key_read_ref'), bai] }
+    ch_csi_for_emit = SAMTOOLS_INDEX.out.csi.map{ meta, csi -> [meta - meta.subMap('key_read_ref'), csi] }
+
+    emit:
+    // Note: output channels will contain meta with additional 'id_index' meta
+    // value to allow association of BAM file with the meta.id of input indicies
+    bam      = ch_bam_for_emit     // channel: [ val(meta), path(bam) ]
+    bai      = ch_bai_for_emit     // channel: [ val(meta), path(bai) ]
+    csi      = ch_csi_for_emit     // channel: [ val(meta), path(csi) ]
+
+    versions = ch_versions         // channel: [ path(versions.yml) ]
+}
+
diff --git a/subworkflows/nf-core/fastq_align_bwaaln/meta.yml b/subworkflows/nf-core/fastq_align_bwaaln/meta.yml
new file mode 100644
index 0000000..ad43398
--- /dev/null
+++ b/subworkflows/nf-core/fastq_align_bwaaln/meta.yml
@@ -0,0 +1,51 @@
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json
+name: "fastq_align_bwaaln"
+description: Align FASTQ files against reference genome with the bwa aln short-read aligner producing a sorted and indexed BAM files
+keywords:
+  - bwa
+  - aln
+  - fasta
+  - bwa
+  - align
+  - map
+components:
+  - bwa/aln
+  - bwa/sampe
+  - bwa/samse
+  - samtools/index
+input:
+  - ch_reads:
+      description: |
+        Structure: [ val(meta), path(fastq) ]
+        One or two FASTQ files for single or paired end data respectively.
+        Note: the subworkflow adds a new meta ID `meta.id_index` that _must_
+        be used in `prefix` to ensure unique file names. See the associated
+        nextflow.config file.
+  - ch_index:
+      description: |
+        Structure: [ val(meta), path(index) ]
+        A directory containing bwa aln reference indices as from `bwa index`
+        Contains files ending in extensions such as .amb, .ann, .bwt etc.
+
+output:
+  - bam:
+      description: |
+        Structure: [ val(meta), path(bam) ]
+        Sorted BAM/CRAM/SAM file
+        Note: the subworkflow adds a new meta ID `meta.id_index` that _must_
+        be used in `prefix` to ensure unique file names. See the associated
+        nextflow.config file.
+  - bai:
+      description: |
+        Structure: [ val(meta), path(bai) ]
+        BAM/CRAM/SAM samtools index file
+  - csi:
+      description: |
+        Structure: [ val(meta), path(csi) ]
+        BAM/CRAM/SAM samtools index file for larger references
+  - versions:
+      description: |
+        Files containing software versions
+        Structure: [ path(versions.yml) ]
+authors:
+  - "@jfy133"
diff --git a/subworkflows/nf-core/fastq_align_bwaaln/nextflow.config b/subworkflows/nf-core/fastq_align_bwaaln/nextflow.config
new file mode 100644
index 0000000..2ff1655
--- /dev/null
+++ b/subworkflows/nf-core/fastq_align_bwaaln/nextflow.config
@@ -0,0 +1,26 @@
+// IMPORTANT: Add this configuration to your modules.config
+// These settings are to ensure you include the reference information within the output file names, to prevent overwritting!
+// `meta.id_index` is created and used within the workflow. You must reuse this meta field if you wish to customise the file naming scheme.
+
+process {
+
+    publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
+
+    withName: '.*FASTQ_ALIGN_BWAALN:BWA_ALN' {
+        ext.prefix = { "${meta.id}_${meta.id_index}" }
+    }
+
+    withName: '.*FASTQ_ALIGN_BWAALN:BWA_SAMSE' {
+        ext.prefix = { "${meta.id}_${meta.id_index}" }
+    }
+
+    withName: '.*FASTQ_ALIGN_BWAALN:BWA_SAMPE' {
+        ext.prefix = { "${meta.id}_${meta.id_index}" }
+    }
+
+    withName: '.*FASTQ_ALIGN_BWAALN:SAMTOOLS_INDEX' {
+        ext.prefix = { "${meta.id}_${meta.id_index}" }
+    }
+
+
+}
diff --git a/workflows/sammyseq.nf b/workflows/sammyseq.nf
index 6d69919..f9e0fe3 100644
--- a/workflows/sammyseq.nf
+++ b/workflows/sammyseq.nf
@@ -37,6 +37,7 @@ ch_multiqc_custom_methods_description = params.multiqc_methods_description ? fil
 //
 include { INPUT_CHECK } from '../subworkflows/local/input_check'
 include { PREPARE_GENOME      } from '../subworkflows/local/prepare_genome'
+include { FASTQ_ALIGN_BWAALN  } from '../subworkflows/nf-core/fastq_align_bwaaln/main.nf'
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -53,7 +54,7 @@ include { FASTQC                      } from '../modules/nf-core/fastqc/main'
 include { MULTIQC                     } from '../modules/nf-core/multiqc/main'
 include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main'
 include { TRIMMOMATIC                 } from '../modules/nf-core/trimmomatic'
-include { BWA_ALN                     } from '../modules/nf-core/bwa/aln/'
+
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -114,7 +115,12 @@ workflow SAMMYSEQ {
        merged_reads 
     )
 
-    BWA_ALN (
+    //BWA_ALN (
+    //    TRIMMOMATIC.out.trimmed_reads,
+    //    PREPARE_GENOME.out.bwa_index
+    //)
+
+    FASTQ_ALIGN_BWAALN (
         TRIMMOMATIC.out.trimmed_reads,
         PREPARE_GENOME.out.bwa_index
     )

From 09d0dd7ed6fa1d2f73e9d80f03ef552d0dc8df09 Mon Sep 17 00:00:00 2001
From: Lucio Di Filippo <difilippolucio@gmail.com>
Date: Thu, 7 Sep 2023 20:22:23 +0200
Subject: [PATCH 02/14] markduplicates and filtering

---
 conf/modules.config                           | 122 ++++++++++++++++++
 modules.json                                  |  42 +++++-
 modules/nf-core/deeptools/bamcoverage/main.nf |  67 ++++++++++
 .../nf-core/deeptools/bamcoverage/meta.yml    |  60 +++++++++
 modules/nf-core/picard/markduplicates/main.nf |  61 +++++++++
 .../nf-core/picard/markduplicates/meta.yml    |  71 ++++++++++
 modules/nf-core/samtools/faidx/main.nf        |  50 +++++++
 modules/nf-core/samtools/faidx/meta.yml       |  57 ++++++++
 modules/nf-core/samtools/flagstat/main.nf     |  46 +++++++
 modules/nf-core/samtools/flagstat/meta.yml    |  49 +++++++
 modules/nf-core/samtools/idxstats/main.nf     |  48 +++++++
 modules/nf-core/samtools/idxstats/meta.yml    |  50 +++++++
 modules/nf-core/samtools/stats/main.nf        |  49 +++++++
 modules/nf-core/samtools/stats/meta.yml       |  59 +++++++++
 .../nf-core/bam_markduplicates_picard/main.nf |  52 ++++++++
 .../bam_markduplicates_picard/meta.yml        |  62 +++++++++
 .../nf-core/bam_stats_samtools/main.nf        |  32 +++++
 .../nf-core/bam_stats_samtools/meta.yml       |  41 ++++++
 workflows/sammyseq.nf                         |  41 ++++++
 19 files changed, 1058 insertions(+), 1 deletion(-)
 create mode 100644 modules/nf-core/deeptools/bamcoverage/main.nf
 create mode 100644 modules/nf-core/deeptools/bamcoverage/meta.yml
 create mode 100644 modules/nf-core/picard/markduplicates/main.nf
 create mode 100644 modules/nf-core/picard/markduplicates/meta.yml
 create mode 100644 modules/nf-core/samtools/faidx/main.nf
 create mode 100644 modules/nf-core/samtools/faidx/meta.yml
 create mode 100644 modules/nf-core/samtools/flagstat/main.nf
 create mode 100644 modules/nf-core/samtools/flagstat/meta.yml
 create mode 100644 modules/nf-core/samtools/idxstats/main.nf
 create mode 100644 modules/nf-core/samtools/idxstats/meta.yml
 create mode 100644 modules/nf-core/samtools/stats/main.nf
 create mode 100644 modules/nf-core/samtools/stats/meta.yml
 create mode 100644 subworkflows/nf-core/bam_markduplicates_picard/main.nf
 create mode 100644 subworkflows/nf-core/bam_markduplicates_picard/meta.yml
 create mode 100644 subworkflows/nf-core/bam_stats_samtools/main.nf
 create mode 100644 subworkflows/nf-core/bam_stats_samtools/meta.yml

diff --git a/conf/modules.config b/conf/modules.config
index 56cec34..0b65a9c 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -12,6 +12,8 @@
 
 process {
 
+    
+
     publishDir = [
         path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" },
         mode: params.publish_dir_mode,
@@ -45,4 +47,124 @@ process {
 	ext.args2 = 'ILLUMINACLIP:TruSeq3-PE.fa:2:30:10:2:True LEADING:3 TRAILING:3 MINLEN:36'
     }
 
+
+    //
+    // Alignment options
+    //
+    withName: 'BWA_MEM' {
+        ext.args = ""
+        publishDir = [
+            path: { "${params.outdir}/bwa" },
+            enabled: false,
+            mode: params.publish_dir_mode,
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
+            pattern: "*.bam"
+        ]
+    }
+
+    withName: 'SAMTOOLS_INDEX_BAM' {
+        ext.args = ""
+        publishDir = [
+            path: { "${params.outdir}/bwa" },
+            enabled: true,
+            mode: params.publish_dir_mode,
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
+            pattern: "*.bai"
+        ]
+    }
+
+    withName: 'SAMTOOLS_SORT_BAM' {
+        ext.args = ""
+        ext.prefix = { "${meta.id}.sorted" }
+        publishDir = [
+            path: { "${params.outdir}/bwa" },
+            enabled: true,
+            mode: params.publish_dir_mode,
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
+            pattern: "*sorted.bam"
+        ]
+    }
+
+    //
+    // Picard MarkDuplicates and Filtering options
+    //
+
+    withName: '.*BAM_MARKDUPLICATES_PICARD:PICARD_MARKDUPLICATES' {
+        ext.args   = '--ASSUME_SORTED true --REMOVE_DUPLICATES true --VALIDATION_STRINGENCY LENIENT --TMP_DIR tmp'
+        ext.prefix = { "${meta.id}.md" }
+        publishDir = [
+            [
+                path: { "${params.outdir}/reports/markduplicates" },
+                mode: params.publish_dir_mode,
+                pattern: '*metrics.txt',
+                saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+            ],
+            [
+                path: { "${params.outdir}/markduplicates/bam" },
+                mode: params.publish_dir_mode,
+                pattern: '*.md.{bam,bai}',
+                saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
+                //enable: params.save_markduplicates_bam
+            ]
+        ]
+    }
+
+    withName: '.*BAM_MARKDUPLICATES_PICARD:SAMTOOLS_INDEX' {
+        ext.prefix  = { "${meta.id}.markdup.sorted" }
+        publishDir  = [
+            path: { "${params.outdir}/markduplicates/bam" },
+            mode: params.publish_dir_mode,
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
+            pattern: '*.{bai,csi}'
+        ]
+    }
+
+    withName: '.*BAM_MARKDUPLICATES_PICARD:BAM_STATS_SAMTOOLS.*' {
+        ext.args    = ""
+        publishDir  = [
+            path: { "${params.outdir}/reports/samtools_stats/${meta.id}/md/" },
+            mode: params.publish_dir_mode,
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
+            pattern: '*.{stats,flagstat,idxstats}'
+        ]
+    }
+
+    withName: 'SAMTOOLS_VIEW_FILTER' {
+        ext.args = '-h -F 0x0400'
+        publishDir  = [
+            path: { "${params.outdir}/markduplicates/duplicates_removed" },
+            mode: params.publish_dir_mode,
+            pattern: '*filtered.{bai,bam}',
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
+            enabled: true
+        ]
+        ext.prefix  = { "${meta.id}.md.filtered" }
+    }
+
+    withName: 'SAMTOOLS_SORT_FILTERED' {
+        ext.args    = ""
+        ext.prefix  = { "${meta.id}.md.filtered.sorted" }
+        publishDir  = [
+            path: { "${params.outdir}/markduplicates/duplicates_removed" },
+            mode: params.publish_dir_mode,
+            pattern: '*filtered.sorted.bam',
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
+            enabled: true
+        ]
+    }
+
+    withName: 'SAMTOOLS_INDEX_FILTERED' {
+        ext.args    = ""
+        ext.prefix  = { "${meta.id}.md.filtered.sorted" }
+        publishDir  = [
+            path: { "${params.outdir}/markduplicates/duplicates_removed" },
+            mode: params.publish_dir_mode,
+            pattern: '*filtered.sorted.bai',
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
+            enabled: true
+        ]
+    }
+
+
+
 }
diff --git a/modules.json b/modules.json
index 64491b8..ecb2175 100644
--- a/modules.json
+++ b/modules.json
@@ -35,6 +35,11 @@
                         "git_sha": "76cc4938c1f6ea5c7d83fed1eeffc146787f9543",
                         "installed_by": ["modules"]
                     },
+                    "deeptools/bamcoverage": {
+                        "branch": "master",
+                        "git_sha": "603ecbd9f45300c9788f197d2a15a005685b4220",
+                        "installed_by": ["modules"]
+                    },
                     "fastqc": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
@@ -50,10 +55,35 @@
                         "git_sha": "f2d63bd5b68925f98f572eed70993d205cc694b7",
                         "installed_by": ["modules"]
                     },
+                    "picard/markduplicates": {
+                        "branch": "master",
+                        "git_sha": "735e1e04e7e01751d2d6e97055bbdb6f70683cc1",
+                        "installed_by": ["bam_markduplicates_picard", "modules"]
+                    },
+                    "samtools/faidx": {
+                        "branch": "master",
+                        "git_sha": "fd742419940e01ba1c5ecb172c3e32ec840662fe",
+                        "installed_by": ["modules"]
+                    },
+                    "samtools/flagstat": {
+                        "branch": "master",
+                        "git_sha": "570ec5bcfe19c49e16c9ca35a7a116563af6cc1c",
+                        "installed_by": ["bam_stats_samtools"]
+                    },
+                    "samtools/idxstats": {
+                        "branch": "master",
+                        "git_sha": "e662ab16e0c11f1e62983e21de9871f59371a639",
+                        "installed_by": ["bam_stats_samtools"]
+                    },
                     "samtools/index": {
                         "branch": "master",
                         "git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
-                        "installed_by": ["fastq_align_bwaaln"]
+                        "installed_by": ["bam_markduplicates_picard", "fastq_align_bwaaln"]
+                    },
+                    "samtools/stats": {
+                        "branch": "master",
+                        "git_sha": "735e1e04e7e01751d2d6e97055bbdb6f70683cc1",
+                        "installed_by": ["bam_stats_samtools"]
                     },
                     "trimmomatic": {
                         "branch": "master",
@@ -69,6 +99,16 @@
             },
             "subworkflows": {
                 "nf-core": {
+                    "bam_markduplicates_picard": {
+                        "branch": "master",
+                        "git_sha": "dedc0e31087f3306101c38835d051bf49789445a",
+                        "installed_by": ["subworkflows"]
+                    },
+                    "bam_stats_samtools": {
+                        "branch": "master",
+                        "git_sha": "dedc0e31087f3306101c38835d051bf49789445a",
+                        "installed_by": ["bam_markduplicates_picard"]
+                    },
                     "fastq_align_bwaaln": {
                         "branch": "master",
                         "git_sha": "dedc0e31087f3306101c38835d051bf49789445a",
diff --git a/modules/nf-core/deeptools/bamcoverage/main.nf b/modules/nf-core/deeptools/bamcoverage/main.nf
new file mode 100644
index 0000000..680fc5b
--- /dev/null
+++ b/modules/nf-core/deeptools/bamcoverage/main.nf
@@ -0,0 +1,67 @@
+process DEEPTOOLS_BAMCOVERAGE {
+    tag "$meta.id"
+    label 'process_low'
+
+    conda "bioconda::deeptools=3.5.1 bioconda::samtools=1.16.1"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/mulled-v2-eb9e7907c7a753917c1e4d7a64384c047429618a:62d1ebe2d3a2a9d1a7ad31e0b902983fa7c25fa7-0':
+        'biocontainers/mulled-v2-eb9e7907c7a753917c1e4d7a64384c047429618a:62d1ebe2d3a2a9d1a7ad31e0b902983fa7c25fa7-0' }"
+
+    input:
+    tuple val(meta), path(input), path(input_index)
+    path(fasta)
+    path(fasta_fai)
+
+    output:
+    tuple val(meta), path("*.bigWig")   , emit: bigwig, optional: true
+    tuple val(meta), path("*.bedgraph") , emit: bedgraph, optional: true
+    path "versions.yml"                 , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}.bigWig"
+
+    // cram_input is currently not working with deeptools
+    // therefore it's required to convert cram to bam first
+    def is_cram = input.Extension == "cram" ? true : false
+    def input_out = is_cram ? input.BaseName + ".bam" : "${input}"
+    def fai_reference = fasta_fai ? "--fai-reference ${fasta_fai}" : ""
+
+    if (is_cram){
+        """
+        samtools view -T $fasta $input $fai_reference -@ $task.cpus -o $input_out
+        samtools index -b $input_out -@ $task.cpus
+
+        bamCoverage \\
+            --bam $input_out \\
+            $args \\
+            --numberOfProcessors ${task.cpus} \\
+            --outFileName ${prefix}
+
+        cat <<-END_VERSIONS > versions.yml
+        "${task.process}":
+            samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
+            deeptools: \$(bamCoverage --version | sed -e "s/bamCoverage //g")
+        END_VERSIONS
+        """
+
+    }
+    else {
+        """
+        bamCoverage \\
+            --bam $input_out \\
+            $args \\
+            --numberOfProcessors ${task.cpus} \\
+            --outFileName ${prefix}
+
+        cat <<-END_VERSIONS > versions.yml
+        "${task.process}":
+            deeptools: \$(bamCoverage --version | sed -e "s/bamCoverage //g")
+        END_VERSIONS
+        """
+    }
+
+}
diff --git a/modules/nf-core/deeptools/bamcoverage/meta.yml b/modules/nf-core/deeptools/bamcoverage/meta.yml
new file mode 100644
index 0000000..769592c
--- /dev/null
+++ b/modules/nf-core/deeptools/bamcoverage/meta.yml
@@ -0,0 +1,60 @@
+name: deeptools_bamcoverage
+description: This tool takes an alignment of reads or fragments as input (BAM file) and generates a coverage track (bigWig or bedGraph) as output.
+keywords:
+  - coverage
+  - depth
+  - track
+tools:
+  - deeptools:
+      description: A set of user-friendly tools for normalization and visualzation of deep-sequencing data
+      homepage: https://deeptools.readthedocs.io/en/develop/content/tools/bamCoverage.html
+      documentation: https://deeptools.readthedocs.io/en/develop/content/tools/bamCoverage.html
+      tool_dev_url: https://github.com/deeptools/deepTools/
+      doi: "10.1093/nar/gkw257"
+      licence: ["GPL v3"]
+
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - input:
+      type: file
+      description: BAM/CRAM file
+      pattern: "*.{bam,cram}"
+  - input_index:
+      type: file
+      description: BAM/CRAM index file
+      pattern: "*.{bai,crai}"
+  - fasta:
+      type: file
+      description: Reference file the CRAM file was created with (required with CRAM input)
+      pattern: "*.{fasta,fa}"
+  - fasta_fai:
+      type: file
+      description: Index of the reference file (optional, but recommended)
+      pattern: "*.{fai}"
+
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - bigWig:
+      type: file
+      description: BigWig file
+      pattern: "*.bigWig"
+  - bedgraph:
+      type: file
+      description: Bedgraph file
+      pattern: "*.bedgraph"
+
+authors:
+  - "@FriederikeHanssen"
+  - "@SusiJo"
diff --git a/modules/nf-core/picard/markduplicates/main.nf b/modules/nf-core/picard/markduplicates/main.nf
new file mode 100644
index 0000000..facd7ef
--- /dev/null
+++ b/modules/nf-core/picard/markduplicates/main.nf
@@ -0,0 +1,61 @@
+process PICARD_MARKDUPLICATES {
+    tag "$meta.id"
+    label 'process_medium'
+
+    conda "bioconda::picard=3.0.0"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/picard:3.0.0--hdfd78af_1' :
+        'biocontainers/picard:3.0.0--hdfd78af_1' }"
+
+    input:
+    tuple val(meta), path(bam)
+    tuple val(meta2), path(fasta)
+    tuple val(meta3), path(fai)
+
+    output:
+    tuple val(meta), path("*.bam")        , emit: bam
+    tuple val(meta), path("*.bai")        , optional:true, emit: bai
+    tuple val(meta), path("*.metrics.txt"), emit: metrics
+    path  "versions.yml"                  , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def avail_mem = 3072
+    if (!task.memory) {
+        log.info '[Picard MarkDuplicates] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
+    } else {
+        avail_mem = (task.memory.mega*0.8).intValue()
+    }
+    """
+    picard \\
+        -Xmx${avail_mem}M \\
+        MarkDuplicates \\
+        $args \\
+        --INPUT $bam \\
+        --OUTPUT ${prefix}.bam \\
+        --REFERENCE_SEQUENCE $fasta \\
+        --METRICS_FILE ${prefix}.MarkDuplicates.metrics.txt
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        picard: \$(echo \$(picard MarkDuplicates --version 2>&1) | grep -o 'Version:.*' | cut -f2- -d:)
+    END_VERSIONS
+    """
+
+    stub:
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    touch ${prefix}.bam
+    touch ${prefix}.bam.bai
+    touch ${prefix}.MarkDuplicates.metrics.txt
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        picard: \$(echo \$(picard MarkDuplicates --version 2>&1) | grep -o 'Version:.*' | cut -f2- -d:)
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/picard/markduplicates/meta.yml b/modules/nf-core/picard/markduplicates/meta.yml
new file mode 100644
index 0000000..f7693d2
--- /dev/null
+++ b/modules/nf-core/picard/markduplicates/meta.yml
@@ -0,0 +1,71 @@
+name: picard_markduplicates
+description: Locate and tag duplicate reads in a BAM file
+keywords:
+  - markduplicates
+  - pcr
+  - duplicates
+  - bam
+  - sam
+  - cram
+tools:
+  - picard:
+      description: |
+        A set of command line tools (in Java) for manipulating high-throughput sequencing (HTS)
+        data and formats such as SAM/BAM/CRAM and VCF.
+      homepage: https://broadinstitute.github.io/picard/
+      documentation: https://broadinstitute.github.io/picard/
+      licence: ["MIT"]
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - bam:
+      type: file
+      description: BAM file
+      pattern: "*.{bam,cram,sam}"
+  - meta2:
+      type: map
+      description: |
+        Groovy Map containing reference information
+        e.g. [ id:'genome' ]
+  - fasta:
+      type: file
+      description: Reference genome fasta file
+      pattern: "*.{fasta,fa}"
+  - meta3:
+      type: map
+      description: |
+        Groovy Map containing reference information
+        e.g. [ id:'genome' ]
+  - fai:
+      type: file
+      description: Reference genome fasta index
+      pattern: "*.{fai}"
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - bam:
+      type: file
+      description: BAM file with duplicate reads marked/removed
+      pattern: "*.{bam}"
+  - bai:
+      type: file
+      description: An optional BAM index file. If desired, --CREATE_INDEX must be passed as a flag
+      pattern: "*.{bai}"
+  - metrics:
+      type: file
+      description: Duplicate metrics file generated by picard
+      pattern: "*.{metrics.txt}"
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+authors:
+  - "@drpatelh"
+  - "@projectoriented"
+  - "@ramprasadn"
diff --git a/modules/nf-core/samtools/faidx/main.nf b/modules/nf-core/samtools/faidx/main.nf
new file mode 100644
index 0000000..59ed308
--- /dev/null
+++ b/modules/nf-core/samtools/faidx/main.nf
@@ -0,0 +1,50 @@
+process SAMTOOLS_FAIDX {
+    tag "$fasta"
+    label 'process_single'
+
+    conda "bioconda::samtools=1.17"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' :
+        'biocontainers/samtools:1.17--h00cdaf9_0' }"
+
+    input:
+    tuple val(meta), path(fasta)
+    tuple val(meta2), path(fai)
+
+    output:
+    tuple val(meta), path ("*.{fa,fasta}") , emit: fa , optional: true
+    tuple val(meta), path ("*.fai")        , emit: fai, optional: true
+    tuple val(meta), path ("*.gzi")        , emit: gzi, optional: true
+    path "versions.yml"                    , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    """
+    samtools \\
+        faidx \\
+        $fasta \\
+        $args
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
+    END_VERSIONS
+    """
+
+    stub:
+    def match = (task.ext.args =~ /-o(?:utput)?\s(.*)\s?/).findAll()
+    def fastacmd = match[0] ? "touch ${match[0][1]}" : ''
+    """
+    ${fastacmd}
+    touch ${fasta}.fai
+
+    cat <<-END_VERSIONS > versions.yml
+
+    "${task.process}":
+        samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/samtools/faidx/meta.yml b/modules/nf-core/samtools/faidx/meta.yml
new file mode 100644
index 0000000..957b25e
--- /dev/null
+++ b/modules/nf-core/samtools/faidx/meta.yml
@@ -0,0 +1,57 @@
+name: samtools_faidx
+description: Index FASTA file
+keywords:
+  - index
+  - fasta
+  - faidx
+tools:
+  - samtools:
+      description: |
+        SAMtools is a set of utilities for interacting with and post-processing
+        short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li.
+        These files are generated as output by short read aligners like BWA.
+      homepage: http://www.htslib.org/
+      documentation: http://www.htslib.org/doc/samtools.html
+      doi: 10.1093/bioinformatics/btp352
+      licence: ["MIT"]
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing reference information
+        e.g. [ id:'test' ]
+  - fasta:
+      type: file
+      description: FASTA file
+      pattern: "*.{fa,fasta}"
+  - meta2:
+      type: map
+      description: |
+        Groovy Map containing reference information
+        e.g. [ id:'test' ]
+  - fai:
+      type: file
+      description: FASTA index file
+      pattern: "*.{fai}"
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - fai:
+      type: file
+      description: FASTA index file
+      pattern: "*.{fai}"
+  - gzi:
+      type: file
+      description: Optional gzip index file for compressed inputs
+      pattern: "*.gzi"
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+authors:
+  - "@drpatelh"
+  - "@ewels"
+  - "@phue"
diff --git a/modules/nf-core/samtools/flagstat/main.nf b/modules/nf-core/samtools/flagstat/main.nf
new file mode 100644
index 0000000..b75707e
--- /dev/null
+++ b/modules/nf-core/samtools/flagstat/main.nf
@@ -0,0 +1,46 @@
+process SAMTOOLS_FLAGSTAT {
+    tag "$meta.id"
+    label 'process_single'
+
+    conda "bioconda::samtools=1.17"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' :
+        'biocontainers/samtools:1.17--h00cdaf9_0' }"
+
+    input:
+    tuple val(meta), path(bam), path(bai)
+
+    output:
+    tuple val(meta), path("*.flagstat"), emit: flagstat
+    path  "versions.yml"               , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    samtools \\
+        flagstat \\
+        --threads ${task.cpus} \\
+        $bam \\
+        > ${prefix}.flagstat
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
+    END_VERSIONS
+    """
+
+    stub:
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    touch ${prefix}.flagstat
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/samtools/flagstat/meta.yml b/modules/nf-core/samtools/flagstat/meta.yml
new file mode 100644
index 0000000..954225d
--- /dev/null
+++ b/modules/nf-core/samtools/flagstat/meta.yml
@@ -0,0 +1,49 @@
+name: samtools_flagstat
+description: Counts the number of alignments in a BAM/CRAM/SAM file for each FLAG type
+keywords:
+  - stats
+  - mapping
+  - counts
+  - bam
+  - sam
+  - cram
+tools:
+  - samtools:
+      description: |
+        SAMtools is a set of utilities for interacting with and post-processing
+        short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li.
+        These files are generated as output by short read aligners like BWA.
+      homepage: http://www.htslib.org/
+      documentation: http://www.htslib.org/doc/samtools.html
+      doi: 10.1093/bioinformatics/btp352
+      licence: ["MIT"]
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - bam:
+      type: file
+      description: BAM/CRAM/SAM file
+      pattern: "*.{bam,cram,sam}"
+  - bai:
+      type: file
+      description: Index for BAM/CRAM/SAM file
+      pattern: "*.{bai,crai,sai}"
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - flagstat:
+      type: file
+      description: File containing samtools flagstat output
+      pattern: "*.{flagstat}"
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+authors:
+  - "@drpatelh"
diff --git a/modules/nf-core/samtools/idxstats/main.nf b/modules/nf-core/samtools/idxstats/main.nf
new file mode 100644
index 0000000..83c7c34
--- /dev/null
+++ b/modules/nf-core/samtools/idxstats/main.nf
@@ -0,0 +1,48 @@
+process SAMTOOLS_IDXSTATS {
+    tag "$meta.id"
+    label 'process_single'
+
+    conda "bioconda::samtools=1.17"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' :
+        'biocontainers/samtools:1.17--h00cdaf9_0' }"
+
+    input:
+    tuple val(meta), path(bam), path(bai)
+
+    output:
+    tuple val(meta), path("*.idxstats"), emit: idxstats
+    path  "versions.yml"               , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+
+    """
+    samtools \\
+        idxstats \\
+        --threads ${task.cpus-1} \\
+        $bam \\
+        > ${prefix}.idxstats
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
+    END_VERSIONS
+    """
+
+    stub:
+    def prefix = task.ext.prefix ?: "${meta.id}"
+
+    """
+    touch ${prefix}.idxstats
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/samtools/idxstats/meta.yml b/modules/nf-core/samtools/idxstats/meta.yml
new file mode 100644
index 0000000..dda87e1
--- /dev/null
+++ b/modules/nf-core/samtools/idxstats/meta.yml
@@ -0,0 +1,50 @@
+name: samtools_idxstats
+description: Reports alignment summary statistics for a BAM/CRAM/SAM file
+keywords:
+  - stats
+  - mapping
+  - counts
+  - chromosome
+  - bam
+  - sam
+  - cram
+tools:
+  - samtools:
+      description: |
+        SAMtools is a set of utilities for interacting with and post-processing
+        short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li.
+        These files are generated as output by short read aligners like BWA.
+      homepage: http://www.htslib.org/
+      documentation: http://www.htslib.org/doc/samtools.html
+      doi: 10.1093/bioinformatics/btp352
+      licence: ["MIT"]
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - bam:
+      type: file
+      description: BAM/CRAM/SAM file
+      pattern: "*.{bam,cram,sam}"
+  - bai:
+      type: file
+      description: Index for BAM/CRAM/SAM file
+      pattern: "*.{bai,crai,sai}"
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - idxstats:
+      type: file
+      description: File containing samtools idxstats output
+      pattern: "*.{idxstats}"
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+authors:
+  - "@drpatelh"
diff --git a/modules/nf-core/samtools/stats/main.nf b/modules/nf-core/samtools/stats/main.nf
new file mode 100644
index 0000000..4a2607d
--- /dev/null
+++ b/modules/nf-core/samtools/stats/main.nf
@@ -0,0 +1,49 @@
+process SAMTOOLS_STATS {
+    tag "$meta.id"
+    label 'process_single'
+
+    conda "bioconda::samtools=1.17"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' :
+        'biocontainers/samtools:1.17--h00cdaf9_0' }"
+
+    input:
+    tuple val(meta), path(input), path(input_index)
+    tuple val(meta2), path(fasta)
+
+    output:
+    tuple val(meta), path("*.stats"), emit: stats
+    path  "versions.yml"            , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def reference = fasta ? "--reference ${fasta}" : ""
+    """
+    samtools \\
+        stats \\
+        --threads ${task.cpus} \\
+        ${reference} \\
+        ${input} \\
+        > ${prefix}.stats
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
+    END_VERSIONS
+    """
+
+    stub:
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    touch ${prefix}.stats
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/samtools/stats/meta.yml b/modules/nf-core/samtools/stats/meta.yml
new file mode 100644
index 0000000..90e6345
--- /dev/null
+++ b/modules/nf-core/samtools/stats/meta.yml
@@ -0,0 +1,59 @@
+name: samtools_stats
+description: Produces comprehensive statistics from SAM/BAM/CRAM file
+keywords:
+  - statistics
+  - counts
+  - bam
+  - sam
+  - cram
+tools:
+  - samtools:
+      description: |
+        SAMtools is a set of utilities for interacting with and post-processing
+        short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li.
+        These files are generated as output by short read aligners like BWA.
+      homepage: http://www.htslib.org/
+      documentation: http://www.htslib.org/doc/samtools.html
+      doi: 10.1093/bioinformatics/btp352
+      licence: ["MIT"]
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - input:
+      type: file
+      description: BAM/CRAM file from alignment
+      pattern: "*.{bam,cram}"
+  - input_index:
+      type: file
+      description: BAI/CRAI file from alignment
+      pattern: "*.{bai,crai}"
+  - meta2:
+      type: map
+      description: |
+        Groovy Map containing reference information
+        e.g. [ id:'genome' ]
+  - fasta:
+      type: file
+      description: Reference file the CRAM was created with (optional)
+      pattern: "*.{fasta,fa}"
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - stats:
+      type: file
+      description: File containing samtools stats output
+      pattern: "*.{stats}"
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+authors:
+  - "@drpatelh"
+  - "@FriederikeHanssen"
+  - "@ramprasadn"
diff --git a/subworkflows/nf-core/bam_markduplicates_picard/main.nf b/subworkflows/nf-core/bam_markduplicates_picard/main.nf
new file mode 100644
index 0000000..6e3df33
--- /dev/null
+++ b/subworkflows/nf-core/bam_markduplicates_picard/main.nf
@@ -0,0 +1,52 @@
+//
+// Picard MarkDuplicates, index BAM file and run samtools stats, flagstat and idxstats
+//
+
+include { PICARD_MARKDUPLICATES } from '../../../modules/nf-core/picard/markduplicates/main'
+include { SAMTOOLS_INDEX        } from '../../../modules/nf-core/samtools/index/main'
+include { BAM_STATS_SAMTOOLS    } from '../bam_stats_samtools/main'
+
+workflow BAM_MARKDUPLICATES_PICARD {
+
+    take:
+    ch_bam   // channel: [ val(meta), path(bam) ]
+    ch_fasta // channel: [ path(fasta) ]
+    ch_fai   // channel: [ path(fai) ]
+
+    main:
+
+    ch_versions = Channel.empty()
+
+    PICARD_MARKDUPLICATES ( ch_bam, ch_fasta, ch_fai )
+    ch_versions = ch_versions.mix(PICARD_MARKDUPLICATES.out.versions.first())
+
+    SAMTOOLS_INDEX ( PICARD_MARKDUPLICATES.out.bam )
+    ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first())
+
+    ch_bam_bai = PICARD_MARKDUPLICATES.out.bam
+        .join(SAMTOOLS_INDEX.out.bai, by: [0], remainder: true)
+        .join(SAMTOOLS_INDEX.out.csi, by: [0], remainder: true)
+        .map {
+            meta, bam, bai, csi ->
+                if (bai) {
+                    [ meta, bam, bai ]
+                } else {
+                    [ meta, bam, csi ]
+                }
+        }
+
+    BAM_STATS_SAMTOOLS ( ch_bam_bai, ch_fasta )
+    ch_versions = ch_versions.mix(BAM_STATS_SAMTOOLS.out.versions)
+
+    emit:
+    bam      = PICARD_MARKDUPLICATES.out.bam     // channel: [ val(meta), path(bam) ]
+    metrics  = PICARD_MARKDUPLICATES.out.metrics // channel: [ val(meta), path(bam) ]
+    bai      = SAMTOOLS_INDEX.out.bai            // channel: [ val(meta), path(bai) ]
+    csi      = SAMTOOLS_INDEX.out.csi            // channel: [ val(meta), path(csi) ]
+
+    stats    = BAM_STATS_SAMTOOLS.out.stats      // channel: [ val(meta), path(stats) ]
+    flagstat = BAM_STATS_SAMTOOLS.out.flagstat   // channel: [ val(meta), path(flagstat) ]
+    idxstats = BAM_STATS_SAMTOOLS.out.idxstats   // channel: [ val(meta), path(idxstats) ]
+
+    versions = ch_versions                       // channel: [ versions.yml ]
+}
diff --git a/subworkflows/nf-core/bam_markduplicates_picard/meta.yml b/subworkflows/nf-core/bam_markduplicates_picard/meta.yml
new file mode 100644
index 0000000..b924596
--- /dev/null
+++ b/subworkflows/nf-core/bam_markduplicates_picard/meta.yml
@@ -0,0 +1,62 @@
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json
+name: "bam_markduplicates_picard"
+description: Picard MarkDuplicates, index BAM file and run samtools stats, flagstat and idxstats
+keywords:
+  - markduplicates
+  - bam
+  - sam
+  - cram
+
+components:
+  - picard/markduplicates
+  - samtools/index
+  - samtools/stats
+  - samtools/idxstats
+  - samtools/flagstat
+  - bam_stats_samtools
+
+input:
+  - ch_bam:
+      description: |
+        BAM/CRAM/SAM file
+        Structure: [ val(meta), path(bam) ]
+  - ch_fasta:
+      description: |
+        Reference genome fasta file
+        Structure: [ path(fasta) ]
+  - ch_fasta:
+      description: |
+        Index of the reference genome fasta file
+        Structure: [ path(fai) ]
+output:
+  - bam:
+      description: |
+        processed BAM/CRAM/SAM file
+        Structure: [ val(meta), path(bam) ]
+  - bai:
+      description: |
+        BAM/CRAM/SAM samtools index
+        Structure: [ val(meta), path(bai) ]
+  - csi:
+      description: |
+        CSI samtools index
+        Structure: [ val(meta), path(csi) ]
+  - stats:
+      description: |
+        File containing samtools stats output
+        Structure: [ val(meta), path(stats) ]
+  - flagstat:
+      description: |
+        File containing samtools flagstat output
+        Structure: [ val(meta), path(flagstat) ]
+  - idxstats:
+      description: |
+        File containing samtools idxstats output
+        Structure: [ val(meta), path(idxstats) ]
+  - versions:
+      description: |
+        Files containing software versions
+        Structure: [ path(versions.yml) ]
+authors:
+  - "@dmarron"
+  - "@drpatelh"
diff --git a/subworkflows/nf-core/bam_stats_samtools/main.nf b/subworkflows/nf-core/bam_stats_samtools/main.nf
new file mode 100644
index 0000000..44d4c01
--- /dev/null
+++ b/subworkflows/nf-core/bam_stats_samtools/main.nf
@@ -0,0 +1,32 @@
+//
+// Run SAMtools stats, flagstat and idxstats
+//
+
+include { SAMTOOLS_STATS    } from '../../../modules/nf-core/samtools/stats/main'
+include { SAMTOOLS_IDXSTATS } from '../../../modules/nf-core/samtools/idxstats/main'
+include { SAMTOOLS_FLAGSTAT } from '../../../modules/nf-core/samtools/flagstat/main'
+
+workflow BAM_STATS_SAMTOOLS {
+    take:
+    ch_bam_bai // channel: [ val(meta), path(bam), path(bai) ]
+    ch_fasta   // channel: [ val(meta), path(fasta) ]
+
+    main:
+    ch_versions = Channel.empty()
+
+    SAMTOOLS_STATS ( ch_bam_bai, ch_fasta )
+    ch_versions = ch_versions.mix(SAMTOOLS_STATS.out.versions)
+
+    SAMTOOLS_FLAGSTAT ( ch_bam_bai )
+    ch_versions = ch_versions.mix(SAMTOOLS_FLAGSTAT.out.versions)
+
+    SAMTOOLS_IDXSTATS ( ch_bam_bai )
+    ch_versions = ch_versions.mix(SAMTOOLS_IDXSTATS.out.versions)
+
+    emit:
+    stats    = SAMTOOLS_STATS.out.stats       // channel: [ val(meta), path(stats) ]
+    flagstat = SAMTOOLS_FLAGSTAT.out.flagstat // channel: [ val(meta), path(flagstat) ]
+    idxstats = SAMTOOLS_IDXSTATS.out.idxstats // channel: [ val(meta), path(idxstats) ]
+
+    versions = ch_versions                    // channel: [ path(versions.yml) ]
+}
diff --git a/subworkflows/nf-core/bam_stats_samtools/meta.yml b/subworkflows/nf-core/bam_stats_samtools/meta.yml
new file mode 100644
index 0000000..87863b1
--- /dev/null
+++ b/subworkflows/nf-core/bam_stats_samtools/meta.yml
@@ -0,0 +1,41 @@
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json
+name: bam_stats_samtools
+description: Produces comprehensive statistics from SAM/BAM/CRAM file
+keywords:
+  - statistics
+  - counts
+  - bam
+  - sam
+  - cram
+components:
+  - samtools/stats
+  - samtools/idxstats
+  - samtools/flagstat
+input:
+  - ch_bam_bai:
+      description: |
+        The input channel containing the BAM/CRAM and it's index
+        Structure: [ val(meta), path(bam), path(bai) ]
+  - ch_fasta:
+      description: |
+        Reference genome fasta file
+        Structure: [ path(fasta) ]
+output:
+  - stats:
+      description: |
+        File containing samtools stats output
+        Structure: [ val(meta), path(stats) ]
+  - flagstat:
+      description: |
+        File containing samtools flagstat output
+        Structure: [ val(meta), path(flagstat) ]
+  - idxstats:
+      description: |
+        File containing samtools idxstats output
+        Structure: [ val(meta), path(idxstats)]
+  - versions:
+      description: |
+        Files containing software versions
+        Structure: [ path(versions.yml) ]
+authors:
+  - "@drpatelh"
diff --git a/workflows/sammyseq.nf b/workflows/sammyseq.nf
index f9e0fe3..ba30723 100644
--- a/workflows/sammyseq.nf
+++ b/workflows/sammyseq.nf
@@ -1,3 +1,14 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    VALIDATE INPUTS
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+
+if (params.fasta) { ch_fasta =  Channel.fromPath(params.fasta) } else { exit 1, 'Fasta reference genome not specified!' }
+
+// Modify fasta channel to include meta data
+ch_fasta_meta = ch_fasta.map{ it -> [[id:it[0].baseName], it] }.collect()
+
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     PRINT PARAMS SUMMARY
@@ -38,6 +49,7 @@ ch_multiqc_custom_methods_description = params.multiqc_methods_description ? fil
 include { INPUT_CHECK } from '../subworkflows/local/input_check'
 include { PREPARE_GENOME      } from '../subworkflows/local/prepare_genome'
 include { FASTQ_ALIGN_BWAALN  } from '../subworkflows/nf-core/fastq_align_bwaaln/main.nf'
+include { BAM_MARKDUPLICATES_PICARD } from '../subworkflows/nf-core/bam_markduplicates_picard'
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -54,7 +66,11 @@ include { FASTQC                      } from '../modules/nf-core/fastqc/main'
 include { MULTIQC                     } from '../modules/nf-core/multiqc/main'
 include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main'
 include { TRIMMOMATIC                 } from '../modules/nf-core/trimmomatic'
+include { SAMTOOLS_FAIDX              } from '../modules/nf-core/samtools/faidx'
 
+// include { SAMTOOLS_VIEW as SAMTOOLS_VIEW_FILTER     }   from '../modules/nf-core/samtools/view/main'
+// include { SAMTOOLS_SORT as SAMTOOLS_SORT_FILTERED   }   from '../modules/nf-core/samtools/sort/main'
+// include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_FILTERED }   from '../modules/nf-core/samtools/index/main'
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -125,6 +141,31 @@ workflow SAMMYSEQ {
         PREPARE_GENOME.out.bwa_index
     )
 
+    // PICARD MARK_DUPLICATES
+    // Index Fasta File for Markduplicates
+    SAMTOOLS_FAIDX (
+            ch_fasta_meta,
+             [[], []]
+        )
+
+    // MARK DUPLICATES IN BAM FILE
+    BAM_MARKDUPLICATES_PICARD (
+        FASTQ_ALIGN_BWAALN.out.bam,
+        ch_fasta_meta,
+        SAMTOOLS_FAIDX.out.fai.collect()
+        )
+
+    // SAMTOOLS_VIEW_FILTER (
+    //                 ch_bam_sorted.join(ch_bam_sorted_bai),
+    //                 ch_fasta_meta,
+    //                 []
+    //             )
+    // ch_versions = ch_versions.mix(SAMTOOLS_VIEW_FILTER.out.versions)
+
+    ch_bam_from_markduplicates = BAM_MARKDUPLICATES_PICARD.bam
+
+
+
     CUSTOM_DUMPSOFTWAREVERSIONS (
         ch_versions.unique().collectFile(name: 'collated_versions.yml')
     )

From 9b0b040e36513eab40d30f84328acc6710fb589b Mon Sep 17 00:00:00 2001
From: Lucio Di Filippo <difilippolucio@gmail.com>
Date: Thu, 7 Sep 2023 20:23:18 +0200
Subject: [PATCH 03/14] bugfix include removal

---
 workflows/sammyseq.nf | 1 +
 1 file changed, 1 insertion(+)

diff --git a/workflows/sammyseq.nf b/workflows/sammyseq.nf
index ba30723..85d643d 100644
--- a/workflows/sammyseq.nf
+++ b/workflows/sammyseq.nf
@@ -67,6 +67,7 @@ include { MULTIQC                     } from '../modules/nf-core/multiqc/main'
 include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main'
 include { TRIMMOMATIC                 } from '../modules/nf-core/trimmomatic'
 include { SAMTOOLS_FAIDX              } from '../modules/nf-core/samtools/faidx'
+// include { }
 
 // include { SAMTOOLS_VIEW as SAMTOOLS_VIEW_FILTER     }   from '../modules/nf-core/samtools/view/main'
 // include { SAMTOOLS_SORT as SAMTOOLS_SORT_FILTERED   }   from '../modules/nf-core/samtools/sort/main'

From 0528894df06014c6471e278fdd313197ae0aa042 Mon Sep 17 00:00:00 2001
From: Lucio Di Filippo <difilippolucio@gmail.com>
Date: Tue, 12 Sep 2023 09:04:50 +0200
Subject: [PATCH 04/14] add stopAt parameter

---
 nextflow.config       |  2 ++
 nextflow_schema.json  |  5 +++++
 workflows/sammyseq.nf | 51 ++++++++++++++++++++++++++++++++++++++++---
 3 files changed, 55 insertions(+), 3 deletions(-)

diff --git a/nextflow.config b/nextflow.config
index 1fbf588..cb04645 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -9,6 +9,8 @@
 // Global default params, used in configs
 params {
 
+    stopAt                     = ''
+
     // TODO nf-core: Specify your pipeline's command line flags
     // Input options
     input                      = null
diff --git a/nextflow_schema.json b/nextflow_schema.json
index b5c8a9d..9352576 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -12,6 +12,11 @@
             "description": "Define where the pipeline should find input data and save output data.",
             "required": ["input", "outdir"],
             "properties": {
+                "stopAt": {
+                    "type": "string",
+                    "description": "Specify after which step the pipeline should stop.",
+                    "fa_icon": "fas fa-stop-circle"
+                },
                 "input": {
                     "type": "string",
                     "format": "file-path",
diff --git a/workflows/sammyseq.nf b/workflows/sammyseq.nf
index 85d643d..1ed3eec 100644
--- a/workflows/sammyseq.nf
+++ b/workflows/sammyseq.nf
@@ -67,8 +67,8 @@ include { MULTIQC                     } from '../modules/nf-core/multiqc/main'
 include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main'
 include { TRIMMOMATIC                 } from '../modules/nf-core/trimmomatic'
 include { SAMTOOLS_FAIDX              } from '../modules/nf-core/samtools/faidx'
-// include { }
-
+include { DEEPTOOLS_BAMCOVERAGE       } from '../modules/nf-core/deeptools/bamcoverage'
+ 
 // include { SAMTOOLS_VIEW as SAMTOOLS_VIEW_FILTER     }   from '../modules/nf-core/samtools/view/main'
 // include { SAMTOOLS_SORT as SAMTOOLS_SORT_FILTERED   }   from '../modules/nf-core/samtools/sort/main'
 // include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_FILTERED }   from '../modules/nf-core/samtools/index/main'
@@ -132,6 +132,11 @@ workflow SAMMYSEQ {
        merged_reads 
     )
 
+    if (params.stopAt == 'TRIMMOMATIC') {
+        return
+    }
+
+
     //BWA_ALN (
     //    TRIMMOMATIC.out.trimmed_reads,
     //    PREPARE_GENOME.out.bwa_index
@@ -142,6 +147,11 @@ workflow SAMMYSEQ {
         PREPARE_GENOME.out.bwa_index
     )
 
+    if (params.stopAt == 'FASTQ_ALIGN_BWAALN') {
+        return
+    }
+
+
     // PICARD MARK_DUPLICATES
     // Index Fasta File for Markduplicates
     SAMTOOLS_FAIDX (
@@ -149,6 +159,8 @@ workflow SAMMYSEQ {
              [[], []]
         )
 
+    
+
     // MARK DUPLICATES IN BAM FILE
     BAM_MARKDUPLICATES_PICARD (
         FASTQ_ALIGN_BWAALN.out.bam,
@@ -156,6 +168,9 @@ workflow SAMMYSEQ {
         SAMTOOLS_FAIDX.out.fai.collect()
         )
 
+    if (params.stopAt == 'BAM_MARKDUPLICATES_PICARD') {
+        return
+    }
     // SAMTOOLS_VIEW_FILTER (
     //                 ch_bam_sorted.join(ch_bam_sorted_bai),
     //                 ch_fasta_meta,
@@ -163,8 +178,38 @@ workflow SAMMYSEQ {
     //             )
     // ch_versions = ch_versions.mix(SAMTOOLS_VIEW_FILTER.out.versions)
 
-    ch_bam_from_markduplicates = BAM_MARKDUPLICATES_PICARD.bam
+    //ch_bam_from_markduplicates = BAM_MARKDUPLICATES_PICARD.bam
 
+    //BAM_MARKDUPLICATES_PICARD.out.bam.view()
+    //BAM_MARKDUPLICATES_PICARD.out.bai.view()
+
+    //ch_bam_bai_combined = BAM_MARKDUPLICATES_PICARD.out.bam.join(BAM_MARKDUPLICATES_PICARD.out.bai, by: [0])
+
+   ch_bam_bai_combined = BAM_MARKDUPLICATES_PICARD.out.bam
+        .join(BAM_MARKDUPLICATES_PICARD.out.bai, by: [0], remainder: true)
+        .map {
+            meta, bam, bai  ->     
+                    [ meta, bam, bai ]
+             
+        }
+
+    //ch_bam_bai_combined.view()
+
+    //ch_for_bamcoverage = ch_bam_bai_combined.map { meta1, bam, meta2, bai -> 
+    //   tuple(meta1, bam, bai)
+    //}
+
+    //ch_for_bamcoverage.view()
+
+    DEEPTOOLS_BAMCOVERAGE (
+      ch_bam_bai_combined,
+      ch_fasta_meta.map { it[2] }, 
+      SAMTOOLS_FAIDX.out.fai.collect()
+    )
+
+    if (params.stopAt == 'DEEPTOOLS_BAMCOVERAGE') {
+        return
+    }
 
 
     CUSTOM_DUMPSOFTWAREVERSIONS (

From 8ff8d478251a2a69324976505dc259c4179a5839 Mon Sep 17 00:00:00 2001
From: Lucio Di Filippo <difilippolucio@gmail.com>
Date: Wed, 13 Sep 2023 10:39:53 +0200
Subject: [PATCH 05/14] deeptools coverage bugfix

---
 subworkflows/local/prepare_genome.nf | 15 ++++++++-------
 workflows/sammyseq.nf                | 26 +++++++++++++++-----------
 2 files changed, 23 insertions(+), 18 deletions(-)

diff --git a/subworkflows/local/prepare_genome.nf b/subworkflows/local/prepare_genome.nf
index 678d970..03d6795 100644
--- a/subworkflows/local/prepare_genome.nf
+++ b/subworkflows/local/prepare_genome.nf
@@ -19,8 +19,9 @@ include {
 include { BWA_INDEX            } from '../../modules/nf-core/bwa/index/main'
 
 workflow PREPARE_GENOME {
-    //take:
-
+    take:
+    input_fasta
+    input_bwaindex
 
     main:
 
@@ -30,11 +31,11 @@ workflow PREPARE_GENOME {
     // Uncompress genome fasta file if required
     //
     ch_fasta = Channel.empty()
-    if (params.fasta.endsWith('.gz')) {
+    if (input_fasta.endsWith('.gz')) {
         ch_fasta    = GUNZIP_FASTA ( [ [:], params.fasta ] ).gunzip.map{ it[1] }
         ch_versions = ch_versions.mix(GUNZIP_FASTA.out.versions)
     } else {
-        ch_fasta =  [ [:], file(params.fasta) ] 
+        ch_fasta =  [ [:], file(input_fasta) ] 
     }
 
     //println(ch_fasta)
@@ -56,11 +57,11 @@ workflow PREPARE_GENOME {
     ch_bwa_index = Channel.empty()
     
         if (params.bwa_index) {
-            if (params.bwa_index.endsWith('.tar.gz')) {
-                ch_bwa_index = UNTAR_BWA_INDEX ( [ [:], params.bwa_index ] ).untar.map{ it[1] }
+            if (input_bwaindex.endsWith('.tar.gz')) {
+                ch_bwa_index = UNTAR_BWA_INDEX ( [ [:], input_bwaindex ] ).untar.map{ it[1] }
                 ch_versions  = ch_versions.mix(UNTAR_BWA_INDEX.out.versions)
             } else {
-                ch_bwa_index = file(params.bwa_index)
+                ch_bwa_index = file(input_bwaindex)
             }
         } else {
             ch_bwa_index = BWA_INDEX ( ch_fasta ).index
diff --git a/workflows/sammyseq.nf b/workflows/sammyseq.nf
index 1ed3eec..1052ecc 100644
--- a/workflows/sammyseq.nf
+++ b/workflows/sammyseq.nf
@@ -89,7 +89,9 @@ workflow SAMMYSEQ {
     //
     // SUBWORKFLOW: Read in samplesheet, validate and stage input files
     //
-    PREPARE_GENOME ()
+    PREPARE_GENOME (params.fasta,
+                    params.bwa_index)
+
     ch_versions = ch_versions.mix(PREPARE_GENOME.out.versions)
 
     INPUT_CHECK (
@@ -141,7 +143,7 @@ workflow SAMMYSEQ {
     //    TRIMMOMATIC.out.trimmed_reads,
     //    PREPARE_GENOME.out.bwa_index
     //)
-
+    
     FASTQ_ALIGN_BWAALN (
         TRIMMOMATIC.out.trimmed_reads,
         PREPARE_GENOME.out.bwa_index
@@ -194,17 +196,19 @@ workflow SAMMYSEQ {
         }
 
     //ch_bam_bai_combined.view()
-
-    //ch_for_bamcoverage = ch_bam_bai_combined.map { meta1, bam, meta2, bai -> 
-    //   tuple(meta1, bam, bai)
-    //}
-
-    //ch_for_bamcoverage.view()
+    //ch_fasta_meta.map { it[2] }.view
+    //ch_fasta_meta.collect().view()
+    //SAMTOOLS_FAIDX.out.fai.collect().view()
+    //SAMTOOLS_FAIDX.out.fai.map { it['path'] }.collect().view()
+    ch_fai_path = SAMTOOLS_FAIDX.out.fai.map { it[1] }
+    //ch_fai_path.view()
+    ch_fasta_path = ch_fasta_meta.map { it[1] }
+    //ch_fasta_path.view()
 
     DEEPTOOLS_BAMCOVERAGE (
-      ch_bam_bai_combined,
-      ch_fasta_meta.map { it[2] }, 
-      SAMTOOLS_FAIDX.out.fai.collect()
+        ch_bam_bai_combined,
+        ch_fasta_path,
+        ch_fai_path
     )
 
     if (params.stopAt == 'DEEPTOOLS_BAMCOVERAGE') {

From 3bcb7221e5a9154edbe61341d9cafe4ec6dc780b Mon Sep 17 00:00:00 2001
From: Lucio Di Filippo <difilippolucio@gmail.com>
Date: Thu, 21 Sep 2023 16:42:56 +0200
Subject: [PATCH 06/14] docker loading

---
 modules/local/rtwosamplesmle/main.nf          | 86 +++++++++++++++++++
 .../templates/two_samples_mle.R               | 45 ++++++++++
 workflows/sammyseq.nf                         | 13 +++
 3 files changed, 144 insertions(+)
 create mode 100644 modules/local/rtwosamplesmle/main.nf
 create mode 100644 modules/local/rtwosamplesmle/templates/two_samples_mle.R

diff --git a/modules/local/rtwosamplesmle/main.nf b/modules/local/rtwosamplesmle/main.nf
new file mode 100644
index 0000000..495df66
--- /dev/null
+++ b/modules/local/rtwosamplesmle/main.nf
@@ -0,0 +1,86 @@
+// TODO nf-core: If in doubt look at other nf-core/modules to see how we are doing things! :)
+//               https://github.com/nf-core/modules/tree/master/modules/nf-core/
+//               You can also ask for help via your pull request or on the #modules channel on the nf-core Slack workspace:
+//               https://nf-co.re/join
+// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters.
+//               All other parameters MUST be provided using the "task.ext" directive, see here:
+//               https://www.nextflow.io/docs/latest/process.html#ext
+//               where "task.ext" is a string.
+//               Any parameters that need to be evaluated in the context of a particular sample
+//               e.g. single-end/paired-end data MUST also be defined and evaluated appropriately.
+// TODO nf-core: Software that can be piped together SHOULD be added to separate module files
+//               unless there is a run-time, storage advantage in implementing in this way
+//               e.g. it's ok to have a single module for bwa to output BAM instead of SAM:
+//                 bwa mem | samtools view -B -T ref.fasta
+// TODO nf-core: Optional inputs are not currently supported by Nextflow. However, using an empty
+//               list (`[]`) instead of a file can be used to work around this issue.
+
+process RTWOSAMPLESMLE {
+    //tag "$meta.id"
+    label 'process_medium'
+
+    // TODO nf-core: List required Conda package(s).
+    //               Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10").
+    //               For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems.
+    // TODO nf-core: See section in main README for further information regarding finding and adding container addresses to the section below.
+    
+    //conda "YOUR-TOOL-HERE"
+    
+    //container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+    //    'https://depot.galaxyproject.org/singularity/YOUR-TOOL-HERE':
+    //    'biocontainers/YOUR-TOOL-HERE' }"
+    container 'lucidif/r_two_samples_mle:latest'
+
+
+    //input:
+    // TODO nf-core: Where applicable all sample-specific information e.g. "id", "single_end", "read_group"
+    //               MUST be provided as an input via a Groovy Map called "meta".
+    //               This information may not be required in some instances e.g. indexing reference genome files:
+    //               https://github.com/nf-core/modules/blob/master/modules/nf-core/bwa/index/main.nf
+    // TODO nf-core: Where applicable please provide/convert compressed files as input/output
+    //               e.g. "*.fastq.gz" and NOT "*.fastq", "*.bam" and NOT "*.sam" etc.
+    //tuple val(meta), path(bam)
+
+    //output:
+    // TODO nf-core: Named file extensions MUST be emitted for ALL output channels
+    //tuple val(meta), path("*.bam"), emit: bam
+    // TODO nf-core: List additional required output channels/values here
+    //path "versions.yml"           , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    
+    //def args = task.ext.args ?: ''
+    //def prefix = task.ext.prefix ?: "${meta.id}"
+    
+    // TODO nf-core: Where possible, a command MUST be provided to obtain the version number of the software e.g. 1.10
+    //               If the software is unable to output a version number on the command-line then it can be manually specified
+    //               e.g. https://github.com/nf-core/modules/blob/master/modules/nf-core/homer/annotatepeaks/main.nf
+    //               Each software used MUST provide the software name and version number in the YAML version file (versions.yml)
+    // TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "task.ext.args" directive
+    // TODO nf-core: If the tool supports multi-threading then you MUST provide the appropriate parameter
+    //               using the Nextflow "task" variable e.g. "--threads $task.cpus"
+    // TODO nf-core: Please replace the example samtools command below with your module's command
+    // TODO nf-core: Please indent the command appropriately (4 spaces!!) to help with readability ;)
+    template 'two_samples_mle.R'
+
+    //stub:
+    
+    //def args = task.ext.args ?: ''
+    //def prefix = task.ext.prefix ?: "${meta.id}"
+    
+    // TODO nf-core: A stub section should mimic the execution of the original module as best as possible
+    //               Have a look at the following examples:
+    //               Simple example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bcftools/annotate/main.nf#L47-L63
+    //               Complex example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bedtools/split/main.nf#L38-L54
+    // """
+    // touch ${prefix}.bam
+
+    // cat <<-END_VERSIONS > versions.yml
+    // "${task.process}":
+    //     : \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' ))
+    // END_VERSIONS
+    // """
+}
diff --git a/modules/local/rtwosamplesmle/templates/two_samples_mle.R b/modules/local/rtwosamplesmle/templates/two_samples_mle.R
new file mode 100644
index 0000000..e226cc0
--- /dev/null
+++ b/modules/local/rtwosamplesmle/templates/two_samples_mle.R
@@ -0,0 +1,45 @@
+#!/usr/bin/env Rscript
+
+# usage:
+# Rscript /path/to/make_twosample_mle_spp.R sample_bam control_bam chromsizes_file mle_output_file"
+
+## LIBRARIES
+
+require(Rcpp)
+require(data.table)
+require(spp)
+require(rtracklayer)
+
+print("Hello World")
+
+################################################
+################################################
+## R SESSION INFO                             ##
+################################################
+################################################
+
+##sink(paste(output_prefix, "R_sessionInfo.log", sep = '.'))
+##print(sessionInfo())
+##sink()
+
+################################################
+################################################
+## VERSIONS FILE                              ##
+################################################
+################################################
+
+# r.version <- strsplit(version[['version.string']], ' ')[[1]][3]
+# deseq2.version <- as.character(packageVersion('DESeq2'))
+
+# writeLines(
+#     c(
+#         '"${task.process}":',
+#         paste('    r-base:', r.version),
+#         paste('    bioconductor-deseq2:', deseq2.version)
+#     ),
+# 'versions.yml')
+
+################################################
+################################################
+################################################
+################################################
\ No newline at end of file
diff --git a/workflows/sammyseq.nf b/workflows/sammyseq.nf
index 1052ecc..ff59892 100644
--- a/workflows/sammyseq.nf
+++ b/workflows/sammyseq.nf
@@ -50,6 +50,7 @@ include { INPUT_CHECK } from '../subworkflows/local/input_check'
 include { PREPARE_GENOME      } from '../subworkflows/local/prepare_genome'
 include { FASTQ_ALIGN_BWAALN  } from '../subworkflows/nf-core/fastq_align_bwaaln/main.nf'
 include { BAM_MARKDUPLICATES_PICARD } from '../subworkflows/nf-core/bam_markduplicates_picard'
+include { RTWOSAMPLESMLE } from '../modules/local/rtwosamplesmle'
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -89,11 +90,23 @@ workflow SAMMYSEQ {
     //
     // SUBWORKFLOW: Read in samplesheet, validate and stage input files
     //
+
+    RTWOSAMPLESMLE()
+
+    if (params.stopAt == 'BEGIN') {
+        return
+    }
+
+
     PREPARE_GENOME (params.fasta,
                     params.bwa_index)
 
     ch_versions = ch_versions.mix(PREPARE_GENOME.out.versions)
 
+    if (params.stopAt == 'PREPARE_GENOME') {
+        return
+    }
+
     INPUT_CHECK (
         file(params.input)
     )

From ff1bd86ac96c4615fa5a08302d15c5b1825d70e4 Mon Sep 17 00:00:00 2001
From: Lucio Di Filippo <difilippolucio@gmail.com>
Date: Mon, 25 Sep 2023 17:35:05 +0200
Subject: [PATCH 07/14] subworkflow faidx (incomplete)

---
 modules/local/chromosomes_size.nf        | 16 ++++++++++++
 subworkflows/local/faidx_cut_chr_size.nf | 32 ++++++++++++++++++++++++
 subworkflows/local/prepare_genome.nf     |  5 ++++
 workflows/sammyseq.nf                    |  7 +++---
 4 files changed, 57 insertions(+), 3 deletions(-)
 create mode 100644 modules/local/chromosomes_size.nf
 create mode 100644 subworkflows/local/faidx_cut_chr_size.nf

diff --git a/modules/local/chromosomes_size.nf b/modules/local/chromosomes_size.nf
new file mode 100644
index 0000000..6c4151e
--- /dev/null
+++ b/modules/local/chromosomes_size.nf
@@ -0,0 +1,16 @@
+process CUT_SIZES_GENOME {
+    tag "${sample_id}"
+    publishDir "${params.outdir}/genome", mode: 'copy'
+
+    input:
+    tuple val(meta), path(fai)
+
+    output:
+    path("sizes.genome") , emit: ch_sizes_genome
+
+    script:
+    """
+    cut -f1,2 ${fai} > sizes.genome
+    """
+}
+
diff --git a/subworkflows/local/faidx_cut_chr_size.nf b/subworkflows/local/faidx_cut_chr_size.nf
new file mode 100644
index 0000000..22b1038
--- /dev/null
+++ b/subworkflows/local/faidx_cut_chr_size.nf
@@ -0,0 +1,32 @@
+// Import SAMTOOLS_FAIDX module
+include { SAMTOOLS_FAIDX } from '../modules/nf-core/samtools/faidx'
+
+// Definition of the CUT_SIZES_GENOME process
+process CUT_SIZES_GENOME {
+    tag "${sample_id}"
+    publishDir "${params.outdir}/genome", mode: 'copy'
+
+    input:
+    tuple val(meta), path(fai)
+
+    output:
+    path("sizes.genome") , emit: ch_sizes_genome
+
+    script:
+    """
+    cut -f1,2 ${fai} > sizes.genome
+    """
+}
+
+// Definition of the subworkflow
+workflow FAIDX_SUBWORKFLOW {
+    take:
+    path fasta_file
+
+    main:
+    SAMTOOLS_FAIDX(fasta_file)
+    CUT_SIZES_GENOME(SAMTOOLS_FAIDX.out.fai)
+
+    emit:
+    path("sizes.genome"), from: CUT_SIZES_GENOME.out.ch_sizes_genome
+}
\ No newline at end of file
diff --git a/subworkflows/local/prepare_genome.nf b/subworkflows/local/prepare_genome.nf
index 03d6795..bc58e65 100644
--- a/subworkflows/local/prepare_genome.nf
+++ b/subworkflows/local/prepare_genome.nf
@@ -17,6 +17,7 @@ include {
 
 //include { GFFREAD              } from '../../modules/nf-core/gffread/main'
 include { BWA_INDEX            } from '../../modules/nf-core/bwa/index/main'
+//include { SAMTOOLS_FAIDX } from '../../modules/nf-core/samtools/faidx'
 
 workflow PREPARE_GENOME {
     take:
@@ -69,6 +70,10 @@ workflow PREPARE_GENOME {
         }
     
 
+    //make chromosome size index
+
+
+
     //
     // Uncompress Bowtie2 index or generate from scratch if required
     //
diff --git a/workflows/sammyseq.nf b/workflows/sammyseq.nf
index ff59892..f6a38d8 100644
--- a/workflows/sammyseq.nf
+++ b/workflows/sammyseq.nf
@@ -50,6 +50,7 @@ include { INPUT_CHECK } from '../subworkflows/local/input_check'
 include { PREPARE_GENOME      } from '../subworkflows/local/prepare_genome'
 include { FASTQ_ALIGN_BWAALN  } from '../subworkflows/nf-core/fastq_align_bwaaln/main.nf'
 include { BAM_MARKDUPLICATES_PICARD } from '../subworkflows/nf-core/bam_markduplicates_picard'
+include { CUT_SIZES_GENOME } from "../modules/local/chromosomes_size"
 include { RTWOSAMPLESMLE } from '../modules/local/rtwosamplesmle'
 
 /*
@@ -91,8 +92,6 @@ workflow SAMMYSEQ {
     // SUBWORKFLOW: Read in samplesheet, validate and stage input files
     //
 
-    RTWOSAMPLESMLE()
-
     if (params.stopAt == 'BEGIN') {
         return
     }
@@ -174,7 +173,9 @@ workflow SAMMYSEQ {
              [[], []]
         )
 
-    
+    ch_fai_for_cut = SAMTOOLS_FAIDX.out.fai.collect()
+
+    CUT_SIZES_GENOME(ch_fai_for_cut)
 
     // MARK DUPLICATES IN BAM FILE
     BAM_MARKDUPLICATES_PICARD (

From 117c237efcd94181d4b965037b2d281380dc5e14 Mon Sep 17 00:00:00 2001
From: Lucio Di Filippo <difilippolucio@gmail.com>
Date: Wed, 27 Sep 2023 19:37:02 +0200
Subject: [PATCH 08/14] comparison test (not work with multiple comparison with
 same sample)

---
 modules/local/rtwosamplesmle/main.nf          | 12 ++-
 .../templates/two_samples_mle.R               | 32 +++++++-
 nextflow.config                               |  2 +
 nextflow_schema.json                          |  8 ++
 workflows/sammyseq.nf                         | 75 ++++++++++++++++++-
 5 files changed, 120 insertions(+), 9 deletions(-)

diff --git a/modules/local/rtwosamplesmle/main.nf b/modules/local/rtwosamplesmle/main.nf
index 495df66..996e72a 100644
--- a/modules/local/rtwosamplesmle/main.nf
+++ b/modules/local/rtwosamplesmle/main.nf
@@ -29,18 +29,22 @@ process RTWOSAMPLESMLE {
     //container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
     //    'https://depot.galaxyproject.org/singularity/YOUR-TOOL-HERE':
     //    'biocontainers/YOUR-TOOL-HERE' }"
-    container 'lucidif/r_two_samples_mle:latest'
 
+    //docker pull lucidif/r_two_samples_mle:0.0.1
+    //docker image tag 96ceba68eb6d quay.io/lucidif/r_two_samples_mle:0.0.1
+    container 'lucidif/r_two_samples_mle:0.0.1'
 
-    //input:
+
+    input:
     // TODO nf-core: Where applicable all sample-specific information e.g. "id", "single_end", "read_group"
     //               MUST be provided as an input via a Groovy Map called "meta".
     //               This information may not be required in some instances e.g. indexing reference genome files:
     //               https://github.com/nf-core/modules/blob/master/modules/nf-core/bwa/index/main.nf
     // TODO nf-core: Where applicable please provide/convert compressed files as input/output
     //               e.g. "*.fastq.gz" and NOT "*.fastq", "*.bam" and NOT "*.sam" etc.
-    //tuple val(meta), path(bam)
-
+    tuple val(meta1), val(meta2), path(bam1), path(bam2)
+    path chromsizes_file
+    
     //output:
     // TODO nf-core: Named file extensions MUST be emitted for ALL output channels
     //tuple val(meta), path("*.bam"), emit: bam
diff --git a/modules/local/rtwosamplesmle/templates/two_samples_mle.R b/modules/local/rtwosamplesmle/templates/two_samples_mle.R
index e226cc0..c9dc805 100644
--- a/modules/local/rtwosamplesmle/templates/two_samples_mle.R
+++ b/modules/local/rtwosamplesmle/templates/two_samples_mle.R
@@ -10,7 +10,37 @@ require(data.table)
 require(spp)
 require(rtracklayer)
 
-print("Hello World")
+################################################
+## PARAMS
+################################################
+
+# ip_file <- args[1]
+# input_file <- args[2]
+# chromsizes_file <- args[3]
+# mle_output_file <- args[4]
+
+ip_file <- "${bam1}"
+input_file <- "${bam2}"
+chromsizes_file <- "${chromsizes_file}"
+mle_output_file <- "${bam1.baseName}VS${bam2.baseName}_mle.txt"
+
+################################################
+# DEFAULT PARAMETERS
+################################################
+
+remove_anomalies <- FALSE
+debug_mode <- TRUE
+
+################################################
+# CORE PROCESSES
+################################################
+
+if (debug_mode){
+    print(c("ip_file=",ip_file))
+    print(c("input_file=",input_file))
+    print(c("chromsizes_file=",chromsizes_file))
+    print(c("mle_output_file",mle_output_file))
+}
 
 ################################################
 ################################################
diff --git a/nextflow.config b/nextflow.config
index cb04645..06ae7c3 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -14,6 +14,8 @@ params {
     // TODO nf-core: Specify your pipeline's command line flags
     // Input options
     input                      = null
+    comparisonFile             = null
+
     // References
     genome                     = null
     igenomes_base              = 's3://ngi-igenomes/igenomes'
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 9352576..e833cf5 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -44,6 +44,14 @@
                     "type": "string",
                     "description": "MultiQC report title. Printed as page header, used for filename if not otherwise specified.",
                     "fa_icon": "fas fa-file-signature"
+                },
+                "comparisonFile": {
+                    "type" : "string",
+                    "format": "file-path",
+                    "description": "Path to comma-separated file containing information about samples comparisons",
+                    "mimetype": "text/csv",
+                    "help_text": "You will need to create a comparisons file with information about the comparisons in your experiment before running the pipeline. Use this parameter to specify the necessary comparisons. It has to be a comma-separated file with 2 columns, one for the first sample to compair and the second one to sample against which to do it."
+
                 }
             }
         },
diff --git a/workflows/sammyseq.nf b/workflows/sammyseq.nf
index f6a38d8..5dec07d 100644
--- a/workflows/sammyseq.nf
+++ b/workflows/sammyseq.nf
@@ -109,6 +109,9 @@ workflow SAMMYSEQ {
     INPUT_CHECK (
         file(params.input)
     )
+
+    //INPUT_CHECK.out.reads.view()
+
     ch_versions = ch_versions.mix(INPUT_CHECK.out.versions)
     // TODO: OPTIONAL, you can use nf-validation plugin to create an input channel from the samplesheet with Channel.fromSamplesheet("input")
     // See the documentation https://nextflow-io.github.io/nf-validation/samplesheets/fromSamplesheet/
@@ -137,6 +140,9 @@ workflow SAMMYSEQ {
     //
     // MODULE: Run FastQC
     //
+    //merged_reads.view()
+
+
     FASTQC (
         merged_reads
     )
@@ -151,10 +157,7 @@ workflow SAMMYSEQ {
     }
 
 
-    //BWA_ALN (
-    //    TRIMMOMATIC.out.trimmed_reads,
-    //    PREPARE_GENOME.out.bwa_index
-    //)
+    //TRIMMOMATIC.out.trimmed_reads.view()
     
     FASTQ_ALIGN_BWAALN (
         TRIMMOMATIC.out.trimmed_reads,
@@ -177,6 +180,8 @@ workflow SAMMYSEQ {
 
     CUT_SIZES_GENOME(ch_fai_for_cut)
 
+    //FASTQ_ALIGN_BWAALN.out.bam.view()
+
     // MARK DUPLICATES IN BAM FILE
     BAM_MARKDUPLICATES_PICARD (
         FASTQ_ALIGN_BWAALN.out.bam,
@@ -229,6 +234,68 @@ workflow SAMMYSEQ {
         return
     }
 
+    if (params.comparisonFile) {
+
+        //BAM_MARKDUPLICATES_PICARD.out.bam.view()
+
+        // Declare the dictionary that will store the comparisons
+        def comparisons = [:]
+
+        // Read the CSV file
+        csvFile = file(params.comparisonFile)
+        csvFile.eachLine { line ->
+        // Skip the first line (header)
+            if (line.startsWith("sample1")) {
+                return
+            }
+    
+        // Extract the samples and add the "_T1" suffix
+                def (sample1, sample2) = line.split(',')
+                comparisons[sample1.trim() + "_T1"] = sample2.trim() + "_T1"
+                println("Comparisons Map: $comparisons")
+        }
+
+        //println("Comparisons Map: $comparisons")  // Print the comparison map
+
+        // Filter BAMs using the updated comparisons map
+        
+
+        ch_filtered_bams = BAM_MARKDUPLICATES_PICARD.out.bam
+           .filter { meta, bam -> 
+                boolean isPresent = comparisons.keySet().contains(meta.id) || comparisons.values().contains(meta.id)
+                if (!isPresent) {
+                    //println("BAM not in comparisons list: $bam")  // Print BAMs not in the comparison list
+                }
+                return isPresent
+            }
+
+        // Print filtered BAMs
+        //ch_filtered_bams.view()
+
+        // Create BAM pairs for comparison
+        
+        ch_filtered_bams
+            .groupTuple(by: [0]) // Group by the metadata
+            .map { meta, bams ->
+                def comparisonValue = comparisons[meta.id]
+                println("Comparison Value for ${meta.id}: $comparisonValue")
+            }
+            .filter { it != null } // Remove any null values from the channel
+            .set { ch_bam_pairs_for_comparison }
+        println("Before view")
+
+        ch_bam_pairs_for_comparison.count().view()
+
+        ch_bam_pairs_for_comparison.view()
+
+        println("after view")
+
+        if (params.stopAt == 'RTWOSAMPLESMLE') {
+        return
+        }
+    }
+
+
 
     CUSTOM_DUMPSOFTWAREVERSIONS (
         ch_versions.unique().collectFile(name: 'collated_versions.yml')

From 03d69fb3066ea7c988db64be5315f84382ecce48 Mon Sep 17 00:00:00 2001
From: Lucio Di Filippo <difilippolucio@gmail.com>
Date: Fri, 29 Sep 2023 11:13:02 +0200
Subject: [PATCH 09/14] second test for comparison

---
 workflows/sammyseq.nf | 172 ++++++++++++++++++++++++++++++++----------
 1 file changed, 134 insertions(+), 38 deletions(-)

diff --git a/workflows/sammyseq.nf b/workflows/sammyseq.nf
index 5dec07d..4bb536e 100644
--- a/workflows/sammyseq.nf
+++ b/workflows/sammyseq.nf
@@ -189,6 +189,12 @@ workflow SAMMYSEQ {
         SAMTOOLS_FAIDX.out.fai.collect()
         )
 
+
+    //BAM_MARKDUPLICATES_PICARD.out.bam.view()
+       
+    ch_mle_in = BAM_MARKDUPLICATES_PICARD.out.bam
+    //ch_mle_in.view()
+
     if (params.stopAt == 'BAM_MARKDUPLICATES_PICARD') {
         return
     }
@@ -206,7 +212,7 @@ workflow SAMMYSEQ {
 
     //ch_bam_bai_combined = BAM_MARKDUPLICATES_PICARD.out.bam.join(BAM_MARKDUPLICATES_PICARD.out.bai, by: [0])
 
-   ch_bam_bai_combined = BAM_MARKDUPLICATES_PICARD.out.bam
+   ch_bam_bai_combined =  BAM_MARKDUPLICATES_PICARD.out.bam
         .join(BAM_MARKDUPLICATES_PICARD.out.bai, by: [0], remainder: true)
         .map {
             meta, bam, bai  ->     
@@ -219,6 +225,12 @@ workflow SAMMYSEQ {
     //ch_fasta_meta.collect().view()
     //SAMTOOLS_FAIDX.out.fai.collect().view()
     //SAMTOOLS_FAIDX.out.fai.map { it['path'] }.collect().view()
+    // println("first BAM_MARKDUPLICATES_PICARD.out.bam.view()") 
+    //     BAM_MARKDUPLICATES_PICARD.out.bam.view()
+    //     ch_bam_bai_combined.view()
+    // println("first END BAM_MARKDUPLICATES_PICARD.out.bam.view()")
+
+
     ch_fai_path = SAMTOOLS_FAIDX.out.fai.map { it[1] }
     //ch_fai_path.view()
     ch_fasta_path = ch_fasta_meta.map { it[1] }
@@ -235,60 +247,144 @@ workflow SAMMYSEQ {
     }
 
     if (params.comparisonFile) {
-
-        //BAM_MARKDUPLICATES_PICARD.out.bam.view()
-
-        // Declare the dictionary that will store the comparisons
+        // Add the suffix "_T1" to each sample ID in the comparison file
         def comparisons = [:]
-
-        // Read the CSV file
+        def isFirstLine = true
         csvFile = file(params.comparisonFile)
         csvFile.eachLine { line ->
-        // Skip the first line (header)
-            if (line.startsWith("sample1")) {
+            if (isFirstLine) {
+                isFirstLine = false
                 return
             }
-    
-        // Extract the samples and add the "_T1" suffix
-                def (sample1, sample2) = line.split(',')
-                comparisons[sample1.trim() + "_T1"] = sample2.trim() + "_T1"
-                println("Comparisons Map: $comparisons")
+            def (sample1, sample2) = line.split(',')
+            comparisons[sample1.trim() + "_T1"] = sample2.trim() + "_T1"
         }
 
-        //println("Comparisons Map: $comparisons")  // Print the comparison map
 
-        // Filter BAMs using the updated comparisons map
+        // ch_filtered_bams = ch_mle_in.filter { meta, bam ->
+        //     boolean isInKeys = comparisons.keySet().contains(meta.id)
+        //     boolean isInValues = comparisons.values().contains(meta.id)
+
+        //     boolean isPresent = isInKeys || isInValues
+
+        //     //println("Is ${meta.id} present? $isPresent")
+
+        //     return isPresent
+        // }
+
         
+def comparison_channels = [:]
+
+
+        // comparisons.each { sample1, sample2 ->
+        //     def ch_name = "${sample1}_VS_${sample2}"
+        //     comparison_channels[ch_name] = ch_mle_in.filter { meta, bam ->
+        //         println("meta.id = $meta.id")
+        //         println("sample1 = $sample1")
+        //         println("sample2 = $sample2")
+        //         meta.id == sample1 || meta.id == sample2
+        //     }
+        // }
+
+        // comparison_channels.each { comparison, channel ->
+        //     println("Checking channel for: $comparison")
+        //     channel.view()
+        // }
+
+        // comparison_channels.each { comparison, channel ->
+        //     channel
+        //     .map { ... }  // Qualsiasi operazione desideri eseguire
+        //     .set { ... }  // Definisci la variabile di output per ulteriori fasi
+        // }
+
+
+        //ch_mle_in.map{meta, bam -> return tuple(meta.id, bam)},set{test_ch}
+
+        //ch_filtered_bams.flatten().view()
+
+        //ch_filtered_bams.toList().transpose().set { all_filtered_bams }
+        
+        //all_filtered_bams.view()
+
+        // 1. Create a Comparisons Channel
+        Channel
+            .fromPath(params.comparisonFile)
+            .splitCsv(header: true, sep: ',')
+            .map { row -> [row.sample1 + "_T1", row.sample2 + "_T1"] }
+            .set { comparisons_ch }
+
+// comparisons_ch.map { sample1, sample2 ->
+//     def filtered_channel = ch_mle_in.filter { meta, bam ->
+//         meta.id == sample1 || meta.id == sample2
+//     }.collect()
+//     return [ "${sample1}_VS_${sample2}", filtered_channel ]
+// }.set { mapped_comparison_channels }
+
+comparisons_ch.map { sample1, sample2 ->
+    def ch_name = "${sample1}_VS_${sample2}"
+    def filtered_channel = ch_mle_in.filter { meta, bam ->
+        meta.id == sample1 || meta.id == sample2
+    }.toList()
+    [ ch_name, filtered_channel ]
+}.set { mapped_comparison_channels }
+
+mapped_comparison_channels.view()
 
-        ch_filtered_bams = BAM_MARKDUPLICATES_PICARD.out.bam
-           .filter { meta, bam -> 
-                boolean isPresent = comparisons.keySet().contains(meta.id) || comparisons.values().contains(meta.id)
-                if (!isPresent) {
-                    //println("BAM not in comparisons list: $bam")  // Print BAMs not in the comparison list
-                }
-                return isPresent
-            }
 
-        // Print filtered BAMs
+        
+        //BAM_MARKDUPLICATES_PICARD.out.bam.view()
         //ch_filtered_bams.view()
+        //all_filtered_bams.view()
 
-        // Create BAM pairs for comparison
+        //test_ch=BAM_MARKDUPLICATES_PICARD.out.bam
         
-        ch_filtered_bams
-            .groupTuple(by: [0]) // Group by the metadata
-            .map { meta, bams ->
-                def comparisonValue = comparisons[meta.id]
-                println("Comparison Value for ${meta.id}: $comparisonValue")
-            }
-            .filter { it != null } // Remove any null values from the channel
-            .set { ch_bam_pairs_for_comparison }
-        println("Before view")
 
-        ch_bam_pairs_for_comparison.count().view()
+        //simplified_ch = channel.fromList ( all_filtered_bams )
+        // simplified_ch = all_filtered_bams.map { it ->
+
+        //         meta=it
+        //         path=it
+        //         println("meta: $meta")
+        //         println("path: $path")
+        //         //println("bamfile: $bamfile")
+        //         return tuple(meta, path)
+        //     }
+        // simplified_ch.view()
+
+        //comparisons_ch.view()
+
+        //all_filtered_bams.join(comparisons_ch).view()
+
+
+        // Creiamo una mappa da 'id' a 'path'
+// def filterBamsByComparison(metaList, pathList, comparison) {
+//     def ids = comparison.collect()
+//     def selectedMetas = metaList.findAll { it['id'] in ids }
+//     def selectedPaths = pathList.findAll { path -> 
+//         def matchingMeta = metaList.find { it['id'] in ids }
+//         path.contains(matchingMeta['id'])
+//     }
+//     return tuple(selectedMetas, selectedPaths)
+// }
+
+// Channel
+//     .combine(all_filtered_bams, comparisons_ch)
+//     .map { allMeta, allPath, comparison ->
+//         return filterBamsByComparison(allMeta, allPath, comparison)
+//     } set {iltered_bams_ch}
+
+// filtered_bams_ch.view()  // Solo per debug
+
+
+
+
+        //simplified_ch.view()
+
+
+
 
-        ch_bam_pairs_for_comparison.view()
 
-        println("after view")
+        //all_filtered_bams.view()
 
         if (params.stopAt == 'RTWOSAMPLESMLE') {
         return

From f01de83e2d53d53f737fdbd66c1334644a133116 Mon Sep 17 00:00:00 2001
From: Lucio Di Filippo <difilippolucio@gmail.com>
Date: Tue, 3 Oct 2023 09:40:34 +0200
Subject: [PATCH 10/14] make comparison channel

---
 modules/local/rtwosamplesmle/main.nf |  14 +-
 workflows/sammyseq.nf                | 203 +++++++++++++++------------
 2 files changed, 129 insertions(+), 88 deletions(-)

diff --git a/modules/local/rtwosamplesmle/main.nf b/modules/local/rtwosamplesmle/main.nf
index 996e72a..fe691e0 100644
--- a/modules/local/rtwosamplesmle/main.nf
+++ b/modules/local/rtwosamplesmle/main.nf
@@ -31,7 +31,7 @@ process RTWOSAMPLESMLE {
     //    'biocontainers/YOUR-TOOL-HERE' }"
 
     //docker pull lucidif/r_two_samples_mle:0.0.1
-    //docker image tag 96ceba68eb6d quay.io/lucidif/r_two_samples_mle:0.0.1
+    //docker image tag ae028b17f7d3 quay.io/lucidif/r_two_samples_mle:0.0.1
     container 'lucidif/r_two_samples_mle:0.0.1'
 
 
@@ -42,9 +42,11 @@ process RTWOSAMPLESMLE {
     //               https://github.com/nf-core/modules/blob/master/modules/nf-core/bwa/index/main.nf
     // TODO nf-core: Where applicable please provide/convert compressed files as input/output
     //               e.g. "*.fastq.gz" and NOT "*.fastq", "*.bam" and NOT "*.sam" etc.
-    tuple val(meta1), val(meta2), path(bam1), path(bam2)
+    //tuple val(meta1), val(meta2), path(bam1), path(bam2)
+    tuple val(meta), path(bam1), path(bam2)
     path chromsizes_file
     
+    
     //output:
     // TODO nf-core: Named file extensions MUST be emitted for ALL output channels
     //tuple val(meta), path("*.bam"), emit: bam
@@ -68,6 +70,14 @@ process RTWOSAMPLESMLE {
     //               using the Nextflow "task" variable e.g. "--threads $task.cpus"
     // TODO nf-core: Please replace the example samtools command below with your module's command
     // TODO nf-core: Please indent the command appropriately (4 spaces!!) to help with readability ;)
+    
+
+
+    //    bam_list_ch
+    //         .map { it[0].collate(2) } // Raggruppa ogni due elementi
+    //         .set { paired_bam_list_ch }        
+
+    
     template 'two_samples_mle.R'
 
     //stub:
diff --git a/workflows/sammyseq.nf b/workflows/sammyseq.nf
index 4bb536e..5aca99c 100644
--- a/workflows/sammyseq.nf
+++ b/workflows/sammyseq.nf
@@ -179,6 +179,7 @@ workflow SAMMYSEQ {
     ch_fai_for_cut = SAMTOOLS_FAIDX.out.fai.collect()
 
     CUT_SIZES_GENOME(ch_fai_for_cut)
+    //CUT_SIZES_GENOME.out.ch_sizes_genome.view()
 
     //FASTQ_ALIGN_BWAALN.out.bam.view()
 
@@ -261,130 +262,160 @@ workflow SAMMYSEQ {
         }
 
 
-        // ch_filtered_bams = ch_mle_in.filter { meta, bam ->
-        //     boolean isInKeys = comparisons.keySet().contains(meta.id)
-        //     boolean isInValues = comparisons.values().contains(meta.id)
-
-        //     boolean isPresent = isInKeys || isInValues
-
-        //     //println("Is ${meta.id} present? $isPresent")
+        // 1. Create a Comparisons Channel
 
-        //     return isPresent
-        // }
+        Channel
+            .fromPath(params.comparisonFile)
+            .splitCsv(header: true, sep: ',')
+            .map { row -> [row.sample1 + "_T1", row.sample2 + "_T1"] }
+            .set { comparisons_ch }
 
         
-def comparison_channels = [:]
 
 
-        // comparisons.each { sample1, sample2 ->
-        //     def ch_name = "${sample1}_VS_${sample2}"
-        //     comparison_channels[ch_name] = ch_mle_in.filter { meta, bam ->
-        //         println("meta.id = $meta.id")
-        //         println("sample1 = $sample1")
-        //         println("sample2 = $sample2")
-        //         meta.id == sample1 || meta.id == sample2
-        //     }
-        // }
+        // ch_mle_in.collect { meta, bam -> [meta.id, bam] }
+        //          .toList()
+        //          .set { bam_list_ch }
 
-        // comparison_channels.each { comparison, channel ->
-        //     println("Checking channel for: $comparison")
-        //     channel.view()
-        // }
+        //bam_list_ch.view()
 
-        // comparison_channels.each { comparison, channel ->
-        //     channel
-        //     .map { ... }  // Qualsiasi operazione desideri eseguire
-        //     .set { ... }  // Definisci la variabile di output per ulteriori fasi
-        // }
+        // bam_list_ch
+        //     .map { it[0].collate(2) } // Raggruppa ogni due elementi
+        //     .set { paired_bam_list_ch }
 
 
-        //ch_mle_in.map{meta, bam -> return tuple(meta.id, bam)},set{test_ch}
+        //paired_bam_list_ch.view()
 
-        //ch_filtered_bams.flatten().view()
+        // paired_bam_list_ch
+        //         .transpose()
+        //         .set {  bam_pairs_ch }
 
-        //ch_filtered_bams.toList().transpose().set { all_filtered_bams }
+        // bam_pairs_ch.merge().view()
         
-        //all_filtered_bams.view()
+ch_bam_input=BAM_MARKDUPLICATES_PICARD.out.bam//.view{"get class : ${it.getClass()}"}
+//comparisons_ch.view()
+//CUT_SIZES_GENOME.out.ch_sizes_genome.view{"get class : ${it.getClass()}"}
 
-        // 1. Create a Comparisons Channel
-        Channel
-            .fromPath(params.comparisonFile)
-            .splitCsv(header: true, sep: ',')
-            .map { row -> [row.sample1 + "_T1", row.sample2 + "_T1"] }
-            .set { comparisons_ch }
 
-// comparisons_ch.map { sample1, sample2 ->
-//     def filtered_channel = ch_mle_in.filter { meta, bam ->
-//         meta.id == sample1 || meta.id == sample2
-//     }.collect()
-//     return [ "${sample1}_VS_${sample2}", filtered_channel ]
-// }.set { mapped_comparison_channels }
+Channel.fromPath(params.comparisonFile)
+        .splitCsv(header : true)
+        .map{ row -> 
+            //sample1 = [sample1:row.sample1 + "_T1"]
+            //sample2 = [sample2:row.sample2 + "_T1"]
+            [ row.sample1 + "_T1", row.sample2 + "_T1",row.sample1 + "_T1_VS_" + row.sample2 + "_T1"]
+        }
+        .set { comparisons_ch_s1 } 
+        //.view() 
+
+Channel.fromPath(params.comparisonFile)
+        .splitCsv(header : true)
+        .map{ row -> 
+            //sample1 = [sample1:row.sample1 + "_T1"]
+            //sample2 = [sample2:row.sample2 + "_T1"]
+            [ row.sample2 + "_T1", row.sample1 + "_T1",row.sample1 + "_T1_VS_" + row.sample2 + "_T1"]
+        }
+        .set { comparisons_ch_s2 } 
 
-comparisons_ch.map { sample1, sample2 ->
-    def ch_name = "${sample1}_VS_${sample2}"
-    def filtered_channel = ch_mle_in.filter { meta, bam ->
-        meta.id == sample1 || meta.id == sample2
-    }.toList()
-    [ ch_name, filtered_channel ]
-}.set { mapped_comparison_channels }
 
-mapped_comparison_channels.view()
+//Channel.fromList(ch_bam_input).view()
 
+        // ch_bam_input.collect { meta, bam -> [meta.id, bam] }
+        //          .toList()
+        //          .set { ch_bam_fix }
 
         
-        //BAM_MARKDUPLICATES_PICARD.out.bam.view()
-        //ch_filtered_bams.view()
-        //all_filtered_bams.view()
 
-        //test_ch=BAM_MARKDUPLICATES_PICARD.out.bam
+
+ch_bam_input
+        .map {meta, bam ->
+                        id=meta.subMap('id')
+                        [id.id, bam] 
+                        
+                        }
+        //.view{"ch_bam+convert: ${it}"}
+        .set { ch_bam_reformat }
+
+
+//ch_bam_test
+//ch_bam_reformat.combine(comparisons_ch_s1, by:0).view{ "combine: ${it}" }
+
+
+// ch_bam_reformat
+//         .toList()
+//         .flatten()
+//         .set { fix_ch_bam }
+
         
 
-        //simplified_ch = channel.fromList ( all_filtered_bams )
-        // simplified_ch = all_filtered_bams.map { it ->
+//comparisons_ch.view{"comparisons_ch= ${it}"}
+//ch_bam_reformat.view{"ch_bam_reformat= ${it}"}
+//ch_bam_input.view{"ch_bam_input= ${it}"}
 
-        //         meta=it
-        //         path=it
-        //         println("meta: $meta")
-        //         println("path: $path")
-        //         //println("bamfile: $bamfile")
-        //         return tuple(meta, path)
-        //     }
-        // simplified_ch.view()
 
-        //comparisons_ch.view()
+comparisons_ch_s1.combine(ch_bam_reformat , by:0)
+                .map { sample1, sample2, comparison, bam ->
+                    //[ comparison:comparison, sample1:sample1, sample2:sample2, bam1:bam1]
+                    [ comparison, bam]
+                }.set { bam1_comparison }
 
-        //all_filtered_bams.join(comparisons_ch).view()
+                //.view{"join= ${it}"}
+comparisons_ch_s2.combine(ch_bam_reformat, by:0)
+                .map{ sample2, sample1, comparison, bam ->
+                    //[ comparison:comparison , sample1:sample1, sample2:sample2 ,bam2:bam2 ]
+                    [ comparison, bam]
+                }
+                .set{ bam2_comparison }
 
+bam1_comparison.join(bam2_comparison, remainder: false, by: 0 ).set{comparisons_merge_ch}
 
-        // Creiamo una mappa da 'id' a 'path'
-// def filterBamsByComparison(metaList, pathList, comparison) {
-//     def ids = comparison.collect()
-//     def selectedMetas = metaList.findAll { it['id'] in ids }
-//     def selectedPaths = pathList.findAll { path -> 
-//         def matchingMeta = metaList.find { it['id'] in ids }
-//         path.contains(matchingMeta['id'])
-//     }
-//     return tuple(selectedMetas, selectedPaths)
-// }
+//comparisons_ch_s1.view{ "comparisons_ch_s1: ${it}" }
+//comparisons_ch_s2.view{ "comparisons_ch_s2: ${it}" }
 
-// Channel
-//     .combine(all_filtered_bams, comparisons_ch)
-//     .map { allMeta, allPath, comparison ->
-//         return filterBamsByComparison(allMeta, allPath, comparison)
-//     } set {iltered_bams_ch}
+//bam1_comparison.view{ "bam1_comparison: ${it}" }
+//bam2_comparison.view{ "bam2_comparison: ${it}" }
 
-// filtered_bams_ch.view()  // Solo per debug
 
+comparisons_merge_ch.view{ "comparisons_merge_ch: ${it}" }
 
+//ch_bam_reformat.view{ "ch_bam_reformat: ${it}" }
 
+//ch_bam_reformat.join(comparisons_ch, remainder:true ,by:[1]).view{"join= ${it}"}
 
-        //simplified_ch.view()
+// canale1.flatten()
+//     .combine(canale2, by: 0)
+//     .groupTuple(by: [0,1])
+//     .map { it.flatten() }
+//     .view()
+
+
+//ch_bam_reformat.view()
+//comparisons_ch.view()
+
+//comparisons_ch.cross( ch_bam_reformat ).view()
+
+// first_join = comparisons_ch
+//     .join(ch_bam_reformat, by: { it[0] }).view()
+//     //.map { it -> [it[0], it[2], it[3]] }.view()
+
+
+// Channel
+//     .of( 'alpha,beta,gamma\n10,20,30\n70,80,90' )
+//     .splitCsv(header: true)
+//     .view { row -> "${row.alpha} - ${row.beta} - ${row.gamma}" }
 
 
+// comparisons_ch
+//     .flatMap { sample1, sample2 -> [ [sample1, sample2] ] } // perché hai bisogno di entrambe le combinazioni?
+//     //.join(ch_bam_input, by: { it[0] })
+//     //.join(ch_bam_input, by: { it[1] })
+//     .set { ch_final_input }
 
+//     ch_final_input.view()
 
 
-        //all_filtered_bams.view()
+        RTWOSAMPLESMLE (comparisons_merge_ch,
+                        CUT_SIZES_GENOME.out.ch_sizes_genome
+                        )
 
         if (params.stopAt == 'RTWOSAMPLESMLE') {
         return

From 99f4ad0405e8d30b74a12fbfd4dffc87af892aa0 Mon Sep 17 00:00:00 2001
From: Lucio Di Filippo <difilippolucio@gmail.com>
Date: Tue, 3 Oct 2023 09:56:50 +0200
Subject: [PATCH 11/14] cleaning after comparison channel creation

---
 workflows/sammyseq.nf | 183 +++++++++---------------------------------
 1 file changed, 40 insertions(+), 143 deletions(-)

diff --git a/workflows/sammyseq.nf b/workflows/sammyseq.nf
index 5aca99c..006a4ca 100644
--- a/workflows/sammyseq.nf
+++ b/workflows/sammyseq.nf
@@ -221,16 +221,6 @@ workflow SAMMYSEQ {
              
         }
 
-    //ch_bam_bai_combined.view()
-    //ch_fasta_meta.map { it[2] }.view
-    //ch_fasta_meta.collect().view()
-    //SAMTOOLS_FAIDX.out.fai.collect().view()
-    //SAMTOOLS_FAIDX.out.fai.map { it['path'] }.collect().view()
-    // println("first BAM_MARKDUPLICATES_PICARD.out.bam.view()") 
-    //     BAM_MARKDUPLICATES_PICARD.out.bam.view()
-    //     ch_bam_bai_combined.view()
-    // println("first END BAM_MARKDUPLICATES_PICARD.out.bam.view()")
-
 
     ch_fai_path = SAMTOOLS_FAIDX.out.fai.map { it[1] }
     //ch_fai_path.view()
@@ -249,6 +239,9 @@ workflow SAMMYSEQ {
 
     if (params.comparisonFile) {
         // Add the suffix "_T1" to each sample ID in the comparison file
+
+        ch_bam_input=BAM_MARKDUPLICATES_PICARD.out.bam
+
         def comparisons = [:]
         def isFirstLine = true
         csvFile = file(params.comparisonFile)
@@ -262,156 +255,60 @@ workflow SAMMYSEQ {
         }
 
 
-        // 1. Create a Comparisons Channel
+        // 1. Create a Comparisons Channels (one for sample 1 in comparison and another for sample 2 in comparison)
 
         Channel
             .fromPath(params.comparisonFile)
-            .splitCsv(header: true, sep: ',')
-            .map { row -> [row.sample1 + "_T1", row.sample2 + "_T1"] }
-            .set { comparisons_ch }
-
-        
-
-
-        // ch_mle_in.collect { meta, bam -> [meta.id, bam] }
-        //          .toList()
-        //          .set { bam_list_ch }
-
-        //bam_list_ch.view()
-
-        // bam_list_ch
-        //     .map { it[0].collate(2) } // Raggruppa ogni due elementi
-        //     .set { paired_bam_list_ch }
-
-
-        //paired_bam_list_ch.view()
-
-        // paired_bam_list_ch
-        //         .transpose()
-        //         .set {  bam_pairs_ch }
-
-        // bam_pairs_ch.merge().view()
-        
-ch_bam_input=BAM_MARKDUPLICATES_PICARD.out.bam//.view{"get class : ${it.getClass()}"}
-//comparisons_ch.view()
-//CUT_SIZES_GENOME.out.ch_sizes_genome.view{"get class : ${it.getClass()}"}
-
-
-Channel.fromPath(params.comparisonFile)
-        .splitCsv(header : true)
-        .map{ row -> 
-            //sample1 = [sample1:row.sample1 + "_T1"]
-            //sample2 = [sample2:row.sample2 + "_T1"]
-            [ row.sample1 + "_T1", row.sample2 + "_T1",row.sample1 + "_T1_VS_" + row.sample2 + "_T1"]
-        }
-        .set { comparisons_ch_s1 } 
-        //.view() 
-
-Channel.fromPath(params.comparisonFile)
-        .splitCsv(header : true)
-        .map{ row -> 
-            //sample1 = [sample1:row.sample1 + "_T1"]
-            //sample2 = [sample2:row.sample2 + "_T1"]
-            [ row.sample2 + "_T1", row.sample1 + "_T1",row.sample1 + "_T1_VS_" + row.sample2 + "_T1"]
-        }
-        .set { comparisons_ch_s2 } 
-
-
-//Channel.fromList(ch_bam_input).view()
-
-        // ch_bam_input.collect { meta, bam -> [meta.id, bam] }
-        //          .toList()
-        //          .set { ch_bam_fix }
-
-        
-
-
-ch_bam_input
-        .map {meta, bam ->
-                        id=meta.subMap('id')
-                        [id.id, bam] 
-                        
-                        }
-        //.view{"ch_bam+convert: ${it}"}
-        .set { ch_bam_reformat }
-
-
-//ch_bam_test
-//ch_bam_reformat.combine(comparisons_ch_s1, by:0).view{ "combine: ${it}" }
-
-
-// ch_bam_reformat
-//         .toList()
-//         .flatten()
-//         .set { fix_ch_bam }
+            .splitCsv(header : true)
+            .map{ row -> 
+                [ row.sample1 + "_T1", row.sample2 + "_T1",row.sample1 + "_T1_VS_" + row.sample2 + "_T1"]
+                }
+                .set { comparisons_ch_s1 } 
+                //.view() 
 
-        
+        Channel.fromPath(params.comparisonFile)
+                .splitCsv(header : true)
+                .map{ row -> 
+                    [ row.sample2 + "_T1", row.sample1 + "_T1",row.sample1 + "_T1_VS_" + row.sample2 + "_T1"]
+                }
+                .set { comparisons_ch_s2 } 
 
-//comparisons_ch.view{"comparisons_ch= ${it}"}
-//ch_bam_reformat.view{"ch_bam_reformat= ${it}"}
-//ch_bam_input.view{"ch_bam_input= ${it}"}
+        //2. convert bam file to input        
 
+        ch_bam_input
+                .map {meta, bam ->
+                    id=meta.subMap('id')
+                    [id.id, bam] 
+                                
+                    }
+                .set { ch_bam_reformat }
 
-comparisons_ch_s1.combine(ch_bam_reformat , by:0)
+        //3. combine comparison channel with bam list channel
+        comparisons_ch_s1
+                .combine(ch_bam_reformat , by:0)
                 .map { sample1, sample2, comparison, bam ->
                     //[ comparison:comparison, sample1:sample1, sample2:sample2, bam1:bam1]
-                    [ comparison, bam]
-                }.set { bam1_comparison }
+                [ comparison, bam]
+                }
+                .set { bam1_comparison }
 
-                //.view{"join= ${it}"}
-comparisons_ch_s2.combine(ch_bam_reformat, by:0)
+                        //.view{"join= ${it}"}
+        comparisons_ch_s2
+                .combine(ch_bam_reformat, by:0)
                 .map{ sample2, sample1, comparison, bam ->
                     //[ comparison:comparison , sample1:sample1, sample2:sample2 ,bam2:bam2 ]
                     [ comparison, bam]
-                }
+                    }
                 .set{ bam2_comparison }
 
-bam1_comparison.join(bam2_comparison, remainder: false, by: 0 ).set{comparisons_merge_ch}
-
-//comparisons_ch_s1.view{ "comparisons_ch_s1: ${it}" }
-//comparisons_ch_s2.view{ "comparisons_ch_s2: ${it}" }
-
-//bam1_comparison.view{ "bam1_comparison: ${it}" }
-//bam2_comparison.view{ "bam2_comparison: ${it}" }
-
-
-comparisons_merge_ch.view{ "comparisons_merge_ch: ${it}" }
-
-//ch_bam_reformat.view{ "ch_bam_reformat: ${it}" }
-
-//ch_bam_reformat.join(comparisons_ch, remainder:true ,by:[1]).view{"join= ${it}"}
-
-// canale1.flatten()
-//     .combine(canale2, by: 0)
-//     .groupTuple(by: [0,1])
-//     .map { it.flatten() }
-//     .view()
-
-
-//ch_bam_reformat.view()
-//comparisons_ch.view()
-
-//comparisons_ch.cross( ch_bam_reformat ).view()
-
-// first_join = comparisons_ch
-//     .join(ch_bam_reformat, by: { it[0] }).view()
-//     //.map { it -> [it[0], it[2], it[3]] }.view()
-
-
-// Channel
-//     .of( 'alpha,beta,gamma\n10,20,30\n70,80,90' )
-//     .splitCsv(header: true)
-//     .view { row -> "${row.alpha} - ${row.beta} - ${row.gamma}" }
-
-
-// comparisons_ch
-//     .flatMap { sample1, sample2 -> [ [sample1, sample2] ] } // perché hai bisogno di entrambe le combinazioni?
-//     //.join(ch_bam_input, by: { it[0] })
-//     //.join(ch_bam_input, by: { it[1] })
-//     .set { ch_final_input }
+        bam1_comparison
+                .join(bam2_comparison, remainder: false, by: 0 )
+                .set{comparisons_merge_ch}
 
-//     ch_final_input.view()
+        //comparisons_merge_ch
+        //        .view{ "comparisons_merge_ch: ${it}" }
 
+        //4.run rscript
 
         RTWOSAMPLESMLE (comparisons_merge_ch,
                         CUT_SIZES_GENOME.out.ch_sizes_genome

From 0d28684aaade476a783d1c221fac63325ad90a6f Mon Sep 17 00:00:00 2001
From: Lucio Di Filippo <difilippolucio@gmail.com>
Date: Wed, 4 Oct 2023 19:16:10 +0200
Subject: [PATCH 12/14] R mle comparison integration

---
 modules/local/rtwosamplesmle/main.nf          |  6 +-
 .../templates/two_samples_mle.R               | 76 ++++++++++++++++++-
 workflows/sammyseq.nf                         | 38 +++++-----
 3 files changed, 96 insertions(+), 24 deletions(-)

diff --git a/modules/local/rtwosamplesmle/main.nf b/modules/local/rtwosamplesmle/main.nf
index fe691e0..4d981bd 100644
--- a/modules/local/rtwosamplesmle/main.nf
+++ b/modules/local/rtwosamplesmle/main.nf
@@ -26,9 +26,9 @@ process RTWOSAMPLESMLE {
     
     //conda "YOUR-TOOL-HERE"
     
-    //container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-    //    'https://depot.galaxyproject.org/singularity/YOUR-TOOL-HERE':
-    //    'biocontainers/YOUR-TOOL-HERE' }"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+       'https://depot.galaxyproject.org/singularity/YOUR-TOOL-HERE':
+       'biocontainers/YOUR-TOOL-HERE' }"
 
     //docker pull lucidif/r_two_samples_mle:0.0.1
     //docker image tag ae028b17f7d3 quay.io/lucidif/r_two_samples_mle:0.0.1
diff --git a/modules/local/rtwosamplesmle/templates/two_samples_mle.R b/modules/local/rtwosamplesmle/templates/two_samples_mle.R
index c9dc805..4d6eda4 100644
--- a/modules/local/rtwosamplesmle/templates/two_samples_mle.R
+++ b/modules/local/rtwosamplesmle/templates/two_samples_mle.R
@@ -10,6 +10,20 @@ require(data.table)
 require(spp)
 require(rtracklayer)
 
+################################################
+# FUNCTIONS
+################################################
+
+sortbychr <- function(x, chrcol="chr", stcol="start", endcol=NULL, chrorder=paste("chr", c(seq(22), "X", "Y"), sep="")) {
+        if (!(is.null(endcol))) {
+                x <- x[order(x[,endcol]),]
+        }       
+        x <- x[order(x[,stcol]),]
+        chrs <- ordered(x[,chrcol], levels=chrorder)
+        # chrs <- ordered(tmp, levels=paste("chr", c(seq(22), "X", "Y"), sep=""))
+        x <- x[order(chrs),]
+}
+
 ################################################
 ## PARAMS
 ################################################
@@ -23,6 +37,7 @@ ip_file <- "${bam1}"
 input_file <- "${bam2}"
 chromsizes_file <- "${chromsizes_file}"
 mle_output_file <- "${bam1.baseName}VS${bam2.baseName}_mle.txt"
+remove_anomalies <- FALSE
 
 ################################################
 # DEFAULT PARAMETERS
@@ -42,15 +57,70 @@ if (debug_mode){
     print(c("mle_output_file",mle_output_file))
 }
 
+## PROCESSING
+
+### Make the list of chromosomes
+
+print("first part")
+chromsizes <- fread(chromsizes_file, data.table = FALSE)
+#chromsizes <- sortbychr(chromsizes, chrcol="V1", stcol="V2", chrorder=paste("chr", c(seq(19), "X", "Y"), sep=""))
+#chrs <- as.character(sub('chr', '', chromsizes[, 1]))
+chrs <- chromsizes[, 1]
+
+### Import the data
+
+ip    <- read.bam.tags(ip_file)
+input <- read.bam.tags(input_file)
+
+### Select the informative tags
+ip_tags <- ip[['tags']]
+input_tags <- input[['tags']]
+print("second part")
+### Remove the tags with anomalies
+if(remove_anomalies==TRUE){
+  ip_rm <- remove.local.tag.anomalies(ip_tags[chrs])
+  input_rm <- remove.local.tag.anomalies(input_tags[chrs])
+}else{
+  ip_rm <- ip_tags
+  input_rm <- input_tags
+}
+
+print("third part")
+### Make the smoothing with the loglikelihood function
+print("lane 90")
+#print(paste0("ip_rm=",head(ip_rm)))
+#print(paste0("input_rm=",head(ip_rm)))
+chip_smoothed_mle <- get.smoothed.enrichment.mle(ip_rm, input_rm, tag.shift = 0, background.density.scaling = FALSE)
+print("lane 91")
+#chip_smoothed_mle <- get.smoothed.enrichment.mle(ip_tags, input_tags, tag.shift = 0, background.density.scaling = FALSE)
+
+ccl <- sapply(chip_smoothed_mle, nrow)
+cpos <- unlist(sapply(chip_smoothed_mle, function(csm) csm\$x))
+ccov <- unlist(sapply(chip_smoothed_mle, function(csm) csm\$y))
+print("line 97")
+gr.mle <- GRanges(seqnames = rep(names(chip_smoothed_mle), ccl), ranges = IRanges(start = cpos, end = cpos), strand = Rle('*', sum(ccl)), score = ccov)
+seqlengths(gr.mle) <- chromsizes[, 2]
+
+#rename score col
+col_names <- names(mcols(gr.mle))
+#col_names <- sub("\\..*\$", "", col_names)
+col_names <- "score"
+names(mcols(gr.mle)) <- col_names
+
+print("fouth part")
+#export out file
+export.bw(gr.mle, mle_output_file)
+
+
 ################################################
 ################################################
 ## R SESSION INFO                             ##
 ################################################
 ################################################
 
-##sink(paste(output_prefix, "R_sessionInfo.log", sep = '.'))
-##print(sessionInfo())
-##sink()
+# sink(paste(output_prefix, "R_sessionInfo.log", sep = '.'))
+# print(sessionInfo())
+# sink()
 
 ################################################
 ################################################
diff --git a/workflows/sammyseq.nf b/workflows/sammyseq.nf
index 006a4ca..ef0e3ce 100644
--- a/workflows/sammyseq.nf
+++ b/workflows/sammyseq.nf
@@ -242,17 +242,17 @@ workflow SAMMYSEQ {
 
         ch_bam_input=BAM_MARKDUPLICATES_PICARD.out.bam
 
-        def comparisons = [:]
-        def isFirstLine = true
-        csvFile = file(params.comparisonFile)
-        csvFile.eachLine { line ->
-            if (isFirstLine) {
-                isFirstLine = false
-                return
-            }
-            def (sample1, sample2) = line.split(',')
-            comparisons[sample1.trim() + "_T1"] = sample2.trim() + "_T1"
-        }
+        // def comparisons = [:]
+        // def isFirstLine = true
+        // csvFile = file(params.comparisonFile)
+        // csvFile.eachLine { line ->
+        //     if (isFirstLine) {
+        //         isFirstLine = false
+        //         return
+        //     }
+        //     def (sample1, sample2) = line.split(',')
+        //     comparisons[sample1.trim() + "_T1"] = sample2.trim() + "_T1"
+        // }
 
 
         // 1. Create a Comparisons Channels (one for sample 1 in comparison and another for sample 2 in comparison)
@@ -261,7 +261,8 @@ workflow SAMMYSEQ {
             .fromPath(params.comparisonFile)
             .splitCsv(header : true)
             .map{ row -> 
-                [ row.sample1 + "_T1", row.sample2 + "_T1",row.sample1 + "_T1_VS_" + row.sample2 + "_T1"]
+                //[ row.sample1 + "_T1", row.sample2 + "_T1",row.sample1 + "_T1_VS_" + row.sample2 + "_T1"]
+                [ row.sample1 + "_T1", row.sample1 + "_T1_VS_" + row.sample2 + "_T1"]
                 }
                 .set { comparisons_ch_s1 } 
                 //.view() 
@@ -269,12 +270,13 @@ workflow SAMMYSEQ {
         Channel.fromPath(params.comparisonFile)
                 .splitCsv(header : true)
                 .map{ row -> 
-                    [ row.sample2 + "_T1", row.sample1 + "_T1",row.sample1 + "_T1_VS_" + row.sample2 + "_T1"]
+                   // [ row.sample2 + "_T1", row.sample1 + "_T1",row.sample1 + "_T1_VS_" + row.sample2 + "_T1"]
+                   [ row.sample2 + "_T1", row.sample1 + "_T1_VS_" + row.sample2 + "_T1"]
                 }
                 .set { comparisons_ch_s2 } 
 
         //2. convert bam file to input        
-
+        // [[id:ggg, paired:true],path.bam]
         ch_bam_input
                 .map {meta, bam ->
                     id=meta.subMap('id')
@@ -286,7 +288,7 @@ workflow SAMMYSEQ {
         //3. combine comparison channel with bam list channel
         comparisons_ch_s1
                 .combine(ch_bam_reformat , by:0)
-                .map { sample1, sample2, comparison, bam ->
+                .map { sample1, comparison, bam ->
                     //[ comparison:comparison, sample1:sample1, sample2:sample2, bam1:bam1]
                 [ comparison, bam]
                 }
@@ -295,7 +297,7 @@ workflow SAMMYSEQ {
                         //.view{"join= ${it}"}
         comparisons_ch_s2
                 .combine(ch_bam_reformat, by:0)
-                .map{ sample2, sample1, comparison, bam ->
+                .map{ sample2, comparison, bam ->
                     //[ comparison:comparison , sample1:sample1, sample2:sample2 ,bam2:bam2 ]
                     [ comparison, bam]
                     }
@@ -311,8 +313,8 @@ workflow SAMMYSEQ {
         //4.run rscript
 
         RTWOSAMPLESMLE (comparisons_merge_ch,
-                        CUT_SIZES_GENOME.out.ch_sizes_genome
-                        )
+                       CUT_SIZES_GENOME.out.ch_sizes_genome
+                       )
 
         if (params.stopAt == 'RTWOSAMPLESMLE') {
         return

From 99ccc8f8767f5470a5e691544b60c3ba463b3eca Mon Sep 17 00:00:00 2001
From: Lucio Di Filippo <difilippolucio@gmail.com>
Date: Thu, 5 Oct 2023 18:00:38 +0200
Subject: [PATCH 13/14] clear code and add samtools to multiqc

---
 conf/modules.config                  | 70 +++-------------------------
 modules/local/rtwosamplesmle/main.nf | 10 ++--
 workflows/sammyseq.nf                | 15 ++----
 3 files changed, 15 insertions(+), 80 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index 0b65a9c..d76dcc9 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -51,39 +51,11 @@ process {
     //
     // Alignment options
     //
-    withName: 'BWA_MEM' {
-        ext.args = ""
-        publishDir = [
-            path: { "${params.outdir}/bwa" },
-            enabled: false,
-            mode: params.publish_dir_mode,
-            saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
-            pattern: "*.bam"
-        ]
-    }
 
-    withName: 'SAMTOOLS_INDEX_BAM' {
-        ext.args = ""
-        publishDir = [
-            path: { "${params.outdir}/bwa" },
-            enabled: true,
-            mode: params.publish_dir_mode,
-            saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
-            pattern: "*.bai"
-        ]
-    }
 
-    withName: 'SAMTOOLS_SORT_BAM' {
-        ext.args = ""
-        ext.prefix = { "${meta.id}.sorted" }
-        publishDir = [
-            path: { "${params.outdir}/bwa" },
-            enabled: true,
-            mode: params.publish_dir_mode,
-            saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
-            pattern: "*sorted.bam"
-        ]
-    }
+
+
+
 
     //
     // Picard MarkDuplicates and Filtering options
@@ -129,41 +101,11 @@ process {
         ]
     }
 
-    withName: 'SAMTOOLS_VIEW_FILTER' {
-        ext.args = '-h -F 0x0400'
-        publishDir  = [
-            path: { "${params.outdir}/markduplicates/duplicates_removed" },
-            mode: params.publish_dir_mode,
-            pattern: '*filtered.{bai,bam}',
-            saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
-            enabled: true
-        ]
-        ext.prefix  = { "${meta.id}.md.filtered" }
-    }
 
-    withName: 'SAMTOOLS_SORT_FILTERED' {
-        ext.args    = ""
-        ext.prefix  = { "${meta.id}.md.filtered.sorted" }
-        publishDir  = [
-            path: { "${params.outdir}/markduplicates/duplicates_removed" },
-            mode: params.publish_dir_mode,
-            pattern: '*filtered.sorted.bam',
-            saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
-            enabled: true
-        ]
-    }
 
-    withName: 'SAMTOOLS_INDEX_FILTERED' {
-        ext.args    = ""
-        ext.prefix  = { "${meta.id}.md.filtered.sorted" }
-        publishDir  = [
-            path: { "${params.outdir}/markduplicates/duplicates_removed" },
-            mode: params.publish_dir_mode,
-            pattern: '*filtered.sorted.bai',
-            saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
-            enabled: true
-        ]
-    }
+
+
+
 
 
 
diff --git a/modules/local/rtwosamplesmle/main.nf b/modules/local/rtwosamplesmle/main.nf
index 4d981bd..f0c05a3 100644
--- a/modules/local/rtwosamplesmle/main.nf
+++ b/modules/local/rtwosamplesmle/main.nf
@@ -26,14 +26,14 @@ process RTWOSAMPLESMLE {
     
     //conda "YOUR-TOOL-HERE"
     
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-       'https://depot.galaxyproject.org/singularity/YOUR-TOOL-HERE':
-       'biocontainers/YOUR-TOOL-HERE' }"
+    //container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+    //   'https://depot.galaxyproject.org/singularity/YOUR-TOOL-HERE':
+    //   'biocontainers/YOUR-TOOL-HERE' }"
 
     //docker pull lucidif/r_two_samples_mle:0.0.1
     //docker image tag ae028b17f7d3 quay.io/lucidif/r_two_samples_mle:0.0.1
-    container 'lucidif/r_two_samples_mle:0.0.1'
-
+    //container 'lucidif/r_two_samples_mle:0.0.1'
+    //container 'https://registry-1.docker.io/v2/lucidif/r_two_samples_mle/blobs/sha256:038e9a5189bd7f946bc85db99ea5c43cce8b643c4e3723d52a2a763b8cf21304'
 
     input:
     // TODO nf-core: Where applicable all sample-specific information e.g. "id", "single_end", "read_group"
diff --git a/workflows/sammyseq.nf b/workflows/sammyseq.nf
index ef0e3ce..1b7d95e 100644
--- a/workflows/sammyseq.nf
+++ b/workflows/sammyseq.nf
@@ -242,17 +242,6 @@ workflow SAMMYSEQ {
 
         ch_bam_input=BAM_MARKDUPLICATES_PICARD.out.bam
 
-        // def comparisons = [:]
-        // def isFirstLine = true
-        // csvFile = file(params.comparisonFile)
-        // csvFile.eachLine { line ->
-        //     if (isFirstLine) {
-        //         isFirstLine = false
-        //         return
-        //     }
-        //     def (sample1, sample2) = line.split(',')
-        //     comparisons[sample1.trim() + "_T1"] = sample2.trim() + "_T1"
-        // }
 
 
         // 1. Create a Comparisons Channels (one for sample 1 in comparison and another for sample 2 in comparison)
@@ -342,6 +331,10 @@ workflow SAMMYSEQ {
     ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect())
     ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([]))
 
+    ch_multiqc_files = ch_multiqc_files.mix(BAM_MARKDUPLICATES_PICARD.out.stats.collect{it[1]}.ifEmpty([]))
+    ch_multiqc_files = ch_multiqc_files.mix(BAM_MARKDUPLICATES_PICARD.out.flagstat.collect{it[1]}.ifEmpty([]))
+    ch_multiqc_files = ch_multiqc_files.mix(BAM_MARKDUPLICATES_PICARD.out.idxstats.collect{it[1]}.ifEmpty([]))
+
     MULTIQC (
         ch_multiqc_files.collect(),
         ch_multiqc_config.toList(),

From 8ec7d758333a89b292532fdb60cc3d3937dcf433 Mon Sep 17 00:00:00 2001
From: daisymut <margherita.mutarelli@cnr.it>
Date: Thu, 5 Oct 2023 18:53:28 +0200
Subject: [PATCH 14/14] fixed prettier

---
 nextflow_schema.json | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/nextflow_schema.json b/nextflow_schema.json
index e833cf5..1ff736c 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -46,12 +46,11 @@
                     "fa_icon": "fas fa-file-signature"
                 },
                 "comparisonFile": {
-                    "type" : "string",
+                    "type": "string",
                     "format": "file-path",
                     "description": "Path to comma-separated file containing information about samples comparisons",
                     "mimetype": "text/csv",
                     "help_text": "You will need to create a comparisons file with information about the comparisons in your experiment before running the pipeline. Use this parameter to specify the necessary comparisons. It has to be a comma-separated file with 2 columns, one for the first sample to compair and the second one to sample against which to do it."
-
                 }
             }
         },