From d7399ff70f7df7de11932c2b7dd0a589b85d775a Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Tue, 9 Jan 2024 14:06:05 -0500 Subject: [PATCH 1/5] add custom/splitfastqbylane module --- .../custom/splitfastqbylane/environment.yml | 9 ++ modules/msk/custom/splitfastqbylane/main.nf | 45 ++++++++++ modules/msk/custom/splitfastqbylane/meta.yml | 57 +++++++++++++ .../resources/usr/bin/split_lanes_awk.sh | 38 +++++++++ .../splitfastqbylane/tests/main.nf.test | 64 +++++++++++++++ .../splitfastqbylane/tests/main.nf.test.snap | 82 +++++++++++++++++++ .../custom/splitfastqbylane/tests/tags.yml | 2 + tests/config/nf-test.config | 4 +- 8 files changed, 300 insertions(+), 1 deletion(-) create mode 100644 modules/msk/custom/splitfastqbylane/environment.yml create mode 100644 modules/msk/custom/splitfastqbylane/main.nf create mode 100644 modules/msk/custom/splitfastqbylane/meta.yml create mode 100755 modules/msk/custom/splitfastqbylane/resources/usr/bin/split_lanes_awk.sh create mode 100644 modules/msk/custom/splitfastqbylane/tests/main.nf.test create mode 100644 modules/msk/custom/splitfastqbylane/tests/main.nf.test.snap create mode 100644 modules/msk/custom/splitfastqbylane/tests/tags.yml diff --git a/modules/msk/custom/splitfastqbylane/environment.yml b/modules/msk/custom/splitfastqbylane/environment.yml new file mode 100644 index 00000000..d25a6e6a --- /dev/null +++ b/modules/msk/custom/splitfastqbylane/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +name: "custom_splitfastqbylane" +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - "bioconda::gawk=4.1.3" diff --git a/modules/msk/custom/splitfastqbylane/main.nf b/modules/msk/custom/splitfastqbylane/main.nf new file mode 100644 index 00000000..62163856 --- /dev/null +++ b/modules/msk/custom/splitfastqbylane/main.nf @@ -0,0 +1,45 @@ +process CUSTOM_SPLITFASTQBYLANE { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gawk:4.1.3--1': + 'biocontainers/gawk:4.1.3--1' }" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path("*.split.fastq.gz"), emit: reads + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def read1 = [reads].flatten()[0] + def read2 = [reads].flatten().size() > 1 ? reads[1] : null + """ + split_lanes_awk.sh ${prefix} ${read1} ${read2 ?: ''} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gawk: \$(awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch out.split.fastq + gzip out.split.fastq + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gawk: \$(awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//') + END_VERSIONS + """ +} diff --git a/modules/msk/custom/splitfastqbylane/meta.yml b/modules/msk/custom/splitfastqbylane/meta.yml new file mode 100644 index 00000000..70e3d85d --- /dev/null +++ b/modules/msk/custom/splitfastqbylane/meta.yml @@ -0,0 +1,57 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "custom_splitfastqbylane" +## TODO nf-core: Add a description of the module and list keywords +description: write your description here +keywords: + - sort + - example + - genomics +tools: + - "custom": + ## TODO nf-core: Add a description and other details for the software below + description: "GNU awk" + homepage: "None" + documentation: "None" + tool_dev_url: "None" + doi: "" + licence: ['GPL v3'] + +## TODO nf-core: Add a description of all of the variables used as input +input: + # Only when we have meta + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + + ## TODO nf-core: Delete / customise this example input + - bam: + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + +## TODO nf-core: Add a description of all of the variables used as output +output: + #Only when we have meta + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + ## TODO nf-core: Delete / customise this example output + - bam: + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + +authors: + - "@anoronh4" +maintainers: + - "@anoronh4" diff --git a/modules/msk/custom/splitfastqbylane/resources/usr/bin/split_lanes_awk.sh b/modules/msk/custom/splitfastqbylane/resources/usr/bin/split_lanes_awk.sh new file mode 100755 index 00000000..c8fcdcac --- /dev/null +++ b/modules/msk/custom/splitfastqbylane/resources/usr/bin/split_lanes_awk.sh @@ -0,0 +1,38 @@ +#!/bin/bash + +prefix=$1 +read1=$2 +read2=$3 + + +if [[ "${read1}" == *gz ]] ; then + cat_="zcat" +else + cat_="cat" +fi + +function a() { + awk \ + -v prefix=$3 \ + -v readnumber=$1 \ + ' + BEGIN {FS = ":"} + { + lane=$(NF-3) + flowcell=$(NF-4) + outfastq=prefix"@"flowcell"_L00"lane"_R"readnumber".split.fastq.gz" + print | "gzip > "outfastq + for (i = 1; i <= 3; i++) { + getline + print | "gzip > "outfastq + } + } + ' <( eval "$cat_ $2") +} + +echo "processing read1" +a 1 ${read1} ${prefix} +if [ ! -z ${read2} ] ; then + echo "processing read2" + a 2 ${read2} ${prefix} +fi \ No newline at end of file diff --git a/modules/msk/custom/splitfastqbylane/tests/main.nf.test b/modules/msk/custom/splitfastqbylane/tests/main.nf.test new file mode 100644 index 00000000..1c1e14c7 --- /dev/null +++ b/modules/msk/custom/splitfastqbylane/tests/main.nf.test @@ -0,0 +1,64 @@ +nextflow_process { + + name "Test Process CUSTOM_SPLITFASTQBYLANE" + script "../main.nf" + process "CUSTOM_SPLITFASTQBYLANE" + + tag "modules" + tag "modules_nfcore" + tag "custom" + tag "custom/splitfastqbylane" + + test("homo_sapiens - fastq") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - fastq - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/msk/custom/splitfastqbylane/tests/main.nf.test.snap b/modules/msk/custom/splitfastqbylane/tests/main.nf.test.snap new file mode 100644 index 00000000..a86c449d --- /dev/null +++ b/modules/msk/custom/splitfastqbylane/tests/main.nf.test.snap @@ -0,0 +1,82 @@ +{ + "sarscov2 - fastq - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "out.split.fastq.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,562408a0639390b9c86c9f5e60b85691" + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + "out.split.fastq.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,562408a0639390b9c86c9f5e60b85691" + ] + } + ], + "timestamp": "2024-01-09T14:04:29.402045" + }, + "homo_sapiens - fastq": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test@HK3MMAFX2_L001_R1.split.fastq.gz:md5,6fdae9563e6f3428ea953435cb71f762", + "test@HK3MMAFX2_L001_R2.split.fastq.gz:md5,a2315ba81e2993823e38af0b83b9e7bd", + "test@HK3MMAFX2_L002_R1.split.fastq.gz:md5,3c5e1d1bf430c964fa2ac1b7faf8c313", + "test@HK3MMAFX2_L002_R2.split.fastq.gz:md5,f3aee4f14f59bd2e26753ec0dda1d5dc", + "test@HK3MMAFX2_L003_R1.split.fastq.gz:md5,c98453d33b9bb4ff224ae5571489913b", + "test@HK3MMAFX2_L003_R2.split.fastq.gz:md5,898a6d549be78b5214c3d5647e7c3c49", + "test@HK3MMAFX2_L004_R1.split.fastq.gz:md5,11b776b50ea9bcc6670aaca952f6a939", + "test@HK3MMAFX2_L004_R2.split.fastq.gz:md5,a50125ecb428a7f534a944c5a22c8109" + ] + ] + ], + "1": [ + "versions.yml:md5,562408a0639390b9c86c9f5e60b85691" + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test@HK3MMAFX2_L001_R1.split.fastq.gz:md5,6fdae9563e6f3428ea953435cb71f762", + "test@HK3MMAFX2_L001_R2.split.fastq.gz:md5,a2315ba81e2993823e38af0b83b9e7bd", + "test@HK3MMAFX2_L002_R1.split.fastq.gz:md5,3c5e1d1bf430c964fa2ac1b7faf8c313", + "test@HK3MMAFX2_L002_R2.split.fastq.gz:md5,f3aee4f14f59bd2e26753ec0dda1d5dc", + "test@HK3MMAFX2_L003_R1.split.fastq.gz:md5,c98453d33b9bb4ff224ae5571489913b", + "test@HK3MMAFX2_L003_R2.split.fastq.gz:md5,898a6d549be78b5214c3d5647e7c3c49", + "test@HK3MMAFX2_L004_R1.split.fastq.gz:md5,11b776b50ea9bcc6670aaca952f6a939", + "test@HK3MMAFX2_L004_R2.split.fastq.gz:md5,a50125ecb428a7f534a944c5a22c8109" + ] + ] + ], + "versions": [ + "versions.yml:md5,562408a0639390b9c86c9f5e60b85691" + ] + } + ], + "timestamp": "2024-01-09T14:04:21.710291" + } +} \ No newline at end of file diff --git a/modules/msk/custom/splitfastqbylane/tests/tags.yml b/modules/msk/custom/splitfastqbylane/tests/tags.yml new file mode 100644 index 00000000..a60df419 --- /dev/null +++ b/modules/msk/custom/splitfastqbylane/tests/tags.yml @@ -0,0 +1,2 @@ +custom/splitfastqbylane: + - "modules/msk/custom/splitfastqbylane/**" diff --git a/tests/config/nf-test.config b/tests/config/nf-test.config index c9a38549..2112df8a 100644 --- a/tests/config/nf-test.config +++ b/tests/config/nf-test.config @@ -45,4 +45,6 @@ includeConfig 'test_data.config' manifest { nextflowVersion = '!>=23.04.0' -} \ No newline at end of file +} + +nextflow.enable.moduleBinaries = true \ No newline at end of file From 68155db4423882f168c0f950cbc65b31c209a45d Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Tue, 9 Jan 2024 14:12:01 -0500 Subject: [PATCH 2/5] update meta.yml --- modules/msk/custom/splitfastqbylane/meta.yml | 39 +++++++------------- 1 file changed, 14 insertions(+), 25 deletions(-) diff --git a/modules/msk/custom/splitfastqbylane/meta.yml b/modules/msk/custom/splitfastqbylane/meta.yml index 70e3d85d..c6a3c83d 100644 --- a/modules/msk/custom/splitfastqbylane/meta.yml +++ b/modules/msk/custom/splitfastqbylane/meta.yml @@ -1,55 +1,44 @@ --- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: "custom_splitfastqbylane" -## TODO nf-core: Add a description of the module and list keywords -description: write your description here +description: Split fastq into multiple fastqs by lane keywords: - - sort - - example - - genomics + - awk + - fastq + - split tools: - "custom": - ## TODO nf-core: Add a description and other details for the software below description: "GNU awk" - homepage: "None" - documentation: "None" - tool_dev_url: "None" - doi: "" - licence: ['GPL v3'] + homepage: "https://www.gnu.org/software/gawk/manual/gawk.html" + licence: ["GPL v3"] -## TODO nf-core: Add a description of all of the variables used as input input: - # Only when we have meta - meta: type: map description: | Groovy Map containing sample information e.g. `[ id:'sample1', single_end:false ]` - - ## TODO nf-core: Delete / customise this example input - - bam: + + - reads: type: file - description: Sorted BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" + description: Paired end or single end FASTQ file(s) + pattern: "*.{fastq,fastq.gz}" -## TODO nf-core: Add a description of all of the variables used as output output: - #Only when we have meta - meta: type: map description: | Groovy Map containing sample information e.g. `[ id:'sample1', single_end:false ]` - + - versions: type: file description: File containing software versions pattern: "versions.yml" - ## TODO nf-core: Delete / customise this example output - - bam: + - reads: type: file - description: Sorted BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" + description: Output fastq files containing only one read lane per file. + pattern: "*.{split.fastq.gz}" authors: - "@anoronh4" From bcc682c58b724fa760e3d3bf359ef70ae09dbe8a Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Tue, 16 Jan 2024 11:34:28 -0500 Subject: [PATCH 3/5] update gawk version --- modules/msk/custom/splitfastqbylane/environment.yml | 2 +- modules/msk/custom/splitfastqbylane/main.nf | 4 ++-- .../custom/splitfastqbylane/tests/main.nf.test.snap | 12 ++++++------ 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/modules/msk/custom/splitfastqbylane/environment.yml b/modules/msk/custom/splitfastqbylane/environment.yml index d25a6e6a..86c5c292 100644 --- a/modules/msk/custom/splitfastqbylane/environment.yml +++ b/modules/msk/custom/splitfastqbylane/environment.yml @@ -6,4 +6,4 @@ channels: - bioconda - defaults dependencies: - - "bioconda::gawk=4.1.3" + - "anaconda::gawk=5.1.0" diff --git a/modules/msk/custom/splitfastqbylane/main.nf b/modules/msk/custom/splitfastqbylane/main.nf index 62163856..3db4fa90 100644 --- a/modules/msk/custom/splitfastqbylane/main.nf +++ b/modules/msk/custom/splitfastqbylane/main.nf @@ -4,8 +4,8 @@ process CUSTOM_SPLITFASTQBYLANE { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gawk:4.1.3--1': - 'biocontainers/gawk:4.1.3--1' }" + 'https://depot.galaxyproject.org/singularity/gawk:5.1.0': + 'biocontainers/gawk:5.1.0' }" input: tuple val(meta), path(reads) diff --git a/modules/msk/custom/splitfastqbylane/tests/main.nf.test.snap b/modules/msk/custom/splitfastqbylane/tests/main.nf.test.snap index a86c449d..69fa8fbd 100644 --- a/modules/msk/custom/splitfastqbylane/tests/main.nf.test.snap +++ b/modules/msk/custom/splitfastqbylane/tests/main.nf.test.snap @@ -12,7 +12,7 @@ ] ], "1": [ - "versions.yml:md5,562408a0639390b9c86c9f5e60b85691" + "versions.yml:md5,68b779fcdaf62c97ffcd67d49ab59b0e" ], "reads": [ [ @@ -24,11 +24,11 @@ ] ], "versions": [ - "versions.yml:md5,562408a0639390b9c86c9f5e60b85691" + "versions.yml:md5,68b779fcdaf62c97ffcd67d49ab59b0e" ] } ], - "timestamp": "2024-01-09T14:04:29.402045" + "timestamp": "2024-01-16T11:28:05.672874" }, "homo_sapiens - fastq": { "content": [ @@ -52,7 +52,7 @@ ] ], "1": [ - "versions.yml:md5,562408a0639390b9c86c9f5e60b85691" + "versions.yml:md5,68b779fcdaf62c97ffcd67d49ab59b0e" ], "reads": [ [ @@ -73,10 +73,10 @@ ] ], "versions": [ - "versions.yml:md5,562408a0639390b9c86c9f5e60b85691" + "versions.yml:md5,68b779fcdaf62c97ffcd67d49ab59b0e" ] } ], - "timestamp": "2024-01-09T14:04:21.710291" + "timestamp": "2024-01-16T11:27:57.305806" } } \ No newline at end of file From 8c9b59ea8b18a95f9fb0fcb97b5df6ca22a969a3 Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Tue, 23 Jan 2024 15:10:27 -0500 Subject: [PATCH 4/5] polish syntax of input parameters --- modules/msk/custom/splitfastqbylane/main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/msk/custom/splitfastqbylane/main.nf b/modules/msk/custom/splitfastqbylane/main.nf index 3db4fa90..5bf09388 100644 --- a/modules/msk/custom/splitfastqbylane/main.nf +++ b/modules/msk/custom/splitfastqbylane/main.nf @@ -21,9 +21,9 @@ process CUSTOM_SPLITFASTQBYLANE { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def read1 = [reads].flatten()[0] - def read2 = [reads].flatten().size() > 1 ? reads[1] : null + def read2 = [reads].flatten().size() > 1 ? reads[1] : '' """ - split_lanes_awk.sh ${prefix} ${read1} ${read2 ?: ''} + split_lanes_awk.sh ${prefix} ${read1} ${read2} cat <<-END_VERSIONS > versions.yml "${task.process}": From d7135ee82cd0aa13f3ee6d2976ffd9c1e8f02e59 Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Thu, 25 Jan 2024 14:44:56 -0500 Subject: [PATCH 5/5] add config file to tests as a template for users --- modules/msk/custom/splitfastqbylane/tests/main.nf.test | 2 ++ modules/msk/custom/splitfastqbylane/tests/nextflow.config | 5 +++++ 2 files changed, 7 insertions(+) create mode 100644 modules/msk/custom/splitfastqbylane/tests/nextflow.config diff --git a/modules/msk/custom/splitfastqbylane/tests/main.nf.test b/modules/msk/custom/splitfastqbylane/tests/main.nf.test index 1c1e14c7..e4e7c3e4 100644 --- a/modules/msk/custom/splitfastqbylane/tests/main.nf.test +++ b/modules/msk/custom/splitfastqbylane/tests/main.nf.test @@ -10,6 +10,7 @@ nextflow_process { tag "custom/splitfastqbylane" test("homo_sapiens - fastq") { + config './nextflow.config' when { process { @@ -35,6 +36,7 @@ nextflow_process { } test("sarscov2 - fastq - stub") { + config './nextflow.config' options "-stub" diff --git a/modules/msk/custom/splitfastqbylane/tests/nextflow.config b/modules/msk/custom/splitfastqbylane/tests/nextflow.config new file mode 100644 index 00000000..8730f1c4 --- /dev/null +++ b/modules/msk/custom/splitfastqbylane/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +}