Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New module: custom/splitfastqbylane #2837

Open
wants to merge 12 commits into
base: master
Choose a base branch
from
35 changes: 35 additions & 0 deletions modules/nf-core/custom/splitfastqbylane/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
process CUSTOM_SPLITFASTQBYLANE {
tag "$meta.id"
label 'process_single'

conda "anaconda::gawk=5.1.0"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/gawk:5.1.0' :
'biocontainers/gawk:5.1.0' }"

input:
tuple val(meta), path(reads)

output:
tuple val(meta), path("*.split.fastq.gz"), emit: reads
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

shell:
args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}"
read1 = [reads].flatten()[0]
read2 = [reads].flatten().size() > 1 ? reads[1] : null
template 'split_lanes_awk.sh'

stub:
"""
touch out.split.fastq.gz
cat <<-END_VERSIONS > versions.yml
"${task.process}":
gawk: \$(awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//')
END_VERSIONS
"""
}
46 changes: 46 additions & 0 deletions modules/nf-core/custom/splitfastqbylane/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
name: "custom_splitfastqbylane"
description: |
Splits fastq files with multiple or unknown number of merged flowcell+lane sources into separate fastq files, each with single flowcell+line sources.
keywords:
- splitlanesbyfastq
- awk
- custom
- fastq
- split
- lanes
tools:
- "gawk":
description: "The awk utility interprets a special-purpose programming language that makes it easy to handle simple data-reformatting jobs."
homepage: "https://www.gnu.org/software/gawk/manual/gawk.html"
documentation: "https://www.gnu.org/software/gawk/manual/gawk.html"
tool_dev_url: "http://savannah.gnu.org/projects/gawk/"
licence: "['GPL v3']"

input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- reads:
type: file
description: List of input FastQ files of size 1 and 2 for single-end and paired-end data, respectively.
pattern: "*.{fastq,fastq.gz}"

output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- reads:
type: file
description: List of output FastQ files of size of multiples of 1 and 2, for single-end and paired-end data, respectively.
pattern: "*.split.fastq.gz"

authors:
- "@anoronh4"
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#!/bin/bash

if [[ "!{read1}" == *gz ]] ; then
cat_="zcat"
else
cat_="cat"
fi

function a() {
awk \
-v prefix=!{prefix} \
-v readnumber=$1 \
'
BEGIN {FS = ":"}
{
lane=$(NF-3)
flowcell=$(NF-4)
outfastq=prefix"@"flowcell"_L00"lane"_R"readnumber".split.fastq.gz"
print | "gzip > "outfastq
for (i = 1; i <= 3; i++) {
getline
print | "gzip > "outfastq
}
}
' <( eval "$cat_ $2")
}

a 1 !{read1}
if [ ! -z !{read2} ] ; then
a 2 !{read2}
fi

cat <<-END_VERSIONS > versions.yml
"!{task.process}":
gawk: $(awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//')
END_VERSIONS
3 changes: 3 additions & 0 deletions tests/config/pytest_modules.yml
Original file line number Diff line number Diff line change
Expand Up @@ -405,6 +405,9 @@ csvtk/split:
custom/matrixfilter:
- modules/nf-core/custom/matrixfilter/**
- tests/modules/nf-core/custom/matrixfilter/**
custom/splitfastqbylane:
- modules/nf-core/custom/splitfastqbylane/**
- tests/modules/nf-core/custom/splitfastqbylane/**
custom/tabulartogseacls:
- modules/nf-core/custom/tabulartogseacls/**
- tests/modules/nf-core/custom/tabulartogseacls/**
Expand Down
27 changes: 27 additions & 0 deletions tests/modules/nf-core/custom/splitfastqbylane/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#!/usr/bin/env nextflow

nextflow.enable.dsl = 2

include { CUSTOM_SPLITFASTQBYLANE } from '../../../../../modules/nf-core/custom/splitfastqbylane/main.nf'

workflow test_custom_splitfastqbylane {

input = [
[ id:'test', single_end:false ], // meta map
[
file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true)
]
]

CUSTOM_SPLITFASTQBYLANE ( input )
}

workflow test_custom_splitfastqbylane_single_end {

input = [
[ id:'test', single_end:true ],
file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
]
CUSTOM_SPLITFASTQBYLANE ( input )
}
5 changes: 5 additions & 0 deletions tests/modules/nf-core/custom/splitfastqbylane/nextflow.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
process {

publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }

}
31 changes: 31 additions & 0 deletions tests/modules/nf-core/custom/splitfastqbylane/test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
- name: custom splitfastqbylane test_custom_splitfastqbylane
command: nextflow run ./tests/modules/nf-core/custom/splitfastqbylane -entry test_custom_splitfastqbylane -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/custom/splitfastqbylane/nextflow.config
tags:
- custom
- custom/splitfastqbylane
files:
- path: output/custom/test@HK3MMAFX2_L001_R1.split.fastq.gz
- path: output/custom/test@HK3MMAFX2_L001_R2.split.fastq.gz
- path: output/custom/test@HK3MMAFX2_L002_R1.split.fastq.gz
- path: output/custom/test@HK3MMAFX2_L002_R2.split.fastq.gz
- path: output/custom/test@HK3MMAFX2_L003_R1.split.fastq.gz
- path: output/custom/test@HK3MMAFX2_L003_R2.split.fastq.gz
- path: output/custom/test@HK3MMAFX2_L004_R1.split.fastq.gz
- path: output/custom/test@HK3MMAFX2_L004_R2.split.fastq.gz
- path: output/custom/versions.yml
contains:
- "gawk"

- name: custom splitfastqbylane test_custom_splitfastqbylane_single_end
command: nextflow run ./tests/modules/nf-core/custom/splitfastqbylane -entry test_custom_splitfastqbylane_single_end -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/custom/splitfastqbylane/nextflow.config
tags:
- custom
- custom/splitfastqbylane
files:
- path: output/custom/test@HK3MMAFX2_L001_R1.split.fastq.gz
- path: output/custom/test@HK3MMAFX2_L002_R1.split.fastq.gz
- path: output/custom/test@HK3MMAFX2_L003_R1.split.fastq.gz
- path: output/custom/test@HK3MMAFX2_L004_R1.split.fastq.gz
- path: output/custom/versions.yml
contains:
- "gawk"
Loading