Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Arriba download update #6745

Open
wants to merge 19 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions modules/nf-core/arriba/arriba/environment.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
channels:
- conda-forge
- bioconda

dependencies:
- bioconda::arriba=2.4.0
17 changes: 7 additions & 10 deletions modules/nf-core/arriba/arriba/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,13 @@ process ARRIBA_ARRIBA {
'biocontainers/arriba:2.4.0--h0033a41_2' }"

input:
tuple val(meta), path(bam)
tuple val(meta), path(bam)
tuple val(meta2), path(fasta)
tuple val(meta3), path(gtf)
tuple val(meta4), path(blacklist)
tuple val(meta5), path(known_fusions)
tuple val(meta6), path(structural_variants)
tuple val(meta7), path(tags)
tuple val(meta8), path(protein_domains)
path(blacklist)
path(known_fusions)
path(cytobands)
path(protein_domains)

output:
tuple val(meta), path("*.fusions.tsv") , emit: fusions
Expand All @@ -30,8 +29,7 @@ process ARRIBA_ARRIBA {
def prefix = task.ext.prefix ?: "${meta.id}"
def blacklist = blacklist ? "-b $blacklist" : "-f blacklist"
def known_fusions = known_fusions ? "-k $known_fusions" : ""
def structural_variants = structural_variants ? "-d $structual_variants" : ""
def tags = tags ? "-t $tags" : ""
def cytobands = cytobands ? "-d $cytobands" : ""
def protein_domains = protein_domains ? "-p $protein_domains" : ""

"""
Expand All @@ -43,8 +41,7 @@ process ARRIBA_ARRIBA {
-O ${prefix}.fusions.discarded.tsv \\
$blacklist \\
$known_fusions \\
$structural_variants \\
$tags \\
$cytobands \\
$protein_domains \\
$args

Expand Down
39 changes: 5 additions & 34 deletions modules/nf-core/arriba/arriba/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,48 +43,19 @@ input:
type: file
description: Annotation GTF file
pattern: "*.{gtf}"
- - meta4:
type: map
description: |
Groovy Map containing reference information
e.g. [ id:'test' ]
- blacklist:
- - blacklist:
type: file
description: Blacklist file
pattern: "*.{tsv}"
- - meta5:
type: map
description: |
Groovy Map containing reference information
e.g. [ id:'test' ]
- known_fusions:
- - known_fusions:
type: file
description: Known fusions file
pattern: "*.{tsv}"
- - meta6:
type: map
description: |
Groovy Map containing reference information
e.g. [ id:'test' ]
- structural_variants:
type: file
description: Structural variants file
pattern: "*.{tsv}"
- - meta7:
type: map
description: |
Groovy Map containing reference information
e.g. [ id:'test' ]
- tags:
- - cytobands:
type: file
description: Tags file
description: Cytobands file
pattern: "*.{tsv}"
- - meta8:
type: map
description: |
Groovy Map containing reference information
e.g. [ id:'test' ]
- protein_domains:
- - protein_domains:
type: file
description: Protein domains file
pattern: "*.{gff3}"
Expand Down
138 changes: 138 additions & 0 deletions modules/nf-core/arriba/arriba/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@

nextflow_process {

name "Test Process ARRIBA_ARRIBA"
script "../main.nf"
process "ARRIBA_ARRIBA"

tag "modules"
tag "modules_nfcore"
tag "arriba"
tag "arriba/arriba"
tag "arriba/download"
tag "star/genomegenerate"
tag "star/align"

setup {
config "./nextflow.config"
run("ARRIBA_DOWNLOAD") {
script "../../../arriba/download/main.nf"
process {
"""
input[0] = 'GRCh38'
"""
}
}
run("STAR_GENOMEGENERATE") {
script "../../../star/genomegenerate/main.nf"
process {
"""
input[0] = Channel.of([
[ id:'test_fasta' ],
[ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ]
])
input[1] = Channel.of([
[ id:'test_gtf' ],
[ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ]
])
"""
}
}
run("STAR_ALIGN") {
script "../../../star/align/main.nf"
process {
"""
input[0] = Channel.of([
[ id:'test', single_end:false ], // meta map
[
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true)
]
])
input[1] = STAR_GENOMEGENERATE.out.index
input[2] = Channel.of([
[ id:'test_gtf' ],
[ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ]
])
input[3] = false
input[4] = 'illumina'
input[5] = false
"""
}
}
}

test("homo_sapiens - paired_end") {

when {
process {
"""
input[0] = STAR_ALIGN.out.bam
input[1] = Channel.of([
[ id:'test_fasta' ],
[ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ]
])
input[2] = Channel.of([
[ id:'test_gtf' ],
[ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ]
])
input[3] = []

input[4] = []

input[5] = []

input[6] = []
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}

test("homo_sapiens - paired_end - stub") {

options "-stub"

when {
process {
"""
input[0] = Channel.of([
[ id:'test', single_end:false ], // meta map
[ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.rna.paired_end.sorted.bam', checkIfExists: true) ]
])
input[1] = Channel.of([
[ id:'test_fasta' ],
[ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ]
])
input[2] = Channel.of([
[ id:'test_gtf' ],
[ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ]
])
input[3] = []

input[4] = []

input[5] = []

input[6] = []
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}

}
108 changes: 108 additions & 0 deletions modules/nf-core/arriba/arriba/tests/main.nf.test.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
{
"homo_sapiens - paired_end": {
"content": [
{
"0": [
[
{
"id": "test",
"single_end": false
},
"test.fusions.tsv:md5,7c3383f7eb6d79b84b0bd30a7ef02d70"
]
],
"1": [
[
{
"id": "test",
"single_end": false
},
"test.fusions.discarded.tsv:md5,445cb87a27f063e751f93498cf8d10b5"
]
],
"2": [
"versions.yml:md5,d323796555db4a58fe4c6bc08d1dec30"
],
"fusions": [
[
{
"id": "test",
"single_end": false
},
"test.fusions.tsv:md5,7c3383f7eb6d79b84b0bd30a7ef02d70"
]
],
"fusions_fail": [
[
{
"id": "test",
"single_end": false
},
"test.fusions.discarded.tsv:md5,445cb87a27f063e751f93498cf8d10b5"
]
],
"versions": [
"versions.yml:md5,d323796555db4a58fe4c6bc08d1dec30"
]
}
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-10-08T15:14:51.73747"
},
"homo_sapiens - paired_end - stub": {
"content": [
{
"0": [
[
{
"id": "test",
"single_end": false
},
"test.fusions.tsv:md5,f50b84b1db4b83ba62ec1deacc69c260"
]
],
"1": [
[
{
"id": "test",
"single_end": false
},
"test.fusions.discarded.tsv:md5,f50b84b1db4b83ba62ec1deacc69c260"
]
],
"2": [
"versions.yml:md5,d323796555db4a58fe4c6bc08d1dec30"
],
"fusions": [
[
{
"id": "test",
"single_end": false
},
"test.fusions.tsv:md5,f50b84b1db4b83ba62ec1deacc69c260"
]
],
"fusions_fail": [
[
{
"id": "test",
"single_end": false
},
"test.fusions.discarded.tsv:md5,f50b84b1db4b83ba62ec1deacc69c260"
]
],
"versions": [
"versions.yml:md5,d323796555db4a58fe4c6bc08d1dec30"
]
}
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-10-08T15:41:23.945072"
}
}
Original file line number Diff line number Diff line change
@@ -1,13 +1,8 @@
process {

publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }

withName: STAR_GENOMEGENERATE {
ext.args = '--genomeSAindexNbases 11'
}

withName: STAR_ALIGN {
ext.args = '--readFilesCommand zcat --outSAMtype BAM Unsorted --outSAMunmapped Within --outBAMcompression 0 --outFilterMultimapNmax 50 --peOverlapNbasesMin 10 --alignSplicedMateMapLminOverLmate 0.5 --alignSJstitchMismatchNmax 5 -1 5 5 --chimSegmentMin 10 --chimOutType WithinBAM HardClip --chimJunctionOverhangMin 10 --chimScoreDropMax 30 --chimScoreJunctionNonGTAG 0 --chimScoreSeparation 1 --chimSegmentReadGapMax 3 --chimMultimapNmax 50'
}

}
9 changes: 6 additions & 3 deletions modules/nf-core/arriba/download/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,14 @@ process ARRIBA_DOWNLOAD {
'biocontainers/arriba:2.4.0--h0033a41_2' }"

input:
val(genome)

output:
path "*" , emit: reference
path "versions.yml" , emit: versions
path "blacklist*${genome}*.tsv.gz" , emit: blacklist
path "cytobands*${genome}*.tsv" , emit: cytobands
path "protein_domains*${genome}*.gff3" , emit: protein_domains
path "known_fusions*${genome}*.tsv.gz" , emit: known_fusions
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when
Expand All @@ -36,7 +40,6 @@ process ARRIBA_DOWNLOAD {
touch protein_domains_hg38_GRCh38_v2.4.0.gff3
touch cytobands_hg38_GRCh38_v2.4.0.tsv
touch known_fusions_hg38_GRCh38_v2.4.0.tsv.gz
touch protein_domains_hg38_GRCh38_v2.4.0.gff3

cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand Down
Loading
Loading