Skip to content

Commit

Permalink
Add mitohifi/mitohifi module (#3573)
Browse files Browse the repository at this point in the history
* Add mitohifi module

* Lint and add a note in pytest_modules

* Fix prettier checks

* Fix spelling

* Fix data references

* Make a note not to use conda

* Change md5 values

* Remove extra file

* Fix code according to code review

* ECLint check fix

* Extend description of the output

* Fix md5 hash for output

* Fix prettier

* Fix conda checks

* Undo changes before merge

* Skip conda test on gihub

* Update md5sums

---------

Co-authored-by: Ksenia Krasheninnikova <[email protected]>
Co-authored-by: Priyanka Surana <[email protected]>
  • Loading branch information
3 people authored Sep 5, 2023
1 parent 780e332 commit 02c3c39
Show file tree
Hide file tree
Showing 8 changed files with 222 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,8 @@ jobs:
tags: merquryfk/merquryfk
- profile: "conda"
tags: merquryfk/ploidyplot
- profile: "conda"
tags: mitohifi/mitohifi
- profile: "conda"
tags: mitohifi/findmitoreference
- profile: "conda"
Expand Down
66 changes: 66 additions & 0 deletions modules/nf-core/mitohifi/mitohifi/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
process MITOHIFI_MITOHIFI {
tag "$meta.id"
label 'process_high'


// Docker image available at the project github repository
container 'ghcr.io/marcelauliano/mitohifi:master'

input:
tuple val(meta), path(reads), path(contigs)
path ref_fa
path ref_gb
val mito_code

output:
tuple val(meta), path("*fasta") , emit: fasta
tuple val(meta), path("*contigs_stats.tsv") , emit: stats
tuple val(meta), path("*gb") , emit: gb, optional: true
tuple val(meta), path("*gff") , emit: gff, optional: true
tuple val(meta), path("*all_potential_contigs.fa") , emit: all_potential_contigs, optional: true
tuple val(meta), path("*contigs_annotations.png") , emit: contigs_annotations, optional: true
tuple val(meta), path("*contigs_circularization") , emit: contigs_circularization, optional: true
tuple val(meta), path("*contigs_filtering") , emit: contigs_filtering, optional: true
tuple val(meta), path("*coverage_mapping") , emit: coverage_mapping, optional: true
tuple val(meta), path("*coverage_plot.png") , emit: coverage_plot, optional: true
tuple val(meta), path("*final_mitogenome.annotation.png"), emit: final_mitogenome_annotation, optional: true
tuple val(meta), path("*final_mitogenome_choice") , emit: final_mitogenome_choice, optional: true
tuple val(meta), path("*final_mitogenome.coverage.png") , emit: final_mitogenome_coverage, optional: true
tuple val(meta), path("*potential_contigs") , emit: potential_contigs, optional: true
tuple val(meta), path("*reads_mapping_and_assembly") , emit: reads_mapping_and_assembly, optional: true
tuple val(meta), path("*shared_genes.tsv") , emit: shared_genes, optional: true
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
// Exit if running this module with -profile conda / -profile mamba
if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
exit 1, "MitoHiFi module does not support Conda. Please use Docker / Singularity instead."
}

def args = task.ext.args ?: ''
def run_type = reads ? "-r ${reads}" :
contigs ? "-c ${contigs}" :
exit("Reads or contigs must be specified")
"""
mitohifi.py ${run_type} -f ${ref_fa} -g ${ref_gb} -o ${mito_code} -t $task.cpus ${args}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
mitohifi: \$( mitohifi.py --version 2>&1 | head -n1 | sed 's/^.*MitoHiFi //; s/ .*\$//' )
END_VERSIONS
"""

stub:
"""
touch final_mitogenome.fasta
touch final_mitogenome.fasta
touch contigs_stats.tsv
cat <<-END_VERSIONS > versions.yml
"${task.process}":
mitohifi: \$( mitohifi.py --version 2>&1 | head -n1 | sed 's/^.*MitoHiFi //; s/ .*\$//')
END_VERSIONS
"""
}
109 changes: 109 additions & 0 deletions modules/nf-core/mitohifi/mitohifi/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
name: "MITOHIFI_MITOHIFI"
description: A python workflow that assembles mitogenomes from Pacbio HiFi reads
keywords:
- mitochondrion
- chloroplast
- PacBio
tools:
- "mitohifi.py":
description: A python workflow that assembles mitogenomes from Pacbio HiFi reads
homepage: https://github.com/marcelauliano/MitoHiFi
documentation: https://github.com/marcelauliano/MitoHiFi
tool_dev_url: https://github.com/marcelauliano/MitoHiFi
doi: "10.1101/2022.12.23.521667"
licence: ["MIT"]

input:
- reads:
type: file
description: Path to PacBio HiFi reads
pattern: "*.{fa,fa.gz,fasta,fasta.gz}"
- contigs:
type: file
description: Path to genome assembly
pattern: "*.{fa,fasta}"
- ref_fa:
type: file
description: Reference sequence
pattern: "*.{fa,fasta}"
- ref_gb:
type: file
description: Reference annotation
pattern: "*.{gb}"
- code:
type: integer
description: Mitochndrial code for annotation
pattern: "[0-9]*"

output:
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- fasta:
type: file
description: Mitochondrial sequence
pattern: "*.{fasta,fa}"
- gb:
type: file
description: Genome annotation in case mitofinder was used
pattern: "*.gb"
- gff:
type: file
description: Genome annotation in case mitos was used
pattern: "*.gff"
- all_potential_contigs:
type: file
description: Contains sequences of all potential contigs
pattern: "*all_potential_contigs.fa"
- contigs_annotations:
type: file
description: Graphical representation of annotated genes and tRNAs
pattern: "*contigs_annotations.png"
- contigs_circularization:
type: directory
description: Contains circularization reports
pattern: "*contigs_circularization"
- contigs_filtering:
type: directory
description: Contains files with initial blast matches
pattern: "*contigs_filtering"
- coverage_mapping:
type: directory
description: Contains statistics on coverage mapping
pattern: "*coverage_mapping"
- coverage_plot:
type: file
description: Read coverage plot for mitochondrial contigs
pattern: "*coverage_plot.png"
- final_mitogenome_annotation:
type: file
description: Graphical representation of annotated genes for the final mito contig
pattern: "*final_mitogenome.annotation.png"
- final_mitogenome_choice:
type: directory
description: Files with potential contigs clusterings and alignments
pattern: "*final_mitogenome_choice"
- final_mitogenome_coverage:
type: file
description: Graphical representation of reads coverage plot for the final mito contig
pattern: "*final_mitogenome.coverage.png"
- potential_contigs:
type: directory
description: Files with sequences and annotations of the potential contigs
pattern: "*potential_contigs"
- reads_mapping_and_assembly:
type: directory
description: Read mapping files for run from the raw reads
pattern: "*reads_mapping_and_assembly"
- shared_genes:
type: directory
description: Report on genes shared with the reference genome
pattern: "*shared_genes.tsv"
- versions:
type: file
description: Software versions used in the run
pattern: "versions.yml"

authors:
- "@ksenia-krasheninnikova"
4 changes: 4 additions & 0 deletions tests/config/pytest_modules.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2450,6 +2450,10 @@ mitohifi/findmitoreference:
- modules/nf-core/mitohifi/findmitoreference/**
- tests/modules/nf-core/mitohifi/findmitoreference/**

mitohifi/mitohifi:
- modules/nf-core/mitohifi/mitohifi/**
- tests/modules/nf-core/mitohifi/mitohifi/**

mlst:
- modules/nf-core/mlst/**
- tests/modules/nf-core/mlst/**
Expand Down
8 changes: 8 additions & 0 deletions tests/config/test_data.config
Original file line number Diff line number Diff line change
Expand Up @@ -655,6 +655,14 @@ params {
pretext = "${params.test_data_base}/data/genomics/eukaryotes/galaxea_fascicularis/hic/jaGalFasc40_2.pretext"
}
}
'deilephila_porcellus' {
'mito' {
ref_fa = "${params.test_data_base}/data/genomics/eukaryotes/deilephila_porcellus/mito/MW539688.1.fasta"
ref_gb = "${params.test_data_base}/data/genomics/eukaryotes/deilephila_porcellus/mito/MW539688.1.gb"
hifi_reads = "${params.test_data_base}/data/genomics/eukaryotes/deilephila_porcellus/mito/ilDeiPorc1.HiFi.reads.fa"
contigs = "${params.test_data_base}/data/genomics/eukaryotes/deilephila_porcellus/mito/ilDeiPorc1.contigs.fa"
}
}
'imaging' {
'h5' {
plant_wga = "${params.test_data_base}/data/imaging/h5/plant_wga.h5"
Expand Down
16 changes: 16 additions & 0 deletions tests/modules/nf-core/mitohifi/mitohifi/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/usr/bin/env nextflow

nextflow.enable.dsl = 2

include { MITOHIFI_MITOHIFI as MITOHIFI_MITOHIFI } from '../../../../../modules/nf-core/mitohifi/mitohifi/main.nf'

workflow test_mitohifi_mitohifi {

species = "'Deilephila porcellus'"

data_contigs = Channel.of([[id:"ilDeiPorc1"],[],file(params.test_data['deilephila_porcellus']['mito']['contigs'], checkIfExists: true)])
ref_gb = file(params.test_data['deilephila_porcellus']['mito']['ref_gb'], checkIfExists: true)
ref_fa = file(params.test_data['deilephila_porcellus']['mito']['ref_fa'], checkIfExists: true)
code = 5
MITOHIFI_MITOHIFI ( data_contigs, ref_fa, ref_gb, code )
}
5 changes: 5 additions & 0 deletions tests/modules/nf-core/mitohifi/mitohifi/nextflow.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
process {

publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }

}
12 changes: 12 additions & 0 deletions tests/modules/nf-core/mitohifi/mitohifi/test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
- name: mitohifi mitohifi test_mitohifi_mitohifi
command: nextflow run ./tests/modules/nf-core/mitohifi/mitohifi -entry test_mitohifi_mitohifi -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/mitohifi/mitohifi/nextflow.config
tags:
- mitohifi
- mitohifi/mitohifi
files:
- path: output/mitohifi/final_mitogenome.fasta
md5sum: 5654c418bbf991483d9e618dd849af03
- path: output/mitohifi/final_mitogenome.gb
md5sum: 3b4659a0d7f27fd89510a25c0588909d
- path: output/mitohifi/versions.yml
md5sum: 2a4db1ea8ac4b9b11ea47e1c3963f591

0 comments on commit 02c3c39

Please sign in to comment.