Skip to content

Commit

Permalink
Add openmsthirdparty/cometadapter (#6627)
Browse files Browse the repository at this point in the history
* Setup cometadapter module

* cometadapter draft

* finalize tests

* remove defaults

* enclosing input as channel

* replace collect with map

* add channel of again

* fix input channles by joining them

* update correct snapshot

* fix lint

* fix snapshots, comet writes timestamps in output file

* prettier

* Update environment.yml

* strip out suffix version tag, which differs between container and conda

* move to version content check instead of hash

* align conda version and container version tag
  • Loading branch information
jonasscheid authored Sep 13, 2024
1 parent b6b54f3 commit af91cff
Show file tree
Hide file tree
Showing 8 changed files with 283 additions and 0 deletions.
5 changes: 5 additions & 0 deletions modules/nf-core/openmsthirdparty/cometadapter/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
channels:
- conda-forge
- bioconda
dependencies:
- "bioconda::openms-thirdparty=3.1.0"
55 changes: 55 additions & 0 deletions modules/nf-core/openmsthirdparty/cometadapter/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
process OPENMSTHIRDPARTY_COMETADAPTER {
tag "$meta.id"
label 'process_high'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/openms-thirdparty:3.1.0--h9ee0642_4' :
'biocontainers/openms-thirdparty:3.1.0--h9ee0642_4' }"

input:
tuple val(meta), path(mzml), path(fasta)

output:
tuple val(meta), path("*.idXML"), emit: idxml
tuple val(meta), path("*.tsv") , emit: pin, optional: true
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"

"""
CometAdapter \\
-in $mzml \\
-database $fasta \\
-out ${prefix}.idXML \\
-threads $task.cpus \\
$args
cat <<-END_VERSIONS > versions.yml
"${task.process}":
CometAdapter: \$(CometAdapter 2>&1 | grep -E '^Version(.*)' | sed 's/Version: //g' | cut -d ' ' -f 1 | cut -d '-' -f 1)
Comet: \$(comet 2>&1 | grep -E "Comet version.*" | sed 's/Comet version //g' | sed 's/"//g')
END_VERSIONS
"""

stub:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"

"""
touch ${prefix}.idXML
touch ${prefix}_pin.tsv
cat <<-END_VERSIONS > versions.yml
"${task.process}":
CometAdapter: \$(CometAdapter 2>&1 | grep -E '^Version(.*)' | sed 's/Version: //g' | cut -d ' ' -f 1 | cut -d '-' -f 1)
Comet: \$(comet 2>&1 | grep -E "Comet version.*" | sed 's/Comet version //g' | sed 's/"//g')
END_VERSIONS
"""
}
55 changes: 55 additions & 0 deletions modules/nf-core/openmsthirdparty/cometadapter/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
name: "openmsthirdparty_cometadapter"
description: Annotates MS/MS spectra using Comet.
keywords:
- search engine
- fasta
- mzml
- openms
- proteomics
tools:
- openms:
description: "OpenMS is an open-source software C++ library for LC-MS data management and analyses"
homepage: "https://openms.de"
documentation: "https://openms.readthedocs.io/en/latest/index.html"
tool_dev_url: "https://github.com/OpenMS/OpenMS"
doi: "10.1038/nmeth.3959"
licence: ["BSD"]

input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- mzml:
type: file
description: File containing mass spectra in mzML format
pattern: "*.{mzML}"
- fasta:
type: file
description: Protein sequence database containing targets and decoys
pattern: "*.{fasta}"

output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- idxml:
type: file
description: File containing target and decoy hits in idXML format
pattern: "*.{idXML}"
- pin:
type: file
description: TSV file tailored as Percolator input (pin) file
pattern: "*.{tsv}"

authors:
- "@jonasscheid"
maintainers:
- "@jonasscheid"
86 changes: 86 additions & 0 deletions modules/nf-core/openmsthirdparty/cometadapter/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
nextflow_process {

name "Test Process OPENMSTHIRDPARTY_COMETADAPTER"
script "../main.nf"
process "OPENMSTHIRDPARTY_COMETADAPTER"
config "./nextflow.config"

tag "modules"
tag "modules_nfcore"
tag "openms"
tag "openmsthirdparty"
tag "openmsthirdparty/cometadapter"
tag "thermorawfileparser"
tag "openms/decoydatabase"

setup {
run("THERMORAWFILEPARSER") {
script "../../../thermorawfileparser/main.nf"
process {
"""
input[0] = Channel.of([
[ id:'test'],
file(params.modules_testdata_base_path + 'proteomics/msspectra/PXD012083_e005640_II.raw', checkIfExists: true)
])
"""
}
}

run("OPENMS_DECOYDATABASE") {
script "../../../openms/decoydatabase/main.nf"
process {
"""
input[0] = Channel.of([
[ id:'test'],
file(params.modules_testdata_base_path + 'proteomics/database/UP000005640_9606.fasta', checkIfExists: true)
])
"""
}
}
}

test("proteomics - comet") {

when {
process {
"""
input[0] =
THERMORAWFILEPARSER.out.spectra.join(
OPENMS_DECOYDATABASE.out.decoy_fasta
)
"""
}
}
// Comet stores timestamp in output file, so we cannot compare checksums
then {
assert process.success
// Assert the search metadata
assert snapshot(file(process.out.idxml[0][1]).readLines()[0..30]).match()
// Make sure the file is not empty
assert file(process.out.idxml[0][1]).readLines().any { it.contains('ProteinHit') }
assert file(process.out.idxml[0][1]).readLines().any { it.contains('PeptideHit') }
assert snapshot(path(process.out.versions.get(0)).yaml).match("versions")
}
}

test("proteomics - comet - stub") {

options "-stub"

when {
process {
"""
input[0] =
THERMORAWFILEPARSER.out.spectra.join(
OPENMS_DECOYDATABASE.out.decoy_fasta
)
"""
}
}

then {
assert process.success
}
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
{
"proteomics - comet": {
"content": [
[
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>",
"<?xml-stylesheet type=\"text/xsl\" href=\"https://www.openms.de/xml-stylesheet/IdXML.xsl\" ?>",
"<IdXML version=\"1.5\" xsi:noNamespaceSchemaLocation=\"https://www.openms.de/xml-schema/IdXML_1_5.xsd\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\">",
"\t<SearchParameters id=\"SP_0\" db=\"UP000005640_9606_decoy.fasta\" db_version=\"\" taxonomy=\"\" mass_type=\"monoisotopic\" charges=\"2:5\" enzyme=\"unspecific cleavage\" missed_cleavages=\"1\" precursor_peak_tolerance=\"5\" precursor_peak_tolerance_ppm=\"true\" peak_mass_tolerance=\"0.50025\" peak_mass_tolerance_ppm=\"false\" >",
"\t\t<FixedModification name=\"Carbamidomethyl (C)\" />",
"\t\t<VariableModification name=\"Oxidation (M)\" />",
"\t\t\t\t<UserParam type=\"string\" name=\"CometAdapter:1:in\" value=\"test.mzML\"/>",
"\t\t\t\t<UserParam type=\"string\" name=\"CometAdapter:1:out\" value=\"test.idXML\"/>",
"\t\t\t\t<UserParam type=\"string\" name=\"CometAdapter:1:database\" value=\"UP000005640_9606_decoy.fasta\"/>",
"\t\t\t\t<UserParam type=\"string\" name=\"CometAdapter:1:comet_executable\" value=\"comet.exe\"/>",
"\t\t\t\t<UserParam type=\"string\" name=\"CometAdapter:1:pin_out\" value=\"\"/>",
"\t\t\t\t<UserParam type=\"string\" name=\"CometAdapter:1:default_params_file\" value=\"\"/>",
"\t\t\t\t<UserParam type=\"float\" name=\"CometAdapter:1:precursor_mass_tolerance\" value=\"5.0\"/>",
"\t\t\t\t<UserParam type=\"string\" name=\"CometAdapter:1:precursor_error_units\" value=\"ppm\"/>",
"\t\t\t\t<UserParam type=\"string\" name=\"CometAdapter:1:isotope_error\" value=\"off\"/>",
"\t\t\t\t<UserParam type=\"float\" name=\"CometAdapter:1:fragment_mass_tolerance\" value=\"0.50025\"/>",
"\t\t\t\t<UserParam type=\"string\" name=\"CometAdapter:1:fragment_error_units\" value=\"Da\"/>",
"\t\t\t\t<UserParam type=\"float\" name=\"CometAdapter:1:fragment_bin_offset\" value=\"0.4\"/>",
"\t\t\t\t<UserParam type=\"string\" name=\"CometAdapter:1:instrument\" value=\"low_res\"/>",
"\t\t\t\t<UserParam type=\"string\" name=\"CometAdapter:1:use_A_ions\" value=\"false\"/>",
"\t\t\t\t<UserParam type=\"string\" name=\"CometAdapter:1:use_B_ions\" value=\"true\"/>",
"\t\t\t\t<UserParam type=\"string\" name=\"CometAdapter:1:use_C_ions\" value=\"false\"/>",
"\t\t\t\t<UserParam type=\"string\" name=\"CometAdapter:1:use_X_ions\" value=\"false\"/>",
"\t\t\t\t<UserParam type=\"string\" name=\"CometAdapter:1:use_Y_ions\" value=\"true\"/>",
"\t\t\t\t<UserParam type=\"string\" name=\"CometAdapter:1:use_Z_ions\" value=\"false\"/>",
"\t\t\t\t<UserParam type=\"string\" name=\"CometAdapter:1:use_NL_ions\" value=\"false\"/>",
"\t\t\t\t<UserParam type=\"string\" name=\"CometAdapter:1:enzyme\" value=\"unspecific cleavage\"/>",
"\t\t\t\t<UserParam type=\"string\" name=\"CometAdapter:1:second_enzyme\" value=\"\"/>",
"\t\t\t\t<UserParam type=\"string\" name=\"CometAdapter:1:num_enzyme_termini\" value=\"fully\"/>",
"\t\t\t\t<UserParam type=\"int\" name=\"CometAdapter:1:missed_cleavages\" value=\"1\"/>",
"\t\t\t\t<UserParam type=\"int\" name=\"CometAdapter:1:min_peptide_length\" value=\"5\"/>"
]
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-09-13T13:57:34.932657501"
},
"versions": {
"content": [
{
"OPENMSTHIRDPARTY_COMETADAPTER": {
"CometAdapter": "3.1.0",
"Comet": "2023.01 rev. 2"
}
}
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-09-13T13:57:35.577894316"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
process {

withName:OPENMSTHIRDPARTY_COMETADAPTER {
ext.args = [
"-instrument low_res",
"-fragment_bin_offset 0.4",
"-precursor_mass_tolerance 5",
"-precursor_error_units 'ppm'",
"-fragment_mass_tolerance 0.50025",
"-digest_mass_range '800:5000'",
"-max_variable_mods_in_peptide 1",
"-precursor_charge '2:5'",
"-activation_method 'CID'",
"-variable_modifications 'Oxidation (M)'",
"-enzyme 'unspecific cleavage'",
"-spectrum_batch_size 0"
].join(' ').trim()
}

}
2 changes: 2 additions & 0 deletions modules/nf-core/openmsthirdparty/cometadapter/tests/tags.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
openmsthirdparty/cometadapter:
- "modules/nf-core/openmsthirdparty/cometadapter/**"
1 change: 1 addition & 0 deletions tests/config/test_data.config
Original file line number Diff line number Diff line change
Expand Up @@ -769,6 +769,7 @@ params {
}
'database' {
yeast_ups = "${params.test_data_base}/data/proteomics/database/yeast_UPS.fasta"
swissprot = "${params.test_data_base}/data/proteomics/database/UP000005640_9606.fasta"
}
'maxquant' {
mq_contrasts = "${params.test_data_base}/data/proteomics/maxquant/MaxQuant_contrasts.csv"
Expand Down

0 comments on commit af91cff

Please sign in to comment.