Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add optional global FDR functionality #339

Merged
merged 8 commits into from
Nov 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,13 @@
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## v2.6.1dev - [date]
## v2.7.0dev - [date]

### `Added`

- Added `PYOPENMS_CHROMATOGRAMEXTRACTOR` extracting MS1 Chromatograms and visualize them in multiQC report [#329](https://github.com/nf-core/mhcquant/pull/329)
- Added `OPENMS_IDMASSACCURACY` and `DATAMASH_HISTOGRAM` to compute fragment mass errors and visualizte them in multiQC report [#332](https://github.com/nf-core/mhcquant/pull/332)
- Added global fdr evaluation in new local subworkflow `RESCORE` [#338](https://github.com/nf-core/mhcquant/pull/338)

### `Fixed`

Expand Down
69 changes: 67 additions & 2 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,17 @@ process {
]
}

withName: 'OPENMS_IDMERGER*' {
withName: 'OPENMS_IDMERGER|OPENMS_IDMERGER_QUANT' {
ext.args = [
"-annotate_file_origin true",
"-merge_proteins_add_PSMs"
].join(' ').trim()
publishDir = [
enabled: false
]
}
withName: 'OPENMS_IDMERGER_GLOBAL' {
label = 'process_high_memory'
ext.args = [
"-annotate_file_origin true",
"-merge_proteins_add_PSMs"
Expand All @@ -106,6 +116,36 @@ process {
]
}

withName: 'OPENMS_IDFILTER_Q_VALUE_GLOBAL' {
label = 'process_high_memory'
ext.prefix = {"${meta.id}_pout_filtered"}
ext.args = [
"-remove_decoys",
"-precursor:length '${params.peptide_min_length}:${params.peptide_max_length}'",
"-delete_unreferenced_peptide_hits",
(params.fdr_threshold == '0.01') ? "-score:pep 0.05" : "-score:pep " + params.fdr_threshold
].join(' ').trim()
publishDir = [
enabled: false
]
}

withName: 'OPENMS_IDFILTER_GLOBAL' {
label = 'process_high_memory'
ext.prefix = {"${meta.id}_pout_filtered"}
ext.args = [
"-remove_decoys",
"-precursor:length '${params.peptide_min_length}:${params.peptide_max_length}'",
"-delete_unreferenced_peptide_hits",
(params.fdr_threshold == '0.01') ? "-score:pep 0.05" : "-score:pep " + params.fdr_threshold
].join(' ').trim()
publishDir = [
path: {"${params.outdir}/intermediate_results/rescoring"},
mode: params.publish_dir_mode,
pattern: '*.idXML'
]
}

withName: 'OPENMS_IDFILTER_QUANT' {
ext.prefix = {"${meta.spectra}_fdr_filtered"}
ext.args = "-best:spectrum_per_peptide 'sequence+charge+modification'"
Expand Down Expand Up @@ -149,7 +189,7 @@ process {

process {
if (params.rescoring_engine == 'mokapot') {
withName: 'NFCORE_MHCQUANT:MHCQUANT:OPENMS_IDSCORESWITCHER' {
withName: 'NFCORE_MHCQUANT:MHCQUANT:RESCORE:OPENMS_IDSCORESWITCHER' {
ext.prefix = {"${meta.id}"}
ext.args = [
"-new_score q-value",
Expand Down Expand Up @@ -293,6 +333,22 @@ process {
]
}

withName: 'OPENMS_PERCOLATORADAPTER_GLOBAL' {
label = 'process_high'
ext.args = [
"-seed 4711",
"-trainFDR 0.05",
"-testFDR 0.05",
"-enzyme no_enzyme",
"-subset_max_train ${params.subset_max_train}",
"-post_processing_tdc",
(params.fdr_level != 'psm_level_fdrs') ? "-" + params.fdr_level : ""
].join(' ').trim()
publishDir = [
enabled: false
]
}

withName: 'OPENMS_PSMFEATUREEXTRACTOR' {
publishDir = [
path: {"${params.outdir}/intermediate_results/rescoring"},
Expand Down Expand Up @@ -339,6 +395,15 @@ process {
]
}

withName: 'OPENMS_TEXTEXPORTER_GLOBAL' {
label = 'process_high_memory'
publishDir = [
path: {"${params.outdir}/global_fdr"},
mode: params.publish_dir_mode,
pattern: '*.tsv'
]
}

withName: 'OPENMS_IDCONFLICTRESOLVER' {
publishDir = [
path: {"${params.outdir}/intermediate_results/features"},
Expand Down
2 changes: 2 additions & 0 deletions docs/output.md
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,8 @@ This folder contains the intermediate results from various steps of the MHCquant
- `{Sample}_{Condition}_pout.idXML`: Unfiltered percolator output.
- `{Sample}_{Condition}_pout_filtered.idXML`: FDR-filtered percolator output.

- `global_fdr`: Contains global FDR-filtered list of all runs in a `tsv` file

- `features`: Holds information of quantified features in `featureXML` files as a result of the [FeatureFinderIdentification](https://openms.de/doxygen/release/3.0.0/html/TOPP_FeatureFinderIdentification.html) in the quantification mode.

- `ion_annotations`
Expand Down
54 changes: 28 additions & 26 deletions modules/local/openms_percolatoradapter.nf
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
process OPENMS_PERCOLATORADAPTER {
tag "$meta.id"
label 'process_low'
label 'process_medium'

conda "bioconda::openms-thirdparty=3.1.0"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
Expand All @@ -18,31 +18,33 @@ process OPENMS_PERCOLATORADAPTER {
task.ext.when == null || task.ext.when

script:
def prefix = task.ext.prefix ?: "${meta.id}_pout"
def args = task.ext.args ?: ''

"""
OMP_NUM_THREADS=$task.cpus \\
PercolatorAdapter -in $merged_with_features \\
-out ${prefix}.idXML \\
$args

cat <<-END_VERSIONS > versions.yml
"${task.process}":
openms-thirdparty: \$(echo \$(FileInfo --help 2>&1) | sed 's/^.*Version: //; s/-.*\$//' | sed 's/ -*//; s/ .*\$//')
END_VERSIONS
"""
def prefix = task.ext.prefix ?: "${meta.id}_pout"
def args = task.ext.args ?: ''
"""
PercolatorAdapter \\
-in $merged_with_features \\
-out ${prefix}.idXML \\
-threads $task.cpus \\
$args

cat <<-END_VERSIONS > versions.yml
"${task.process}":
PercolatorAdapter: \$(PercolatorAdapter 2>&1 | grep -E '^Version(.*)' | sed 's/Version: //g' | cut -d ' ' -f 1)
percolator: \$(percolator -h 2>&1 | grep -E '^Percolator version(.*)' | sed 's/Percolator version //g')
END_VERSIONS
"""

stub:
def prefix = task.ext.prefix ?: "${meta.id}_pout"
def args = task.ext.args ?: ''

"""
touch ${prefix}.idXML

cat <<-END_VERSIONS > versions.yml
"${task.process}":
openms-thirdparty: \$(echo \$(FileInfo --help 2>&1) | sed 's/^.*Version: //; s/-.*\$//' | sed 's/ -*//; s/ .*\$//')
END_VERSIONS
"""
def prefix = task.ext.prefix ?: "${meta.id}_pout"
def args = task.ext.args ?: ''

"""
touch ${prefix}.idXML

cat <<-END_VERSIONS > versions.yml
"${task.process}":
PercolatorAdapter: \$(PercolatorAdapter 2>&1 | grep -E '^Version(.*)' | sed 's/Version: //g' | cut -d ' ' -f 1)
percolator: \$(percolator -h 2>&1 | grep -E '^Percolator version(.*)' | sed 's/Percolator version //g')
END_VERSIONS
"""
}
3 changes: 2 additions & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,11 @@ params {
ms2pip_model_dir = null
deeplc_calibration_set_size = 0.15

// Percolator settings
// Rescoring engine settings
fdr_threshold = 0.01
fdr_level = 'peptide_level_fdrs'
subset_max_train = 0
global_fdr = false

// IDfilter settings
peptide_min_length = 8
Expand Down
7 changes: 7 additions & 0 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,13 @@
"default": 0.01,
"description": "Specify the false discovery rate threshold at which peptide hits should be selected."
},
"global_fdr": {
"type": "boolean",
"default": false,
"fa_icon": "fas fa-less-than",
"description": "Compute global FDR and backfilter sample-specific FDRs"
},

"subset_max_train": {
"type": "integer",
"hidden": true,
Expand Down
89 changes: 89 additions & 0 deletions subworkflows/local/rescore.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
/*
* Prepares the raw or compressed data holding spectra information for the subsequent database search.
*/

//
// MODULE: Loaded from modules/local/
//

include { MS2RESCORE } from '../../modules/local/ms2rescore'
include { OPENMS_PSMFEATUREEXTRACTOR } from '../../modules/local/openms_psmfeatureextractor'
include { OPENMS_PERCOLATORADAPTER;
OPENMS_PERCOLATORADAPTER as OPENMS_PERCOLATORADAPTER_GLOBAL } from '../../modules/local/openms_percolatoradapter'
include { OPENMS_TEXTEXPORTER as OPENMS_TEXTEXPORTER_GLOBAL } from '../../modules/local/openms_textexporter'
//
// MODULE: Installed directly from nf-core/modules
//

include { OPENMS_IDMERGER as OPENMS_IDMERGER_GLOBAL } from '../../modules/nf-core/openms/idmerger/main'
include { OPENMS_IDSCORESWITCHER } from '../../modules/nf-core/openms/idscoreswitcher/main.nf'
include { OPENMS_IDFILTER as OPENMS_IDFILTER_Q_VALUE;
OPENMS_IDFILTER as OPENMS_IDFILTER_Q_VALUE_GLOBAL;
OPENMS_IDFILTER as OPENMS_IDFILTER_GLOBAL } from '../../modules/nf-core/openms/idfilter/main'

workflow RESCORE {

take:
ch_merged_runs

main:
ch_versions = Channel.empty()

// Compute features via ms2rescore
MS2RESCORE(ch_merged_runs)
ch_versions = ch_versions.mix(MS2RESCORE.out.versions)

if (params.rescoring_engine == 'mokapot') {
log.warn "The rescoring engine is set to mokapot. This rescoring engine currently only supports psm-level-fdr via ms2rescore."
if (params.global_fdr) {
log.warn "Global FDR is currently not supported by mokapot. The global_fdr parameter will be ignored."
}
// Switch comet e-value to mokapot q-value
OPENMS_IDSCORESWITCHER(MS2RESCORE.out.idxml)
ch_versions = ch_versions.mix(OPENMS_IDSCORESWITCHER.out.versions)
ch_rescored_runs = OPENMS_IDSCORESWITCHER.out.idxml

// Filter by mokapot q-value
OPENMS_IDFILTER_Q_VALUE(ch_rescored_runs.map {group_meta, idxml -> [group_meta, idxml, []]})
ch_versions = ch_versions.mix(OPENMS_IDFILTER_Q_VALUE.out.versions)
ch_filter_q_value = OPENMS_IDFILTER_Q_VALUE.out.filtered

} else {
// Extract PSM features for Percolator
OPENMS_PSMFEATUREEXTRACTOR(MS2RESCORE.out.idxml.join(MS2RESCORE.out.feature_names))
ch_versions = ch_versions.mix(OPENMS_PSMFEATUREEXTRACTOR.out.versions)

// Run Percolator with local FDR
OPENMS_PERCOLATORADAPTER(OPENMS_PSMFEATUREEXTRACTOR.out.idxml)
ch_versions = ch_versions.mix(OPENMS_PERCOLATORADAPTER.out.versions)
ch_pout = OPENMS_PERCOLATORADAPTER.out.idxml

if (params.global_fdr) {
// Merge all samples into one group
OPENMS_IDMERGER_GLOBAL(OPENMS_PSMFEATUREEXTRACTOR.out.idxml.map {group_meta, idxml -> [[id:'global'], idxml] }.groupTuple())
// Run Percolator with global FDR
OPENMS_PERCOLATORADAPTER_GLOBAL(OPENMS_IDMERGER_GLOBAL.out.idxml)
ch_rescored_runs = OPENMS_PERCOLATORADAPTER_GLOBAL.out.idxml
// Filter by global percolator q-value
OPENMS_IDFILTER_Q_VALUE_GLOBAL(ch_rescored_runs.map {id, idxml -> [id, idxml, []]})
ch_versions = ch_versions.mix(OPENMS_IDFILTER_Q_VALUE_GLOBAL.out.versions)
// Backfilter sample_condition runs according to global FDR
OPENMS_IDFILTER_GLOBAL(ch_pout.combine(OPENMS_IDFILTER_Q_VALUE_GLOBAL.out.filtered.map{ it[1] }))
ch_filter_q_value = OPENMS_IDFILTER_GLOBAL.out.filtered
// Save globally merged runs in tsv
OPENMS_TEXTEXPORTER_GLOBAL(OPENMS_PERCOLATORADAPTER_GLOBAL.out.idxml)

} else {
ch_rescored_runs = ch_pout
// Filter by percolator q-value
OPENMS_IDFILTER_Q_VALUE(ch_rescored_runs.map {group_meta, idxml -> [group_meta, idxml, []]})
ch_versions = ch_versions.mix(OPENMS_IDFILTER_Q_VALUE.out.versions)
ch_filter_q_value = OPENMS_IDFILTER_Q_VALUE.out.filtered
}
}

emit:
rescored_runs = ch_rescored_runs
fdr_filtered = ch_filter_q_value
versions = ch_versions
}
Loading
Loading