Skip to content

Commit

Permalink
Merge pull request #349 from jonasscheid/generate_speclib
Browse files Browse the repository at this point in the history
Generate spectrum library using EasyPQP
  • Loading branch information
jonasscheid authored Jan 22, 2025
2 parents e2a6509 + 4f65a3d commit 96d1490
Show file tree
Hide file tree
Showing 14 changed files with 30,827 additions and 13 deletions.
6 changes: 6 additions & 0 deletions .editorconfig
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,12 @@ indent_style = unset
[/assets/email*]
indent_size = unset

[/assets/250120_unimod_tables.xml]
indent_style = unset
indent_size = unset
end_of_line = unset
trim_trailing_whitespace = unset

# ignore python and markdown
[*.{py,md}]
indent_style = unset
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ jobs:
# Test latest edge release of Nextflow
- NXF_VER: ""
NXF_EDGE: "1"
tests: ["test_mokapot", "test_percolator", "test_ionannotator"]
tests: ["test_mokapot", "test_percolator", "test_ionannotator", "test_speclib"]
steps:
- name: Check out pipeline code
uses: actions/checkout@v2
Expand Down
1 change: 1 addition & 0 deletions .nf-core.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
lint:
files_unchanged:
- .github/CONTRIBUTING.md
- assets/250120_unimod_tables.xml
nf_core_version: 3.1.1
repository_type: pipeline
template:
Expand Down
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Added `OPENMS_IDMASSACCURACY` and `DATAMASH_HISTOGRAM` to compute fragment mass errors and visualizte them in multiQC report [#332](https://github.com/nf-core/mhcquant/pull/332)
- Added global fdr evaluation in new local subworkflow `RESCORE` [#338](https://github.com/nf-core/mhcquant/pull/338)
- Added `-weights` parameter in `OPENMS_PERCOLATORADAPTER` and visualize the median feature weights in multiQC report [#347](https://github.com/nf-core/mhcquant/pull/347)
- Added flag `generate_speclib` that will generate a spectrum library for DIA searches with EasyPQP [#349](https://github.com/nf-core/mhcquant/pull/349)
- Replace local with nf-core modules [#350](https://github.com/nf-core/mhcquant/pull/347)

### `Fixed`
Expand Down
30,542 changes: 30,542 additions & 0 deletions assets/250120_unimod_tables.xml

Large diffs are not rendered by default.

26 changes: 26 additions & 0 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,32 @@ process {
]
}

withName: 'OPENMS_IDFILTER_FOR_SPECLIB' {
ext.prefix = {"${meta.id}_comet_fdr_filtered"}
publishDir = [
enabled: false
]
}

withName: 'EASYPQP_CONVERT' {
publishDir = [
enabled: false
]
}

withName: 'EASYPQP_LIBRARY' {
ext.args = [
"--perform_rt_calibration False",
"--perform_im_calibration False",
"--nofdr",
].join(' ').trim()
publishDir = [
path: {"${params.outdir}/spectrum_library"},
mode: params.publish_dir_mode,
pattern: '*speclib.tsv'
]
}

withName: 'OPENMS_IDFILTER_QUANT' {
ext.prefix = {"${meta.spectra}_fdr_filtered"}
ext.args = "-best:spectrum_per_peptide 'sequence+charge+modification'"
Expand Down
30 changes: 30 additions & 0 deletions conf/test_speclib.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Nextflow config file for running minimal tests
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Defines input files and everything required to run a fast and simple pipeline test.
Use as follows:
nextflow run nf-core/mhcquant -profile test,<docker/singularity> --outdir <OUTDIR>
----------------------------------------------------------------------------------------
*/

process {
resourceLimits = [
cpus: 2,
memory: '6.GB',
time: '2.h'
]
}

params {
config_profile_name = 'Test profile'
config_profile_description = 'Minimal test dataset to check pipeline function'

// Input data
input = params.pipelines_testdata_base_path + 'mhcquant/testdata/HepG2_sample_sheet.tsv'
fasta = params.pipelines_testdata_base_path + 'mhcquant/testdata/UP000005640_9606.fasta'
generate_speclib = true

}
5 changes: 5 additions & 0 deletions docs/output.md
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,11 @@ This folder contains the intermediate results from various steps of the MHCquant

- `features`: Holds information of quantified features in `featureXML` files as a result of the [FeatureFinderIdentification](https://openms.de/doxygen/release/3.0.0/html/TOPP_FeatureFinderIdentification.html) in the quantification mode.

- `spectrum_library`

- `{Sample}_{Condition}_speclib.tsv`: FDR-filtered spectrum library for sample-condition pair
- `global_speclib.tsv`: Global FDR-filtered spectrum library for all MS runs in samplesheet. This file is only written if `--global_fdr` is specified

- `ion_annotations`

- `{Sample}_{Condition}_all_peaks.tsv`: Contains metadata of all measured ions of peptides reported after peptide identification.
Expand Down
58 changes: 58 additions & 0 deletions modules/local/easypqp/convert.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
process EASYPQP_CONVERT {
tag "$meta.id"
label 'process_single'

conda "bioconda::easypqp=0.1.50"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/easypqp:0.1.50--pyhdfd78af_1' :
'biocontainers/easypqp:0.1.50--pyhdfd78af_1' }"

input:
tuple val(meta), path(pepxml), path(spectra)
path unimod

output:
tuple val(meta), path("*.psmpkl") , emit: psmpkl
tuple val(meta), path("*.peakpkl"), emit: peakpkl
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''

"""
export MPLCONFIGDIR=/tmp/matplotlib
export XDG_CACHE_HOME=/tmp/fontconfig-cache
mkdir -p \$MPLCONFIGDIR \$XDG_CACHE_HOME
easypqp convert \\
--pepxml $pepxml \\
--spectra $spectra \\
--unimod $unimod \\
$args
cat <<-END_VERSIONS > versions.yml
"${task.process}":
easypqp: \$(easypqp --version 2>&1 | sed 's/easypqp, version //; s/Using.*\$//')
END_VERSIONS
"""

stub:
def prefix = task.ext.prefix ?: "${meta.id}"

"""
export MPLCONFIGDIR=/tmp/matplotlib
export XDG_CACHE_HOME=/tmp/fontconfig-cache
mkdir -p \$MPLCONFIGDIR \$XDG_CACHE_HOME
touch "${prefix}.psmpkl"
touch "${prefix}.peakpkl"
cat <<-END_VERSIONS > versions.yml
"${task.process}":
easypqp: \$(easypqp --version 2>&1 | sed 's/easypqp, version //; s/Using.*\$//')
END_VERSIONS
"""
}
56 changes: 56 additions & 0 deletions modules/local/easypqp/library.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
process EASYPQP_LIBRARY {
tag "$meta.id"
label 'process_single'

conda "bioconda::easypqp=0.1.50"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/easypqp:0.1.50--pyhdfd78af_1' :
'biocontainers/easypqp:0.1.50--pyhdfd78af_1' }"

input:
tuple val(meta), path(psmpkl), path(peakpkl)

output:
tuple val(meta), path("*.tsv") , emit: tsv
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"

"""
export MPLCONFIGDIR=/tmp/matplotlib
export XDG_CACHE_HOME=/tmp/fontconfig-cache
mkdir -p \$MPLCONFIGDIR \$XDG_CACHE_HOME
easypqp library \
--out ${prefix}_speclib.tsv \
$args \
$psmpkl $peakpkl
cat <<-END_VERSIONS > versions.yml
"${task.process}":
easypqp: \$(easypqp --version 2>&1 | sed 's/easypqp, version //; s/Using.*\$//')
END_VERSIONS
"""

stub:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"

"""
export MPLCONFIGDIR=/tmp/matplotlib
export XDG_CACHE_HOME=/tmp/fontconfig-cache
mkdir -p \$MPLCONFIGDIR \$XDG_CACHE_HOME
touch "${prefix}_speclib.tsv"
cat <<-END_VERSIONS > versions.yml
"${task.process}":
easypqp: \$(easypqp --version 2>&1 | sed 's/easypqp, version //; s/Using.*\$//')
END_VERSIONS
"""
}
2 changes: 2 additions & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ params {
skip_decoy_generation = false
run_centroidisation = false
filter_mzml = false
generate_speclib = false
quantify = false
annotate_ions = false

Expand Down Expand Up @@ -219,6 +220,7 @@ profiles {
test_mokapot { includeConfig 'conf/test_mokapot.config' }
test_percolator { includeConfig 'conf/test_percolator.config' }
test_ionannotator { includeConfig 'conf/test_ionannotator.config' }
test_speclib { includeConfig 'conf/test_speclib.config' }
test_timstof { includeConfig 'conf/test_timstof.config' }
test_full { includeConfig 'conf/test_full.config' }
}
Expand Down
6 changes: 6 additions & 0 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -414,6 +414,12 @@
"default": 12,
"description": "Specify the maximum length of peptides to be considered after processing"
},
"generate_speclib": {
"type": "boolean",
"default": false,
"fa_icon": "fas fa-database",
"description": "Generate a spectral library from the search results. If `global_fdr` is specified, an additional global FDR-filtered library is generated from all MSruns in the samplesheet."
},
"annotate_ions": {
"type": "boolean",
"default": false,
Expand Down
61 changes: 61 additions & 0 deletions subworkflows/local/speclib.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
/*
* Generates spectrum library for DIA-based searches
*/

//
// MODULE: Loaded from modules/local/
//

include { EASYPQP_CONVERT } from '../../modules/local/easypqp/convert'
include { EASYPQP_LIBRARY;
EASYPQP_LIBRARY as EASYPQP_LIBRARY_GLOBAL } from '../../modules/local/easypqp/library'

//
// MODULE: Installed directly from nf-core/modules
//

workflow SPECLIB {

take:
fdrfiltered_comet_idxml
mzml

main:
ch_versions = Channel.empty()

// Load unimod tables (Future:)
unimod = file("$projectDir/assets/250120_unimod_tables.xml", checkIfExists: true)

// Convert psms and spectra to pickle files
EASYPQP_CONVERT(fdrfiltered_comet_idxml.join(mzml), unimod)
ch_versions = ch_versions.mix(EASYPQP_CONVERT.out.versions)

EASYPQP_CONVERT.out.psmpkl
.map { meta, psmpkl -> [groupKey([id: "${meta.sample}_${meta.condition}"], meta.group_count), psmpkl] }
.groupTuple()
.set { ch_psmpkl }
EASYPQP_CONVERT.out.peakpkl
.map { meta, peakpkl -> [groupKey([id: "${meta.sample}_${meta.condition}"], meta.group_count), peakpkl] }
.groupTuple()
.set { ch_peakpkl }

// Generate spectrum library for each sample-condition pair
EASYPQP_LIBRARY(ch_psmpkl.join(ch_peakpkl))
ch_versions = ch_versions.mix(EASYPQP_LIBRARY.out.versions)

// Generate spectrum library for all MSruns in the samplesheet
if (params.global_fdr) {
EASYPQP_CONVERT.out.psmpkl
.map { meta, psmpkl -> [[id: "global"], psmpkl] }
.groupTuple()
.set { ch_global_psmpkl }
EASYPQP_CONVERT.out.peakpkl
.map { meta, peakpkl -> [[id: "global"], peakpkl] }
.groupTuple()
.set { ch_global_peakpkl }
EASYPQP_LIBRARY_GLOBAL(ch_global_psmpkl.join(ch_global_peakpkl))
}

emit:
versions = ch_versions
}
44 changes: 32 additions & 12 deletions workflows/mhcquant.nf
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ include { OPENMS_MZTABEXPORTER } from '../modules/local/openms_mztabex
//
include { PREPARE_SPECTRA } from '../subworkflows/local/prepare_spectra'
include { RESCORE } from '../subworkflows/local/rescore'
include { SPECLIB } from '../subworkflows/local/speclib'
include { QUANT } from '../subworkflows/local/quant'

/*
Expand All @@ -30,17 +31,18 @@ include { QUANT } from '../subworkflows/local/quant'
//
// MODULE: Installed directly from nf-core/modules
//
include { OPENMS_FILEFILTER } from '../modules/nf-core/openms/filefilter/main'
include { OPENMS_DECOYDATABASE } from '../modules/nf-core/openms/decoydatabase/main'
include { OPENMS_IDMASSACCURACY } from '../modules/nf-core/openms/idmassaccuracy/main'
include { OPENMSTHIRDPARTY_COMETADAPTER } from '../modules/nf-core/openmsthirdparty/cometadapter/main'
include { OPENMS_PEPTIDEINDEXER } from '../modules/nf-core/openms/peptideindexer/main'
include { OPENMS_IDMERGER } from '../modules/nf-core/openms/idmerger/main'
include { MULTIQC } from '../modules/nf-core/multiqc/main'
include { paramsSummaryMap } from 'plugin/nf-schema'
include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline'
include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline'
include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_mhcquant_pipeline'
include { OPENMS_FILEFILTER } from '../modules/nf-core/openms/filefilter/main'
include { OPENMS_DECOYDATABASE } from '../modules/nf-core/openms/decoydatabase/main'
include { OPENMS_IDMASSACCURACY } from '../modules/nf-core/openms/idmassaccuracy/main'
include { OPENMSTHIRDPARTY_COMETADAPTER } from '../modules/nf-core/openmsthirdparty/cometadapter/main'
include { OPENMS_PEPTIDEINDEXER } from '../modules/nf-core/openms/peptideindexer/main'
include { OPENMS_IDMERGER } from '../modules/nf-core/openms/idmerger/main'
include { OPENMS_IDFILTER as OPENMS_IDFILTER_FOR_SPECLIB } from '../modules/nf-core/openms/idfilter/main'
include { MULTIQC } from '../modules/nf-core/multiqc/main'
include { paramsSummaryMap } from 'plugin/nf-schema'
include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline'
include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline'
include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_mhcquant_pipeline'

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Expand Down Expand Up @@ -103,7 +105,6 @@ workflow MHCQUANT {
ch_multiqc_files = ch_multiqc_files.mix(DATAMASH_HISTOGRAM.out.binned_tsv.map{ meta, frag_err_hist -> frag_err_hist })

// Save indexed runs for later use to keep meta-run information. Sort based on file id
OPENMS_PEPTIDEINDEXER.out.id_file_pi.view()
OPENMS_PEPTIDEINDEXER.out.id_file_pi
.map { meta, idxml -> [ groupKey([id: "${meta.sample}_${meta.condition}"], meta.group_count), meta] }
.groupTuple()
Expand Down Expand Up @@ -132,6 +133,25 @@ workflow MHCQUANT {
RESCORE( ch_rescore_in, ch_multiqc_files )
ch_versions = ch_versions.mix(RESCORE.out.versions)
ch_multiqc_files = ch_multiqc_files.mix(RESCORE.out.multiqc_files)

// GENERATE SPECTRUM LIBRARY
if (params.generate_speclib) {
OPENMSTHIRDPARTY_COMETADAPTER.out.idxml
.map { meta, idxml -> [ [id: "${meta.sample}_${meta.condition}"], meta, idxml] }
.combine(RESCORE.out.fdr_filtered, by:0)
.map { groupKey, meta, comet_idxml, fdr_filtered_idxml -> [meta, comet_idxml, fdr_filtered_idxml] }
.set { ch_fdrfilter_comet_idxml }

// Backfilter Comet identifications with FDR threshold
OPENMS_IDFILTER_FOR_SPECLIB(ch_fdrfilter_comet_idxml)

//
// SUBWORKFLOW: SPECLIB
//
SPECLIB(OPENMS_IDFILTER_FOR_SPECLIB.out.filtered, ch_clean_mzml_file)
ch_versions = ch_versions.mix(SPECLIB.out.versions)
}

//
// SUBWORKFLOW: QUANT
//
Expand Down

0 comments on commit 96d1490

Please sign in to comment.