diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index ea27a584..4ecfbfe3 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -2,6 +2,7 @@ "name": "nfcore", "image": "nfcore/gitpod:latest", "remoteUser": "gitpod", + "runArgs": ["--privileged"], // Configure tool-specific properties. "customizations": { diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 85b8f2be..3b7c77be 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -9,7 +9,9 @@ Please use the pre-filled template to save time. However, don't be put off by this template - other more general issues and suggestions are welcome! Contributions to the code are even more welcome ;) -> If you need help using or modifying nf-core/mhcquant then the best place to ask is on the nf-core Slack [#mhcquant](https://nfcore.slack.com/channels/mhcquant) channel ([join our Slack here](https://nf-co.re/join/slack)). +:::info +If you need help using or modifying nf-core/mhcquant then the best place to ask is on the nf-core Slack [#mhcquant](https://nfcore.slack.com/channels/mhcquant) channel ([join our Slack here](https://nf-co.re/join/slack)). +::: ## Contribution workflow diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 822c1c48..5527a539 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -37,27 +37,37 @@ jobs: - name: Run pipeline with test data run: | - nextflow run ${GITHUB_WORKSPACE} -profile test,docker --spectrum_batch_size 5000 --outdir ./results + nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results - test_additional_params: - name: Run pipeline with additional params - # Only run on push if this is the nf-core dev branch (merged PRs) - if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/mhcquant') }}" + profile: + name: Run profile tests + if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/mhcquant') }} runs-on: ubuntu-latest + env: + NXF_VER: ${{ matrix.nxf_ver }} + NXF_ANSI_LOG: false strategy: matrix: - NXF_VER: - - "23.04.0" - - "latest-everything" + include: + # Test pipeline minimum Nextflow version + - NXF_VER: "23.04.0" + NXF_EDGE: "" + # Test latest edge release of Nextflow + - NXF_VER: "" + NXF_EDGE: "1" + tests: ["test_deeplc", "test_ms2pip", "test_ionannotator", "test_quant", "test_full"] steps: - name: Check out pipeline code - uses: actions/checkout@v3 - + uses: actions/checkout@v2 - name: Install Nextflow - uses: nf-core/setup-nextflow@v1 - with: - version: "${{ matrix.NXF_VER }}" - - - name: Run pipeline with additional params + env: + NXF_VER: ${{ matrix.NXF_VER }} + # Uncomment only if the edge release is more recent than the latest stable release + # See https://github.com/nextflow-io/nextflow/issues/2467 + # NXF_EDGE: ${{ matrix.NXF_EDGE }} + run: | + wget -qO- get.nextflow.io | bash + sudo mv nextflow /usr/local/bin/ + - name: Run pipeline with profile ${{ matrix.tests }} run: | - nextflow run ${GITHUB_WORKSPACE} -profile test,docker --predict_class_1 --predict_class_2 --predict_RT --spectrum_batch_size 2000 --use_deeplc --use_ms2pip --outdir ./results + nextflow run ${GITHUB_WORKSPACE} -profile ${{ matrix.tests }},docker --max_memory '6.GB' --max_cpus 2 --spectrum_batch_size 5000 --outdir ./results diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 888cb4bc..b8bdd214 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -78,7 +78,7 @@ jobs: - uses: actions/setup-python@v4 with: - python-version: "3.8" + python-version: "3.11" architecture: "x64" - name: Install dependencies diff --git a/.github/workflows/release-announcments.yml b/.github/workflows/release-announcments.yml new file mode 100644 index 00000000..6ad33927 --- /dev/null +++ b/.github/workflows/release-announcments.yml @@ -0,0 +1,68 @@ +name: release-announcements +# Automatic release toot and tweet anouncements +on: + release: + types: [published] + workflow_dispatch: + +jobs: + toot: + runs-on: ubuntu-latest + steps: + - uses: rzr/fediverse-action@master + with: + access-token: ${{ secrets.MASTODON_ACCESS_TOKEN }} + host: "mstdn.science" # custom host if not "mastodon.social" (default) + # GitHub event payload + # https://docs.github.com/en/developers/webhooks-and-events/webhooks/webhook-events-and-payloads#release + message: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + + send-tweet: + runs-on: ubuntu-latest + + steps: + - uses: actions/setup-python@v4 + with: + python-version: "3.10" + - name: Install dependencies + run: pip install tweepy==4.14.0 + - name: Send tweet + shell: python + run: | + import os + import tweepy + + client = tweepy.Client( + access_token=os.getenv("TWITTER_ACCESS_TOKEN"), + access_token_secret=os.getenv("TWITTER_ACCESS_TOKEN_SECRET"), + consumer_key=os.getenv("TWITTER_CONSUMER_KEY"), + consumer_secret=os.getenv("TWITTER_CONSUMER_SECRET"), + ) + tweet = os.getenv("TWEET") + client.create_tweet(text=tweet) + env: + TWEET: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + TWITTER_CONSUMER_KEY: ${{ secrets.TWITTER_CONSUMER_KEY }} + TWITTER_CONSUMER_SECRET: ${{ secrets.TWITTER_CONSUMER_SECRET }} + TWITTER_ACCESS_TOKEN: ${{ secrets.TWITTER_ACCESS_TOKEN }} + TWITTER_ACCESS_TOKEN_SECRET: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }} + + bsky-post: + runs-on: ubuntu-latest + steps: + - uses: zentered/bluesky-post-action@v0.0.2 + with: + post: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + env: + BSKY_IDENTIFIER: ${{ secrets.BSKY_IDENTIFIER }} + BSKY_PASSWORD: ${{ secrets.BSKY_PASSWORD }} + # diff --git a/CHANGELOG.md b/CHANGELOG.md index 18a6bc0f..37bcfb91 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,30 +3,34 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v2.4.2dev - [date] +## v2.5.0 - nfcore/mhcquant "Angry Bird" - 2023/10/04 ### `Added` - Support for brukers tdf format by adding tdf2mzml converter [#263](https://github.com/nf-core/mhcquant/issues/263) - DeepLC retention time prediction - MS2PIP peak intensity prediction -- Added OpenMS FileFilter to clean mzml after parsing to remove artifacts like empty spectra or precursors with charge 0 +- Added OpenMS FileFilter to clean mzml after parsing to remove artifacts like empty spectra or precursors with charge 0 (optional) - Made file extension check case insensitive - Added option to provide a default comet parameters file - Optimize resource allocations -- Template update 2.9. [#274](https://github.com/nf-core/mhcquant/pull/274) +- Template update 2.9 [#274](https://github.com/nf-core/mhcquant/pull/274) +- Improved quantification such that merged FDR-filtered runs can be quantified properly +- Template update 2.10 [#282](https://github.com/nf-core/mhcquant/pull/282) ### `Fixed` -- [#266](https://github.com/nf-core/mhcquant/pull/266) New OpenMS version 2.9.1 fixes duplicated ID bug [#250](https://github.com/nf-core/mhcquant/issues/250) +- [#266](https://github.com/nf-core/mhcquant/pull/266) New OpenMS version 3.0.0 fixes duplicated ID bug [#250](https://github.com/nf-core/mhcquant/issues/250) ### `Dependencies` -- [#266](https://github.com/nf-core/mhcquant/pull/266) Switched from OpenMS version 2.8.0 to newest version 2.9.1 [#265](https://github.com/nf-core/mhcquant/issues/265) +- [#266](https://github.com/nf-core/mhcquant/pull/266) Switched from OpenMS version 2.8.0 to newest version 3.0.0 [#265](https://github.com/nf-core/mhcquant/issues/265) - [#266](https://github.com/nf-core/mhcquant/pull/266) Bumped ThermoRawFileParser version from 1.4.0 to 1.4.2 ### `Deprecated` +- OpenMS RT prediction + ## v2.4.1 nfcore/mhcquant "Young Shark" (patch) - 2023/04/04 ### `Added` diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index f4fd052f..c089ec78 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -1,18 +1,20 @@ -# Code of Conduct at nf-core (v1.0) +# Code of Conduct at nf-core (v1.4) ## Our Pledge -In the interest of fostering an open, collaborative, and welcoming environment, we as contributors and maintainers of nf-core, pledge to making participation in our projects and community a harassment-free experience for everyone, regardless of: +In the interest of fostering an open, collaborative, and welcoming environment, we as contributors and maintainers of nf-core pledge to making participation in our projects and community a harassment-free experience for everyone, regardless of: - Age +- Ability - Body size +- Caste - Familial status - Gender identity and expression - Geographical location - Level of experience - Nationality and national origins - Native language -- Physical and neurological ability +- Neurodiversity - Race or ethnicity - Religion - Sexual identity and orientation @@ -22,80 +24,133 @@ Please note that the list above is alphabetised and is therefore not ranked in a ## Preamble -> Note: This Code of Conduct (CoC) has been drafted by the nf-core Safety Officer and been edited after input from members of the nf-core team and others. "We", in this document, refers to the Safety Officer and members of the nf-core core team, both of whom are deemed to be members of the nf-core community and are therefore required to abide by this Code of Conduct. This document will amended periodically to keep it up-to-date, and in case of any dispute, the most current version will apply. +:::note +This Code of Conduct (CoC) has been drafted by Renuka Kudva, Cris Tuñí, and Michael Heuer, with input from the nf-core Core Team and Susanna Marquez from the nf-core community. "We", in this document, refers to the Safety Officers and members of the nf-core Core Team, both of whom are deemed to be members of the nf-core community and are therefore required to abide by this Code of Conduct. This document will be amended periodically to keep it up-to-date. In case of any dispute, the most current version will apply. +::: -An up-to-date list of members of the nf-core core team can be found [here](https://nf-co.re/about). Our current safety officer is Renuka Kudva. +An up-to-date list of members of the nf-core core team can be found [here](https://nf-co.re/about). + +Our Safety Officers are Saba Nafees, Cris Tuñí, and Michael Heuer. nf-core is a young and growing community that welcomes contributions from anyone with a shared vision for [Open Science Policies](https://www.fosteropenscience.eu/taxonomy/term/8). Open science policies encompass inclusive behaviours and we strive to build and maintain a safe and inclusive environment for all individuals. -We have therefore adopted this code of conduct (CoC), which we require all members of our community and attendees in nf-core events to adhere to in all our workspaces at all times. Workspaces include but are not limited to Slack, meetings on Zoom, Jitsi, YouTube live etc. +We have therefore adopted this CoC, which we require all members of our community and attendees of nf-core events to adhere to in all our workspaces at all times. Workspaces include, but are not limited to, Slack, meetings on Zoom, gather.town, YouTube live etc. -Our CoC will be strictly enforced and the nf-core team reserve the right to exclude participants who do not comply with our guidelines from our workspaces and future nf-core activities. +Our CoC will be strictly enforced and the nf-core team reserves the right to exclude participants who do not comply with our guidelines from our workspaces and future nf-core activities. -We ask all members of our community to help maintain a supportive and productive workspace and to avoid behaviours that can make individuals feel unsafe or unwelcome. Please help us maintain and uphold this CoC. +We ask all members of our community to help maintain supportive and productive workspaces and to avoid behaviours that can make individuals feel unsafe or unwelcome. Please help us maintain and uphold this CoC. -Questions, concerns or ideas on what we can include? Contact safety [at] nf-co [dot] re +Questions, concerns, or ideas on what we can include? Contact members of the Safety Team on Slack or email safety [at] nf-co [dot] re. ## Our Responsibilities -The safety officer is responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behaviour. +Members of the Safety Team (the Safety Officers) are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behaviour. -The safety officer in consultation with the nf-core core team have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. +The Safety Team, in consultation with the nf-core core team, have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this CoC, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. -Members of the core team or the safety officer who violate the CoC will be required to recuse themselves pending investigation. They will not have access to any reports of the violations and be subject to the same actions as others in violation of the CoC. +Members of the core team or the Safety Team who violate the CoC will be required to recuse themselves pending investigation. They will not have access to any reports of the violations and will be subject to the same actions as others in violation of the CoC. -## When are where does this Code of Conduct apply? +## When and where does this Code of Conduct apply? -Participation in the nf-core community is contingent on following these guidelines in all our workspaces and events. This includes but is not limited to the following listed alphabetically and therefore in no order of preference: +Participation in the nf-core community is contingent on following these guidelines in all our workspaces and events, such as hackathons, workshops, bytesize, and collaborative workspaces on gather.town. These guidelines include, but are not limited to, the following (listed alphabetically and therefore in no order of preference): - Communicating with an official project email address. - Communicating with community members within the nf-core Slack channel. - Participating in hackathons organised by nf-core (both online and in-person events). -- Participating in collaborative work on GitHub, Google Suite, community calls, mentorship meetings, email correspondence. -- Participating in workshops, training, and seminar series organised by nf-core (both online and in-person events). This applies to events hosted on web-based platforms such as Zoom, Jitsi, YouTube live etc. +- Participating in collaborative work on GitHub, Google Suite, community calls, mentorship meetings, email correspondence, and on the nf-core gather.town workspace. +- Participating in workshops, training, and seminar series organised by nf-core (both online and in-person events). This applies to events hosted on web-based platforms such as Zoom, gather.town, Jitsi, YouTube live etc. - Representing nf-core on social media. This includes both official and personal accounts. ## nf-core cares 😊 -nf-core's CoC and expectations of respectful behaviours for all participants (including organisers and the nf-core team) include but are not limited to the following (listed in alphabetical order): +nf-core's CoC and expectations of respectful behaviours for all participants (including organisers and the nf-core team) include, but are not limited to, the following (listed in alphabetical order): - Ask for consent before sharing another community member’s personal information (including photographs) on social media. - Be respectful of differing viewpoints and experiences. We are all here to learn from one another and a difference in opinion can present a good learning opportunity. -- Celebrate your accomplishments at events! (Get creative with your use of emojis 🎉 🥳 💯 🙌 !) +- Celebrate your accomplishments! (Get creative with your use of emojis 🎉 🥳 💯 🙌 !) - Demonstrate empathy towards other community members. (We don’t all have the same amount of time to dedicate to nf-core. If tasks are pending, don’t hesitate to gently remind members of your team. If you are leading a task, ask for help if you feel overwhelmed.) - Engage with and enquire after others. (This is especially important given the geographically remote nature of the nf-core community, so let’s do this the best we can) - Focus on what is best for the team and the community. (When in doubt, ask) -- Graciously accept constructive criticism, yet be unafraid to question, deliberate, and learn. +- Accept feedback, yet be unafraid to question, deliberate, and learn. - Introduce yourself to members of the community. (We’ve all been outsiders and we know that talking to strangers can be hard for some, but remember we’re interested in getting to know you and your visions for open science!) -- Show appreciation and **provide clear feedback**. (This is especially important because we don’t see each other in person and it can be harder to interpret subtleties. Also remember that not everyone understands a certain language to the same extent as you do, so **be clear in your communications to be kind.**) +- Show appreciation and **provide clear feedback**. (This is especially important because we don’t see each other in person and it can be harder to interpret subtleties. Also remember that not everyone understands a certain language to the same extent as you do, so **be clear in your communication to be kind.**) - Take breaks when you feel like you need them. -- Using welcoming and inclusive language. (Participants are encouraged to display their chosen pronouns on Zoom or in communication on Slack.) +- Use welcoming and inclusive language. (Participants are encouraged to display their chosen pronouns on Zoom or in communication on Slack) ## nf-core frowns on 😕 -The following behaviours from any participants within the nf-core community (including the organisers) will be considered unacceptable under this code of conduct. Engaging or advocating for any of the following could result in expulsion from nf-core workspaces. +The following behaviours from any participants within the nf-core community (including the organisers) will be considered unacceptable under this CoC. Engaging or advocating for any of the following could result in expulsion from nf-core workspaces: - Deliberate intimidation, stalking or following and sustained disruption of communication among participants of the community. This includes hijacking shared screens through actions such as using the annotate tool in conferencing software such as Zoom. - “Doxing” i.e. posting (or threatening to post) another person’s personal identifying information online. - Spamming or trolling of individuals on social media. -- Use of sexual or discriminatory imagery, comments, or jokes and unwelcome sexual attention. -- Verbal and text comments that reinforce social structures of domination related to gender, gender identity and expression, sexual orientation, ability, physical appearance, body size, race, age, religion or work experience. +- Use of sexual or discriminatory imagery, comments, jokes, or unwelcome sexual attention. +- Verbal and text comments that reinforce social structures of domination related to gender, gender identity and expression, sexual orientation, ability, physical appearance, body size, race, age, religion, or work experience. ### Online Trolling -The majority of nf-core interactions and events are held online. Unfortunately, holding events online comes with the added issue of online trolling. This is unacceptable, reports of such behaviour will be taken very seriously, and perpetrators will be excluded from activities immediately. +The majority of nf-core interactions and events are held online. Unfortunately, holding events online comes with the risk of online trolling. This is unacceptable — reports of such behaviour will be taken very seriously and perpetrators will be excluded from activities immediately. -All community members are required to ask members of the group they are working within for explicit consent prior to taking screenshots of individuals during video calls. +All community members are **required** to ask members of the group they are working with for explicit consent prior to taking screenshots of individuals during video calls. -## Procedures for Reporting CoC violations +## Procedures for reporting CoC violations If someone makes you feel uncomfortable through their behaviours or actions, report it as soon as possible. -You can reach out to members of the [nf-core core team](https://nf-co.re/about) and they will forward your concerns to the safety officer(s). +You can reach out to members of the Safety Team (Saba Nafees, Cris Tuñí, and Michael Heuer) on Slack. Alternatively, contact a member of the nf-core core team [nf-core core team](https://nf-co.re/about), and they will forward your concerns to the Safety Team. + +Issues directly concerning members of the Core Team or the Safety Team will be dealt with by other members of the core team and the safety manager — possible conflicts of interest will be taken into account. nf-core is also in discussions about having an ombudsperson and details will be shared in due course. + +All reports will be handled with the utmost discretion and confidentiality. + +You can also report any CoC violations to safety [at] nf-co [dot] re. In your email report, please do your best to include: + +- Your contact information. +- Identifying information (e.g. names, nicknames, pseudonyms) of the participant who has violated the Code of Conduct. +- The behaviour that was in violation and the circumstances surrounding the incident. +- The approximate time of the behaviour (if different than the time the report was made). +- Other people involved in the incident, if applicable. +- If you believe the incident is ongoing. +- If there is a publicly available record (e.g. mailing list record, a screenshot). +- Any additional information. + +After you file a report, one or more members of our Safety Team will contact you to follow up on your report. + +## Who will read and handle reports + +All reports will be read and handled by the members of the Safety Team at nf-core. + +If members of the Safety Team are deemed to have a conflict of interest with a report, they will be required to recuse themselves as per our Code of Conduct and will not have access to any follow-ups. + +To keep this first report confidential from any of the Safety Team members, please submit your first report by direct messaging on Slack/direct email to any of the nf-core members you are comfortable disclosing the information to, and be explicit about which member(s) you do not consent to sharing the information with. + +## Reviewing reports + +After receiving the report, members of the Safety Team will review the incident report to determine whether immediate action is required, for example, whether there is immediate threat to participants’ safety. + +The Safety Team, in consultation with members of the nf-core core team, will assess the information to determine whether the report constitutes a Code of Conduct violation, for them to decide on a course of action. + +In the case of insufficient information, one or more members of the Safety Team may contact the reporter, the reportee, or any other attendees to obtain more information. -Issues directly concerning members of the core team will be dealt with by other members of the core team and the safety manager, and possible conflicts of interest will be taken into account. nf-core is also in discussions about having an ombudsperson, and details will be shared in due course. +Once additional information is gathered, the Safety Team will collectively review and decide on the best course of action to take, if any. The Safety Team reserves the right to not act on a report. -All reports will be handled with utmost discretion and confidentially. +## Confidentiality + +All reports, and any additional information included, are only shared with the team of safety officers (and possibly members of the core team, in case the safety officer is in violation of the CoC). We will respect confidentiality requests for the purpose of protecting victims of abuse. + +We will not name harassment victims, beyond discussions between the safety officer and members of the nf-core team, without the explicit consent of the individuals involved. + +## Enforcement + +Actions taken by the nf-core’s Safety Team may include, but are not limited to: + +- Asking anyone to stop a behaviour. +- Asking anyone to leave the event and online spaces either temporarily, for the remainder of the event, or permanently. +- Removing access to the gather.town and Slack, either temporarily or permanently. +- Communicating to all participants to reinforce our expectations for conduct and remind what is unacceptable behaviour; this may be public for practical reasons. +- Communicating to all participants that an incident has taken place and how we will act or have acted — this may be for the purpose of letting event participants know we are aware of and dealing with the incident. +- Banning anyone from participating in nf-core-managed spaces, future events, and activities, either temporarily or permanently. +- No action. ## Attribution and Acknowledgements @@ -106,6 +161,22 @@ All reports will be handled with utmost discretion and confidentially. ## Changelog -### v1.0 - March 12th, 2021 +### v1.4 - February 8th, 2022 + +- Included a new member of the Safety Team. Corrected a typographical error in the text. + +### v1.3 - December 10th, 2021 + +- Added a statement that the CoC applies to nf-core gather.town workspaces. Corrected typographical errors in the text. + +### v1.2 - November 12th, 2021 + +- Removed information specific to reporting CoC violations at the Hackathon in October 2021. + +### v1.1 - October 14th, 2021 + +- Updated with names of new Safety Officers and specific information for the hackathon in October 2021. + +### v1.0 - March 15th, 2021 - Complete rewrite from original [Contributor Covenant](http://contributor-covenant.org/) CoC. diff --git a/README.md b/README.md index 7b40e163..db37dbc0 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,7 @@ # ![nf-core/mhcquant](docs/images/nf-core-mhcquant_logo_light.png#gh-light-mode-only) ![nf-core/mhcquant](docs/images/nf-core-mhcquant_logo_dark.png#gh-dark-mode-only) -[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/mhcquant/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.1569909-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.1569909) +[![GitHub Actions CI Status](https://github.com/nf-core/mhcquant/workflows/nf-core%20CI/badge.svg)](https://github.com/nf-core/mhcquant/actions?query=workflow%3A%22nf-core+CI%22) +[![GitHub Actions Linting Status](https://github.com/nf-core/mhcquant/workflows/nf-core%20linting/badge.svg)](https://github.com/nf-core/mhcquant/actions?query=workflow%3A%22nf-core+linting%22)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/mhcquant/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX) [![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A523.04.0-23aa62.svg)](https://www.nextflow.io/) [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) @@ -22,11 +23,11 @@ The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool On release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources. The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/mhcquant/results). -![overview](assets/mhcquant_web.png) +![overview](docs/images/mhcquant_subway.png) ## Usage -> **Note** +> [!NOTE] > If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how > to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) > with `-profile test` before running the workflow on actual data. @@ -58,10 +59,11 @@ nextflow run nf-core/mhcquant \ --outdir ``` -> **Warning:** -> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those -> provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; -> see [docs](https://nf-co.re/usage/configuration#custom-configuration-files). +:::warning +Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those +provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; +see [docs](https://nf-co.re/usage/configuration#custom-configuration-files). +::: For more details and further functionality, please refer to the [usage documentation](https://nf-co.re/mhcquant/usage) and the [parameter documentation](https://nf-co.re/mhcquant/parameters). @@ -82,15 +84,34 @@ nextflow run nf-core/mhcquant -profile test, > Käll L. et al, _Nat Methods_ 2007 Nov;4(11):923-5. doi: [10.1038/nmeth1113](https://www.nature.com/articles/nmeth1113). Epub 2007 Oct 21. > +> **Retention time prediction** +> +> Bouwmeester R. et al, _Nature Methods_ 2021 Oct;18(11):1363-1369. doi: [10.1038/s41592-021-01301-5](https://www.nature.com/articles/s41592-021-01301-5) +> +> **MS2 Peak intensity prediction** +> +> Gabriels R. et al, _Nucleic Acids Research_ 2019 Jul;47(W1):W295-9. doi: [10.1093/nar/gkz299](https://academic.oup.com/nar/article/47/W1/W295/5480903) +> > **Identification based RT Alignment** > > Weisser H. et al, _J Proteome Res._ 2013 Apr 5;12(4):1628-44. doi: [10.1021/pr300992u](https://pubs.acs.org/doi/10.1021/pr300992u). Epub 2013 Feb 22. diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 09392396..04621a17 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,7 +1,11 @@ +custom_logo: "nf-core-mhcquant_logo_light.png" +custom_logo_url: https://github.com/nf-core/mhcquant +custom_logo_title: "nf-core/mhcquant" + report_comment: > - This report has been generated by the nf-core/mhcquant + This report has been generated by the nf-core/mhcquant analysis pipeline. For information about how to interpret these results, please see the - documentation. + documentation. report_section_order: "nf-core-mhcquant-methods-description": order: -1000 diff --git a/bin/IDFilter.py b/bin/IDFilter.py new file mode 100755 index 00000000..47d6db80 --- /dev/null +++ b/bin/IDFilter.py @@ -0,0 +1,90 @@ +#!/usr/bin/env python +# Written by Jonas Scheid under the MIT license + +from pyopenms import * +import pandas as pd +import os +import argparse + + +def parse_args() -> argparse.Namespace: + """ + Parse command line arguments. + + :return: parsed arguments + :rtype: argparse.Namespace + """ + parser = argparse.ArgumentParser(description="Filter idXML by a given whitelist of peptides.") + parser.add_argument("--input", required=True, type=str, help="Input idXML file.") + parser.add_argument( + "--whitelist", required=True, type=str, help="IdXML file, which peptide IDs are used as whitelist filter." + ) + parser.add_argument("--output", required=True, type=str, help="Filtered idXML file.") + + return parser.parse_args() + + +def parse_idxml(path: str) -> tuple[list, list]: + """ + Parse idXML file and return PeptideIdentification and ProteinIdentification objects. + + :param path: path to idXML file + :type path: str + :return: ProteinIdentification and PeptideIdentification objects + :rtype: (list, list) + """ + protein_ids = [] + peptide_ids = [] + IdXMLFile().load(path, protein_ids, peptide_ids) + + return protein_ids, peptide_ids + + +def filter_run(protein_ids, peptide_ids, whitelist) -> tuple[list, list]: + """ + Filter Protein and PeptideIdentifications of one run by a whitelist of PeptideIdentifications. + + :param protein_ids: ProteinIdentification objects + :type protein_ids: list + :param peptide_ids: PeptideIdentification objects + :type peptide_ids: list + :param whitelist: PeptideIdentification objects to keep in the run + :type whitelist: list + """ + filter = IDFilter() + ids_to_keep = [ + peptide_id + for peptide_id in peptide_ids + for hit in peptide_id.getHits() + if hit.getSequence().toString() in whitelist + ] + filter.keepPeptidesWithMatchingSequences(peptide_ids, ids_to_keep, ignore_mods=False) + # We only want to have unique peptide sequences + filter.keepBestPerPeptide(peptide_ids, ignore_mods=False, ignore_charges=False, nr_best_spectrum=1) + filter.removeEmptyIdentifications(peptide_ids) + # We only want to have protein accessions that are referenced by the fdr-filtered peptide hits + filter.removeUnreferencedProteins(protein_ids, peptide_ids) + + return protein_ids, peptide_ids + + +def main(): + args = parse_args() + + # Read idXML files of runs + protein_ids, peptide_ids = parse_idxml(args.input) + + # Read file containing peptides to keep + whitelist_protein_ids, whitelist_peptide_ids = parse_idxml(args.whitelist) + # Get string representation of peptide sequences in fdr_filtered_peptides + whitelist_peptides = [hit.getSequence().toString() for id in whitelist_peptide_ids for hit in id.getHits()] + + # Filter runs for peptides only in the fdr_filtered_peptides list + protein_id_filtered, peptide_ids_filtered = filter_run(protein_ids, peptide_ids, whitelist_peptides) + + # Write filtered run to idXML file + IdXMLFile().store(args.output, protein_id_filtered, peptide_ids_filtered) + + +if __name__ == "__main__": + main() diff --git a/bin/get_ion_annotations.py b/bin/get_ion_annotations.py index 35bffff0..e6615479 100755 --- a/bin/get_ion_annotations.py +++ b/bin/get_ion_annotations.py @@ -7,7 +7,6 @@ import pandas as pd import numpy as np import argparse -from pyopenms.Plotting import * def parse_arguments() -> Tuple[argparse.ArgumentParser, argparse.Namespace]: diff --git a/conf/modules.config b/conf/modules.config index 8a9fd722..62f0f4e1 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -13,148 +13,216 @@ process { publishDir = [ - path: {"${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}"}, + path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + + withName: SAMPLESHEET_CHECK { + publishDir = [ + path: { "${params.outdir}/pipeline_info" }, mode: params.publish_dir_mode, - saveAs: {filename -> filename.equals('versions.yml') ? null : filename}, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] + } - withName: 'SAMPLESHEET_CHECK' { - publishDir = [ - path: {"${params.outdir}/pipeline_info"}, - mode: params.publish_dir_mode - ] - } - - withName: 'CUSTOM_DUMPSOFTWAREVERSIONS' { - publishDir = [ - path: {"${params.outdir}/pipeline_info"}, - mode: params.publish_dir_mode, - pattern: '*_versions.yml' - ] - } + withName: CUSTOM_DUMPSOFTWAREVERSIONS { + publishDir = [ + path: { "${params.outdir}/pipeline_info" }, + mode: params.publish_dir_mode, + pattern: '*_versions.yml' + ] + } - withName: 'MULTIQC' { - publishDir = [ - path: {"${params.outdir}/multiqc"}, - mode: params.publish_dir_mode, - enabled: true - ] - } + withName: 'MULTIQC' { + ext.args = params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' + publishDir = [ + path: { "${params.outdir}/multiqc" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } } process { + withName: 'THERMORAWFILEPARSER' { + publishDir = [ + path: {"${params.outdir}"}, + mode: params.publish_dir_mode, + enabled: false + ] + } + + withName: 'TDF2MZML' { + publishDir = [ + enabled: false + ] + } + withName: 'GENERATE_PROTEINS_FROM_VCF' { - ext.args = [ - "-t ${params.variant_annotation_style}", - "-r ${params.variant_reference}", - params.variant_indel_filter ? "-fINDEL" : "", - params.variant_frameshift_filter ? "-fFS" : "", - params.variant_snp_filter ? "-fSNP" : "" - ].join(' ').trim() - publishDir = [ - path: {"${params.outdir}"}, - mode: params.publish_dir_mode, - pattern: '*.fasta' - ] - } + ext.args = [ + "-t ${params.variant_annotation_style}", + "-r ${params.variant_reference}", + params.variant_indel_filter ? "-fINDEL" : "", + params.variant_frameshift_filter ? "-fFS" : "", + params.variant_snp_filter ? "-fSNP" : "" + ].join(' ').trim() + publishDir = [ + path: {"${params.outdir}"}, + mode: params.publish_dir_mode, + pattern: '*.fasta' + ] + } + + withName: 'OPENMS_DECOYDATABASE' { + publishDir = [ + enabled: false + ] + } + + withName: 'OPENMS_FILEFILTER' { + publishDir = [ + enabled: false + ] + } withName: 'OPENMS_MAPALIGNERIDENTIFICATION' { - ext.args = [ - "-model:type linear", - "-algorithm:max_rt_shift ${params.max_rt_alignment_shift}" - ].join(' ').trim() - } + ext.args = [ + "-model:type linear", + "-algorithm:max_rt_shift ${params.max_rt_alignment_shift}" + ].join(' ').trim() + publishDir = [ + path: {"${params.outdir}/intermediate_results/alignment"}, + mode: params.publish_dir_mode, + pattern: '*.trafoXML' + ] + } + + withName: 'OPENMS_MAPRTTRANSFORMERMZML|OPENMS_MAPRTTRANSFORMERIDXML' { + publishDir = [ + enabled: false + ] + } + + withName: 'OPENMS_IDMERGER*' { + publishDir = [ + enabled: false + ] + } withName: 'OPENMS_COMETADAPTER' { - ext.args = [ - "-precursor_mass_tolerance ${params.precursor_mass_tolerance}", - "-fragment_mass_tolerance ${params.fragment_mass_tolerance}", - "-fragment_bin_offset ${params.fragment_bin_offset}", - "-instrument ${params.instrument}", - "-num_hits ${params.num_hits}", - "-digest_mass_range ${params.digest_mass_range}", - "-max_variable_mods_in_peptide ${params.number_mods}", - "-missed_cleavages 0", - "-precursor_charge ${params.prec_charge}", - "-activation_method ${params.activation_method}", - "-variable_modifications ${params.variable_mods.tokenize(',').collect {"'${it}'"}.join(" ")}", - "-enzyme '${params.enzyme}'", - "-spectrum_batch_size ${params.spectrum_batch_size}" - ].join(' ').trim() - publishDir = [ - path: {"${params.outdir}/intermediate_results/comet"}, - mode: params.publish_dir_mode, - pattern: '*.tsv' - ] - } + ext.args = [ + "-precursor_mass_tolerance ${params.precursor_mass_tolerance}", + "-fragment_mass_tolerance ${params.fragment_mass_tolerance}", + "-fragment_bin_offset ${params.fragment_bin_offset}", + "-instrument ${params.instrument}", + "-num_hits ${params.num_hits}", + "-digest_mass_range ${params.digest_mass_range}", + "-max_variable_mods_in_peptide ${params.number_mods}", + "-missed_cleavages 0", + "-precursor_charge ${params.prec_charge}", + "-activation_method ${params.activation_method}", + "-variable_modifications ${params.variable_mods.tokenize(',').collect {"'${it}'"}.join(" ")}", + "-enzyme '${params.enzyme}'", + "-spectrum_batch_size ${params.spectrum_batch_size}" + ].join(' ').trim() + publishDir = [ + path: {"${params.outdir}/intermediate_results/comet"}, + mode: params.publish_dir_mode, + pattern: '*.tsv' + ] + } - withName: 'OPENMS_IDFILTER_FOR_ALIGNMENT' { - ext.args = [ - "-remove_decoys", - "-precursor:length '${params.peptide_min_length}:${params.peptide_max_length}'", - "-delete_unreferenced_peptide_hits", - (params.fdr_threshold == '0.01') ? "-score:pep 0.05" : "-score:pep " + params.fdr_threshold - ].join(' ').trim() - publishDir = [ - path: {"${params.outdir}/intermediate_results/alignment"}, - mode: params.publish_dir_mode, - pattern: '*.idXML' - ] - } + withName: 'OPENMS_PEPTIDEINDEXER' { + publishDir = [ + enabled: false + ] + } withName: 'OPENMS_IDFILTER_Q_VALUE' { - ext.prefix = {"${meta.id}_fdr_filtered"} - ext.args = [ - "-remove_decoys", - "-precursor:length '${params.peptide_min_length}:${params.peptide_max_length}'", - "-delete_unreferenced_peptide_hits", - (params.fdr_threshold == '0.01') ? "-score:pep 0.05" : "-score:pep " + params.fdr_threshold - ].join(' ').trim() - publishDir = [ - path: {"${params.outdir}/intermediate_results/alignment"}, - mode: params.publish_dir_mode, - pattern: '*.idXML' - ] - } + ext.prefix = {"${meta.id}_pout_filtered"} + ext.args = [ + "-remove_decoys", + "-precursor:length '${params.peptide_min_length}:${params.peptide_max_length}'", + "-delete_unreferenced_peptide_hits", + (params.fdr_threshold == '0.01') ? "-score:pep 0.05" : "-score:pep " + params.fdr_threshold + ].join(' ').trim() + publishDir = [ + path: {"${params.outdir}/intermediate_results/percolator"}, + mode: params.publish_dir_mode, + pattern: '*.idXML' + ] + } withName: 'OPENMS_PERCOLATORADAPTER' { - ext.prefix = {"${meta.id}_all_ids_merged_psm_perc"} - ext.args = [ - "-seed 4711", - "-trainFDR 0.05", - "-testFDR 0.05", - "-enzyme no_enzyme", - "-subset_max_train ${params.subset_max_train}", - "-doc ${params.description_correct_features} ", - "-post_processing_tdc", - (params.fdr_level != 'psm_level_fdrs') ? "-" + params.fdr_level : "" - ].join(' ').trim() - publishDir = [ - path: {"${params.outdir}/intermediate_results/percolator"}, - mode: params.publish_dir_mode, - pattern: '*.idXML' - ] - } + ext.args = [ + "-seed 4711", + "-trainFDR 0.05", + "-testFDR 0.05", + "-enzyme no_enzyme", + "-subset_max_train ${params.subset_max_train}", + "-doc ${params.description_correct_features} ", + "-post_processing_tdc", + (params.fdr_level != 'psm_level_fdrs') ? "-" + params.fdr_level : "" + ].join(' ').trim() + publishDir = [ + path: {"${params.outdir}/intermediate_results/percolator"}, + mode: params.publish_dir_mode, + pattern: '*.idXML' + ] + } withName: 'OPENMS_PSMFEATUREEXTRACTOR' { - publishDir = [ - path: {"${params.outdir}/intermediate_results/features"}, - mode: params.publish_dir_mode, - pattern: '*.idXML' - ] - } + publishDir = [ + path: {"${params.outdir}/intermediate_results/percolator"}, + mode: params.publish_dir_mode, + pattern: '*.idXML' + ] + } + + withName: 'OPENMS_MZTABEXPORTER' { + ext.prefix = {"${meta.id}"} + publishDir = [ + path: {"${params.outdir}"}, + mode: params.publish_dir_mode, + pattern: '*.mzTab' + ] + } + + withName: 'OPENMS_IDRIPPER' { + publishDir = [ + mode: params.publish_dir_mode, + pattern: '*.idXML', + enabled: false + ] + } - withName: 'OPENMS_MZTABEXPORTER_QUANT' { - publishDir = [ - path: {"${params.outdir}/intermediate_results/features"}, - mode: params.publish_dir_mode, - pattern: '*.mzTab' - ] + withName: 'OPENMS_IDSCORESWITCHER' { + publishDir = [ + mode: params.publish_dir_mode, + pattern: '*.idXML', + enabled: false + ] + } + + withName: 'PYOPENMS_IDFILTER' { + publishDir = [ + mode: params.publish_dir_mode, + pattern: '*.idXML', + enabled: false + ] } withName: 'OPENMS_FEATUREFINDERIDENTIFICATION' { + ext.args = [ + "-extract:mz_window ${params.quantification_mz_window}", + "-extract:rt_window ${params.quantification_rt_window}", + "-detect:mapping_tolerance ${params.quantification_mapping_tolerance}", + "-detect:peak_width ${params.quantification_peak_width}", + "-detect:min_peak_width ${params.quantification_min_peak_width}" + ].join(' ').trim() publishDir = [ path: {"${params.outdir}/intermediate_results/features"}, mode: params.publish_dir_mode, @@ -162,29 +230,27 @@ process { ] } - withName: 'OPENMS_TEXTEXPORTER_UNQUANTIFIED|OPENMS_TEXTEXPORTER_QUANTIFIED' { - publishDir = [ - path: {"${params.outdir}/"}, - mode: params.publish_dir_mode, - pattern: '*.tsv' - ] - } + withName: 'OPENMS_FEATURELINKERUNLABELEDKD' { + publishDir = [ + enabled: false + ] + } - withName: 'OPENMS_TEXTEXPORTER_COMET' { - ext.prefix = {"${meta.sample}_${meta.condition}_${meta.id}"} - publishDir = [ - path: {"${params.outdir}/intermediate_results/comet"}, - mode: params.publish_dir_mode, - pattern: '*.tsv' - ] - } + withName: 'OPENMS_TEXTEXPORTER' { + publishDir = [ + path: {"${params.outdir}"}, + mode: params.publish_dir_mode, + pattern: '*.tsv' + ] + } withName: 'OPENMS_IDCONFLICTRESOLVER' { - publishDir = [ - path: {"${params.outdir}/intermediate_results/features"}, - mode: params.publish_dir_mode, - pattern: '*.consensusXML' - ] + publishDir = [ + path: {"${params.outdir}/intermediate_results/features"}, + mode: params.publish_dir_mode, + pattern: '*.consensusXML', + enabled: false + ] } } @@ -384,42 +450,11 @@ process { } } -process { - - if (params.predict_RT) { - withName: 'OPENMS_RTMODEL' { - publishDir = [ - path: {"${params.outdir}/RT_prediction"}, - mode: params.publish_dir_mode, - pattern: '*.txt|*.paramXML' - ] - } - - withName: 'OPENMS_RTPREDICT_FOUND_PEPTIDES' { - ext.prefix = {"${meta.sample}_id_files_for_rt_prediction_RTpredicted"} - publishDir = [ - path: {"${params.outdir}/RT_prediction"}, - mode: params.publish_dir_mode, - pattern: '*.csv' - ] - } - - withName: 'OPENMS_RTPREDICT_NEOEPITOPES' { - ext.prefix = {"${meta.sample}_txt_file_for_rt_prediction_RTpredicted"} - publishDir = [ - path: {"${params.outdir}/RT_prediction"}, - mode: params.publish_dir_mode, - pattern: '*.csv' - ] - } - } -} process { if (params.annotate_ions) { withName: 'PYOPENMS_IONANNOTATOR' { - ext.prefix = {"${meta.sample}"} ext.args = [ "--precursor_charge ${params.prec_charge}", "--fragment_mass_tolerance ${params.fragment_mass_tolerance}", @@ -430,27 +465,21 @@ process { mode: params.publish_dir_mode, pattern: '*.tsv' ] - } } + } } process { if (params.use_deeplc) { withName: 'DEEPLC' { - publishDir = [ - path: {"${params.outdir}/DeepLC"}, - mode: params.publish_dir_mode, - pattern: '*.idXML' - ] - } - // DeepLC settings - use_deeplc = false - deeplc_calibration_mode = 'rt_bin' - deeplc_calibration_bins = 20 - deeplc_add_abs_rt_error = false - deeplc_add_sqr_rt_error = false - deeplc_add_log_rt_error = false + publishDir = [ + path: {"${params.outdir}/DeepLC"}, + mode: params.publish_dir_mode, + pattern: '*.idXML', + enabled: false + ] + } } } @@ -461,11 +490,9 @@ process { publishDir = [ path: {"${params.outdir}/MS2PIP"}, mode: params.publish_dir_mode, - pattern: '*.idXML' + pattern: '*.idXML', + enabled: false ] } - // MS2PIP settings - use_ms2pip = false - ms2pip_model_name = 'Immuno-HCD' } } diff --git a/conf/test_deeplc.config b/conf/test_deeplc.config new file mode 100644 index 00000000..687b0135 --- /dev/null +++ b/conf/test_deeplc.config @@ -0,0 +1,33 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests with DeepLC +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/mhcquant -profile test_deeplc, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Test DeepLC profile' + config_profile_description = 'Minimal test dataset to check pipeline function with DeepLC' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + + // Input data + fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/mhcquant/testdata/UP000005640_9606.fasta' + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/mhcquant/testdata/HepG2_sample_sheet.tsv' + + // Don't do quantification since this step needs a larger test dataset (-> test quantification using test_full) + skip_quantification = true + spectrum_batch_size = 5000 + use_deeplc = true + deeplc_add_abs_rt_error = true + deeplc_add_sqr_rt_error = true + deeplc_add_log_rt_error = true +} diff --git a/conf/test_full.config b/conf/test_full.config index 1ba617ee..b5793cee 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -14,10 +14,15 @@ params { config_profile_name = 'Full test profile' config_profile_description = 'Full test dataset to check pipeline function' - predict_class_1 = true - // Input data fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/mhcquant/testdata/UP000005640_9606.fasta' input = 'https://raw.githubusercontent.com/nf-core/test-datasets/mhcquant/testdata/sample_sheet_full.tsv' allele_sheet = 'https://raw.githubusercontent.com/nf-core/test-datasets/mhcquant/testdata/allele_sheet_full.tsv' + + predict_class_1 = true + predict_class_2 = true + use_deeplc = true + use_ms2pip = true + ms2pip_model_name = 'CID' + annotate_ions = true } diff --git a/conf/test_ionannotator.config b/conf/test_ionannotator.config new file mode 100644 index 00000000..2e52c0b5 --- /dev/null +++ b/conf/test_ionannotator.config @@ -0,0 +1,30 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running ion annotator tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/mhcquant -profile test_ionannotator, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Test ion annotator profile' + config_profile_description = 'Minimal test dataset to check pipeline function with ion annotator' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + + // Input data + fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/mhcquant/testdata/UP000005640_9606.fasta' + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/mhcquant/testdata/HepG2_sample_sheet.tsv' + + // Don't do quantification since this step needs a larger test dataset (-> test quantification using test_full) + skip_quantification = true + spectrum_batch_size = 5000 + annotate_ions = true +} diff --git a/conf/test_ms2pip.config b/conf/test_ms2pip.config new file mode 100644 index 00000000..da3c23b5 --- /dev/null +++ b/conf/test_ms2pip.config @@ -0,0 +1,31 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests with MS2PIP +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/mhcquant -profile test_ms2pip, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Test MS2PIP profile' + config_profile_description = 'Minimal test dataset to check pipeline function with MS2PIP' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + + // Input data + fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/mhcquant/testdata/UP000005640_9606.fasta' + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/mhcquant/testdata/HepG2_sample_sheet.tsv' + + // Don't do quantification since this step needs a larger test dataset (-> test quantification using test_full) + skip_quantification = true + spectrum_batch_size = 5000 + use_ms2pip = true + ms2pip_model_name = 'Immuno-HCD' +} diff --git a/conf/test_quant.config b/conf/test_quant.config new file mode 100644 index 00000000..105d4396 --- /dev/null +++ b/conf/test_quant.config @@ -0,0 +1,22 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running quantification tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a full size pipeline test. + + Use as follows: + nextflow run nf-core/mhcquant -profile test_quant, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Quantification test profile' + config_profile_description = 'Quantification test dataset to check pipeline function' + + // Input data + fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/mhcquant/testdata/UP000005640_9606.fasta' + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/mhcquant/testdata/sample_sheet_full.tsv' + + spectrum_batch_size = 2000 +} diff --git a/docs/images/mhcquant_subway.png b/docs/images/mhcquant_subway.png new file mode 100644 index 00000000..2de45154 Binary files /dev/null and b/docs/images/mhcquant_subway.png differ diff --git a/docs/images/mqc_fastqc_adapter.png b/docs/images/mqc_fastqc_adapter.png deleted file mode 100755 index 361d0e47..00000000 Binary files a/docs/images/mqc_fastqc_adapter.png and /dev/null differ diff --git a/docs/images/mqc_fastqc_counts.png b/docs/images/mqc_fastqc_counts.png deleted file mode 100755 index cb39ebb8..00000000 Binary files a/docs/images/mqc_fastqc_counts.png and /dev/null differ diff --git a/docs/images/mqc_fastqc_quality.png b/docs/images/mqc_fastqc_quality.png deleted file mode 100755 index a4b89bf5..00000000 Binary files a/docs/images/mqc_fastqc_quality.png and /dev/null differ diff --git a/docs/output.md b/docs/output.md index 1e74ea39..112aa819 100644 --- a/docs/output.md +++ b/docs/output.md @@ -8,21 +8,27 @@ The directories listed below will be created in the results directory after the ## General -### Quantification - -
Output files -- `*.tsv` : If `--skip_quantification` is not specified. +- `*.mzTab` +- `*.tsv` -
+The mzTab output file follows the a [HUPO-PSI format]() and combines all information of the sample-condition group extracted from a database search throughout the pipeline. A detailed explanation of the respective entries are elaborately explained [here](https://psidev.info/sites/default/files/2017-07/R2_The_ten_minute_guide_to_mzTab.pdf). MzTab files are compatible with the PRIDE Archive - proteomics data repository and can be uploaded as search files. -The CSV output file is a table containing all information extracted from a database search throughout the pipeline. See the [OpenMS](https://www.openms.de/) or PSI documentation for more information about [annotated scores and format](https://abibuilder.informatik.uni-tuebingen.de/archive/openms/Documentation/release/latest/html/TOPP_TextExporter.html). +MzTab files contain many columns and annotate the most important information - here are a few outpointed: + +```bash +PEP sequence accession best_search_engine_score[1] retention_time charge mass_to_charge peptide_abundance_study_variable[1] +``` + +Most important to know is that in this format we annotated the Comet XCorr of each peptide identification in the `best_search_engine_score[1]` column and peptide quantities in the `peptide_abundance_study_variable` columns. If `--skip_quantification` is specified the `best_search_engine_score[1]` holds the percolator q-value. + +The TSV output file is an alternative output of [OpenMS](https://www.openms.de/) comprising similar information to the mzTab output. A brief explanation of the structure is listed below. See documentation of the format or PSI documentation for more information about [annotated scores and format](https://abibuilder.informatik.uni-tuebingen.de/archive/openms/Documentation/release/latest/html/TOPP_TextExporter.html). MAP contains information about the different mzML files that were provided initially ```bash -#MAP id filename label size +#MAP id filename label size ``` RUN contains information about the search that was performed on each run @@ -57,44 +63,44 @@ PEPTIDE contains information about peptide hits that were identified and corresp ### Intermediate results -
+
+ +This folder contains the intermediate results from various steps of the MHCquant pipeline (e.g. (un)filtered PSMs, aligned mzMLs, features) + Output files - `intermediate_results/` - - `alignment` - - `*filtered.idXML` : If `--skip_quantification` is not specified, then this file is generated in the `OPENMS_IDFILTER_Q_VALUE` - - `{ID}_-_{filename}_filtered` : An outcome file of `OPENMS_IDFILTER_FOR_ALIGNMENT`, this file is only generated when `--skip_quantification` is not specified - - `comet` - - `{raw filename}.tsv` : The outcome of `CometAdapter` containing more detailed information about all of the hits that have been found (no filtering has been applied) - - `{Sample}_{Condition}_{ID}.tsv` : Single files that hold information about the peptides sequences that have been identified (no filtering has been applied) - - `features` - - `*.mztab` : mztab file generated by the OpenMS MzTabExporter command which is present in the `PROCESS_FEATURE` step - - `*.idXML` : Outcome of `PSMFEATUREEXTRACTOR`, containing the computations of extra features for each input PSM - - `*.featureXML` : These files file is generated by the OpenMS `FeatureFinderIdentification` command - - `ion_annotations` - - `{Sample}_{Condition}_all_peaks.tsv`: Contains metadata of all measured ions of peptides reported after `OPENMS_IDFILTER_Q_VALUE`. - - `{Sample}_{Condition}_matching_ions.tsv`: Contains ion annotations and additional metadata of peptides reported after `OPENMS_IDFILTER_Q_VALUE`. + + - `alignment`: Contains the `trafoXML` files of each run that document the retention time shift after alignment in quantification mode. + + - `comet`: Contains pin files generated by comet after database search - `percolator` - - `*all_ids_merged_psm_perc.idXML` : idXML files are generated with `OPENMS_PERCOLATORADAPTER` - - `refined_fdr` (Only if `--refine_fdr_on_predicted_subset` is specified) - - `*merged_psm_perc_filtered.mzTab` : This file export filtered percolator results (by q-value) as mztab - - `*_all_ids_merged.mzTab` : Exportas all of the psm results as mztab - - `*perc_subset.idXML` : This file is the outcome of a second OpenMS `PercolatorAdapter` run - - `*pred_filtered.idXML` : Contains filtered PSMs prediction results by shrinked search space (outcome mhcflurry). - - `{ID}_-_{filename}_filtered` : An outcome file of `OPENMS_IDFILTER_REFINED` -
+ - `{Sample}_{Condition}_psm.idXML`: File holding extra features that will be used by percolator. Created by [PSMFeatureExtractor](https://openms.de/doxygen/release/3.0.0/html/UTILS_PSMFeatureExtractor.html). + - `{Sample}_{Condition}_pout.idXML`: Unfiltered percolator output. + - `{Sample}_{Condition}_pout_filtered.idXML`: FDR-filtered percolator output. -This folder contains the intermediate results from various steps of the MHCquant pipeline (e.g. (un)filtered PSMs, aligned mzMLs, features) + - `features`: Holds information of quantified features in `featureXML` files as a result of the [FeatureFinderIdentification](https://openms.de/doxygen/release/3.0.0/html/TOPP_FeatureFinderIdentification.html) in the quantification mode. -The output mzTab contains many columns annotating the most important information - here are a few outpointed: +- `ion_annotations` -```bash -PEP sequence accession best_search_engine_score[1] retention_time charge mass_to_charge peptide_abundance_study_variable[1] -``` + - `{Sample}_{Condition}_all_peaks.tsv`: Contains metadata of all measured ions of peptides reported after peptide identification. + + - `{Sample}_{Condition}_matching_ions.tsv`: Contains ion annotations and additional metadata of peptides reported after peptide identification. + +- `refined_fdr` (Only if `--refine_fdr_on_predicted_subset` is specified) + + - `*merged_psm_perc_filtered.mzTab` : This file export filtered percolator results (by q-value) as mzTab. + + - `*_all_ids_merged.mzTab` : Exportas all of the psm results as mztab. -Most important to know is that in this format we annotated the q-value of each peptide identification in the `best_search_engine_score[1]` column and peptide quantities in the peptide_abundance_study_variable` columns. -[mzTab](http://www.psidev.info/mztab) is a light-weight format to report mass spectrometry search results. It provides all important information about identified peptide hits and is compatible with the PRIDE Archive - proteomics data repository. + - `*perc_subset.idXML` : This file is the outcome of a second OpenMS `PercolatorAdapter` run. + + - `*pred_filtered.idXML` : Contains filtered PSMs prediction results by shrinked search space (outcome mhcflurry). + + - `{ID}_-_{filename}_filtered` : An outcome file of `OPENMS_IDFILTER_REFINED`. + +
## VCF @@ -118,8 +124,11 @@ These CSV files list all of the theoretically possible neoepitope sequences from Output files - `class_1_bindings/` + - `*found_neoepitopes_class1.csv`: Generated when `--include_proteins_from_vcf` and `--predict_class_1` are specified + - `class_2_bindings/` + - `*found_neoepitopes_class2.csv`: Generated when `--include_proteins_from_vcf` and `--predict_class_2` are specified @@ -137,9 +146,12 @@ peptide sequence geneID Output files - `class_1_bindings/` - - `*vcf_neoepitopes_class1.csv`: Generated when `--include_proteins_from_vcf` and `--predict_class_1` are specified + +- `*vcf_neoepitopes_class1.csv`: Generated when `--include_proteins_from_vcf` and `--predict_class_1` are specified + - `class_2_bindings/` - - `*vcf_neoepitopes_class2.csv`: Generated when `--include_proteins_from_vcf` and `--predict_class_2` are specified + +- `*vcf_neoepitopes_class2.csv`: Generated when `--include_proteins_from_vcf` and `--predict_class_2` are specified @@ -158,9 +170,12 @@ Sequence Antigen ID Variants Output files - `class_1_bindings/` - - `*predicted_peptides_class_1.csv`: If `--predict_class_1` is specified, then this CSV is generated + +- `*predicted_peptides_class_1.csv`: If `--predict_class_1` is specified, then this CSV is generated + - `class_2_bindings/` - - `*predicted_peptides_class_2.csv`: If `--predict_class_2` is specified, then this CSV is generated + +- `*predicted_peptides_class_2.csv`: If `--predict_class_2` is specified, then this CSV is generated @@ -171,26 +186,18 @@ The prediction outputs are comma-separated table (CSV) for each allele, listing peptide allele prediction prediction_low prediction_high prediction_percentile ``` -## Retention time prediction +### MultiQC
Output files -- `RT_prediction` - - `*id_RTpredicted.csv`: If `--predict_RT` is specified, the retention time found peptides are provided - - `*txt_RTpredicted.csv`: If `--predict_RT` is specified, the retention time predicted neoepitopes are provided +- `multiqc/` -
+- `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. -### MultiQC +- `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. -
-Output files - -- `multiqc/` - - `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. - - `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. - - `multiqc_plots/`: directory containing static images from the report in various formats. +- `multiqc_plots/`: directory containing static images from the report in various formats.
@@ -204,9 +211,11 @@ Results generated by MultiQC collate pipeline QC from supported tools e.g. FastQ Output files - `pipeline_info/` + - Reports generated by Nextflow: `execution_report.html`, `execution_timeline.html`, `execution_trace.txt` and `pipeline_dag.html`. - Reports generated by the pipeline: `software_versions.yml`. - Reformatted samplesheet files used as input to the pipeline: `samplesheet.valid.csv`. + - Parameters used by the pipeline run: `params.json`. diff --git a/docs/usage.md b/docs/usage.md index b1892f10..e454a066 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -62,7 +62,7 @@ An [example samplesheet](../assets/samplesheet.tsv) has been provided with the p The typical command for running the pipeline is as follows: ```console -nextflow run nf-core/mhcquant --input 'samples.tsv' --outdir --fasta 'SWISSPROT_2020.fasta' --allele_sheet 'alleles.tsv' -profile docker +nextflow run nf-core/mhcquant --input 'samples.tsv' --outdir --fasta 'SWISSPROT_2020.fasta' --use_deeplc --use_ms2pip -profile docker ``` This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. @@ -80,7 +80,9 @@ If you wish to repeatedly use the same parameters for multiple runs, rather than Pipeline settings can be provided in a `yaml` or `json` file via `-params-file `. -> ⚠️ Do not use `-c ` to specify parameters as this will result in errors. Custom config files specified with `-c` must only be used for [tuning process resource specifications](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources), other infrastructural tweaks (such as output directories), or module arguments (args). +:::warning +Do not use `-c ` to specify parameters as this will result in errors. Custom config files specified with `-c` must only be used for [tuning process resource specifications](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources), other infrastructural tweaks (such as output directories), or module arguments (args). +::: The above pipeline run specified with a params file in yaml format: @@ -117,11 +119,15 @@ This version number will be logged in reports when you run the pipeline, so that To further assist in reproducbility, you can use share and re-use [parameter files](#running-the-pipeline) to repeat pipeline runs with the same settings without having to write out a command with every single parameter. -> 💡 If you wish to share such profile (such as upload as supplementary material for academic publications), make sure to NOT include cluster specific paths to files, nor institutional specific profiles. +:::tip +If you wish to share such profile (such as upload as supplementary material for academic publications), make sure to NOT include cluster specific paths to files, nor institutional specific profiles. +::: ## Core Nextflow arguments -> **NB:** These options are part of Nextflow and use a _single_ hyphen (pipeline parameters use a double-hyphen). +:::note +These options are part of Nextflow and use a _single_ hyphen (pipeline parameters use a double-hyphen). +::: ### `-profile` @@ -129,7 +135,9 @@ Use this parameter to choose a configuration profile. Profiles can give configur Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Apptainer, Conda) - see below. -> We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported. +:::info +We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported. +::: The pipeline also dynamically loads configurations from [https://github.com/nf-core/configs](https://github.com/nf-core/configs) when it runs, making multiple config profiles for various institutional clusters available at run time. For more information and to see if your system is available in these configs please see the [nf-core/configs documentation](https://github.com/nf-core/configs#documentation). diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy index 408951ae..01b8653d 100755 --- a/lib/NfcoreTemplate.groovy +++ b/lib/NfcoreTemplate.groovy @@ -3,6 +3,7 @@ // import org.yaml.snakeyaml.Yaml +import groovy.json.JsonOutput class NfcoreTemplate { @@ -222,6 +223,21 @@ class NfcoreTemplate { } } + // + // Dump pipeline parameters in a json file + // + public static void dump_parameters(workflow, params) { + def output_d = new File("${params.outdir}/pipeline_info/") + if (!output_d.exists()) { + output_d.mkdirs() + } + + def timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') + def output_pf = new File(output_d, "params_${timestamp}.json") + def jsonStr = JsonOutput.toJson(params) + output_pf.text = JsonOutput.prettyPrint(jsonStr) + } + // // Print pipeline summary on completion // diff --git a/lib/WorkflowMhcquant.groovy b/lib/WorkflowMhcquant.groovy index cff57856..a99924b3 100644 --- a/lib/WorkflowMhcquant.groovy +++ b/lib/WorkflowMhcquant.groovy @@ -145,7 +145,7 @@ class WorkflowMhcquant { public static String toolCitationText(params) { - // TODO Optionally add in-text citation tools to this list. + // TODO nf-core: Optionally add in-text citation tools to this list. // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", // Uncomment function in methodsDescriptionText to render in MultiQC report def citation_text = [ diff --git a/modules.json b/modules.json index b1cceb7c..f9875084 100644 --- a/modules.json +++ b/modules.json @@ -7,17 +7,12 @@ "nf-core": { "custom/dumpsoftwareversions": { "branch": "master", - "git_sha": "76cc4938c1f6ea5c7d83fed1eeffc146787f9543", - "installed_by": ["modules"] - }, - "fastqc": { - "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, "multiqc": { "branch": "master", - "git_sha": "f2d63bd5b68925f98f572eed70993d205cc694b7", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] } } diff --git a/modules/local/deeplc.nf b/modules/local/deeplc.nf index 800c510f..5f7ea37d 100644 --- a/modules/local/deeplc.nf +++ b/modules/local/deeplc.nf @@ -3,7 +3,9 @@ process DEEPLC { label 'process_medium' conda "bioconda::deeplc=2.2.0 bioconda::pyopenms=2.9.1" - container 'ghcr.io/jonasscheid/mhcquant:deeplc' + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-beb85d5ee68ba9251d26079ca28797d51ea3c49a:857e5e7908422b6ea5016a3c313f67087fbe2f8b-0' : + 'biocontainers/mulled-v2-beb85d5ee68ba9251d26079ca28797d51ea3c49a:857e5e7908422b6ea5016a3c313f67087fbe2f8b-0' }" input: tuple val(meta), path(idxml_in) diff --git a/modules/local/mhcflurry_predictneoepitopesclass1.nf b/modules/local/mhcflurry_predictneoepitopesclass1.nf index f0b72fd2..9c84ea9f 100644 --- a/modules/local/mhcflurry_predictneoepitopesclass1.nf +++ b/modules/local/mhcflurry_predictneoepitopesclass1.nf @@ -1,5 +1,5 @@ process MHCFLURRY_PREDICTNEOEPITOPESCLASS1 { - tag "$meta" + tag "$meta.id" label 'process_low' conda "bioconda::fred2=2.0.7 bioconda::mhcflurry=1.4.3 bioconda::mhcnuggets=2.3.2" @@ -18,7 +18,7 @@ process MHCFLURRY_PREDICTNEOEPITOPESCLASS1 { task.ext.when == null || task.ext.when script: - def prefix = task.ext.suffix ?: "${neoepitopes}_${meta}_predicted_neoepitopes_class_1" + def prefix = task.ext.suffix ?: "${neoepitopes}_${meta.id}_predicted_neoepitopes_class_1" """ mhcflurry-downloads --quiet fetch models_class1 diff --git a/modules/local/mhcflurry_predictpeptidesclass1.nf b/modules/local/mhcflurry_predictpeptidesclass1.nf index aa45acd3..d828542a 100644 --- a/modules/local/mhcflurry_predictpeptidesclass1.nf +++ b/modules/local/mhcflurry_predictpeptidesclass1.nf @@ -1,5 +1,5 @@ process MHCFLURRY_PREDICTPEPTIDESCLASS1 { - tag "$meta" + tag "$meta.id" label 'process_low' conda "bioconda::fred2=2.0.7 bioconda::mhcflurry=1.4.3 bioconda::mhcnuggets=2.3.2" diff --git a/modules/local/mhcflurry_predictpsms.nf b/modules/local/mhcflurry_predictpsms.nf index 87159089..f5e92fc1 100644 --- a/modules/local/mhcflurry_predictpsms.nf +++ b/modules/local/mhcflurry_predictpsms.nf @@ -1,5 +1,5 @@ process MHCFLURRY_PREDICTPSMS { - tag "$meta" + tag "$meta.id" label 'process_medium' conda "bioconda::fred2=2.0.7 bioconda::mhcflurry=1.4.3 bioconda::mhcnuggets=2.3.2" diff --git a/modules/local/mhcnuggets_neoepitopesclass2post.nf b/modules/local/mhcnuggets_neoepitopesclass2post.nf index 088a491a..721b6cfe 100644 --- a/modules/local/mhcnuggets_neoepitopesclass2post.nf +++ b/modules/local/mhcnuggets_neoepitopesclass2post.nf @@ -1,5 +1,5 @@ process MHCNUGGETS_NEOEPITOPESCLASS2POST { - tag "$meta" + tag "$meta.id" label 'process_low' conda "bioconda::mhcnuggets=2.3.2" diff --git a/modules/local/mhcnuggets_neoepitopesclass2pre.nf b/modules/local/mhcnuggets_neoepitopesclass2pre.nf index 597970aa..fd68fec4 100644 --- a/modules/local/mhcnuggets_neoepitopesclass2pre.nf +++ b/modules/local/mhcnuggets_neoepitopesclass2pre.nf @@ -1,5 +1,5 @@ process MHCNUGGETS_NEOEPITOPESCLASS2PRE { - tag "$meta" + tag "$meta.id" label 'process_low' conda "bioconda::mhcnuggets=2.3.2" @@ -18,7 +18,7 @@ process MHCNUGGETS_NEOEPITOPESCLASS2PRE { task.ext.when == null || task.ext.when script: - def prefix = task.ext.prefix ?: "${meta}_mhcnuggets_preprocessed" + def prefix = task.ext.prefix ?: "${meta.id}_mhcnuggets_preprocessed" """ preprocess_neoepitopes_mhcnuggets.py \\ diff --git a/modules/local/mhcnuggets_peptidesclass2post.nf b/modules/local/mhcnuggets_peptidesclass2post.nf index fe94dc20..fd114f36 100644 --- a/modules/local/mhcnuggets_peptidesclass2post.nf +++ b/modules/local/mhcnuggets_peptidesclass2post.nf @@ -1,5 +1,5 @@ process MHCNUGGETS_PEPTIDESCLASS2POST { - tag "$meta" + tag "$meta.id" label 'process_low' conda "bioconda::mhcnuggets=2.3.2" @@ -18,7 +18,7 @@ process MHCNUGGETS_PEPTIDESCLASS2POST { task.ext.when == null || task.ext.when script: - def prefix = task.ext.prefix ?: "${meta.sample}_postprocessed" + def prefix = task.ext.prefix ?: "${meta.id}_postprocessed" """ postprocess_peptides_mhcnuggets.py --input $peptides \\ diff --git a/modules/local/mhcnuggets_peptidesclass2pre.nf b/modules/local/mhcnuggets_peptidesclass2pre.nf index 8f8a6a83..a3b140aa 100644 --- a/modules/local/mhcnuggets_peptidesclass2pre.nf +++ b/modules/local/mhcnuggets_peptidesclass2pre.nf @@ -1,5 +1,5 @@ process MHCNUGGETS_PEPTIDESCLASS2PRE { - tag "$meta" + tag "$meta.id" label 'process_low' conda "bioconda::mhcnuggets=2.3.2" @@ -19,7 +19,7 @@ process MHCNUGGETS_PEPTIDESCLASS2PRE { task.ext.when == null || task.ext.when script: - def prefix = task.ext.prefix ?: "${meta.sample}_preprocessed_mhcnuggets_peptides" + def prefix = task.ext.prefix ?: "${meta.id}_preprocessed_mhcnuggets_peptides" """ preprocess_peptides_mhcnuggets.py --mztab $mztab \\ diff --git a/modules/local/mhcnuggets_predictneoepitopesclass2.nf b/modules/local/mhcnuggets_predictneoepitopesclass2.nf index 03b99d81..d2c35eef 100644 --- a/modules/local/mhcnuggets_predictneoepitopesclass2.nf +++ b/modules/local/mhcnuggets_predictneoepitopesclass2.nf @@ -1,5 +1,5 @@ process MHCNUGGETS_PREDICTNEOEPITOPESCLASS2 { - tag "$meta" + tag "$meta.id" label 'process_low' conda "bioconda::fred2=2.0.7 bioconda::mhcflurry=1.4.3 bioconda::mhcnuggets=2.3.2" @@ -18,7 +18,7 @@ process MHCNUGGETS_PREDICTNEOEPITOPESCLASS2 { task.ext.when == null || task.ext.when script: - def prefix = task.ext.prefix ?: "${meta}_predicted_neoepitopes_class_2" + def prefix = task.ext.prefix ?: "${meta.id}_predicted_neoepitopes_class_2" """ mhcnuggets_predict_peptides.py --peptides $neoepitopes \\ diff --git a/modules/local/mhcnuggets_predictpeptidesclass2.nf b/modules/local/mhcnuggets_predictpeptidesclass2.nf index 69364007..d275e7c2 100644 --- a/modules/local/mhcnuggets_predictpeptidesclass2.nf +++ b/modules/local/mhcnuggets_predictpeptidesclass2.nf @@ -1,5 +1,5 @@ process MHCNUGGETS_PREDICTPEPTIDESCLASS2 { - tag "$meta" + tag "$meta.id" label 'process_low' conda "bioconda::fred2=2.0.7 bioconda::mhcflurry=1.4.3 bioconda::mhcnuggets=2.3.2" @@ -18,7 +18,7 @@ process MHCNUGGETS_PREDICTPEPTIDESCLASS2 { task.ext.when == null || task.ext.when script: - def prefix = task.ext.prefix ?: "${meta.sample}_predicted_peptides_class_2" + def prefix = task.ext.prefix ?: "${meta.id}_predicted_peptides_class_2" """ mhcnuggets_predict_peptides.py --peptides $peptides \\ diff --git a/modules/local/ms2pip.nf b/modules/local/ms2pip.nf index 911e3ce9..c66cb357 100644 --- a/modules/local/ms2pip.nf +++ b/modules/local/ms2pip.nf @@ -3,7 +3,9 @@ process MS2PIP { label 'process_low' conda "bioconda::ms2pip=3.11.0 bioconda::pyopenms=2.9.1" - container 'ghcr.io/jonasscheid/mhcquant:ms2pip' + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-beb85d5ee68ba9251d26079ca28797d51ea3c49a:857e5e7908422b6ea5016a3c313f67087fbe2f8b-0' : + 'biocontainers/mulled-v2-beb85d5ee68ba9251d26079ca28797d51ea3c49a:857e5e7908422b6ea5016a3c313f67087fbe2f8b-0' }" input: tuple val(meta), path(idxml_in), path(mzml) diff --git a/modules/local/openms_cometadapter.nf b/modules/local/openms_cometadapter.nf index ab531a9d..d3f000d0 100644 --- a/modules/local/openms_cometadapter.nf +++ b/modules/local/openms_cometadapter.nf @@ -2,10 +2,10 @@ process OPENMS_COMETADAPTER { tag "$meta.id" label 'process_high' - conda "bioconda::openms-thirdparty=2.9.1" + conda "bioconda::openms-thirdparty=3.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms-thirdparty:2.9.1--h9ee0642_1' : - 'biocontainers/openms-thirdparty:2.9.1--h9ee0642_1' }" + 'https://depot.galaxyproject.org/singularity/openms-thirdparty:3.0.0--h9ee0642_1' : + 'biocontainers/openms-thirdparty:3.0.0--h9ee0642_1' }" input: tuple val(meta), path(mzml), path(fasta) @@ -36,7 +36,7 @@ process OPENMS_COMETADAPTER { -out ${prefix}.idXML \\ -database $fasta \\ -threads $task.cpus \\ - -pin_out ${prefix}.tsv \\ + -pin_out ${prefix}_pin.tsv \\ $params_file \\ $args \\ $mods \\ diff --git a/modules/local/openms_decoydatabase.nf b/modules/local/openms_decoydatabase.nf index 3c4970d6..6994057f 100644 --- a/modules/local/openms_decoydatabase.nf +++ b/modules/local/openms_decoydatabase.nf @@ -2,10 +2,10 @@ process OPENMS_DECOYDATABASE { tag "$meta.id" label 'process_single' - conda "bioconda::openms=2.9.1" + conda "bioconda::openms=3.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms:2.9.1--h135471a_1' : - 'biocontainers/openms:2.9.1--h135471a_1' }" + 'https://depot.galaxyproject.org/singularity/openms:3.0.0--h8964181_1' : + 'biocontainers/openms:3.0.0--h8964181_1' }" input: tuple val(meta), path(fasta) diff --git a/modules/local/openms_falsediscoveryrate.nf b/modules/local/openms_falsediscoveryrate.nf index 8ff40dd8..048dda8b 100644 --- a/modules/local/openms_falsediscoveryrate.nf +++ b/modules/local/openms_falsediscoveryrate.nf @@ -2,10 +2,10 @@ process OPENMS_FALSEDISCOVERYRATE { tag "$meta.id" label 'process_single' - conda "bioconda::openms=2.9.1" + conda "bioconda::openms=3.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms:2.9.1--h135471a_1' : - 'biocontainers/openms:2.9.1--h135471a_1' }" + 'https://depot.galaxyproject.org/singularity/openms:3.0.0--h8964181_1' : + 'biocontainers/openms:3.0.0--h8964181_1' }" input: tuple val(meta), path(idxml) diff --git a/modules/local/openms_featurefinderidentification.nf b/modules/local/openms_featurefinderidentification.nf index c4f3bd65..bbdfb26d 100644 --- a/modules/local/openms_featurefinderidentification.nf +++ b/modules/local/openms_featurefinderidentification.nf @@ -2,13 +2,13 @@ process OPENMS_FEATUREFINDERIDENTIFICATION { tag "$meta.id" label 'process_medium' - conda "bioconda::openms=2.9.1" + conda "bioconda::openms=3.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms:2.9.1--h135471a_1' : - 'biocontainers/openms:2.9.1--h135471a_1' }" + 'https://depot.galaxyproject.org/singularity/openms:3.0.0--h8964181_1' : + 'biocontainers/openms:3.0.0--h8964181_1' }" input: - tuple val(meta), path(id_quant_int), path(mzml), path(id_quant) + tuple val(meta), path(mzml), path(id_int), path(id_ext) output: tuple val(meta), path("*.featureXML"), emit: featurexml @@ -18,14 +18,16 @@ process OPENMS_FEATUREFINDERIDENTIFICATION { task.ext.when == null || task.ext.when script: - def prefix = task.ext.prefix ?: "${meta.sample}_${meta.id}" - def arguments = params.quantification_fdr ? "-id $id_quant_int -id_ext $id_quant -svm:min_prob ${params.quantification_min_prob}" : "-id $id_quant" + def prefix = task.ext.prefix ?: "${meta.id}_${meta.sample}_${meta.condition}" + def args = task.ext.args ?: '' + def quant_fdr = params.quantification_fdr ? "-id $id_int -id_ext $id_ext -svm:min_prob ${params.quantification_min_prob}" : "-id $id_ext" + args = args + " $quant_fdr" """ FeatureFinderIdentification -in $mzml \\ -out ${prefix}.featureXML \\ -threads $task.cpus \\ - ${arguments} + $args cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/openms_featurelinkerunlabeledkd.nf b/modules/local/openms_featurelinkerunlabeledkd.nf index e4976128..2765836a 100644 --- a/modules/local/openms_featurelinkerunlabeledkd.nf +++ b/modules/local/openms_featurelinkerunlabeledkd.nf @@ -2,10 +2,10 @@ process OPENMS_FEATURELINKERUNLABELEDKD { tag "$meta.id" label 'process_single' - conda "bioconda::openms-thirdparty=2.9.1" + conda "bioconda::openms-thirdparty=3.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms-thirdparty:2.9.1--h9ee0642_1' : - 'biocontainers/openms-thirdparty:2.9.1--h9ee0642_1' }" + 'https://depot.galaxyproject.org/singularity/openms-thirdparty:3.0.0--h9ee0642_1' : + 'biocontainers/openms-thirdparty:3.0.0--h9ee0642_1' }" input: tuple val(meta), path(features) diff --git a/modules/local/openms_filefilter.nf b/modules/local/openms_filefilter.nf index 27da384c..e526e959 100644 --- a/modules/local/openms_filefilter.nf +++ b/modules/local/openms_filefilter.nf @@ -2,10 +2,10 @@ process OPENMS_FILEFILTER { tag "$meta.id" label 'process_low' - conda "bioconda::openms=2.9.1" + conda "bioconda::openms=3.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms:2.9.1--h135471a_1' : - 'biocontainers/openms:2.9.1--h135471a_1' }" + 'https://depot.galaxyproject.org/singularity/openms:3.0.0--h8964181_1' : + 'biocontainers/openms:3.0.0--h8964181_1' }" input: tuple val(meta), path(mzml) @@ -18,7 +18,7 @@ process OPENMS_FILEFILTER { task.ext.when == null || task.ext.when script: - def prefix = task.ext.prefix ?: "${mzml.baseName}_cleaned" + def prefix = task.ext.prefix ?: "${meta.id}_${meta.sample}_${meta.condition}" """ FileFilter -in $mzml \\ -out ${prefix}.mzML \\ diff --git a/modules/local/openms_idconflictresolver.nf b/modules/local/openms_idconflictresolver.nf index b089820a..7a1f795f 100644 --- a/modules/local/openms_idconflictresolver.nf +++ b/modules/local/openms_idconflictresolver.nf @@ -2,10 +2,10 @@ process OPENMS_IDCONFLICTRESOLVER { tag "$meta.id" label 'process_single' - conda "bioconda::openms=2.9.1" + conda "bioconda::openms=3.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms:2.9.1--h135471a_1' : - 'biocontainers/openms:2.9.1--h135471a_1' }" + 'https://depot.galaxyproject.org/singularity/openms:3.0.0--h8964181_1' : + 'biocontainers/openms:3.0.0--h8964181_1' }" input: tuple val(meta), path(consensus) diff --git a/modules/local/openms_idfilter.nf b/modules/local/openms_idfilter.nf index 9c807c15..fb946789 100644 --- a/modules/local/openms_idfilter.nf +++ b/modules/local/openms_idfilter.nf @@ -2,13 +2,13 @@ process OPENMS_IDFILTER { tag "$meta.id" label 'process_single' - conda "bioconda::openms=2.9.1" + conda "bioconda::openms=3.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms:2.9.1--h135471a_1' : - 'biocontainers/openms:2.9.1--h135471a_1' }" + 'https://depot.galaxyproject.org/singularity/openms:3.0.0--h8964181_1' : + 'biocontainers/openms:3.0.0--h8964181_1' }" input: - tuple val(meta), path(idxml), file(peptide_filter) + tuple val(meta), path(idxml), val(peptide_filter) output: tuple val(meta), path("*.idXML"), emit: idxml @@ -18,20 +18,18 @@ process OPENMS_IDFILTER { task.ext.when == null || task.ext.when script: - def whitelist = "$peptide_filter" - def prefix = task.ext.prefix ?: "${meta.id}_-_${idxml.baseName}_filtered" + def prefix = task.ext.prefix ?: "${meta.id}_filtered" def args = task.ext.args ?: '' - if (whitelist == "input.2") { - whitelist = " " + if (peptide_filter != null) { + args += "-whitelist:peptides $peptide_filter" } """ IDFilter -in $idxml \\ -out ${prefix}.idXML \\ -threads $task.cpus \\ - $args \\ - $whitelist + $args cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/openms_idmerger.nf b/modules/local/openms_idmerger.nf index 08db7778..740ecfcf 100644 --- a/modules/local/openms_idmerger.nf +++ b/modules/local/openms_idmerger.nf @@ -2,13 +2,13 @@ process OPENMS_IDMERGER { tag "$meta.id" label 'process_single' - conda "bioconda::openms=2.9.1" + conda "bioconda::openms=3.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms:2.9.1--h135471a_1' : - 'biocontainers/openms:2.9.1--h135471a_1' }" + 'https://depot.galaxyproject.org/singularity/openms:3.0.0--h8964181_1' : + 'biocontainers/openms:3.0.0--h8964181_1' }" input: - tuple val(meta), path(aligned) + tuple val(meta), path(idxmls) output: tuple val(meta), path("*.idXML"), emit: idxml @@ -18,10 +18,10 @@ process OPENMS_IDMERGER { task.ext.when == null || task.ext.when script: - def prefix = task.ext.prefix ?: "${meta.sample}_${meta.condition}_all_ids_merged" + def prefix = task.ext.prefix ?: "${meta.id}" """ - IDMerger -in $aligned \\ + IDMerger -in $idxmls \\ -out ${prefix}.idXML \\ -threads $task.cpus \\ -annotate_file_origin true \\ diff --git a/modules/local/openms_idripper.nf b/modules/local/openms_idripper.nf new file mode 100644 index 00000000..64d3631c --- /dev/null +++ b/modules/local/openms_idripper.nf @@ -0,0 +1,34 @@ +process OPENMS_IDRIPPER { + tag "${meta.id}" + label 'process_single' + + conda "bioconda::openms=3.0.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/openms:3.0.0--h8964181_1' : + 'biocontainers/openms:3.0.0--h8964181_1' }" + + input: + tuple val(meta), path(merged_idxml) + + output: + tuple val(meta), path("*.idXML"), emit: ripped + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + """ + IDRipper -in $merged_idxml \\ + -out . \\ + -threads $task.cpus \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + openms: \$(echo \$(FileInfo --help 2>&1) | sed 's/^.*Version: //; s/-.*\$//' | sed 's/ -*//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/local/openms_idscoreswitcher.nf b/modules/local/openms_idscoreswitcher.nf new file mode 100644 index 00000000..1df324f0 --- /dev/null +++ b/modules/local/openms_idscoreswitcher.nf @@ -0,0 +1,38 @@ +process OPENMS_IDSCORESWITCHER { + tag "$meta.id" + label 'process_single' + + conda "bioconda::openms=3.0.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/openms:3.0.0--h8964181_1' : + 'biocontainers/openms:3.0.0--h8964181_1' }" + + input: + tuple val(meta), path(idxml), path(whitelist) + + output: + tuple val(meta), path("*.idXML"), path(whitelist), emit: switched_idxml + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${meta.id}_${meta.sample}_${meta.condition}_switched" + def args = task.ext.args ?: '' + + """ + IDScoreSwitcher -in $idxml \\ + -out ${prefix}.idXML \\ + -threads $task.cpus \\ + -new_score 'COMET:xcorr' \\ + -new_score_orientation 'higher_better' \\ + -old_score 'q-value' \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + openms: \$(echo \$(FileInfo --help 2>&1) | sed 's/^.*Version: //; s/-.*\$//' | sed 's/ -*//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/local/openms_mapaligneridentification.nf b/modules/local/openms_mapaligneridentification.nf index e4f0e91b..550f59d2 100644 --- a/modules/local/openms_mapaligneridentification.nf +++ b/modules/local/openms_mapaligneridentification.nf @@ -2,13 +2,13 @@ process OPENMS_MAPALIGNERIDENTIFICATION { tag "$meta.id" label 'process_single' - conda "bioconda::openms=2.9.1" + conda "bioconda::openms=3.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms:2.9.1--h135471a_1' : - 'biocontainers/openms:2.9.1--h135471a_1' }" + 'https://depot.galaxyproject.org/singularity/openms:3.0.0--h8964181_1' : + 'biocontainers/openms:3.0.0--h8964181_1' }" input: - tuple val(meta), path(idxml) + tuple val(meta), path(idxmls) output: tuple val(meta), path("*.trafoXML"), emit: trafoxml @@ -18,11 +18,11 @@ process OPENMS_MAPALIGNERIDENTIFICATION { task.ext.when == null || task.ext.when script: - def out_names = idxml.collect { it.baseName+'.trafoXML' }.join(' ') + def out_names = idxmls.collect { it.baseName+'.trafoXML' }.join(' ') def args = task.ext.args ?: '' """ - MapAlignerIdentification -in $idxml \\ + MapAlignerIdentification -in $idxmls \\ -trafo_out ${out_names} \\ $args diff --git a/modules/local/openms_maprttransformer.nf b/modules/local/openms_maprttransformer.nf index 0026990d..afe6d007 100644 --- a/modules/local/openms_maprttransformer.nf +++ b/modules/local/openms_maprttransformer.nf @@ -2,10 +2,10 @@ process OPENMS_MAPRTTRANSFORMER { tag "$meta.id" label 'process_single' - conda "bioconda::openms=2.9.1" + conda "bioconda::openms=3.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms:2.9.1--h135471a_1' : - 'biocontainers/openms:2.9.1--h135471a_1' }" + 'https://depot.galaxyproject.org/singularity/openms:3.0.0--h8964181_1' : + 'biocontainers/openms:3.0.0--h8964181_1' }" input: tuple val(meta), path(alignment_file), path(trafoxml) @@ -18,7 +18,7 @@ process OPENMS_MAPRTTRANSFORMER { task.ext.when == null || task.ext.when script: - def prefix = task.ext.prefix ?: "${meta.id}_aligned" + def prefix = task.ext.prefix ?: "${meta.id}_${meta.sample}_${meta.condition}_aligned" def fileExt = alignment_file.collect { it.name.tokenize("\\.")[1] }.join(' ') """ diff --git a/modules/local/openms_mztabexporter.nf b/modules/local/openms_mztabexporter.nf index f87aa53c..16056675 100644 --- a/modules/local/openms_mztabexporter.nf +++ b/modules/local/openms_mztabexporter.nf @@ -2,13 +2,13 @@ process OPENMS_MZTABEXPORTER { tag "$meta.id" label 'process_single' - conda "bioconda::openms=2.9.1" + conda "bioconda::openms=3.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms:2.9.1--h135471a_1' : - 'biocontainers/openms:2.9.1--h135471a_1' }" + 'https://depot.galaxyproject.org/singularity/openms:3.0.0--h8964181_1' : + 'biocontainers/openms:3.0.0--h8964181_1' }" input: - tuple val(meta), path(mztab) + tuple val(meta), path(in_file) output: tuple val(meta), path("*.mzTab"), emit: mztab @@ -18,11 +18,11 @@ process OPENMS_MZTABEXPORTER { task.ext.when == null || task.ext.when script: - def prefix = task.ext.prefix ?: "${meta.sample}_${meta.condition}" + def prefix = task.ext.prefix ?: "${meta.id}" def args = task.ext.args ?: '' """ - MzTabExporter -in $mztab \\ + MzTabExporter -in $in_file \\ -out ${prefix}.mzTab \\ -threads $task.cpus \\ $args diff --git a/modules/local/openms_peakpickerhires.nf b/modules/local/openms_peakpickerhires.nf index 588ba8c3..e8ca0afa 100644 --- a/modules/local/openms_peakpickerhires.nf +++ b/modules/local/openms_peakpickerhires.nf @@ -2,10 +2,10 @@ process OPENMS_PEAKPICKERHIRES { tag "$meta.id" label 'process_medium' - conda "bioconda::openms=2.9.1" + conda "bioconda::openms=3.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms:2.9.1--h135471a_1' : - 'biocontainers/openms:2.9.1--h135471a_1' }" + 'https://depot.galaxyproject.org/singularity/openms:3.0.0--h8964181_1' : + 'biocontainers/openms:3.0.0--h8964181_1' }" input: tuple val(meta), path(mzml) diff --git a/modules/local/openms_peptideindexer.nf b/modules/local/openms_peptideindexer.nf index d6dcc63b..48a21cd6 100644 --- a/modules/local/openms_peptideindexer.nf +++ b/modules/local/openms_peptideindexer.nf @@ -2,10 +2,10 @@ process OPENMS_PEPTIDEINDEXER { tag "$meta.id" label 'process_single' - conda "bioconda::openms=2.9.1" + conda "bioconda::openms=3.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms:2.9.1--h135471a_1' : - 'biocontainers/openms:2.9.1--h135471a_1' }" + 'https://depot.galaxyproject.org/singularity/openms:3.0.0--h8964181_1' : + 'biocontainers/openms:3.0.0--h8964181_1' }" input: tuple val(meta), path(idxml), path(fasta) @@ -18,7 +18,7 @@ process OPENMS_PEPTIDEINDEXER { task.ext.when == null || task.ext.when script: - def prefix = task.ext.prefix ?: "${idxml.baseName}_-_idx" + def prefix = task.ext.prefix ?: "${meta.id}_${meta.sample}_${meta.condition}_idx" """ PeptideIndexer -in $idxml \\ diff --git a/modules/local/openms_percolatoradapter.nf b/modules/local/openms_percolatoradapter.nf index f2057d34..b45e41cc 100644 --- a/modules/local/openms_percolatoradapter.nf +++ b/modules/local/openms_percolatoradapter.nf @@ -2,13 +2,13 @@ process OPENMS_PERCOLATORADAPTER { tag "$meta.id" label 'process_low' - conda "bioconda::openms-thirdparty=2.9.1" + conda "bioconda::openms-thirdparty=3.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms-thirdparty:2.9.1--h9ee0642_1' : - 'biocontainers/openms-thirdparty:2.9.1--h9ee0642_1' }" + 'https://depot.galaxyproject.org/singularity/openms-thirdparty:3.0.0--h9ee0642_1' : + 'biocontainers/openms-thirdparty:3.0.0--h9ee0642_1' }" input: - tuple val(meta), path(psm) + tuple val(meta), path(merged_with_features) output: tuple val(meta), path("*.idXML"), emit: idxml @@ -18,13 +18,13 @@ process OPENMS_PERCOLATORADAPTER { task.ext.when == null || task.ext.when script: - def prefix = task.ext.prefix ?: "${meta.id}" + def prefix = task.ext.prefix ?: "${meta.id}_pout" def args = task.ext.args ?: '' def klammer = (params.description_correct_features > 0 && params.klammer) ? "-klammer" : "" """ OMP_NUM_THREADS=$task.cpus \\ - PercolatorAdapter -in $psm \\ + PercolatorAdapter -in $merged_with_features \\ -out ${prefix}.idXML \\ $klammer \\ $args diff --git a/modules/local/openms_psmfeatureextractor.nf b/modules/local/openms_psmfeatureextractor.nf index e11c914b..7ec54ce7 100644 --- a/modules/local/openms_psmfeatureextractor.nf +++ b/modules/local/openms_psmfeatureextractor.nf @@ -2,13 +2,13 @@ process OPENMS_PSMFEATUREEXTRACTOR { tag "$meta.id" label 'process_low' - conda "bioconda::openms=2.9.1" + conda "bioconda::openms=3.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms:2.9.1--h135471a_1' : - 'biocontainers/openms:2.9.1--h135471a_1' }" + 'https://depot.galaxyproject.org/singularity/openms:3.0.0--h8964181_1' : + 'biocontainers/openms:3.0.0--h8964181_1' }" input: - tuple val(meta), path(merged) + tuple val(meta), path(idxml) output: tuple val(meta), path("*.idXML"), emit: idxml @@ -18,7 +18,7 @@ process OPENMS_PSMFEATUREEXTRACTOR { task.ext.when == null || task.ext.when script: - def prefix = task.ext.prefix ?: "${merged.baseName}_psm" + def prefix = task.ext.prefix ?: "${meta.id}_psm" def args = task.ext.args ?: '' def extra_features = "" if(params.use_deeplc || params.use_ms2pip){ @@ -40,7 +40,7 @@ process OPENMS_PSMFEATUREEXTRACTOR { } """ - PSMFeatureExtractor -in $merged \\ + PSMFeatureExtractor -in $idxml \\ -out ${prefix}.idXML \\ -threads $task.cpus \\ $extra_features \\ diff --git a/modules/local/openms_rtmodel.nf b/modules/local/openms_rtmodel.nf deleted file mode 100644 index 5a9aa85a..00000000 --- a/modules/local/openms_rtmodel.nf +++ /dev/null @@ -1,35 +0,0 @@ -process OPENMS_RTMODEL { - tag "$meta.id" - label 'process_single' - - conda "bioconda::openms=2.9.1" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms:2.9.1--h135471a_1' : - 'biocontainers/openms:2.9.1--h135471a_1' }" - - input: - tuple val(meta), path(rt_training) - - output: - tuple val(meta), path("*_rt_training.txt"), path("*.paramXML"), path("*_trainset.txt"), emit: complete - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def prefix = task.ext.prefix ?: "${meta.sample}" - - """ - RTModel -in $rt_training \\ - -cv:skip_cv \\ - -out ${prefix}_rt_training.txt \\ - -out_oligo_params ${prefix}_params.paramXML \\ - -out_oligo_trainset ${prefix}_trainset.txt - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - openms: \$(echo \$(FileInfo --help 2>&1) | sed 's/^.*Version: //; s/-.*\$//' | sed 's/ -*//; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/local/openms_rtpredict.nf b/modules/local/openms_rtpredict.nf deleted file mode 100644 index 933823b0..00000000 --- a/modules/local/openms_rtpredict.nf +++ /dev/null @@ -1,35 +0,0 @@ -process OPENMS_RTPREDICT { - tag "$meta.id" - label 'process_single' - - conda "bioconda::openms-thirdparty=2.9.1" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms-thirdparty:2.9.1--h9ee0642_1' : - 'biocontainers/openms-thirdparty:2.9.1--h9ee0642_1' }" - - input: - tuple val(meta), path(idxml), path(rt_model), path(rt_params), path(trainset) - - output: - tuple val(meta), path("*.csv"), emit: csv - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def prefix = task.ext.prefix ?: "${meta.sample}_RTpredicted" - - """ - RTPredict -in_id $idxml \\ - -svm_model $rt_model \\ - -in_oligo_params $rt_params \\ - -in_oligo_trainset $trainset \\ - -out_text:file ${prefix}.csv - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - openms-thirdparty: \$(echo \$(FileInfo --help 2>&1) | sed 's/^.*Version: //; s/-.*\$//' | sed 's/ -*//; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/local/openms_textexporter.nf b/modules/local/openms_textexporter.nf index 09d53349..fc16d59e 100644 --- a/modules/local/openms_textexporter.nf +++ b/modules/local/openms_textexporter.nf @@ -2,13 +2,13 @@ process OPENMS_TEXTEXPORTER { tag "$meta.id" label 'process_single' - conda "bioconda::openms=2.9.1" + conda "bioconda::openms=3.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms:2.9.1--h135471a_1' : - 'quay.io/biocontainers/openms:2.9.1--h135471a_1' }" + 'https://depot.galaxyproject.org/singularity/openms:3.0.0--h8964181_1' : + 'quay.io/biocontainers/openms:3.0.0--h8964181_1' }" input: - tuple val(meta), path(consensus_resolved) + tuple val(meta), path(file) output: tuple val(meta), path("*.tsv"), emit: tsv @@ -22,7 +22,7 @@ process OPENMS_TEXTEXPORTER { def args = task.ext.args ?: '' """ - TextExporter -in $consensus_resolved \\ + TextExporter -in $file \\ -out ${prefix}.tsv \\ -threads $task.cpus \\ -id:add_hit_metavalues 0 \\ diff --git a/modules/local/pyopenms_idfilter.nf b/modules/local/pyopenms_idfilter.nf new file mode 100644 index 00000000..cabb8e7b --- /dev/null +++ b/modules/local/pyopenms_idfilter.nf @@ -0,0 +1,34 @@ +process PYOPENMS_IDFILTER { + tag "$meta.id" + label 'process_low' + + conda "bioconda::pyopenms=3.0.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/pyopenms:3.0.0--py311h9b8898c_0' : + 'biocontainers/pyopenms:3.0.0--py311h9b8898c_0' }" + + input: + tuple val(meta), path(idxml), path(whitelist) + + output: + tuple val(meta), path("*_fdr_filtered.idXML") , emit: filtered + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + prefix = task.ext.prefix ?: "${meta.id}_${meta.sample}_${meta.condition}_fdr_filtered" + + """ + IDFilter.py \\ + --input $idxml \\ + --whitelist $whitelist \\ + --output ${prefix}.idXML + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pyopenms: \$(echo \$(FileInfo --help 2>&1) | sed 's/^.*Version: //; s/-.*\$//' | sed 's/ -*//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/local/pyopenms_ionannotator.nf b/modules/local/pyopenms_ionannotator.nf index 8fdb9f83..d2f4964e 100644 --- a/modules/local/pyopenms_ionannotator.nf +++ b/modules/local/pyopenms_ionannotator.nf @@ -1,24 +1,24 @@ process PYOPENMS_IONANNOTATOR { - tag "$sample" + tag "$meta.id" label 'process_high' - conda "bioconda::pyopenms=2.8.0" + conda "bioconda::pyopenms=3.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/pyopenms:2.8.0--py310h3dc0cdb_1' : - 'biocontainers/pyopenms:2.8.0--py310h3dc0cdb_1' }" + 'https://depot.galaxyproject.org/singularity/pyopenms:3.0.0--py311h9b8898c_0' : + 'biocontainers/pyopenms:3.0.0--py311h9b8898c_0' }" input: - tuple val(sample), path(mzml), path(fdr_filtered_idxml) + tuple val(meta), path(mzml), path(fdr_filtered_idxml) output: - tuple val(sample), path("*.tsv"), path("*.tsv"), emit: tsv + tuple val(meta), path("*.tsv") , emit: tsv path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: - def prefix = task.ext.prefix ?: "${mzml.baseName}" + def prefix = task.ext.prefix ?: "${meta.id}" def args = task.ext.args ?: '' def xions = params.use_x_ions ? "-use_x_ions" : "" @@ -30,7 +30,7 @@ process PYOPENMS_IONANNOTATOR { get_ion_annotations.py \\ --input $mzml \\ -idxml $fdr_filtered_idxml \\ - --prefix $sample \\ + --prefix $meta.id \\ $args \\ $xions \\ $zions \\ diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf index 800a6099..ebc87273 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/main.nf +++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf @@ -5,7 +5,7 @@ process CUSTOM_DUMPSOFTWAREVERSIONS { conda "bioconda::multiqc=1.14" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.14--pyhdfd78af_0' }" + 'biocontainers/multiqc:1.14--pyhdfd78af_0' }" input: path versions diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index 4b604749..1fc387be 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -4,7 +4,7 @@ process MULTIQC { conda "bioconda::multiqc=1.14" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.14--pyhdfd78af_0' }" + 'biocontainers/multiqc:1.14--pyhdfd78af_0' }" input: path multiqc_files, stageAs: "?/*" diff --git a/nextflow.config b/nextflow.config index 702fb6c3..2992acaf 100644 --- a/nextflow.config +++ b/nextflow.config @@ -44,11 +44,15 @@ params { peptide_min_length = 8 peptide_max_length = 12 pick_ms_levels = 2 - predict_RT = false prec_charge = '2:3' precursor_mass_tolerance = 5 quantification_fdr = null quantification_min_prob = 0 + quantification_mz_window = 5 + quantification_rt_window = 0 + quantification_peak_width = 60 + quantification_min_peak_width = 0.2 + quantification_mapping_tolerance= 0 refine_fdr_on_predicted_subset = false remove_precursor_peak = false run_centroidisation = false @@ -63,6 +67,7 @@ params { variable_mods = 'Oxidation (M)' vcf_sheet = null annotate_ions = false + filter_mzml = false // DeepLC settings use_deeplc = false @@ -112,7 +117,7 @@ params { // Schema validation default options validationFailUnrecognisedParams = false validationLenientMode = false - validationSchemaIgnoreParams = 'genomes' + validationSchemaIgnoreParams = 'genomes,igenomes_base' validationShowHiddenParams = false validate_params = true @@ -212,6 +217,7 @@ profiles { } apptainer { apptainer.enabled = true + apptainer.autoMounts = true conda.enabled = false docker.enabled = false singularity.enabled = false @@ -221,11 +227,15 @@ profiles { } gitpod { executor.name = 'local' - executor.cpus = 16 - executor.memory = 60.GB + executor.cpus = 4 + executor.memory = 8.GB } - test { includeConfig 'conf/test.config' } - test_full { includeConfig 'conf/test_full.config' } + test { includeConfig 'conf/test.config' } + test_deeplc { includeConfig 'conf/test_deeplc.config' } + test_ms2pip { includeConfig 'conf/test_ms2pip.config' } + test_ionannotator { includeConfig 'conf/test_ionannotator.config' } + test_quant { includeConfig 'conf/test_quant.config' } + test_full { includeConfig 'conf/test_full.config' } } // Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile @@ -280,7 +290,7 @@ manifest { description = """Identify and quantify peptides from mass spectrometry raw data""" mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' - version = '2.4.2dev' + version = '2.5.0' doi = '10.1021/acs.jproteome.9b00313' } diff --git a/nextflow_schema.json b/nextflow_schema.json index ab7f6fb3..31135be5 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -244,6 +244,16 @@ "default": "high_res", "fa_icon": "fas fa-wrench", "description": "Comets theoretical_fragment_ions parameter: theoretical fragment ion peak representation, high-res: sum of intensities plus flanking bins, ion trap (low-res) ms/ms: sum of intensities of central M bin only" + }, + "default_params_file_comet": { + "type": "string", + "fa_icon": "fas fa-file-code", + "description": "Default Comet params file. All parameters of this take precedence." + }, + "filter_mzml": { + "type": "boolean", + "fa_icon": "fas fa-file-code", + "description": "Clean up mzml files and remove artificial charge 0 peptides." } } }, @@ -338,11 +348,6 @@ "type": "string", "fa_icon": "fas fa-train-track", "description": "MS2pip model name defined (https://github.com/compomics/ms2pip#specialized-prediction-models)" - }, - "default_params_file_comet": { - "type": "string", - "fa_icon": "fas fa-file-code", - "description": "Default Comet params file. All parameters of this take precedence." } } }, @@ -359,7 +364,7 @@ "description": "Skip quantification and only yield peptide identifications" }, "quantification_fdr": { - "type": "string", + "type": "boolean", "fa_icon": "fas fa-less-than", "description": "Compute FDR for the targeted approach", "help_text": "(Weisser H. and Choudhary J.S. J Proteome Res. 2017 Aug 4)" @@ -367,6 +372,26 @@ "quantification_min_prob": { "type": "number", "description": "Specify a cut off probability value for quantification events as a filter" + }, + "quantification_mz_window": { + "type": "number", + "description": "Specify a m/z window for matching between runs" + }, + "quantification_rt_window": { + "type": "number", + "description": "Specify a rt window for matching between runs" + }, + "quantification_mapping_tolerance": { + "type": "number", + "description": "Specify a rt mapping tolerance for mapping features between runs" + }, + "quantification_peak_width": { + "type": "number", + "description": "Specify a peak width for feature extraction" + }, + "quantification_min_peak_width": { + "type": "number", + "description": "Specify a minimum peak width for quantification" } } }, @@ -436,20 +461,6 @@ } } }, - "rt_prediction": { - "title": "RT Prediction", - "type": "object", - "fa_icon": "fas fa-timeline", - "description": "", - "default": "", - "properties": { - "predict_RT": { - "type": "boolean", - "fa_icon": "fas fa-wrench", - "description": "Set this option to predict retention times of all identified peptides and possible neoepitopes based on high scoring ids" - } - } - }, "institutional_config_options": { "title": "Institutional config options", "type": "object", @@ -551,14 +562,12 @@ "type": "boolean", "description": "Display help text.", "fa_icon": "fas fa-question-circle", - "default": false, "hidden": true }, "version": { "type": "boolean", "description": "Display version and exit.", "fa_icon": "fas fa-question-circle", - "default": false, "hidden": true }, "publish_dir_mode": { @@ -582,7 +591,6 @@ "type": "boolean", "description": "Send plain-text email instead of HTML.", "fa_icon": "fas fa-remove-format", - "default": false, "hidden": true }, "max_multiqc_email_size": { @@ -597,7 +605,6 @@ "type": "boolean", "description": "Do not use coloured log outputs.", "fa_icon": "fas fa-palette", - "default": false, "hidden": true }, "hook_url": { @@ -636,7 +643,6 @@ "type": "boolean", "fa_icon": "far fa-eye-slash", "description": "Show all params when using `--help`", - "default": false, "hidden": true, "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." }, @@ -644,7 +650,6 @@ "type": "boolean", "fa_icon": "far fa-check-circle", "description": "Validation of parameters fails when an unrecognised parameter is found.", - "default": false, "hidden": true, "help_text": "By default, when an unrecognised parameter is found, it returns a warinig." }, @@ -652,7 +657,6 @@ "type": "boolean", "fa_icon": "far fa-check-circle", "description": "Validation of parameters in lenient more.", - "default": false, "hidden": true, "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)." } @@ -684,9 +688,6 @@ { "$ref": "#/definitions/variant_options" }, - { - "$ref": "#/definitions/rt_prediction" - }, { "$ref": "#/definitions/institutional_config_options" }, diff --git a/subworkflows/local/map_alignment.nf b/subworkflows/local/map_alignment.nf index e038f7e7..90efc013 100644 --- a/subworkflows/local/map_alignment.nf +++ b/subworkflows/local/map_alignment.nf @@ -1,9 +1,6 @@ /* - * Perform the quantification of the samples when the parameter --skip_quantification is not provided + * Align retention times of runs to be able to quantify them. */ - -include { OPENMS_FALSEDISCOVERYRATE } from '../../modules/local/openms_falsediscoveryrate' -include { OPENMS_IDFILTER as OPENMS_IDFILTER_FOR_ALIGNMENT } from '../../modules/local/openms_idfilter' include { OPENMS_MAPALIGNERIDENTIFICATION } from '../../modules/local/openms_mapaligneridentification' include { OPENMS_MAPRTTRANSFORMER as OPENMS_MAPRTTRANSFORMERMZML @@ -12,58 +9,44 @@ include { workflow MAP_ALIGNMENT { take: - indexed_hits - mzml_files + runs_to_be_aligned + mzml + merge_meta_map main: ch_versions = Channel.empty() - // Calculate fdr for id based alignment - OPENMS_FALSEDISCOVERYRATE(indexed_hits) - ch_versions = ch_versions.mix(OPENMS_FALSEDISCOVERYRATE.out.versions.first().ifEmpty(null)) - // Filter fdr for id based alignment - OPENMS_IDFILTER_FOR_ALIGNMENT(OPENMS_FALSEDISCOVERYRATE.out.idxml - .flatMap { it -> [tuple(it[0], it[1], null)]}) - ch_versions = ch_versions.mix(OPENMS_IDFILTER_FOR_ALIGNMENT.out.versions.first().ifEmpty(null)) - // Group samples together if they are replicates - ch_grouped_fdr_filtered = OPENMS_IDFILTER_FOR_ALIGNMENT.out.idxml - .map { - meta, raw -> - [[id:meta.sample + "_" + meta.condition, sample:meta.sample, condition:meta.condition, ext:meta.ext], raw] - } - .groupTuple(by: [0]) - // Compute alignment rt transformation - OPENMS_MAPALIGNERIDENTIFICATION(ch_grouped_fdr_filtered) + // Compute group-wise alignment rt transformation + OPENMS_MAPALIGNERIDENTIFICATION( runs_to_be_aligned ) ch_versions = ch_versions.mix(OPENMS_MAPALIGNERIDENTIFICATION.out.versions.first().ifEmpty(null)) - // Obtain the unique files that were present for the combined data - joined_trafos = OPENMS_MAPALIGNERIDENTIFICATION.out.trafoxml + + // Join run specific trafoXMLs with meta information + merge_meta_map + .join( OPENMS_MAPALIGNERIDENTIFICATION.out.trafoxml ) + .map { groupMeta, meta, trafoxml -> [meta, trafoxml] } .transpose() - .flatMap { - meta, trafoxml -> - ident = trafoxml.baseName.split('_-_')[0] - [[[id:ident, sample:meta.sample, condition:meta.condition, ext:meta.ext], trafoxml]] - } - // Intermediate step to join RT transformation files with mzml channels - joined_trafos_mzmls = mzml_files.join(joined_trafos) - // Intermediate step to join RT transformation files with idxml channels - joined_trafos_ids = indexed_hits.join(joined_trafos) + .set { joined_trafos } + + // Intermediate step to join RT transformation files with mzml channels -> [meta, idxml, mzml] + joined_trafos_mzmls = mzml.join(joined_trafos) + + // Intermediate step to join RT transformation files with idxml channels -> [meta, idxml, trafoxml] + runs_to_be_aligned + .join( merge_meta_map ) + .map { group_meta, idxml, meta -> [meta, idxml] } + .transpose() + .join( joined_trafos ) + .set { joined_trafos_ids } + // Align mzML files using trafoXMLs OPENMS_MAPRTTRANSFORMERMZML(joined_trafos_mzmls) ch_versions = ch_versions.mix(OPENMS_MAPRTTRANSFORMERMZML.out.versions.first().ifEmpty(null)) - // Align unfiltered idXMLfiles using trafoXMLs + // Align idXMLfiles using trafoXMLs OPENMS_MAPRTTRANSFORMERIDXML(joined_trafos_ids) ch_versions = ch_versions.mix(OPENMS_MAPRTTRANSFORMERIDXML.out.versions.first().ifEmpty(null)) - ch_proceeding_idx = OPENMS_MAPRTTRANSFORMERIDXML.out.aligned - .map { - meta, raw -> - [[id:meta.sample + "_" + meta.condition, sample:meta.sample, condition:meta.condition, ext:meta.ext], raw] - } - .groupTuple(by: [0]) emit: - // Define the information that is returned by this workflow versions = ch_versions - ch_proceeding_idx - aligned_idfilter = OPENMS_IDFILTER_FOR_ALIGNMENT.out.idxml + aligned_idxml = OPENMS_MAPRTTRANSFORMERIDXML.out.aligned aligned_mzml = OPENMS_MAPRTTRANSFORMERMZML.out.aligned } diff --git a/subworkflows/local/predict_class1.nf b/subworkflows/local/predict_class1.nf index 70532717..948b8f91 100644 --- a/subworkflows/local/predict_class1.nf +++ b/subworkflows/local/predict_class1.nf @@ -16,18 +16,15 @@ workflow PREDICT_CLASS1 { main: ch_versions = Channel.empty() ch_predicted_possible_neoepitopes = Channel.empty() + alleles = peptides_class_1_alleles.map{ meta, alleles -> [[id:meta], alleles] } // If specified predict peptides using MHCFlurry - MHCFLURRY_PREDICTPEPTIDESCLASS1( - mztab - .map{ it -> [it[0].sample, it[0], it[1]] } - .combine( peptides_class_1_alleles, by:0) - .map( it -> [it[1], it[2], it[3]]) - ) + MHCFLURRY_PREDICTPEPTIDESCLASS1(mztab.join(alleles)) ch_versions = ch_versions.mix(MHCFLURRY_PREDICTPEPTIDESCLASS1.out.versions.first().ifEmpty(null)) + if ( params.include_proteins_from_vcf ) { // Predict all possible neoepitopes from vcf - PREDICT_POSSIBLE_CLASS1_NEOEPITOPES(peptides_class_1_alleles.combine(ch_vcf_from_sheet, by:0)) + PREDICT_POSSIBLE_CLASS1_NEOEPITOPES(alleles.combine(ch_vcf_from_sheet, by:0)) ch_versions = ch_versions.mix(PREDICT_POSSIBLE_CLASS1_NEOEPITOPES.out.versions.first().ifEmpty(null)) ch_predicted_possible_neoepitopes = PREDICT_POSSIBLE_CLASS1_NEOEPITOPES.out.csv // Resolve found neoepitopes @@ -39,7 +36,7 @@ workflow PREDICT_CLASS1 { ) ch_versions = ch_versions.mix(RESOLVE_FOUND_CLASS1_NEOEPITOPES.out.versions.first().ifEmpty(null)) // Predict class 1 neoepitopes MHCFlurry - MHCFLURRY_PREDICTNEOEPITOPESCLASS1(peptides_class_1_alleles.join(RESOLVE_FOUND_CLASS1_NEOEPITOPES.out.csv, by:0)) + MHCFLURRY_PREDICTNEOEPITOPESCLASS1(alleles.join(RESOLVE_FOUND_CLASS1_NEOEPITOPES.out.csv, by:0)) ch_versions = ch_versions.mix(MHCFLURRY_PREDICTNEOEPITOPESCLASS1.out.versions.first().ifEmpty(null)) } diff --git a/subworkflows/local/predict_class2.nf b/subworkflows/local/predict_class2.nf index 4baa282d..41f3c7cd 100644 --- a/subworkflows/local/predict_class2.nf +++ b/subworkflows/local/predict_class2.nf @@ -20,16 +20,16 @@ workflow PREDICT_CLASS2 { main: ch_versions = Channel.empty() ch_predicted_possible_neoepitopes = Channel.empty() + alleles = peptides_class_2_alleles.map{meta, alleles -> [[id:meta], alleles]} // Preprocess found peptides for MHCNuggets prediction class 2 MHCNUGGETS_PEPTIDESCLASS2PRE(mztab) ch_versions = ch_versions.mix(MHCNUGGETS_PEPTIDESCLASS2PRE.out.versions.first().ifEmpty(null)) + // Predict found peptides using MHCNuggets class 2 MHCNUGGETS_PREDICTPEPTIDESCLASS2( MHCNUGGETS_PEPTIDESCLASS2PRE.out.preprocessed - .map{ it -> [it[0].sample, it[0], it[1]] } - .join(peptides_class_2_alleles, by:0) - .map( it -> [it[1], it[2], it[3]]) + .join(alleles) ) ch_versions = ch_versions.mix(MHCNUGGETS_PREDICTPEPTIDESCLASS2.out.versions.first().ifEmpty(null)) // Postprocess predicted MHCNuggets peptides class 2 @@ -37,7 +37,7 @@ workflow PREDICT_CLASS2 { ch_versions = ch_versions.mix(MHCNUGGETS_PEPTIDESCLASS2POST.out.versions.first().ifEmpty(null)) if ( params.include_proteins_from_vcf ) { // Predict all possible class 2 neoepitopes from vcf - PREDICT_POSSIBLE_CLASS2_NEOEPITOPES(peptides_class_2_alleles.combine(ch_vcf_from_sheet, by:0)) + PREDICT_POSSIBLE_CLASS2_NEOEPITOPES(alleles.combine(ch_vcf_from_sheet, by:0)) ch_versions = ch_versions.mix(PREDICT_POSSIBLE_CLASS2_NEOEPITOPES.out.versions.first().ifEmpty(null)) ch_predicted_possible_neoepitopes = PREDICT_POSSIBLE_CLASS2_NEOEPITOPES.out.csv // Resolve found class 2 neoepitopes @@ -51,7 +51,7 @@ workflow PREDICT_CLASS2 { MHCNUGGETS_NEOEPITOPESCLASS2PRE(RESOLVE_FOUND_CLASS2_NEOEPITOPES.out.csv) ch_versions = ch_versions.mix(MHCNUGGETS_NEOEPITOPESCLASS2PRE.out.versions.first().ifEmpty(null)) // Predict class 2 MHCNuggets - MHCNUGGETS_PREDICTNEOEPITOPESCLASS2(MHCNUGGETS_NEOEPITOPESCLASS2PRE.out.preprocessed.join(peptides_class_2_alleles, by:0)) + MHCNUGGETS_PREDICTNEOEPITOPESCLASS2(MHCNUGGETS_NEOEPITOPESCLASS2PRE.out.preprocessed.join(alleles, by:0)) ch_versions = ch_versions.mix(MHCNUGGETS_PREDICTNEOEPITOPESCLASS2.out.versions.first().ifEmpty(null)) // Class 2 MHCNuggets Postprocessing MHCNUGGETS_NEOEPITOPESCLASS2POST(RESOLVE_FOUND_CLASS2_NEOEPITOPES.out.csv.join(MHCNUGGETS_PREDICTNEOEPITOPESCLASS2.out.csv, by:0)) diff --git a/subworkflows/local/predict_rt.nf b/subworkflows/local/predict_rt.nf deleted file mode 100644 index 894eddf9..00000000 --- a/subworkflows/local/predict_rt.nf +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Perform the Retention time prediction when the parameter --predict_RT is provided - */ - -include { OPENMS_RTMODEL } from '../../modules/local/openms_rtmodel' -include { - OPENMS_RTPREDICT as OPENMS_RTPREDICT_FOUND_PEPTIDES - OPENMS_RTPREDICT as OPENMS_RTPREDICT_NEOEPITOPES} from '../../modules/local/openms_rtpredict' - - -workflow PREDICT_RT { - take: - filter_q_value - ch_predicted_possible_neoepitopes - ch_predicted_possible_neoepitopes_II - - main: - ch_versions = Channel.empty() - - // Train Retention Times Predictor - OPENMS_RTMODEL(filter_q_value) - ch_versions = ch_versions.mix(OPENMS_RTMODEL.out.versions.first().ifEmpty(null)) - // Retention Times Predictor Found Peptides - OPENMS_RTPREDICT_FOUND_PEPTIDES(filter_q_value.join(OPENMS_RTMODEL.out.complete, by:[0])) - ch_versions = ch_versions.mix(OPENMS_RTPREDICT_FOUND_PEPTIDES.out.versions.first().ifEmpty(null)) - // Retention Times Predictor possible Neoepitopes - OPENMS_RTPREDICT_NEOEPITOPES(ch_predicted_possible_neoepitopes.mix(ch_predicted_possible_neoepitopes_II).join(OPENMS_RTMODEL.out.complete, by:[0])) - ch_versions = ch_versions.mix(OPENMS_RTPREDICT_FOUND_PEPTIDES.out.versions.first().ifEmpty(null)) - - emit: - // Define the information that is returned by this workflow - versions = ch_versions -} diff --git a/subworkflows/local/process_feature.nf b/subworkflows/local/process_feature.nf index cc8e3dfd..a254285b 100644 --- a/subworkflows/local/process_feature.nf +++ b/subworkflows/local/process_feature.nf @@ -1,51 +1,38 @@ /* - * Perform the quantification of the samples when the parameter --skip_quantification is not provided + * Perform the quantification by extracting the feature intensities and group runs corresponding to the same sample and condition. */ +include { OPENMS_IDMERGER } from '../../modules/local/openms_idmerger' include { OPENMS_FEATUREFINDERIDENTIFICATION } from '../../modules/local/openms_featurefinderidentification' include { OPENMS_FEATURELINKERUNLABELEDKD } from '../../modules/local/openms_featurelinkerunlabeledkd' include { OPENMS_IDCONFLICTRESOLVER } from '../../modules/local/openms_idconflictresolver' -include { OPENMS_TEXTEXPORTER as OPENMS_TEXTEXPORTER_QUANTIFIED } from '../../modules/local/openms_textexporter' +include { OPENMS_TEXTEXPORTER as OPENMS_TEXTEXPORTER_QUANT } from '../../modules/local/openms_textexporter' include { OPENMS_MZTABEXPORTER as OPENMS_MZTABEXPORTER_QUANT } from '../../modules/local/openms_mztabexporter' workflow PROCESS_FEATURE { take: - psms_outcome - aligned_mzml - filter_q_value + ch_runs_to_be_quantified main: ch_versions = Channel.empty() - // Combining the necessary information into one channel - psms_outcome - .join( aligned_mzml, by: [0] ) - .map { it -> [it[0].sample, it[0], it[1], it[2]] } - .combine( filter_q_value , by: [0] ) - .map { it -> [it[1], it[2], it[3], it[5]] } - .set{ joined_mzmls_ids_quant } + // Quantify identifications using targeted feature extraction - OPENMS_FEATUREFINDERIDENTIFICATION(joined_mzmls_ids_quant) + OPENMS_FEATUREFINDERIDENTIFICATION(ch_runs_to_be_quantified).featurexml + .map { meta, featurexml -> [[id: meta.sample + '_' + meta.condition], featurexml] } + .groupTuple() + .set { ch_features_grouped } ch_versions = ch_versions.mix(OPENMS_FEATUREFINDERIDENTIFICATION.out.versions.first().ifEmpty(null)) + // Link extracted features - OPENMS_FEATURELINKERUNLABELEDKD( - OPENMS_FEATUREFINDERIDENTIFICATION.out.featurexml - .flatMap { - meta, raw -> - [[[id:meta.sample + "_" + meta.condition, sample:meta.sample, condition:meta.condition, ext:meta.ext], raw]] - } - .groupTuple(by:[0])) + OPENMS_FEATURELINKERUNLABELEDKD(ch_features_grouped) ch_versions = ch_versions.mix(OPENMS_FEATURELINKERUNLABELEDKD.out.versions.first().ifEmpty(null)) + // Resolve conflicting ids matching to the same feature OPENMS_IDCONFLICTRESOLVER(OPENMS_FEATURELINKERUNLABELEDKD.out.consensusxml) ch_versions = ch_versions.mix(OPENMS_IDCONFLICTRESOLVER.out.versions.first().ifEmpty(null)) - // Export all information as text to csv - OPENMS_TEXTEXPORTER_QUANTIFIED(OPENMS_IDCONFLICTRESOLVER.out.consensusxml) - ch_versions = ch_versions.mix(OPENMS_TEXTEXPORTER_QUANTIFIED.out.versions.first().ifEmpty(null)) - // Export all information as mzTab - OPENMS_MZTABEXPORTER_QUANT(OPENMS_IDCONFLICTRESOLVER.out.consensusxml) - ch_versions = ch_versions.mix(OPENMS_MZTABEXPORTER_QUANT.out.versions.first().ifEmpty(null)) + emit: // Define the information that is returned by this workflow versions = ch_versions - mztab = OPENMS_MZTABEXPORTER_QUANT.out.mztab + consensusxml = OPENMS_IDCONFLICTRESOLVER.out.consensusxml } diff --git a/subworkflows/local/quant.nf b/subworkflows/local/quant.nf new file mode 100644 index 00000000..baea53e8 --- /dev/null +++ b/subworkflows/local/quant.nf @@ -0,0 +1,84 @@ +/* + * Perform the quantification of the samples when the parameter --skip_quantification is not provided + * This workflow splits the merged percolator output into the individual runs and filters them based on the q-value + * It then aligns the retention times of the runs and merges the idxml files together to use them as id_ext in featurefinder + * Finally, it performs the quantification and emits the consensusXML file + */ +include { OPENMS_IDRIPPER } from '../../modules/local/openms_idripper' +include { OPENMS_IDSCORESWITCHER } from '../../modules/local/openms_idscoreswitcher' +include { PYOPENMS_IDFILTER } from '../../modules/local/pyopenms_idfilter' +include { OPENMS_IDMERGER as OPENMS_IDMERGER_QUANT } from '../../modules/local/openms_idmerger' + +include { MAP_ALIGNMENT } from './map_alignment' +include { PROCESS_FEATURE } from './process_feature' + +// Sort closure for merging and splitting files +def sortById = { a, b -> a.id <=> b.id } + +workflow QUANT { + take: + merge_meta_map + merged_pout + filter_q_value + mzml + + main: + ch_versions = Channel.empty() + // Rip post-percolator idXML files and manipulate such that we end up with [meta_run1, idxml_run1, pout_filtered] [meta_run2, idxml_run2, pout_filtered] ... + OPENMS_IDRIPPER( merged_pout ).ripped + .join( merge_meta_map ) + .join( filter_q_value ) + .map { group_meta, ripped, meta, fdrfiltered -> [meta, ripped, fdrfiltered] } + .transpose() + .set { ch_ripped_pout } + ch_versions = ch_versions.mix(OPENMS_IDRIPPER.out.versions.ifEmpty(null)) + + // Switch to xcorr for filtering since q-values are set to 1 with peptide-level-fdr + if (params.fdr_level == 'peptide_level_fdrs'){ + ch_runs_to_be_filtered = OPENMS_IDSCORESWITCHER( ch_ripped_pout ).switched_idxml + ch_versions = ch_versions.mix(OPENMS_IDSCORESWITCHER.out.versions.ifEmpty(null)) + } else { + ch_runs_to_be_filtered = ch_ripped_pout + } + + // Filter runs based on fdr filtered coprocessed percolator output. + // NOTE: This is an alternative filtering method that will be replaced by IDFilter with new release of OpenMS + PYOPENMS_IDFILTER( ch_runs_to_be_filtered ).filtered + .map { meta, idxml -> [[id:meta.sample + '_' + meta.condition], [id:meta.id, file:idxml]] } + .groupTuple(sort: sortById) + .map { meta, idxml -> [meta, idxml.file] } + .set { ch_runs_to_be_aligned } + ch_versions = ch_versions.mix(PYOPENMS_IDFILTER.out.versions.ifEmpty(null)) + + // Align retention times of runs + MAP_ALIGNMENT( + ch_runs_to_be_aligned, + mzml, + merge_meta_map + ) + ch_versions = ch_versions.mix(MAP_ALIGNMENT.out.versions.ifEmpty(null)) + + // We need to merge groupwise the aligned idxml files together to use them as id_ext in featurefinder + OPENMS_IDMERGER_QUANT(MAP_ALIGNMENT.out.aligned_idxml + .map { meta, aligned_idxml -> [[id: meta.sample + '_' + meta.condition], aligned_idxml] } + .groupTuple()) + ch_versions = ch_versions.mix(OPENMS_IDMERGER_QUANT.out.versions.ifEmpty(null)) + + // Manipulate channels such that we end up with : [meta, mzml, run_idxml, merged_runs_idxml] + MAP_ALIGNMENT.out.aligned_mzml + .join(MAP_ALIGNMENT.out.aligned_idxml) + .map {meta, mzml, idxml -> [[id: meta.sample + '_' + meta.condition], meta, [id:meta.id, file:mzml], [id:meta.id, file:idxml]] } + .groupTuple(sort: sortById) + .map { group_meta, meta, mzml, idxml -> [group_meta, meta, mzml.file, idxml.file]} + .join(OPENMS_IDMERGER_QUANT.out.idxml) + .map { group_meta, meta, mzml, idxml, merged_idxml -> [meta, mzml, idxml, merged_idxml] } + .transpose() + .set { ch_runs_to_be_quantified } + + PROCESS_FEATURE ( ch_runs_to_be_quantified ) + ch_versions = ch_versions.mix(PROCESS_FEATURE.out.versions.ifEmpty(null)) + + emit: + consensusxml = PROCESS_FEATURE.out.consensusxml + versions = ch_versions +} diff --git a/workflows/mhcquant.nf b/workflows/mhcquant.nf index f8d56891..c917941a 100644 --- a/workflows/mhcquant.nf +++ b/workflows/mhcquant.nf @@ -75,16 +75,16 @@ include { OPENMS_PEPTIDEINDEXER } from include { DEEPLC } from '../modules/local/deeplc' include { MS2PIP } from '../modules/local/ms2pip' -include { OPENMS_TEXTEXPORTER as OPENMS_TEXTEXPORTER_COMET } from '../modules/local/openms_textexporter' - include { OPENMS_IDFILTER as OPENMS_IDFILTER_Q_VALUE } from '../modules/local/openms_idfilter' include { OPENMS_IDMERGER } from '../modules/local/openms_idmerger' + include { OPENMS_PSMFEATUREEXTRACTOR } from '../modules/local/openms_psmfeatureextractor' include { OPENMS_PERCOLATORADAPTER } from '../modules/local/openms_percolatoradapter' include { PYOPENMS_IONANNOTATOR } from '../modules/local/pyopenms_ionannotator' -include { OPENMS_TEXTEXPORTER as OPENMS_TEXTEXPORTER_FDR } from '../modules/local/openms_textexporter' -include { OPENMS_TEXTEXPORTER as OPENMS_TEXTEXPORTER_UNQUANTIFIED } from '../modules/local/openms_textexporter' +include { OPENMS_TEXTEXPORTER } from '../modules/local/openms_textexporter' +include { OPENMS_MZTABEXPORTER } from '../modules/local/openms_mztabexporter' + // // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules @@ -111,14 +111,14 @@ include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoft // Info required for completion email and summary def multiqc_report = [] +// Sort closure for merging and splitting files +def sortById = { a, b -> a.id <=> b.id } include { INCLUDE_PROTEINS } from '../subworkflows/local/include_proteins' -include { MAP_ALIGNMENT } from '../subworkflows/local/map_alignment' include { REFINE_FDR } from '../subworkflows/local/refine_fdr' -include { PROCESS_FEATURE } from '../subworkflows/local/process_feature.nf' +include { QUANT } from '../subworkflows/local/quant' include { PREDICT_CLASS1 } from '../subworkflows/local/predict_class1' include { PREDICT_CLASS2 } from '../subworkflows/local/predict_class2' -include { PREDICT_RT } from '../subworkflows/local/predict_rt' //////////////////////////////////////////////////// /* -- RUN MAIN WORKFLOW -- */ @@ -142,18 +142,18 @@ workflow MHCQUANT { .branch { meta, filename -> raw : meta.ext == 'raw' - return [ meta, filename ] + return [ meta.subMap('id', 'sample', 'condition'), filename ] mzml : meta.ext == 'mzml' - return [ meta, filename ] + return [ meta.subMap('id', 'sample', 'condition'), filename ] tdf : meta.ext == 'd' - return [ meta, filename ] + return [ meta.subMap('id', 'sample', 'condition'), filename ] other : true } .set { branched_ms_files } // Input fasta file Channel.fromPath(params.fasta) .combine(INPUT_CHECK.out.ms_runs) - .flatMap{ it -> [tuple(it[1],it[0])] } + .map{ fasta, meta, ms_file -> [meta.subMap('id', 'sample', 'condition'), fasta] } .ifEmpty { exit 1, "params.fasta was empty - no input file supplied" } .set { input_fasta } @@ -180,7 +180,8 @@ workflow MHCQUANT { ch_decoy_db = ch_fasta_file } - ch_ms_files = (branched_ms_files.mzml) + // If mzml files are specified, they are encapsulated in a list [meta, [mzml]]. We need to extract the path for grouping later + ch_ms_files = branched_ms_files.mzml.map{ meta, mzml -> [meta, mzml[0]]} // Raw file conversion THERMORAWFILEPARSER(branched_ms_files.raw) ch_versions = ch_versions.mix(THERMORAWFILEPARSER.out.versions.ifEmpty(null)) @@ -191,8 +192,8 @@ workflow MHCQUANT { ch_versions = ch_versions.mix(TDF2MZML.out.versions.ifEmpty(null)) ch_ms_files = ch_ms_files.mix(TDF2MZML.out.mzml) + // Optional: Run Peak Picking as Preprocessing if (params.run_centroidisation) { - // Optional: Run Peak Picking as Preprocessing OPENMS_PEAKPICKERHIRES(ch_ms_files) ch_versions = ch_versions.mix(OPENMS_PEAKPICKERHIRES.out.versions.ifEmpty(null)) ch_mzml_file = OPENMS_PEAKPICKERHIRES.out.mzml @@ -200,14 +201,17 @@ workflow MHCQUANT { ch_mzml_file = ch_ms_files } - // Clean up mzML files - OPENMS_FILEFILTER(ch_mzml_file) - ch_versions = ch_versions.mix(OPENMS_FILEFILTER.out.versions.ifEmpty(null)) - ch_clean_mzml_file = OPENMS_FILEFILTER.out.cleaned_mzml + // Optionally clean up mzML files + if (params.filter_mzml){ + OPENMS_FILEFILTER(ch_mzml_file) + ch_versions = ch_versions.mix(OPENMS_FILEFILTER.out.versions.ifEmpty(null)) + ch_clean_mzml_file = OPENMS_FILEFILTER.out.cleaned_mzml + } else { + ch_clean_mzml_file = ch_mzml_file + } // Run comet database search - OPENMS_COMETADAPTER( - ch_clean_mzml_file.join(ch_decoy_db, remainder:true)) + OPENMS_COMETADAPTER(ch_clean_mzml_file.join(ch_decoy_db, remainder:true)) // Run DeepLC if specified if (params.use_deeplc){ @@ -227,52 +231,36 @@ workflow MHCQUANT { ch_comet_out_idxml_proceeding = ch_comet_out_idxml } - // Write this information to an tsv file - OPENMS_TEXTEXPORTER_COMET(ch_comet_out_idxml_proceeding) - ch_versions = ch_versions.mix(OPENMS_COMETADAPTER.out.versions.ifEmpty(null)) // Index decoy and target hits OPENMS_PEPTIDEINDEXER(ch_comet_out_idxml_proceeding.join(ch_decoy_db)) ch_versions = ch_versions.mix(OPENMS_PEPTIDEINDEXER.out.versions.ifEmpty(null)) - // - // SUBWORKFLOW: Pre-process step for the quantification of the data - // - if (!params.skip_quantification) { - MAP_ALIGNMENT( - OPENMS_PEPTIDEINDEXER.out.idxml, - ch_clean_mzml_file - ) - ch_proceeding_idx = MAP_ALIGNMENT.out.ch_proceeding_idx - ch_versions = ch_versions.mix(MAP_ALIGNMENT.out.versions.ifEmpty(null)) - } else { - ch_proceeding_idx = OPENMS_PEPTIDEINDEXER.out.idxml - .map { - meta, raw -> - [[id:meta.sample + "_" + meta.condition, sample:meta.sample, condition:meta.condition, ext:meta.ext], raw] - } - .groupTuple(by: [0]) - } + // Save indexed runs for later use to keep meta-run information. Sort based on file id + OPENMS_PEPTIDEINDEXER.out.idxml + .map { meta, idxml -> [[id: meta.sample + '_' + meta.condition], meta] } + .groupTuple( sort: sortById ) + .set { merge_meta_map } + + OPENMS_PEPTIDEINDEXER.out.idxml + .map { meta, idxml -> [[id: meta.sample + '_' + meta.condition], idxml] } + .groupTuple() + .set { ch_runs_to_merge } // Merge aligned idXMLfiles - OPENMS_IDMERGER(ch_proceeding_idx) + OPENMS_IDMERGER(ch_runs_to_merge) ch_versions = ch_versions.mix(OPENMS_IDMERGER.out.versions.ifEmpty(null)) + // Extract PSM features for Percolator OPENMS_PSMFEATUREEXTRACTOR(OPENMS_IDMERGER.out.idxml) ch_versions = ch_versions.mix(OPENMS_PSMFEATUREEXTRACTOR.out.versions.ifEmpty(null)) + // Run Percolator OPENMS_PERCOLATORADAPTER(OPENMS_PSMFEATUREEXTRACTOR.out.idxml) ch_versions = ch_versions.mix(OPENMS_PERCOLATORADAPTER.out.versions.ifEmpty(null)) + // Filter by percolator q-value OPENMS_IDFILTER_Q_VALUE(OPENMS_PERCOLATORADAPTER.out.idxml.flatMap { it -> [tuple(it[0], it[1], null)] }) ch_versions = ch_versions.mix(OPENMS_IDFILTER_Q_VALUE.out.versions.ifEmpty(null)) - // Prepare for check if file is empty - OPENMS_TEXTEXPORTER_FDR(OPENMS_IDFILTER_Q_VALUE.out.idxml) - // Return an error message when there is only a header present in the document - OPENMS_TEXTEXPORTER_FDR.out.tsv.map { - meta, tsv -> if (tsv.size() < 130) { - log.warn "It seems that there were no significant hits found for this sample: " + meta.sample + "\nPlease consider incrementing the '--fdr_threshold' after removing the work directory or to exclude this sample. " - } - } // // SUBWORKFLOW: Refine the FDR values on the predicted subset @@ -286,36 +274,45 @@ workflow MHCQUANT { ) ch_versions = ch_versions.mix(REFINE_FDR.out.versions.ifEmpty(null)) // Define the outcome of the paramer to a fixed variable - filter_q_value = REFINE_FDR.out.filter_refined_q_value.flatMap { it -> [ tuple(it[0].sample, it[0], it[1]) ] } + filter_q_value = REFINE_FDR.out.filter_refined_q_value } else { // Make sure that the columns that consists of the ID's, sample names and the idXML file names are returned - filter_q_value = OPENMS_IDFILTER_Q_VALUE.out.idxml.map { it -> [it[0].sample, it[0], it[1]] } + filter_q_value = OPENMS_IDFILTER_Q_VALUE.out.idxml } // - // SUBWORKFLOW: Perform the step to process the feature and obtain the belonging information + // SUBWORKFLOW: QUANT // - if (!params.skip_quantification) { - PROCESS_FEATURE ( - MAP_ALIGNMENT.out.aligned_idfilter, - MAP_ALIGNMENT.out.aligned_mzml, - filter_q_value - ) - ch_versions = ch_versions.mix(PROCESS_FEATURE.out.versions.ifEmpty(null)) + QUANT(merge_meta_map, OPENMS_PERCOLATORADAPTER.out.idxml, filter_q_value, ch_clean_mzml_file) + ch_versions = ch_versions.mix(QUANT.out.versions.ifEmpty(null)) + ch_output = QUANT.out.consensusxml } else { - OPENMS_TEXTEXPORTER_UNQUANTIFIED(filter_q_value.flatMap { ident, meta, idxml -> [[meta, idxml]] }) + ch_output = filter_q_value + } + + // Prepare for check if file is empty + OPENMS_TEXTEXPORTER(ch_output) + ch_versions = ch_versions.mix(OPENMS_TEXTEXPORTER.out.versions.ifEmpty(null)) + // Return an error message when there is only a header present in the document + OPENMS_TEXTEXPORTER.out.tsv.map { + meta, tsv -> if (tsv.size() < 130) { + log.warn "It seems that there were no significant hits found for this sample: " + meta.sample + "\nPlease consider incrementing the '--fdr_threshold' after removing the work directory or to exclude this sample. " + } } + OPENMS_MZTABEXPORTER(ch_output) + ch_versions = ch_versions.mix(OPENMS_MZTABEXPORTER.out.versions.ifEmpty(null)) + // // SUBWORKFLOW: Predict class I (neoepitopes) // if (params.predict_class_1 & !params.skip_quantification) { PREDICT_CLASS1 ( - PROCESS_FEATURE.out.mztab, + OPENMS_MZTABEXPORTER.out.mztab, peptides_class_1_alleles, ch_vcf_from_sheet - ) + ) ch_versions = ch_versions.mix(PREDICT_CLASS1.out.versions.ifEmpty(null)) ch_predicted_possible_neoepitopes = PREDICT_CLASS1.out.ch_predicted_possible_neoepitopes } else { @@ -327,7 +324,7 @@ workflow MHCQUANT { // if (params.predict_class_2 & !params.skip_quantification) { PREDICT_CLASS2 ( - PROCESS_FEATURE.out.mztab, + OPENMS_MZTABEXPORTER.out.mztab, peptides_class_2_alleles, ch_vcf_from_sheet ) @@ -337,26 +334,15 @@ workflow MHCQUANT { ch_predicted_possible_neoepitopes_II = Channel.empty() } - // - // SUBWORKFLOW: Predict retention time - // - if (params.predict_RT) { - PREDICT_RT ( - filter_q_value.map{ it -> [it[1], it[2]] }, - ch_predicted_possible_neoepitopes, - ch_predicted_possible_neoepitopes_II - ) - } - if (params.annotate_ions) { - // Alter the annotation of the filtered q value - ch_filtered_idxml = filter_q_value.map { ident, meta, idxml -> [meta.id, idxml] } // Join the ch_filtered_idxml and the ch_mzml_file - ch_raw_spectra_data = ch_clean_mzml_file.map {meta, mzml -> [meta.sample + '_' + meta.condition, mzml] } + ch_clean_mzml_file.map {meta, mzml -> [[id: meta.sample + '_' + meta.condition], mzml] } .groupTuple() - .join(ch_filtered_idxml) + .join(filter_q_value) + .set{ ch_ion_annotator_input } + // Annotate spectra with ion fragmentation information - PYOPENMS_IONANNOTATOR(ch_raw_spectra_data) + PYOPENMS_IONANNOTATOR( ch_ion_annotator_input ) ch_versions = ch_versions.mix(PYOPENMS_IONANNOTATOR.out.versions.ifEmpty(null)) } @@ -404,6 +390,7 @@ workflow.onComplete { if (params.email || params.email_on_fail) { NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report) } + NfcoreTemplate.dump_parameters(workflow, params) NfcoreTemplate.summary(workflow, params, log) if (params.hook_url) { NfcoreTemplate.IM_notification(workflow, params, summary_params, projectDir, log)