Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/main'
Browse files Browse the repository at this point in the history
  • Loading branch information
mschwoer committed Feb 19, 2025
2 parents 3ecd66c + 56915b2 commit d99e098
Show file tree
Hide file tree
Showing 12 changed files with 354 additions and 266 deletions.
90 changes: 67 additions & 23 deletions alphadia/constants/default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,27 @@ fasta_paths: []
quant_directory: null

general:

thread_count: 10
# maximum number of threads or processes to use per raw file
# Enables separate transfer learning step
transfer_step_enabled: False

# Enables separate MBR step
mbr_step_enabled: False


# === advanced settings ===
# whether to reuse previously calculated calibration data
reuse_calibration: false
# whether to reuse previously calculated quantification data
reuse_quant: false
# whether to use Astral MS1 feature detection
astral_ms1: false
# logging verbosity level (DEBUG, INFO, PROGRESS, WARNING, ERROR)
log_level: 'INFO'
# whether to memory map detector events for improved performance
mmap_detector_events: false
# whether to use GPU acceleration for library prediction and scoring
use_gpu: true
# whether to save the libraries to the output directory
save_library: True # input library
Expand All @@ -27,25 +41,44 @@ library_loading:
# if retention times are reported in absolute units, the rt_heuristic defines rt is interpreted as minutes or seconds

library_prediction:
# Basic parameters
# whether to use alphaPeptDeep to predict peptide properties
predict: False
# enzyme used for in-silico digest
enzyme: trypsin
# fixed modifications for in-silico digest
# Format: Modification@AminoAcid
# Example: Carbamidomethyl@C
fixed_modifications: 'Carbamidomethyl@C'
# variable modifications for in-silico digest. Semicolon separated list
# See https://github.com/MannLabs/alphabase/blob/c99c1ec7eb482745f6dae58a324658d6b6c4abf0/alphabase/constants/const_files/modification.tsv
# Format: Modification@AminoAcid
# Example: Oxidation@M;Acetyl@Protein_N-term
variable_modifications: 'Oxidation@M;Acetyl@Protein_N-term'
# maximum number of variable modifications per peptide
max_var_mod_num: 2
# number of missed cleavages allowed for in-silico digest
missed_cleavages: 1
# using tuples here as it makes interaction with the GUI easier
# minimum and maximum number of amino acids for generated precursors
precursor_len:
- 7
- 35
# minimum and maximum charge states for generated precursors
precursor_charge:
- 2
- 4
# minimum and maximum m/z values for generated precursors
precursor_mz:
- 400
- 1200
# minimum and maximum m/z values for generated fragments
fragment_mz:
- 200
- 2000

# === advanced settings ===
# normalized collision energy for fragment generation
nce: 25.0

# list of fragment types (see alphabase.peptide.fragment.FRAGMENT_TYPES for supported types)
Expand All @@ -56,6 +89,8 @@ library_prediction:

# maximum charge state for predicted fragments
max_fragment_charge: 2

# instrument types as supported by peptdeep. Leave this as Lumos if you are not 100% sure you know what you are changing
instrument: Lumos

# set path for custom peptdeep model. If set to null, the default model will be used
Expand Down Expand Up @@ -96,26 +131,26 @@ custom_modifications:
composition: C(12)

search:
channel_filter: "" # will be mapped to "0" -> load only the default unmultiplexed channel
exclude_shared_ions: True
compete_for_fragments: True

target_num_candidates: 2
# target ms1 tolerance in ppm
target_ms1_tolerance: 5
# target ms2 tolerance in ppm
target_ms2_tolerance: 10
# target ion mobility tolerance in 1/K_0
target_mobility_tolerance: 0.0 # default is to optimize automatically
# target retention time tolerance in seconds if > 1, or a proportion of the total gradient length if < 1
# target retention time tolerance in seconds if > 1, or a proportion of the total gradient length if < 1. 0.0 means to optimize automatically
target_rt_tolerance: 0.0 # default is to optimize automatically
# target ion mobility tolerance in 1/K_0
target_mobility_tolerance: 0.0 # default is to optimize automatically

quant_window: 3
# === advanced settings ===
target_num_candidates: 2
# filter to apply to the channels. If set to "", all channels will be used.
channel_filter: ""
exclude_shared_ions: True
compete_for_fragments: True
# target retention time tolerance in seconds if > 1, or a proportion of the total gradient length if < 1
quant_all: True

search_advanced:
top_k_fragments: 12


calibration:

# Number of precursors searched and scored per batch
Expand Down Expand Up @@ -235,10 +270,14 @@ multiplexing:
fdr:
fdr: 0.01
group_level: 'proteins'
inference_strategy: "heuristic"


# === advanced settings ===
competetive_scoring: true
keep_decoys: false
channel_wise_fdr: false
inference_strategy: "heuristic"

# (Experimental)
# uses a two-step classifier consisting of a logistic regression and a neural network, with a default maximum of 5 iterations per fitting call
enable_two_step_classifier: false
Expand All @@ -248,17 +287,24 @@ fdr:
enable_nn_hyperparameter_tuning: false

search_output:
# Output file format for search results. Can be either "tsv" or "parquet"
file_format: "tsv"
# Enable label-free quantification at peptide level and generate peptide matrix
peptide_level_lfq: false
# Enable label-free quantification at precursor level and generate precursor matrix
precursor_level_lfq: false
save_fragment_quant_matrix: false #advanced feature to write out quantitative matrix with fragment ion intensities e.g. for AlphaQuant

# === advanced settings ===
# Minimum number of fragments required for quantification
min_k_fragments: 12
# Minimum correlation required between fragment XICs for quantification
min_correlation: 0.9
# Number of samples used for quadratic fit in retention time alignment
num_samples_quadratic: 50
# Minimum number of non-missing values required for quantification
min_nonnan: 3
# Enable normalization of label-free quantification values
normalize_lfq: True
# can be either "parquet" or "tsv"
file_format: "tsv"
file_format_advanced: "parquet"

# Configuration for the optimization of search parameters. These parameters should not normally be adjusted and are for the use of experienced users only.
optimization:
Expand Down Expand Up @@ -336,6 +382,8 @@ transfer_library:
# if true, the library is created for transfer learning
enabled: False

# === advanced settings ===

# list of fragment types (see alphabase.peptide.fragment.FRAGMENT_TYPES for supported types)
# Supported types are: a, b, c, x, y, z, b_modloss, y_modloss, b_H2O, y_H2O, b_NH3, y_NH3, c_lossH, z_addH
fragment_types: ['b', 'y']
Expand Down Expand Up @@ -363,6 +411,8 @@ transfer_learning:
# if true, a custom peptdeep model will be created using the transfer learned library
enabled: False

# === advanced settings ===

# number of precursors per batch
batch_size: 2000

Expand Down Expand Up @@ -396,9 +446,3 @@ transfer_learning:

# instrument type encoded during training
instrument: 'Lumos'


# scope of default yaml should be one search step
multistep_search:
transfer_step_enabled: False
mbr_step_enabled: False
8 changes: 3 additions & 5 deletions alphadia/search_plan.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,11 +67,9 @@ def __init__(
self._multistep_config: dict | None = None
self._transfer_step_output_dir: Path | None = None

multistep_search_config = self._user_config.get("multistep_search", {})
self._transfer_step_enabled = multistep_search_config.get(
"transfer_step_enabled", False
)
self._mbr_step_enabled = multistep_search_config.get("mbr_step_enabled", False)
general_config = self._user_config.get("general", {})
self._transfer_step_enabled = general_config.get("transfer_step_enabled", False)
self._mbr_step_enabled = general_config.get("mbr_step_enabled", False)

if self._transfer_step_enabled or self._mbr_step_enabled:
self._update_paths()
Expand Down
4 changes: 1 addition & 3 deletions alphadia/search_step.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,9 +257,7 @@ def _parse_modifications(mod_str: str) -> list[str]:
decoy_type="diann",
mp_process_num=thread_count,
),
libtransform.FlattenLibrary(
self.config["search_advanced"]["top_k_fragments"]
),
libtransform.FlattenLibrary(self.config["search"]["top_k_fragments"]),
libtransform.InitFlatColumns(),
libtransform.LogFlatLibraryStats(),
]
Expand Down
4 changes: 2 additions & 2 deletions alphadia/workflow/peptidecentric.py
Original file line number Diff line number Diff line change
Expand Up @@ -812,7 +812,7 @@ def extract_batch(
config.update(self.config["selection_config"])
config.update(
{
"top_k_fragments": self.config["search_advanced"]["top_k_fragments"],
"top_k_fragments": self.config["search"]["top_k_fragments"],
"rt_tolerance": self.optimization_manager.rt_error,
"mobility_tolerance": self.optimization_manager.mobility_error,
"candidate_count": self.optimization_manager.num_candidates,
Expand Down Expand Up @@ -877,7 +877,7 @@ def extract_batch(
config.update(self.config["scoring_config"])
config.update(
{
"top_k_fragments": self.config["search_advanced"]["top_k_fragments"],
"top_k_fragments": self.config["search"]["top_k_fragments"],
"precursor_mz_tolerance": self.optimization_manager.ms1_error,
"fragment_mz_tolerance": self.optimization_manager.ms2_error,
"exclude_shared_ions": self.config["search"]["exclude_shared_ions"],
Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
49 changes: 37 additions & 12 deletions docs/guides/transfer-dimethyl.md
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
# DIA Transfer Learning for Dimethyl Labeled Samples

**This tutorial was created using AlphaDIA 1.8.1 - please be aware that there might be changes in your version.**

Note: from AlphaDIA 1.10.0, the multistep workflow is supported directly via GUI (and CLI), without the need for
multiple starts of AlphaDIA. Working through this tutorial is still valuable, as it provides some insights and
intuition about the process. See section [Integrated multistep workflow](#integrated-multistep-workflow) below for details.
multiple starts of AlphaDIA.
We recommend using this integrated multistep workflow, see [below](#integrated-multistep-workflow) for details.

Working through this tutorial might still be valuable, as it provides some insights and
intuition about the process and the relevant parameters.

## 1. Prerequisites
**This tutorial was created using AlphaDIA 1.8.1 - please be aware that there might be changes in your version.**

Make sure that you have a machine with at least 64 gigabytes of memory.
Please download the test data for this tutorial [here](https://datashare.biochem.mpg.de/s/1GiKQSwlPf6YlMm).
We will be using replicates of dimethyl-labeled tryptic HeLa digests. The samples are not multiplexed and only contain the natural light isotope.
Expand Down Expand Up @@ -138,22 +141,44 @@ Altough the different parameters do not allow for fair comparison of absolut num
The multistep workflow described by the above tutorial is supported directly via GUI (and CLI), without the need for
multiple starts of AlphaDIA.

## 1. Prerequisites
Follow the "Prerequisites" step [above](#1-prerequisites) to obtain the data.

## 2. Configure input/output
Point AlphaDIA to the raw & FASTA files and set the output folder to a folder of your choice
<img src="../_static/images/transfer-dimethyl/transfer_input.png" width="100%" height="auto">

## 3. Configure multistep search
In the GUI, locate the "Multi-step Search" section and activate "Add 'transfer learning' step"
and/or "Add 'second search' step". Set the rest of parameters as desired and start the search.
and/or "Add 'second search' step". Set the rest of parameters as shown (cf. also the more detailed instructions above)
and start the search by clicking the "Run Workflow" button.

![multistep_settings.png](../_static/images/transfer-dimethyl/multistep_settings.png)

This will orchestrate the individual search steps by transferring the data between the steps
and by setting the configuration parameters that are specific to each step. Any other parameter set via GUI (e.g. `thread_count`)
will apply to all steps. Here, the exceptions are `MS1 Tolerance` and `MS2 Tolerance`, which will be overwritten with
optimal values determined in the first step. The intermediate results are stored in subfolders `tranfer` and `library`, respectively.
As usual, you will find the final results in the root of the project folder.
and by setting the configuration parameters that are specific to each step.

For the "transfer" step, this is `transfer_library.enabled=True` and `transfer_learning.enabled=True`
(note that you might also want to enable the "Transfer library" step, cf. [above](#33-transfer-learning))
For the "mbr" step, this is `fdr.inference_strategy='library'` and `search.target_num_candidates=5`.

Any other parameter set via GUI (e.g. `thread_count`)
will apply to all steps. Here, the exceptions are `search.target_ms1_tolerance` and `search.target_ms2_tolerance`, which will be overwritten with
optimal values determined in the previous step.

After the three-step search ran through, you will find the final results in the root of the project folder.
The results of the intermediate steps are stored in subfolders `transfer` and `library`, respectively.


### Notes one the multistep search using CLI
If you use the CLI, add the following to your `config.yaml` to enable the multistep search:
```yaml
multistep_search:
general:
transfer_step_enabled: True
mbr_step_enabled: True
```
Details on the internals of the multistep search can be found in `multistep.yaml`.

In case the multistep search fails at some step, you can restart the failed step by
using the `full_config.yaml` that is stored in the respective subfolder. You can of course edit
this file in order to fix the issue that caused the failure.
using the `full_config.yaml` file that is stored in the respective subfolder. You can of course edit
the configuration file in order to fix the issue that caused the failure.
1 change: 1 addition & 0 deletions gui/src/renderer/App.js
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ const AppLayout = styled('div')(({ theme }) => ({
}));

const ContentContainer = styled('div')(({ theme }) => ({
overflowX: 'hidden',
flexGrow: 1,
minWidth: 0,
paddingLeft: theme.spacing(2),
Expand Down
Loading

0 comments on commit d99e098

Please sign in to comment.