Merge remote-tracking branch 'origin/main'

MannLabs · Feb 19, 2025 · d99e098 · d99e098
2 parents 3ecd66c + 56915b2
commit d99e098
Show file tree

Hide file tree

Showing 12 changed files with 354 additions and 266 deletions.
diff --git a/alphadia/constants/default.yaml b/alphadia/constants/default.yaml
@@ -10,13 +10,27 @@ fasta_paths: []
 quant_directory: null
 
 general:
+
   thread_count: 10
-  # maximum number of threads or processes to use per raw file
+  # Enables separate transfer learning step
+  transfer_step_enabled: False
+
+  # Enables separate MBR step
+  mbr_step_enabled: False
+
+
+  # === advanced settings ===
+  # whether to reuse previously calculated calibration data
   reuse_calibration: false
+  # whether to reuse previously calculated quantification data
   reuse_quant: false
+  # whether to use Astral MS1 feature detection
   astral_ms1: false
+  # logging verbosity level (DEBUG, INFO, PROGRESS, WARNING, ERROR)
   log_level: 'INFO'
+  # whether to memory map detector events for improved performance
   mmap_detector_events: false
+  # whether to use GPU acceleration for library prediction and scoring
   use_gpu: true
   # whether to save the libraries to the output directory
   save_library: True       # input library
@@ -27,25 +41,44 @@ library_loading:
   # if retention times are reported in absolute units, the rt_heuristic defines rt is interpreted as minutes or seconds
 
 library_prediction:
+  # Basic parameters
+  # whether to use alphaPeptDeep to predict peptide properties
   predict: False
+  # enzyme used for in-silico digest
   enzyme: trypsin
+  # fixed modifications for in-silico digest
+  # Format: Modification@AminoAcid
+  # Example: Carbamidomethyl@C
   fixed_modifications: 'Carbamidomethyl@C'
+  # variable modifications for in-silico digest. Semicolon separated list
+  # See https://github.com/MannLabs/alphabase/blob/c99c1ec7eb482745f6dae58a324658d6b6c4abf0/alphabase/constants/const_files/modification.tsv
+  # Format: Modification@AminoAcid
+  # Example: Oxidation@M;Acetyl@Protein_N-term
   variable_modifications: 'Oxidation@M;Acetyl@Protein_N-term'
+  # maximum number of variable modifications per peptide
   max_var_mod_num: 2
+  # number of missed cleavages allowed for in-silico digest
   missed_cleavages: 1
   # using tuples here as it makes interaction with the GUI easier
+  # minimum and maximum number of amino acids for generated precursors
   precursor_len:
     - 7
     - 35
+  # minimum and maximum charge states for generated precursors
   precursor_charge:
     - 2
     - 4
+  # minimum and maximum m/z values for generated precursors
   precursor_mz:
     - 400
     - 1200
+  # minimum and maximum m/z values for generated fragments
   fragment_mz:
     - 200
     - 2000
+
+  # === advanced settings ===
+  # normalized collision energy for fragment generation
   nce: 25.0
 
   # list of fragment types (see alphabase.peptide.fragment.FRAGMENT_TYPES for supported types)
@@ -56,6 +89,8 @@ library_prediction:
 
   # maximum charge state for predicted fragments
   max_fragment_charge: 2
+
+  # instrument types as supported by peptdeep. Leave this as Lumos if you are not 100% sure you know what you are changing
   instrument: Lumos
 
   # set path for custom peptdeep model. If set to null, the default model will be used
@@ -96,26 +131,26 @@ custom_modifications:
     composition: C(12)
 
 search:
-  channel_filter: "" # will be mapped to "0" -> load only the default unmultiplexed channel
-  exclude_shared_ions: True
-  compete_for_fragments: True
-
-  target_num_candidates: 2
   # target ms1 tolerance in ppm
   target_ms1_tolerance: 5
   # target ms2 tolerance in ppm
   target_ms2_tolerance: 10
-  # target ion mobility tolerance in 1/K_0
-  target_mobility_tolerance: 0.0 # default is to optimize automatically
-  # target retention time tolerance in seconds if > 1, or a proportion of the total gradient length if < 1
+  # target retention time tolerance in seconds if > 1, or a proportion of the total gradient length if < 1. 0.0 means to optimize automatically
   target_rt_tolerance: 0.0 # default is to optimize automatically
+    # target ion mobility tolerance in 1/K_0
+  target_mobility_tolerance: 0.0 # default is to optimize automatically
 
-  quant_window: 3
+  # === advanced settings ===
+  target_num_candidates: 2
+  # filter to apply to the channels. If set to "", all channels will be used.
+  channel_filter: ""
+  exclude_shared_ions: True
+  compete_for_fragments: True
+  # target retention time tolerance in seconds if > 1, or a proportion of the total gradient length if < 1
   quant_all: True
-
-search_advanced:
   top_k_fragments: 12
 
+
 calibration:
 
   # Number of precursors searched and scored per batch
@@ -235,10 +270,14 @@ multiplexing:
 fdr:
   fdr: 0.01
   group_level: 'proteins'
+  inference_strategy: "heuristic"
+
+
+  # === advanced settings ===
   competetive_scoring: true
   keep_decoys: false
   channel_wise_fdr: false
-  inference_strategy: "heuristic"
+
   # (Experimental)
   # uses a two-step classifier consisting of a logistic regression and a neural network, with a default maximum of 5 iterations per fitting call
   enable_two_step_classifier: false
@@ -248,17 +287,24 @@ fdr:
   enable_nn_hyperparameter_tuning: false
 
 search_output:
+  # Output file format for search results. Can be either "tsv" or "parquet"
+  file_format: "tsv"
+  # Enable label-free quantification at peptide level and generate peptide matrix
   peptide_level_lfq: false
+  # Enable label-free quantification at precursor level and generate precursor matrix
   precursor_level_lfq: false
-  save_fragment_quant_matrix: false #advanced feature to write out quantitative matrix with fragment ion intensities e.g. for AlphaQuant
+
+  # === advanced settings ===
+  # Minimum number of fragments required for quantification
   min_k_fragments: 12
+  # Minimum correlation required between fragment XICs for quantification
   min_correlation: 0.9
+  # Number of samples used for quadratic fit in retention time alignment
   num_samples_quadratic: 50
+  # Minimum number of non-missing values required for quantification
   min_nonnan: 3
+  # Enable normalization of label-free quantification values
   normalize_lfq: True
-  # can be either "parquet" or "tsv"
-  file_format: "tsv"
-  file_format_advanced: "parquet"
 
 # Configuration for the optimization of search parameters. These parameters should not normally be adjusted and are for the use of experienced users only.
 optimization:
@@ -336,6 +382,8 @@ transfer_library:
   # if true, the library is created for transfer learning
   enabled: False
 
+  # === advanced settings ===
+
   # list of fragment types (see alphabase.peptide.fragment.FRAGMENT_TYPES for supported types)
   # Supported types are: a, b, c, x, y, z, b_modloss, y_modloss, b_H2O, y_H2O, b_NH3, y_NH3, c_lossH, z_addH
   fragment_types: ['b', 'y']
@@ -363,6 +411,8 @@ transfer_learning:
   # if true, a custom peptdeep model will be created using the transfer learned library
   enabled: False
 
+  # === advanced settings ===
+
   # number of precursors per batch
   batch_size: 2000
 
@@ -396,9 +446,3 @@ transfer_learning:
 
   # instrument type encoded during training
   instrument: 'Lumos'
-
-
-# scope of default yaml should be one search step
-multistep_search:
-  transfer_step_enabled: False
-  mbr_step_enabled: False
diff --git a/alphadia/search_plan.py b/alphadia/search_plan.py
@@ -67,11 +67,9 @@ def __init__(
         self._multistep_config: dict | None = None
         self._transfer_step_output_dir: Path | None = None
 
-        multistep_search_config = self._user_config.get("multistep_search", {})
-        self._transfer_step_enabled = multistep_search_config.get(
-            "transfer_step_enabled", False
-        )
-        self._mbr_step_enabled = multistep_search_config.get("mbr_step_enabled", False)
+        general_config = self._user_config.get("general", {})
+        self._transfer_step_enabled = general_config.get("transfer_step_enabled", False)
+        self._mbr_step_enabled = general_config.get("mbr_step_enabled", False)
 
         if self._transfer_step_enabled or self._mbr_step_enabled:
             self._update_paths()

diff --git a/alphadia/search_step.py b/alphadia/search_step.py
@@ -257,9 +257,7 @@ def _parse_modifications(mod_str: str) -> list[str]:
                     decoy_type="diann",
                     mp_process_num=thread_count,
                 ),
-                libtransform.FlattenLibrary(
-                    self.config["search_advanced"]["top_k_fragments"]
-                ),
+                libtransform.FlattenLibrary(self.config["search"]["top_k_fragments"]),
                 libtransform.InitFlatColumns(),
                 libtransform.LogFlatLibraryStats(),
             ]

diff --git a/alphadia/workflow/peptidecentric.py b/alphadia/workflow/peptidecentric.py
@@ -812,7 +812,7 @@ def extract_batch(
         config.update(self.config["selection_config"])
         config.update(
             {
-                "top_k_fragments": self.config["search_advanced"]["top_k_fragments"],
+                "top_k_fragments": self.config["search"]["top_k_fragments"],
                 "rt_tolerance": self.optimization_manager.rt_error,
                 "mobility_tolerance": self.optimization_manager.mobility_error,
                 "candidate_count": self.optimization_manager.num_candidates,
@@ -877,7 +877,7 @@ def extract_batch(
         config.update(self.config["scoring_config"])
         config.update(
             {
-                "top_k_fragments": self.config["search_advanced"]["top_k_fragments"],
+                "top_k_fragments": self.config["search"]["top_k_fragments"],
                 "precursor_mz_tolerance": self.optimization_manager.ms1_error,
                 "fragment_mz_tolerance": self.optimization_manager.ms2_error,
                 "exclude_shared_ions": self.config["search"]["exclude_shared_ions"],

diff --git a/docs/_static/images/transfer-dimethyl/multistep_settings.png b/docs/_static/images/transfer-dimethyl/multistep_settings.png
diff --git a/docs/guides/transfer-dimethyl.md b/docs/guides/transfer-dimethyl.md
@@ -1,12 +1,15 @@
 # DIA Transfer Learning for Dimethyl Labeled Samples
 
-**This tutorial was created using AlphaDIA 1.8.1 - please be aware that there might be changes in your version.**
-
 Note: from AlphaDIA 1.10.0, the multistep workflow is supported directly via GUI (and CLI), without the need for
-multiple starts of AlphaDIA. Working through this tutorial is still valuable, as it provides some insights and
-intuition about the process. See section [Integrated multistep workflow](#integrated-multistep-workflow) below for details.
+multiple starts of AlphaDIA.
+We recommend using this integrated multistep workflow, see [below](#integrated-multistep-workflow) for details.
+
+Working through this tutorial might still be valuable, as it provides some insights and
+intuition about the process and the relevant parameters.
 
 ## 1. Prerequisites
+**This tutorial was created using AlphaDIA 1.8.1 - please be aware that there might be changes in your version.**
+
 Make sure that you have a machine with at least 64 gigabytes of memory.
 Please download the test data for this tutorial [here](https://datashare.biochem.mpg.de/s/1GiKQSwlPf6YlMm).
 We will be using replicates of dimethyl-labeled tryptic HeLa digests. The samples are not multiplexed and only contain the natural light isotope.
@@ -138,22 +141,44 @@ Altough the different parameters do not allow for fair comparison of absolut num
 The multistep workflow described by the above tutorial is supported directly via GUI (and CLI), without the need for
 multiple starts of AlphaDIA.
 
+## 1. Prerequisites
+Follow the "Prerequisites" step [above](#1-prerequisites) to obtain the data.
+
+## 2. Configure input/output
+Point AlphaDIA to the raw & FASTA files and set the output folder to a folder of your choice
+<img src="../_static/images/transfer-dimethyl/transfer_input.png" width="100%" height="auto">
+
+## 3. Configure multistep search
 In the GUI, locate the "Multi-step Search" section and activate "Add 'transfer learning' step"
-and/or "Add 'second search' step". Set the rest of parameters as desired and start the search.
+and/or "Add 'second search' step". Set the rest of parameters as shown (cf. also the more detailed instructions above)
+and start the search by clicking the "Run Workflow" button.
+
+![multistep_settings.png](../_static/images/transfer-dimethyl/multistep_settings.png)
 
 This will orchestrate the individual search steps by transferring the data between the steps
-and by setting the configuration parameters that are specific to each step. Any other parameter set via GUI (e.g. `thread_count`)
-will apply to all steps. Here, the exceptions are `MS1 Tolerance` and `MS2 Tolerance`, which will be overwritten with
-optimal values determined in the first step. The intermediate results are stored in subfolders `tranfer` and `library`, respectively.
-As usual, you will find the final results in the root of the project folder.
+and by setting the configuration parameters that are specific to each step.
+
+For the "transfer" step, this is `transfer_library.enabled=True` and `transfer_learning.enabled=True`
+(note that you might also want to enable the "Transfer library" step, cf. [above](#33-transfer-learning))
+For the "mbr" step, this is `fdr.inference_strategy='library'` and `search.target_num_candidates=5`.
+
+Any other parameter set via GUI (e.g. `thread_count`)
+will apply to all steps. Here, the exceptions are `search.target_ms1_tolerance` and `search.target_ms2_tolerance`, which will be overwritten with
+optimal values determined in the previous step.
+
+After the three-step search ran through, you will find the final results in the root of the project folder.
+The results of the intermediate steps are stored in subfolders `transfer` and `library`, respectively.
+
 
+### Notes one the multistep search using CLI
 If you use the CLI, add the following to your `config.yaml` to enable the multistep search:
 ```yaml
-multistep_search:
+general:
   transfer_step_enabled: True
   mbr_step_enabled: True
 ```
+Details on the internals of the multistep search can be found in `multistep.yaml`.
 
 In case the multistep search fails at some step, you can restart the failed step by
-using the `full_config.yaml` that is stored in the respective subfolder. You can of course edit
-this file in order to fix the issue that caused the failure.
+using the `full_config.yaml` file that is stored in the respective subfolder. You can of course edit
+the configuration file in order to fix the issue that caused the failure.
diff --git a/gui/src/renderer/App.js b/gui/src/renderer/App.js
@@ -21,6 +21,7 @@ const AppLayout = styled('div')(({ theme }) => ({
 }));
 
 const ContentContainer = styled('div')(({ theme }) => ({
+  overflowX: 'hidden',
   flexGrow: 1,
   minWidth: 0,
   paddingLeft: theme.spacing(2),