PyProphet · jcharkow · Jul 30, 2025 · Aug 7, 2025 · Aug 7, 2025 · Aug 12, 2025
diff --git a/pyprophet/_config.py b/pyprophet/_config.py
@@ -90,7 +90,7 @@ class RunnerConfig:
     Configuration for scoring, classifier setup, learning parameters, and optional features.
 
     Attributes:
-        classifier (str): Classifier type used for semi-supervised learning ('LDA', 'SVM' or 'XGBoost').
+        classifier (str): Classifier type used for semi-supervised learning Can either be a single classifier ('LDA', 'SVM', 'XGBoost') or a multiclassifier ('LDA_XGBoost').
         autotune (bool): Whether to autotune hyperparameters for the classifier (XGBoost / SVM)
         ss_main_score (str): Starting main score for semi-supervised learning (can be 'auto').
         main_score_selection_report (bool): Whether to generate a report for main score selection.
@@ -127,7 +127,7 @@ class RunnerConfig:
     """
 
     # Scoring / classifier options
-    classifier: Literal["LDA", "SVM", "XGBoost"] = "LDA"
+    classifier: Literal["LDA", "SVM", "XGBoost", 'LDA_XGBoost'] = "LDA"
     autotune: bool = False
     ss_main_score: str = "auto"
     main_score_selection_report: bool = False

diff --git a/pyprophet/cli/score.py b/pyprophet/cli/score.py
@@ -12,7 +12,7 @@
     memray_profile,
 )
 from .._config import RunnerIOConfig
-from ..scoring.runner import PyProphetLearner, PyProphetWeightApplier
+from ..scoring.runner import PyProphetLearner, PyProphetWeightApplier, LDA_XGBoostMultiLearner
 
 
 # PyProphet semi-supervised learning and scoring
@@ -43,7 +43,7 @@
     "--classifier",
     default="LDA",
     show_default=True,
-    type=click.Choice(["LDA", "SVM", "XGBoost"]),
+    type=click.Choice(["LDA", "SVM", "XGBoost", "LDA_XGBoost"]),
     help='Either a "LDA", "SVM" or "XGBoost" classifier is used for semi-supervised learning.',
 )
 @click.option(
@@ -360,7 +360,7 @@ def score(
         config.subsample_ratio = 1.0
 
     if not apply_weights:
-        if config.subsample_ratio < 1.0:
+        if config.subsample_ratio < 1.0: # currently LDA_XGBoostMultiLearner does not support subsampling
             logger.info(
                 f"Conducting {level} semi-supervised learning on {config.subsample_ratio * 100}% of the data.",
             )
@@ -399,11 +399,18 @@ def score(
                     PyProphetWeightApplier(weights_path, run_config).run()
             else:
                 PyProphetWeightApplier(weights_path, config).run()
-        else:
-            logger.info(
+        else:  # No subsampling
+            if config.runner.classifier == "LDA_XGBoost":
+                logger.info(
+                    f"Conducting {level} semi-supervised learning with LDA followed by XGBoost.",
+                )
+                LDA_XGBoostMultiLearner(config).run()
+
+            else:
+                logger.info(
                 f"Conducting {level} semi-supervised learning.",
-            )
-            PyProphetLearner(config).run()
+                )
+                PyProphetLearner(config).run()
     else:
         logger.info(
             f"Applying {level} weights from {apply_weights} to the full data set.",

diff --git a/pyprophet/io/_base.py b/pyprophet/io/_base.py
@@ -152,7 +152,7 @@ def _finalize_feature_table(self, df, ss_main_score):
                 f"Main score ({main_score}) not found in input columns: {df.columns}"
             )
 
-        if self.classifier == "XGBoost" and self.level != "alignment":
+        if self.classifier in ["XGBoost", "LDA_XGBoost"] and self.level != "alignment":
             logger.info(
                 "Enable number of transitions & precursor / product charge scores for XGBoost-based classifier"
             )

diff --git a/pyprophet/scoring/runner.py b/pyprophet/scoring/runner.py
@@ -255,6 +255,60 @@ def print_summary(self, result):
             logger.opt(raw=True).info("\n")
 
 
+class PyProphetMultiLearner(PyProphetRunner):
+    """
+    Implements the learning and scoring workflow for PyProphet with multiple classifiers run sequentially.
+    """
+
+    __metaclass__ = abc.ABCMeta
+
+    @abc.abstractmethod
+    def run_algo(self, part=None):
+        if self.glyco:
+            raise click.ClickException(
+                "Multi-classifier learning is not supported for glycopeptide workflows."
+            )
+
+
+class LDA_XGBoostMultiLearner(PyProphetMultiLearner):
+    """
+    Implements the learning and scoring workflow for PyProphet with multiple classifiers run sequentially
+    """
+
+    def run_algo(self, part=None):
+        """
+        Runs the learning and scoring algorithm for multiple classifiers.
+
+        Returns:
+            tuple: A tuple containing the result, scorer, and weights.
+        """
+
+        super(LDA_XGBoostMultiLearner, self).run_algo(part)
+
+        config_lda = self.config.copy()
+        config_lda.runner.classifier = "LDA"
+
+        # remove columns that are not needed for LDA
+        table_lda = self.table.drop(columns=["var_precursor_charge", "var_product_charge", "var_transition_count"], errors='ignore')
+
+        (result_lda, scorer_lda, weights_lda) = PyProphet(config_lda).learn_and_apply(table_lda)
+
+        # rename the column that was the main score
+        self.table.columns = self.table.columns.str.replace('^main', '', regex=True)
+
+        self.table['main_var_lda_score'] = result_lda.scored_tables['d_score']
+
+        logger.info("LDA scores computed! Now running XGBoost using the LDA score as the main score")
+
+        config_xgb = self.config.copy()
+        config_xgb.runner.ss_main_score = 'var_lda_score' # use lda score as the main score for XGBoost
+        config_xgb.runner.classifier = "XGBoost"
+        config_xgb.runner.ss_use_dynamic_main_score = False # since using lda score do not need to dynamically select the main score
+        self.config.runner.classifier = "XGBoost" # need to change to XGBoost for saving the weights
+
+        (result_xgb, scorer_xgb, weights_xgb) = PyProphet(config_xgb).learn_and_apply(self.table)
+        return (result_xgb, scorer_xgb, weights_xgb)
+
 class PyProphetLearner(PyProphetRunner):
     """
     Implements the learning and scoring workflow for PyProphet.

diff --git a/tests/_regtest_outputs/test_pyprophet_score.test_osw_11.out b/tests/_regtest_outputs/test_pyprophet_score.test_osw_11.out
@@ -0,0 +1,14 @@
+             feature_id  ms1_precursor_pep  ms2_peakgroup_pep  ms2_precursor_pep
+0  -9078977811506172301             0.0063             0.0022             0.0025
+1  -9009602369958523731             0.0063             0.0022             0.0325
+2  -8990894093332793487             0.0063             0.0022             0.0025
+3  -8915955323477460297             0.0063             0.0022             0.0071
+4  -8858715981476206597             0.0063             0.0022             0.0025
+..                  ...                ...                ...                ...
+95 -2912234918591861719             0.0063             0.0022             0.0025
+96 -2872329084347808160             0.0063             0.0022             0.0025
+97 -2789098353857361973             1.0000             0.0022             0.0025
+98 -2788620575140019858             0.0063             0.0022             0.0025
+99 -2741276427609241638             0.0063             0.0022             0.0325
+
+[100 rows x 4 columns]
diff --git a/tests/test_pyprophet_score.py b/tests/test_pyprophet_score.py
@@ -190,6 +190,8 @@ def execute(self, levels=None, **kwargs):
                 level_cmd += " --classifier=XGBoost"
             if kwargs.get("xgboost_tune"):
                 level_cmd += " --autotune"
+            if kwargs.get("lda_xgboost"):
+                level_cmd += " --classifier=LDA_XGBoost"
             if kwargs.get("score_filter"):
                 level_cmd = self.config.add_score_filter(level_cmd, level)
 
@@ -770,6 +772,19 @@ def test_osw_9(test_runner, test_config, regtest):
 def test_osw_10(test_runner, test_config, regtest):
     run_metabo_test(test_runner, test_config, regtest, ms1ms2=True, score_filter=True)
 
+# Tests LDA then XGBoost
+def test_osw_11(test_runner, test_config, regtest):
+    run_generic_test(
+        test_runner,
+        test_config,
+        OSWTestStrategy,
+        regtest,
+        pfdr=True,
+        pi0_lambda="0 0 0",
+        ms1ms2=True,
+        lda_xgboost=True,
+    )
+
 
 # Parquet Tests
 def test_parquet_0(test_runner, test_config, regtest):