From 0c222c41915ffba5981310165557c56aa3842fef Mon Sep 17 00:00:00 2001
From: Sotiris Touliopoulos
 <109972702+SotirisTouliopoulos@users.noreply.github.com>
Date: Sat, 27 Jul 2024 02:27:18 +0300
Subject: [PATCH] PreProcess class for reactions removal (#100)

* initial commit for preprocess.py

* fix path for e.coli json file

* removed uni-directed reactions function

* simplified some functions

* new approach

* fix json model path

* created PreProcess class

* added FOO to FVA

* comparison with abs and tol

* generalized model with objective function

* renamed function

* added blocked, zero-flux and merge of removed reactions

* version fix

* possible essential reactions method

* infeasible error

* Update preprocess.py

* added self to functions

* unittest for preprocess.py

* initial commit for preprocess unittest

* minor fixes

* minor fixes

* Update preprocess unittest

* simplified functions

* Update preprocess unittest

* restore reaction bounds dict

* added documentation

* cobra dingo conversion

* added some comments

* fixed conversion to dingo model

* fixed conversion to dingo model

* corrections to the PreProcess PR

* corrections to the PreProcess PR
---
 dingo/preprocess.py | 258 ++++++++++++++++++++++++++++++++++++++++++++
 tests/preprocess.py |  69 ++++++++++++
 2 files changed, 327 insertions(+)
 create mode 100644 dingo/preprocess.py
 create mode 100644 tests/preprocess.py

diff --git a/dingo/preprocess.py b/dingo/preprocess.py
new file mode 100644
index 00000000..a148d303
--- /dev/null
+++ b/dingo/preprocess.py
@@ -0,0 +1,258 @@
+
+import cobra
+import cobra.manipulation
+from collections import Counter
+from dingo import MetabolicNetwork
+
+
+class PreProcess:
+    
+    def __init__(self, model, tol=1e-6, open_exchanges=False):
+
+        """
+        model parameter gets a cobra model as input
+        
+        tol parameter gets a cutoff value used to classify 
+        zero-flux and mle reactions and compare FBA solutions
+        before and after reactions removal
+        
+        open_exchanges parameter is used in the function that identifies blocked reactions
+        It controls whether or not to open all exchange reactions to very high flux ranges
+        """
+        
+        self._model = model
+        self._tol = tol
+        
+        if self._tol > 1e-6:
+            print("Tolerance value set to",self._tol,"while default value is 1e-6. A looser check will be performed")
+        
+        self._open_exchanges = open_exchanges
+        
+        self._objective = self._objective_function()
+        self._initial_reactions = self._initial()
+        self._reaction_bounds_dict = self._reaction_bounds_dictionary()
+        self._essential_reactions = self._essentials()
+        self._zero_flux_reactions = self._zero_flux()
+        self._blocked_reactions = self._blocked()
+        self._mle_reactions = self._metabolically_less_efficient()
+        self._removed_reactions = []
+
+
+    def _objective_function(self):
+        """
+        A function used to find the objective function of a model
+        """
+        
+        objective = str(self._model.summary()._objective)
+        self._objective = objective.split(" ")[1]
+        
+        return self._objective
+
+
+    def _initial(self):
+        """
+        A function used to find reaction ids of a model
+        """
+        
+        self._initial_reactions =  [ reaction.id for reaction in \
+        self._model.reactions ]
+                    
+        return self._initial_reactions
+    
+    
+    def _reaction_bounds_dictionary(self):
+        """
+        A function used to create a dictionary that maps
+        reactions with their corresponding bounds. It is used to
+        later restore bounds to their wild-type values
+        """
+        
+        self._reaction_bounds_dict = { }
+                
+        for reaction_id in self._initial_reactions:
+            bounds = self._model.reactions.get_by_id(reaction_id).bounds
+            self._reaction_bounds_dict[reaction_id] = bounds
+            
+        return self._reaction_bounds_dict
+
+
+    def _essentials(self):
+        """
+        A function used to find all the essential reactions
+        and append them into a list. Essential reactions are
+        the ones that are required for growth. If removed the 
+        objective function gets zeroed.
+        """
+        
+        self._essential_reactions =  [ reaction.id for reaction in \
+        cobra.flux_analysis.find_essential_reactions(self._model) ]
+                    
+        return self._essential_reactions
+    
+
+    def _zero_flux(self):
+        """
+        A function used to find zero-flux reactions.
+        “Zero-flux” reactions cannot carry a flux while maintaining 
+        at least 90% of the maximum growth rate.
+        These reactions have both a min and a max flux equal to 0,
+        when running a FVA analysis with the fraction of optimum set to 90%
+        """
+        
+        tol = self._tol
+        
+        fva = cobra.flux_analysis.flux_variability_analysis(self._model, fraction_of_optimum=0.9)
+        zero_flux = fva.loc[ (abs(fva['minimum']) < tol ) & (abs(fva['maximum']) < tol)]
+        self._zero_flux_reactions = zero_flux.index.tolist()
+        
+        return self._zero_flux_reactions
+    
+    
+    def _blocked(self):
+        """
+        A function used to find blocked reactions.
+        "Blocked" reactions that cannot carry a flux in any condition.
+        These reactions can not have any flux other than 0
+        """
+        
+        self._blocked_reactions = cobra.flux_analysis.find_blocked_reactions(self._model, open_exchanges=self._open_exchanges)
+        return self._blocked_reactions
+
+
+    def _metabolically_less_efficient(self):
+        """
+        A function used to find metabolically less efficient reactions.
+        "Metabolically less efficient" require a reduction in growth rate if used
+        These reactions are found when running an FBA and setting the  
+        optimal growth rate as the lower bound of the objective function (in 
+        this case biomass production. After running an FVA with the fraction of optimum
+        set to 0.95, the reactions that have no flux are the metabolically less efficient.
+        """
+            
+        tol = self._tol
+
+        fba_solution = self._model.optimize()
+
+        wt_lower_bound = self._model.reactions.get_by_id(self._objective).lower_bound
+        self._model.reactions.get_by_id(self._objective).lower_bound = fba_solution.objective_value
+
+        fva = cobra.flux_analysis.flux_variability_analysis(self._model, fraction_of_optimum=0.95)
+        mle = fva.loc[ (abs(fva['minimum']) < tol ) & (abs(fva['maximum']) < tol)]
+        self._mle_reactions = mle.index.tolist()
+        
+        self._model.reactions.get_by_id(self._objective).lower_bound = wt_lower_bound
+        
+        return self._mle_reactions
+    
+    
+    def _remove_model_reactions(self):
+        """
+        A function used to set the lower and upper bounds of certain reactions to 0
+        (it turns off reactions)
+        """
+                                
+        for reaction in self._removed_reactions:
+            self._model.reactions.get_by_id(reaction).lower_bound = 0
+            self._model.reactions.get_by_id(reaction).upper_bound = 0
+            
+        return self._model
+   
+            
+    def reduce(self, extend=False):
+        """
+        A function that calls the "remove_model_reactions" function
+        and removes blocked, zero-flux and metabolically less efficient 
+        reactions from the model.
+        
+        Then it finds the remaining reactions in the model after 
+        exclusion of the essential reactions.
+        
+        When the "extend" parameter is set to True, the function  performes
+        an additional check to remove further reactions. These reactions 
+        are the ones that if knocked-down, they do not affect the value 
+        of the objective function. These reactions are removed simultaneously
+        from the model. If this simultaneous removal produces an infesible
+        solution (or a solution of 0) to the objective function, 
+        these reactions are restored with their initial bounds.
+        
+        A dingo-type tuple is then created from the cobra model 
+        using the "cobra_dingo_tuple" function.
+        
+        The outputs are
+        (a) A list of the removed reactions ids
+        (b) A reduced dingo model
+        """        
+        
+        # create a list from the combined blocked, zero-flux, mle reactions
+        blocked_mle_zero = self._blocked_reactions + self._mle_reactions + self._zero_flux_reactions
+        list_removed_reactions = list(set(blocked_mle_zero))        
+        self._removed_reactions = list_removed_reactions
+   
+        # remove these reactions from the model
+        self._remove_model_reactions()
+                     
+        remained_reactions = list((Counter(self._initial_reactions)-Counter(self._removed_reactions)).elements())
+        remained_reactions = list((Counter(remained_reactions)-Counter(self._essential_reactions)).elements())
+        
+        tol = self._tol
+        
+        if extend != False and extend != True:
+            raise Exception("Wrong Input to extend parameter")
+        
+        elif extend == False:
+            
+            print(len(self._removed_reactions), "of the", len(self._initial_reactions), \
+            "reactions were removed from the model with extend set to", extend) 
+            
+            # call this functon to convert cobra to dingo model
+            self._dingo_model = MetabolicNetwork.from_cobra_model(self._model)
+            return self._removed_reactions, self._dingo_model 
+
+        elif extend == True:
+  
+            # find additional reactions with a possibility of removal
+            additional_removed_reactions_list = []
+            additional_removed_reactions_count = 0       
+            for reaction in remained_reactions:
+            
+                fba_solution_before = self._model.optimize().objective_value
+            
+                # perform a knock-out and check the output
+                self._model.reactions.get_by_id(reaction).lower_bound = 0
+                self._model.reactions.get_by_id(reaction).upper_bound = 0
+    
+                fba_solution_after = self._model.optimize().objective_value
+            
+                if fba_solution_after != None:
+                    if (abs(fba_solution_after - fba_solution_before) < tol):
+                        self._removed_reactions.append(reaction)
+                        additional_removed_reactions_list.append(reaction)
+                        additional_removed_reactions_count += 1
+              
+                self._model.reactions.get_by_id(reaction).upper_bound = self._reaction_bounds_dict[reaction][1]
+                self._model.reactions.get_by_id(reaction).lower_bound = self._reaction_bounds_dict[reaction][0]
+            
+            
+            # compare FBA solution before and after the removal of additional reactions
+            fba_solution_initial = self._model.optimize().objective_value
+            self._remove_model_reactions()        
+            fba_solution_final = self._model.optimize().objective_value
+
+                
+            # if FBA solution after removal is infeasible or altered
+            # restore the initial reactions bounds
+            if (fba_solution_final == None) | (abs(fba_solution_final - fba_solution_initial) > tol):
+                for reaction in additional_removed_reactions_list:
+                    self._model.reactions.get_by_id(reaction).bounds = self._reaction_bounds_dict[reaction]
+                    self._removed_reactions.remove(reaction)
+                print(len(self._removed_reactions), "of the", len(self._initial_reactions), \
+                "reactions were removed from the model with extend set to", extend)
+
+            else:
+                print(len(self._removed_reactions), "of the", len(self._initial_reactions), \
+                "reactions were removed from the model with extend set to", extend)
+
+    
+            # call this functon to convert cobra to dingo model
+            self._dingo_model = MetabolicNetwork.from_cobra_model(self._model)
+            return self._removed_reactions, self._dingo_model 
diff --git a/tests/preprocess.py b/tests/preprocess.py
new file mode 100644
index 00000000..28db8bbf
--- /dev/null
+++ b/tests/preprocess.py
@@ -0,0 +1,69 @@
+
+from cobra.io import load_json_model
+from dingo import MetabolicNetwork
+from dingo.preprocess import PreProcess
+import unittest
+import numpy as np
+
+class TestPreprocess(unittest.TestCase):
+
+    def test_preprocess(self):
+
+        # load cobra model
+        cobra_model = load_json_model("ext_data/e_coli_core.json")
+        
+        # convert cobra to dingo model
+        initial_dingo_model = MetabolicNetwork.from_cobra_model(cobra_model)
+        
+        # perform an FBA to find the initial FBA solution
+        initial_fba_solution = initial_dingo_model.fba()[1]
+
+
+        # call the reduce function from the PreProcess class 
+        # with extend=False to remove reactions from the model        
+        obj = PreProcess(cobra_model, tol=1e-5, open_exchanges=False)  
+        removed_reactions, final_dingo_model = obj.reduce(extend=False)      
+        
+        # calculate the count of removed reactions with extend set to False        
+        removed_reactions_count = len(removed_reactions)
+        self.assertTrue( 46 - removed_reactions_count == 0 )
+
+        # calculate the count of reactions with bounds equal to 0 
+        # with extend set to False from the dingo model
+        dingo_removed_reactions = np.sum((final_dingo_model.lb == 0) & (final_dingo_model.ub == 0))
+        self.assertTrue( 46 - dingo_removed_reactions == 0 )
+        
+        # perform an FBA to check the solution after reactions removal
+        final_fba_solution = final_dingo_model.fba()[1]
+        self.assertTrue(abs(final_fba_solution - initial_fba_solution) < 1e-03)       
+
+
+        # load models in cobra and dingo format again to restore bounds
+        cobra_model = load_json_model("ext_data/e_coli_core.json")
+        
+        # convert cobra to dingo model
+        initial_dingo_model = MetabolicNetwork.from_cobra_model(cobra_model)
+     
+        # call the reduce function from the PreProcess class 
+        # with extend=True to remove additional reactions from the model        
+        obj = PreProcess(cobra_model, tol=1e-6, open_exchanges=False)        
+        removed_reactions, final_dingo_model = obj.reduce(extend=True)        
+    
+        # calculate the count of removed reactions with extend set to True        
+        removed_reactions_count = len(removed_reactions)
+        self.assertTrue( 47 - removed_reactions_count == 0 )
+
+        # calculate the count of reactions with bounds equal to 0 
+        # with extend set to True from the dingo model
+        dingo_removed_reactions = np.sum((final_dingo_model.lb == 0) & (final_dingo_model.ub == 0))
+        self.assertTrue( 47 - dingo_removed_reactions == 0 )
+        
+        # perform an FBA to check the result after reactions removal
+        final_fba_solution = final_dingo_model.fba()[1]
+        self.assertTrue(abs(final_fba_solution - initial_fba_solution) < 1e-03)   
+
+
+if __name__ == "__main__":
+    unittest.main()
+
+ 
\ No newline at end of file