From 4a961e7127e6a10c4e82c0a648621a4389f608df Mon Sep 17 00:00:00 2001 From: Sotiris Touliopoulos <109972702+SotirisTouliopoulos@users.noreply.github.com> Date: Thu, 15 Aug 2024 19:00:10 +0300 Subject: [PATCH] Functions for correlation matrix, copula indicator and plotting (#103) * initial commit for correlation * initial commit for correlation * initial commit for correlation * fixed correlation plot * added filtering based on copula indicator * Filtering of correlated reactions based on copula * fix copula indicator * added unittest and fixed heatmap * fixed plots of corr matrix * new method for additional reaction removal * documentation correction * documentation correction * corr matrix pearson fix * correction to correlation labels output * correction to correlation labels output symbol * added verbose boolean variable --- dingo/illustrations.py | 47 ++++++++++++++ dingo/preprocess.py | 133 +++++++++++++++++++++++---------------- dingo/utils.py | 140 +++++++++++++++++++++++++++++++++++++++++ tests/correlation.py | 53 ++++++++++++++++ tests/preprocess.py | 8 +-- 5 files changed, 323 insertions(+), 58 deletions(-) create mode 100644 tests/correlation.py diff --git a/dingo/illustrations.py b/dingo/illustrations.py index 00f2cba2..0e9a120f 100644 --- a/dingo/illustrations.py +++ b/dingo/illustrations.py @@ -9,6 +9,7 @@ import matplotlib.pyplot as plt import plotly.graph_objects as go import plotly.io as pio +import plotly.express as px from dingo.utils import compute_copula def plot_copula(data_flux1, data_flux2, n = 5, width = 900 , height = 600, export_format = "svg"): @@ -82,3 +83,49 @@ def plot_histogram(reaction_fluxes, reaction, n_bins=40): plt.axis([np.amin(reaction_fluxes), np.amax(reaction_fluxes), 0, np.amax(n) * 1.2]) plt.show() + + + +def plot_corr_matrix(corr_matrix, reactions, removed_reactions=[], format="svg"): + """A Python function to plot the heatmap of a model's pearson correlation matrix. + + Keyword arguments: + corr_matrix -- A matrix produced from the "correlated_reactions" function + reactions -- A list with the model's reactions + removed_reactions -- A list with the removed reactions in case of a preprocess. + If provided removed reactions are not plotted. + """ + + sns_colormap = [[0.0, '#3f7f93'], + [0.1, '#6397a7'], + [0.2, '#88b1bd'], + [0.3, '#acc9d2'], + [0.4, '#d1e2e7'], + [0.5, '#f2f2f2'], + [0.6, '#f6cdd0'], + [0.7, '#efa8ad'], + [0.8, '#e8848b'], + [0.9, '#e15e68'], + [1.0, '#da3b46']] + + if removed_reactions != 0: + for reaction in reactions: + index = reactions.index(reaction) + if reaction in removed_reactions: + reactions[index] = None + + fig = px.imshow(corr_matrix, + color_continuous_scale = sns_colormap, + x = reactions, y = reactions, origin="upper") + + fig.update_layout( + xaxis=dict(tickfont=dict(size=5)), + yaxis=dict(tickfont=dict(size=5)), + width=900, height=900, plot_bgcolor="rgba(0,0,0,0)") + + fig.update_traces(xgap=1, ygap=1, hoverongaps=False) + + fig.show() + + fig_name = "CorrelationMatrix." + format + pio.write_image(fig, fig_name, scale=2) diff --git a/dingo/preprocess.py b/dingo/preprocess.py index a148d303..4f711ff2 100644 --- a/dingo/preprocess.py +++ b/dingo/preprocess.py @@ -2,32 +2,39 @@ import cobra import cobra.manipulation from collections import Counter -from dingo import MetabolicNetwork +from dingo import MetabolicNetwork, PolytopeSampler +from dingo.utils import correlated_reactions +import numpy as np class PreProcess: - def __init__(self, model, tol=1e-6, open_exchanges=False): + def __init__(self, model, tol = 1e-6, open_exchanges = False, verbose = False): """ - model parameter gets a cobra model as input + model -- parameter gets a cobra model as input - tol parameter gets a cutoff value used to classify - zero-flux and mle reactions and compare FBA solutions - before and after reactions removal + tol -- parameter gets a cutoff value used to classify + zero-flux and mle reactions and compare FBA solutions + before and after reactions removal - open_exchanges parameter is used in the function that identifies blocked reactions - It controls whether or not to open all exchange reactions to very high flux ranges + open_exchanges -- parameter is used in the function that identifies blocked reactions + It controls whether or not to open all exchange reactions + to very high flux ranges. + + verbose -- A boolean type variable that if True + additional information for preprocess is printed. """ self._model = model self._tol = tol - - if self._tol > 1e-6: - print("Tolerance value set to",self._tol,"while default value is 1e-6. A looser check will be performed") - + self._open_exchanges = open_exchanges + self._verbose = verbose + if self._tol > 1e-6 and verbose == True: + print("Tolerance value set to",self._tol,"while default value is 1e-6. A looser check will be performed") + self._objective = self._objective_function() self._initial_reactions = self._initial() self._reaction_bounds_dict = self._reaction_bounds_dictionary() @@ -170,10 +177,11 @@ def reduce(self, extend=False): When the "extend" parameter is set to True, the function performes an additional check to remove further reactions. These reactions are the ones that if knocked-down, they do not affect the value - of the objective function. These reactions are removed simultaneously - from the model. If this simultaneous removal produces an infesible - solution (or a solution of 0) to the objective function, - these reactions are restored with their initial bounds. + of the objective function. Reactions are removed in an ordered way. + The ones with the least overall correlation in a correlation matrix + are removed first. These reactions are removed one by one from the model. + If this removal produces an infeasible solution (or a solution of 0) + to the objective function, these reactions are restored to their initial bounds. A dingo-type tuple is then created from the cobra model using the "cobra_dingo_tuple" function. @@ -201,58 +209,75 @@ def reduce(self, extend=False): elif extend == False: - print(len(self._removed_reactions), "of the", len(self._initial_reactions), \ - "reactions were removed from the model with extend set to", extend) + if self._verbose == True: + print(len(self._removed_reactions), "of the", len(self._initial_reactions), \ + "reactions were removed from the model with extend set to", extend) # call this functon to convert cobra to dingo model self._dingo_model = MetabolicNetwork.from_cobra_model(self._model) return self._removed_reactions, self._dingo_model elif extend == True: - - # find additional reactions with a possibility of removal - additional_removed_reactions_list = [] - additional_removed_reactions_count = 0 - for reaction in remained_reactions: - fba_solution_before = self._model.optimize().objective_value + reduced_dingo_model = MetabolicNetwork.from_cobra_model(self._model) + reactions = reduced_dingo_model.reactions + sampler = PolytopeSampler(reduced_dingo_model) + steady_states = sampler.generate_steady_states() + + # calculate correlation matrix with additional filtering from copula indicator + corr_matrix = correlated_reactions( + steady_states, + pearson_cutoff = 0, + indicator_cutoff = 0, + cells = 10, + cop_coeff = 0.3, + lower_triangle = False) - # perform a knock-out and check the output - self._model.reactions.get_by_id(reaction).lower_bound = 0 - self._model.reactions.get_by_id(reaction).upper_bound = 0 - - fba_solution_after = self._model.optimize().objective_value + # convert pearson values to absolute values + abs_array = abs(corr_matrix) + # sum absolute pearson values per row + sum_array = np.sum((abs_array), axis=1) + # get indices of ordered sum values + order_sum_indices = np.argsort(sum_array) - if fba_solution_after != None: - if (abs(fba_solution_after - fba_solution_before) < tol): - self._removed_reactions.append(reaction) - additional_removed_reactions_list.append(reaction) - additional_removed_reactions_count += 1 - - self._model.reactions.get_by_id(reaction).upper_bound = self._reaction_bounds_dict[reaction][1] - self._model.reactions.get_by_id(reaction).lower_bound = self._reaction_bounds_dict[reaction][0] + fba_solution_before = self._model.optimize().objective_value + # count additional reactions with a possibility of removal + additional_removed_reactions_count = 0 - # compare FBA solution before and after the removal of additional reactions - fba_solution_initial = self._model.optimize().objective_value - self._remove_model_reactions() - fba_solution_final = self._model.optimize().objective_value - - - # if FBA solution after removal is infeasible or altered - # restore the initial reactions bounds - if (fba_solution_final == None) | (abs(fba_solution_final - fba_solution_initial) > tol): - for reaction in additional_removed_reactions_list: - self._model.reactions.get_by_id(reaction).bounds = self._reaction_bounds_dict[reaction] - self._removed_reactions.remove(reaction) - print(len(self._removed_reactions), "of the", len(self._initial_reactions), \ - "reactions were removed from the model with extend set to", extend) + # find additional reactions with a possibility of removal + for index in order_sum_indices: + if reactions[index] in remained_reactions: + reaction = reactions[index] + + # perform a knock-out and check the output + self._model.reactions.get_by_id(reaction).lower_bound = 0 + self._model.reactions.get_by_id(reaction).upper_bound = 0 + + try: + fba_solution_after = self._model.optimize().objective_value + if (abs(fba_solution_after - fba_solution_before) > tol): + # restore bounds + self._model.reactions.get_by_id(reaction).bounds = self._reaction_bounds_dict[reaction] + + # if system has no solution + except: + self._model.reactions.get_by_id(reaction).bounds = self._reaction_bounds_dict[reaction] + + finally: + if (fba_solution_after != None) and (abs(fba_solution_after - fba_solution_before) < tol): + self._removed_reactions.append(reaction) + additional_removed_reactions_count += 1 + else: + # restore bounds + self._model.reactions.get_by_id(reaction).bounds = self._reaction_bounds_dict[reaction] - else: + + if self._verbose == True: print(len(self._removed_reactions), "of the", len(self._initial_reactions), \ "reactions were removed from the model with extend set to", extend) - - + print(additional_removed_reactions_count, "additional reaction(s) removed") + # call this functon to convert cobra to dingo model self._dingo_model = MetabolicNetwork.from_cobra_model(self._model) return self._removed_reactions, self._dingo_model diff --git a/dingo/utils.py b/dingo/utils.py index 8074b155..3fb0888c 100644 --- a/dingo/utils.py +++ b/dingo/utils.py @@ -201,3 +201,143 @@ def get_matrices_of_full_dim_polytope(A, b, Aeq, beq): print("gmscale failed to compute a good scaling.") return A, b, N, N_shift + + + +def correlated_reactions(steady_states, reactions=[], pearson_cutoff = 0.90, indicator_cutoff = 10, + cells = 10, cop_coeff = 0.3, lower_triangle = True, verbose = False): + """A Python function to calculate the pearson correlation matrix of a model + and filter values based on the copula's indicator + + Keyword arguments: + steady_states -- A numpy array of the generated steady states fluxes + reactions -- A list with the model's reactions + pearson_cutoff -- A cutoff to filter reactions based on pearson coefficient + indicator_cutoff -- A cutoff to filter reactions based on indicator value + cells -- Number of cells to compute the copula + cop_coeff -- A value that narrows or widens the width of the copula's diagonal + lower_triangle -- A boolean variable that if True plots only the lower triangular matrix + verbose -- A boolean variable that if True additional information is printed as an output. + """ + + if cop_coeff > 0.4 or cop_coeff < 0.2: + raise Exception("Input value to cop_coeff parameter must be between 0.2 and 0.4") + + # calculate coefficients to access red and blue copula mass + cop_coeff_1 = cop_coeff + cop_coeff_2 = 1 - cop_coeff + cop_coeff_3 = 1 + cop_coeff + + # compute correlation matrix + corr_matrix = np.corrcoef(steady_states, rowvar=True) + + # replace not assigned values with 0 + corr_matrix[np.isnan(corr_matrix)] = 0 + + # create a copy of correlation matrix to replace/filter values + filtered_corr_matrix = corr_matrix.copy() + + # find indices of correlation matrix where correlation does not occur + no_corr_indices = np.argwhere((filtered_corr_matrix < pearson_cutoff) & (filtered_corr_matrix > -pearson_cutoff)) + + # replace values from the correlation matrix that do not overcome + # the pearson cutoff with 0 + for i in range(0, no_corr_indices.shape[0]): + index1 = no_corr_indices[i][0] + index2 = no_corr_indices[i][1] + + filtered_corr_matrix[index1, index2] = 0 + + # if user does not provide an indicator cutoff then do not proceed + # with the filtering of the correlation matrix + if indicator_cutoff == 0: + if lower_triangle == True: + filtered_corr_matrix[np.triu_indices(filtered_corr_matrix.shape[0], 1)] = np.nan + np.fill_diagonal(filtered_corr_matrix, 1) + return filtered_corr_matrix + else: + np.fill_diagonal(filtered_corr_matrix, 1) + return filtered_corr_matrix + else: + # a dictionary that will store for each filtered reaction combination, + # the pearson correlation value, the copula's indicator value + # and the correlation classification + indicator_dict = {} + + # keep only the lower triangle + corr_matrix = np.tril(corr_matrix) + # replace diagonal values with 0 + np.fill_diagonal(corr_matrix, 0) + + # find indices of correlation matrix where correlation occurs + corr_indices = np.argwhere((corr_matrix > pearson_cutoff) | (corr_matrix < -pearson_cutoff)) + + # compute copula for each set of correlated reactions + for i in range(0, corr_indices.shape[0]): + + index1 = corr_indices[i][0] + index2 = corr_indices[i][1] + + reaction1 = reactions[index1] + reaction2 = reactions[index2] + + flux1 = steady_states[index1] + flux2 = steady_states[index2] + + copula = compute_copula(flux1, flux2, cells) + rows, cols = copula.shape + + red_mass = 0 + blue_mass = 0 + indicator = 0 + + for row in range(rows): + for col in range(cols): + # values in the diagonal + if ((row-col >= -cop_coeff_1*rows) & (row-col <= cop_coeff_1*rows)): + # values near the top left and bottom right corner + if ((row+col < cop_coeff_2*rows) | (row+col > cop_coeff_3*rows)): + red_mass = red_mass + copula[row][col] + else: + # values near the top right and bottom left corner + if ((row+col >= cop_coeff_2*rows-1) & (row+col <= cop_coeff_3*rows-1)): + blue_mass = blue_mass + copula[row][col] + + indicator = (red_mass+1e-9) / (blue_mass+1e-9) + + # classify specific pair of reactions as positive or negative + # correlated based on indicator cutoff + if indicator > indicator_cutoff: + pearson = filtered_corr_matrix[index1, index2] + indicator_dict[reaction1 + "~" + reaction2] = {'pearson': pearson, + 'indicator': indicator, + 'classification': "positive"} + + elif indicator < 1/indicator_cutoff: + pearson = filtered_corr_matrix[index1, index2] + indicator_dict[reaction1 + "~" + reaction2] = {'pearson': pearson, + 'indicator': indicator, + 'classification': "negative"} + + # if they do not overcome the cutoff replace their corresponding + # value in the correlation matrix with 0 + else: + filtered_corr_matrix[index1, index2] = 0 + filtered_corr_matrix[index2, index1] = 0 + pearson = filtered_corr_matrix[index1, index2] + indicator_dict[reaction1 + "~" + reaction2] = {'pearson': pearson, + 'indicator': indicator, + 'classification': "no correlation"} + + if verbose == True: + print("Completed process of",i+1,"from",corr_indices.shape[0],"copulas") + + if lower_triangle == True: + filtered_corr_matrix[np.triu_indices(filtered_corr_matrix.shape[0], 1)] = np.nan + np.fill_diagonal(filtered_corr_matrix, 1) + return filtered_corr_matrix, indicator_dict + + else: + np.fill_diagonal(filtered_corr_matrix, 1) + return filtered_corr_matrix, indicator_dict + \ No newline at end of file diff --git a/tests/correlation.py b/tests/correlation.py new file mode 100644 index 00000000..fdb21b75 --- /dev/null +++ b/tests/correlation.py @@ -0,0 +1,53 @@ + +from dingo.utils import correlated_reactions +from dingo import MetabolicNetwork, PolytopeSampler +import numpy as np +import unittest + +class TestCorrelation(unittest.TestCase): + + def test_correlation(self): + + dingo_model = MetabolicNetwork.from_json('ext_data/e_coli_core.json') + reactions = dingo_model.reactions + + sampler = PolytopeSampler(dingo_model) + steady_states = sampler.generate_steady_states() + + # calculate correlation matrix with filtering from copula indicator + corr_matrix, indicator_dict = correlated_reactions(steady_states, + reactions = reactions, + indicator_cutoff = 5, + pearson_cutoff = 0.999999, + lower_triangle = False, + verbose = False) + + # sum values in the diagonal of the correlation matrix ==> 95*pearson ==> 95*1 + self.assertTrue(np.trace(corr_matrix) == len(reactions)) + # rows and columns must be equal to model reactions + self.assertTrue(corr_matrix.shape[0] == len(reactions)) + self.assertTrue(corr_matrix.shape[1] == len(reactions)) + + + dingo_model = MetabolicNetwork.from_json('ext_data/e_coli_core.json') + reactions = dingo_model.reactions + + sampler = PolytopeSampler(dingo_model) + steady_states = sampler.generate_steady_states() + + # calculate correlation matrix without filtering from copula indicator + corr_matrix = correlated_reactions(steady_states, + indicator_cutoff = 0, + pearson_cutoff = 0, + lower_triangle = True, + verbose = False) + + # sum values in the diagonal of the correlation matrix ==> 95*pearson ==> 95*1 + self.assertTrue(np.trace(corr_matrix) == len(reactions)) + # rows and columns must be equal to model reactions + self.assertTrue(corr_matrix.shape[0] == len(reactions)) + self.assertTrue(corr_matrix.shape[1] == len(reactions)) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/preprocess.py b/tests/preprocess.py index 28db8bbf..64edce9c 100644 --- a/tests/preprocess.py +++ b/tests/preprocess.py @@ -21,7 +21,7 @@ def test_preprocess(self): # call the reduce function from the PreProcess class # with extend=False to remove reactions from the model - obj = PreProcess(cobra_model, tol=1e-5, open_exchanges=False) + obj = PreProcess(cobra_model, tol=1e-5, open_exchanges=False, verbose=False) removed_reactions, final_dingo_model = obj.reduce(extend=False) # calculate the count of removed reactions with extend set to False @@ -46,17 +46,17 @@ def test_preprocess(self): # call the reduce function from the PreProcess class # with extend=True to remove additional reactions from the model - obj = PreProcess(cobra_model, tol=1e-6, open_exchanges=False) + obj = PreProcess(cobra_model, tol=1e-6, open_exchanges=False, verbose=False) removed_reactions, final_dingo_model = obj.reduce(extend=True) # calculate the count of removed reactions with extend set to True removed_reactions_count = len(removed_reactions) - self.assertTrue( 47 - removed_reactions_count == 0 ) + self.assertTrue( 46 - removed_reactions_count <= 0 ) # calculate the count of reactions with bounds equal to 0 # with extend set to True from the dingo model dingo_removed_reactions = np.sum((final_dingo_model.lb == 0) & (final_dingo_model.ub == 0)) - self.assertTrue( 47 - dingo_removed_reactions == 0 ) + self.assertTrue( 46 - dingo_removed_reactions <= 0 ) # perform an FBA to check the result after reactions removal final_fba_solution = final_dingo_model.fba()[1]