From f8fd6285bfa8d229e3aaa988f7917c1eec477cc9 Mon Sep 17 00:00:00 2001 From: SotirisTouliopoulos Date: Mon, 22 Jul 2024 14:06:50 +0300 Subject: [PATCH 01/16] initial commit for correlation --- dingo/illustrations.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/dingo/illustrations.py b/dingo/illustrations.py index 00f2cba2..59c225d6 100644 --- a/dingo/illustrations.py +++ b/dingo/illustrations.py @@ -82,3 +82,11 @@ def plot_histogram(reaction_fluxes, reaction, n_bins=40): plt.axis([np.amin(reaction_fluxes), np.amax(reaction_fluxes), 0, np.amax(n) * 1.2]) plt.show() + + + +from dingo import MetabolicNetwork, PolytopeSampler + +model = MetabolicNetwork.from_json('ext_data/e_coli_core.json') +sampler = PolytopeSampler(model) +steady_states = sampler.generate_steady_states() \ No newline at end of file From f3a6148ce5c0ee4b5823622fee38014208f04b72 Mon Sep 17 00:00:00 2001 From: SotirisTouliopoulos Date: Mon, 22 Jul 2024 14:11:51 +0300 Subject: [PATCH 02/16] initial commit for correlation --- dingo/illustrations.py | 9 +++++---- tests/correlation.py | 4 ++++ 2 files changed, 9 insertions(+), 4 deletions(-) create mode 100644 tests/correlation.py diff --git a/dingo/illustrations.py b/dingo/illustrations.py index 59c225d6..78061f48 100644 --- a/dingo/illustrations.py +++ b/dingo/illustrations.py @@ -85,8 +85,9 @@ def plot_histogram(reaction_fluxes, reaction, n_bins=40): -from dingo import MetabolicNetwork, PolytopeSampler +def corr(): + from dingo import MetabolicNetwork, PolytopeSampler -model = MetabolicNetwork.from_json('ext_data/e_coli_core.json') -sampler = PolytopeSampler(model) -steady_states = sampler.generate_steady_states() \ No newline at end of file + model = MetabolicNetwork.from_json('ext_data/e_coli_core.json') + sampler = PolytopeSampler(model) + steady_states = sampler.generate_steady_states() \ No newline at end of file diff --git a/tests/correlation.py b/tests/correlation.py new file mode 100644 index 00000000..f50368e9 --- /dev/null +++ b/tests/correlation.py @@ -0,0 +1,4 @@ + +from dingo.illustrations import corr + +corr() \ No newline at end of file From a35a65c385f3c3596624604be5a5cc61bc4baca6 Mon Sep 17 00:00:00 2001 From: SotirisTouliopoulos Date: Tue, 23 Jul 2024 19:51:29 +0300 Subject: [PATCH 03/16] initial commit for correlation --- dingo/illustrations.py | 22 +++++++++++++++++----- tests/correlation.py | 4 +++- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/dingo/illustrations.py b/dingo/illustrations.py index 78061f48..4a07592d 100644 --- a/dingo/illustrations.py +++ b/dingo/illustrations.py @@ -85,9 +85,21 @@ def plot_histogram(reaction_fluxes, reaction, n_bins=40): -def corr(): - from dingo import MetabolicNetwork, PolytopeSampler - - model = MetabolicNetwork.from_json('ext_data/e_coli_core.json') +def corr(model): + + from dingo import PolytopeSampler + sampler = PolytopeSampler(model) - steady_states = sampler.generate_steady_states() \ No newline at end of file + steady_states = sampler.generate_steady_states(ess=100) + + for steady_states_a in steady_states: + hist_counts_a, _ = np.histogram(steady_states_a, bins=60) + + for steady_states_b in steady_states: + hist_counts_b, _ = np.histogram(steady_states_b, bins=60) + + hist_counts_ab = np.stack((hist_counts_a, hist_counts_b), axis=0) + hist_counts_ab = hist_counts_ab.transpose() + + corr_matrix = np.corrcoef(hist_counts_ab, rowvar=False) + print("Correlation between reactions is: ",corr_matrix[0][1]) \ No newline at end of file diff --git a/tests/correlation.py b/tests/correlation.py index f50368e9..eab829cb 100644 --- a/tests/correlation.py +++ b/tests/correlation.py @@ -1,4 +1,6 @@ from dingo.illustrations import corr +from dingo import MetabolicNetwork -corr() \ No newline at end of file +model = MetabolicNetwork.from_json('ext_data/e_coli_core.json') +corr(model) \ No newline at end of file From 6b01cf5fc772d852843d7eaa604300181d55850d Mon Sep 17 00:00:00 2001 From: SotirisTouliopoulos Date: Thu, 25 Jul 2024 15:37:21 +0300 Subject: [PATCH 04/16] fixed correlation plot --- dingo/illustrations.py | 32 +++++++++++++++++--------------- tests/correlation.py | 16 +++++++++++++--- 2 files changed, 30 insertions(+), 18 deletions(-) diff --git a/dingo/illustrations.py b/dingo/illustrations.py index 4a07592d..ccfa8e75 100644 --- a/dingo/illustrations.py +++ b/dingo/illustrations.py @@ -85,21 +85,23 @@ def plot_histogram(reaction_fluxes, reaction, n_bins=40): -def corr(model): +def plot_corr_matrix(steady_states, reactions, color="RdYlBu"): - from dingo import PolytopeSampler - - sampler = PolytopeSampler(model) - steady_states = sampler.generate_steady_states(ess=100) - - for steady_states_a in steady_states: - hist_counts_a, _ = np.histogram(steady_states_a, bins=60) + import plotly.express as px - for steady_states_b in steady_states: - hist_counts_b, _ = np.histogram(steady_states_b, bins=60) + corr_matrix = np.corrcoef(steady_states, rowvar=True) + corr_matrix[np.isnan(corr_matrix)] = 0 + corr_matrix = np.tril(corr_matrix) - hist_counts_ab = np.stack((hist_counts_a, hist_counts_b), axis=0) - hist_counts_ab = hist_counts_ab.transpose() - - corr_matrix = np.corrcoef(hist_counts_ab, rowvar=False) - print("Correlation between reactions is: ",corr_matrix[0][1]) \ No newline at end of file + print("You can see the full list of color scales here: ", px.colors.named_colorscales()) + + fig = px.imshow(corr_matrix, + color_continuous_scale = color, + x = reactions, + y = reactions) + + fig.update_layout( + xaxis=dict(tickfont=dict(size=5)), + yaxis=dict(tickfont=dict(size=5)) ) + + fig.show() diff --git a/tests/correlation.py b/tests/correlation.py index eab829cb..655c7963 100644 --- a/tests/correlation.py +++ b/tests/correlation.py @@ -1,6 +1,16 @@ -from dingo.illustrations import corr -from dingo import MetabolicNetwork +from dingo.illustrations import plot_corr_matrix +from dingo.utils import correlated_reactions +from dingo import MetabolicNetwork, PolytopeSampler + model = MetabolicNetwork.from_json('ext_data/e_coli_core.json') -corr(model) \ No newline at end of file +reactions = model.reactions + +sampler = PolytopeSampler(model) +steady_states = sampler.generate_steady_states() + +plot_corr_matrix(steady_states, reactions, color="RdYlBu") +correlated_reactions(steady_states, reactions, pearson_cutoff = 0.7, n = 10) + + From 537ebad4996a3a5d74fdd4f627c1d03e40faeba9 Mon Sep 17 00:00:00 2001 From: SotirisTouliopoulos Date: Thu, 25 Jul 2024 15:38:05 +0300 Subject: [PATCH 05/16] added filtering based on copula indicator --- dingo/utils.py | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/dingo/utils.py b/dingo/utils.py index 8074b155..4be598cb 100644 --- a/dingo/utils.py +++ b/dingo/utils.py @@ -201,3 +201,45 @@ def get_matrices_of_full_dim_polytope(A, b, Aeq, beq): print("gmscale failed to compute a good scaling.") return A, b, N, N_shift + + + +def correlated_reactions(steady_states, reactions, pearson_cutoff = 0.5, n = 10): + + # compute correlation matrix + corr_matrix = np.corrcoef(steady_states, rowvar=True) + # replace not assigned values with 0 + corr_matrix[np.isnan(corr_matrix)] = 0 + # keep only the lower triangle + corr_matrix = np.tril(corr_matrix) + # replace diagonal values with 0 + np.fill_diagonal(corr_matrix, 0) + + # find indices of corr matrix where correlation occurs + indices = np.argwhere((corr_matrix > pearson_cutoff) | (corr_matrix < -pearson_cutoff)) + + # compute copula for this set of correlated reactions + for i in range(0, indices.shape[0]): + index1 = indices[i][0] + index2 = indices[i][1] + flux1 = steady_states[index1] + flux2 = steady_states[index2] + + copula = compute_copula(flux1, flux2, n) + rows, cols = copula.shape + + red_mass = 0 + blue_mass = 0 + + for row in range(rows): + for col in range(cols): + if ((row-col >= -2) & (row-col <= 2)): + if ((row+col < 8) | (row+col > 12)): + red_mass = red_mass + copula[row][col] + + elif ((row+col >= 9) | (row+col <= 13)): + blue_mass = blue_mass + copula[row][col] + + indicator = red_mass / blue_mass + if indicator > 2: + print(reactions[index1], reactions[index2]) \ No newline at end of file From d8dcf865292f7a177f7883790ba96e648248726f Mon Sep 17 00:00:00 2001 From: SotirisTouliopoulos Date: Fri, 26 Jul 2024 15:23:49 +0300 Subject: [PATCH 06/16] Filtering of correlated reactions based on copula --- tests/correlation.py | 151 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 145 insertions(+), 6 deletions(-) diff --git a/tests/correlation.py b/tests/correlation.py index 655c7963..6e8bcad0 100644 --- a/tests/correlation.py +++ b/tests/correlation.py @@ -2,15 +2,154 @@ from dingo.illustrations import plot_corr_matrix from dingo.utils import correlated_reactions from dingo import MetabolicNetwork, PolytopeSampler +import numpy as np -model = MetabolicNetwork.from_json('ext_data/e_coli_core.json') -reactions = model.reactions +#model = MetabolicNetwork.from_json('ext_data/e_coli_core.json') +#reactions = model.reactions -sampler = PolytopeSampler(model) -steady_states = sampler.generate_steady_states() +#sampler = PolytopeSampler(model) +#steady_states = sampler.generate_steady_states() -plot_corr_matrix(steady_states, reactions, color="RdYlBu") -correlated_reactions(steady_states, reactions, pearson_cutoff = 0.7, n = 10) +#plot_corr_matrix(steady_states, reactions, color="RdYlBu") +#correlated_reactions(steady_states, reactions, pearson_cutoff = 0.80, n = 7) +# Creation of 4 possible copulas classifying 2 reactions as +# Positive, Negative or No Correlated + + +# Positive Correlation +data = [ + [2, 2, 0, 1, 1, 1, 1, 1, 1], + [2, 2, 2, 0, 1, 1, 1, 1, 1], + [0, 2, 2, 2, 0, 1, 1, 1, 1], + [1, 0, 2, 2, 2, 0, 1, 1, 1], + [1, 1, 0, 2, 2, 2, 0, 1, 1], + [1, 1, 1, 0, 2, 2, 2, 0, 1], + [1, 1, 1, 1, 0, 2, 2, 2, 0], + [1, 1, 1, 1, 1, 0, 2, 2, 2], + [1, 1, 1, 1, 1, 1, 0, 2, 2] + +] + + +# Negative Correlation +data = [ + [1, 1, 1, 1, 1, 1, 0, 2, 2], + [1, 1, 1, 1, 1, 0, 2, 2, 2], + [1, 1, 1, 1, 0, 2, 2, 2, 0], + [1, 1, 1, 0, 2, 2, 2, 0, 1], + [1, 1, 0, 2, 2, 2, 0, 1, 1], + [1, 0, 2, 2, 2, 0, 1, 1, 1], + [0, 2, 2, 2, 0, 1, 1, 1, 1], + [2, 2, 2, 0, 1, 1, 1, 1, 1], + [2, 2, 0, 1, 1, 1, 1, 1, 1] + +] + + +# No Correlation +data = [ + [2, 2, 1, 1, 1, 1, 1, 1, 1], + [2, 2, 2, 1, 1, 1, 1, 1, 1], + [1, 2, 2, 2, 1, 1, 1, 1, 1], + [1, 1, 2, 2, 2, 1, 1, 1, 1], + [1, 1, 1, 2, 2, 2, 1, 1, 1], + [1, 1, 1, 1, 2, 2, 2, 1, 1], + [1, 1, 1, 1, 1, 2, 2, 2, 1], + [1, 1, 1, 1, 1, 1, 2, 2, 2], + [1, 1, 1, 1, 1, 1, 1, 2, 2] + +] + + +# No Correlation +data = [ + [2, 0, 0, 0, 1, 1, 1, 1, 1], + [0, 2, 0, 0, 0, 1, 1, 1, 1], + [0, 0, 2, 0, 0, 0, 1, 1, 1], + [0, 0, 0, 2, 0, 0, 0, 1, 1], + [1, 0, 0, 0, 2, 0, 0, 0, 1], + [1, 1, 0, 0, 0, 2, 0, 0, 0], + [1, 1, 1, 0, 0, 0, 2, 0, 0], + [1, 1, 1, 1, 0, 0, 0, 2, 0], + [1, 1, 1, 1, 1, 0, 0, 0, 2] + +] + + +# Convert nested list to numps 2D array +copula1 = np.array(data) +# Flip array upside down to get the other diagonal starting in the top left corner +copula2 = np.flipud(copula1) + +print("Copula 1:\n" , copula1 , end = "\n\n") +print("Copula 2:\n" , copula2 , end = "\n\n") + +# Get the dimensions of the copula +rows, cols = copula1.shape + +# Variable to store the sum of values across the 1st diagonal +cop_a_diag_sum = 0 +# Variable to store the count of values across the 1st diagonal +cop_a_diag_count = 0 +# Variable to store the sum of values across the edges (non 1st diagonal values) +cop_a_edge_sum = 0 +# Variable to store the count of values across the edges (non 1st diagonal counts) +cop_a_edge_count = 0 + +# Variable to store the sum of values across the 2st diagonal +cop_b_diag_sum = 0 +# Variable to store the count of values across the 2st diagonal +cop_b_diag_count = 0 +# Variable to store the sum of values across the edges (non 2nd diagonal values) +cop_b_edge_sum = 0 +# Variable to store the count of values across the edges (non 2nd diagonal counts) +cop_b_edge_count = 0 + +# Iterate every copula's row +for row in range(rows): + # Iterate every copula's column + for col in range(cols): + # Find combination of rows and column near the diagonal + if ((row-col >= -0.2*rows) & (row-col <= 0.2*rows)): + # Sum the values of combinations that met the criteria + cop_a_diag_sum = cop_a_diag_sum + copula1[row][col] + # Count the occurences of combinations that met the criteria + cop_a_diag_count += 1 + + # Do the same for the flipped copula (2nd diagonal) + cop_b_diag_sum = cop_b_diag_sum + copula2[row][col] + cop_b_diag_count += 1 + + else: + # Do the same for values not belonging in the 1st diagonal + cop_a_edge_sum = cop_a_edge_sum + copula1[row][col] + cop_a_edge_count += 1 + + # Do the same for values not belonging in the 2nd diagonal + cop_b_edge_sum = cop_b_edge_sum + copula2[row][col] + cop_b_edge_count += 1 + + +# Calculate the fraction of mass in the diagonal to the mass in the edges (1st diagonal) +# Add a value of 1e-9 to avoid inf values +diagonal_a_avg = ((cop_a_diag_sum / cop_a_diag_count + 1e-9) / + (cop_a_edge_sum / cop_a_edge_count + 1e-9)) + +# Calculate the fraction of mass in the diagonal to the mass in the edges (2nd diagonal) +# Add a value of 1e-9 to avoid inf values +diagonal_b_avg = ((cop_b_diag_sum / cop_b_diag_count + 1e-9) / + (cop_b_edge_sum / cop_b_edge_count + 1e-9)) + +# If the fraction between the 2 diagonals is similar ==> No Correlation +if( ((diagonal_a_avg / diagonal_b_avg) < 2) and ((diagonal_b_avg / diagonal_a_avg) < 2) ): + print("No Correlation") +else: + # If 1st diagonal has a higher fraction + if diagonal_a_avg > diagonal_b_avg: + print("Positive Correlation") + # If 2nd diagonal has a higher fraction + else: + print("Negative Correlation") \ No newline at end of file From b363f755e46718ea86a85b453ab1908f8661890e Mon Sep 17 00:00:00 2001 From: SotirisTouliopoulos Date: Sat, 27 Jul 2024 11:44:10 +0300 Subject: [PATCH 07/16] fix copula indicator --- dingo/illustrations.py | 8 +- dingo/utils.py | 47 +++++++++--- tests/correlation.py | 161 +++++------------------------------------ 3 files changed, 60 insertions(+), 156 deletions(-) diff --git a/dingo/illustrations.py b/dingo/illustrations.py index ccfa8e75..341b5138 100644 --- a/dingo/illustrations.py +++ b/dingo/illustrations.py @@ -85,14 +85,10 @@ def plot_histogram(reaction_fluxes, reaction, n_bins=40): -def plot_corr_matrix(steady_states, reactions, color="RdYlBu"): +def plot_corr_matrix(corr_matrix, reactions, color="RdYlBu"): import plotly.express as px - corr_matrix = np.corrcoef(steady_states, rowvar=True) - corr_matrix[np.isnan(corr_matrix)] = 0 - corr_matrix = np.tril(corr_matrix) - print("You can see the full list of color scales here: ", px.colors.named_colorscales()) fig = px.imshow(corr_matrix, @@ -104,4 +100,4 @@ def plot_corr_matrix(steady_states, reactions, color="RdYlBu"): xaxis=dict(tickfont=dict(size=5)), yaxis=dict(tickfont=dict(size=5)) ) - fig.show() + fig.show() \ No newline at end of file diff --git a/dingo/utils.py b/dingo/utils.py index 4be598cb..4341a458 100644 --- a/dingo/utils.py +++ b/dingo/utils.py @@ -204,7 +204,7 @@ def get_matrices_of_full_dim_polytope(A, b, Aeq, beq): -def correlated_reactions(steady_states, reactions, pearson_cutoff = 0.5, n = 10): +def correlated_reactions(steady_states, pearson_cutoff = 0.5, indicator_cutoff = 1000, n = 10): # compute correlation matrix corr_matrix = np.corrcoef(steady_states, rowvar=True) @@ -214,14 +214,32 @@ def correlated_reactions(steady_states, reactions, pearson_cutoff = 0.5, n = 10) corr_matrix = np.tril(corr_matrix) # replace diagonal values with 0 np.fill_diagonal(corr_matrix, 0) + + combinations = np.argwhere(corr_matrix != 0) # find indices of corr matrix where correlation occurs indices = np.argwhere((corr_matrix > pearson_cutoff) | (corr_matrix < -pearson_cutoff)) + # find indices of corr matrix where correlation occurs + anti_indices = np.argwhere((corr_matrix < pearson_cutoff) & (corr_matrix > -pearson_cutoff)) + + updated_corr_matrix = corr_matrix.copy() + + for i in range(0, anti_indices.shape[0]): + index1 = anti_indices[i][0] + index2 = anti_indices[i][1] + print(index1, index2) + updated_corr_matrix[index1, index2] = 0 + + + positive = 0 + negative = 0 + # compute copula for this set of correlated reactions for i in range(0, indices.shape[0]): index1 = indices[i][0] index2 = indices[i][1] + flux1 = steady_states[index1] flux2 = steady_states[index2] @@ -230,16 +248,27 @@ def correlated_reactions(steady_states, reactions, pearson_cutoff = 0.5, n = 10) red_mass = 0 blue_mass = 0 - + indicator = 0 + for row in range(rows): for col in range(cols): - if ((row-col >= -2) & (row-col <= 2)): - if ((row+col < 8) | (row+col > 12)): + if ((row-col >= -0.2*rows) & (row-col <= 0.2*rows)): + if ((row+col < 0.8*rows) | (row+col > 1.2*rows)): red_mass = red_mass + copula[row][col] - - elif ((row+col >= 9) | (row+col <= 13)): + else: + if ((row+col >= 0.8*rows-1) & (row+col <= 1.2*rows-1)): blue_mass = blue_mass + copula[row][col] - indicator = red_mass / blue_mass - if indicator > 2: - print(reactions[index1], reactions[index2]) \ No newline at end of file + indicator = (red_mass+1e-9) / (blue_mass+1e-9) + + if indicator > indicator_cutoff: + positive += 1 + elif indicator < 1/indicator_cutoff: + negative += 1 + else: + updated_corr_matrix[index1, index2] = 0 + + + print(indices.shape[0],"out of",combinations.shape[0], + "reactions combinations were filtered based on pearson correlation") + print(negative, "out of", i+1, "copulas were negative correlated based on copula indicator") \ No newline at end of file diff --git a/tests/correlation.py b/tests/correlation.py index 6e8bcad0..537968e6 100644 --- a/tests/correlation.py +++ b/tests/correlation.py @@ -3,153 +3,32 @@ from dingo.utils import correlated_reactions from dingo import MetabolicNetwork, PolytopeSampler import numpy as np +from dingo import plot_copula -#model = MetabolicNetwork.from_json('ext_data/e_coli_core.json') -#reactions = model.reactions +model = MetabolicNetwork.from_json('ext_data/e_coli_core.json') +reactions = model.reactions -#sampler = PolytopeSampler(model) -#steady_states = sampler.generate_steady_states() +sampler = PolytopeSampler(model) +steady_states = sampler.generate_steady_states() -#plot_corr_matrix(steady_states, reactions, color="RdYlBu") -#correlated_reactions(steady_states, reactions, pearson_cutoff = 0.80, n = 7) +corr_matrix, updated_corr_matrix = correlated_reactions(steady_states, + pearson_cutoff = 0.80, + indicator_cutoff = 9e1000, + n = 11) -# Creation of 4 possible copulas classifying 2 reactions as -# Positive, Negative or No Correlated +arr = corr_matrix == updated_corr_matrix +rows, cols = np.where(arr == True) +for i in range(len(rows)): + row = rows[i] + col = cols[i] + print(reactions[row], reactions[col]) + #data_flux2=[steady_states[row],reactions[row]] + #data_flux1=[steady_states[col],reactions[col]] + #plot_copula(data_flux1, data_flux2, n=10) -# Positive Correlation -data = [ - [2, 2, 0, 1, 1, 1, 1, 1, 1], - [2, 2, 2, 0, 1, 1, 1, 1, 1], - [0, 2, 2, 2, 0, 1, 1, 1, 1], - [1, 0, 2, 2, 2, 0, 1, 1, 1], - [1, 1, 0, 2, 2, 2, 0, 1, 1], - [1, 1, 1, 0, 2, 2, 2, 0, 1], - [1, 1, 1, 1, 0, 2, 2, 2, 0], - [1, 1, 1, 1, 1, 0, 2, 2, 2], - [1, 1, 1, 1, 1, 1, 0, 2, 2] - -] - -# Negative Correlation -data = [ - [1, 1, 1, 1, 1, 1, 0, 2, 2], - [1, 1, 1, 1, 1, 0, 2, 2, 2], - [1, 1, 1, 1, 0, 2, 2, 2, 0], - [1, 1, 1, 0, 2, 2, 2, 0, 1], - [1, 1, 0, 2, 2, 2, 0, 1, 1], - [1, 0, 2, 2, 2, 0, 1, 1, 1], - [0, 2, 2, 2, 0, 1, 1, 1, 1], - [2, 2, 2, 0, 1, 1, 1, 1, 1], - [2, 2, 0, 1, 1, 1, 1, 1, 1] - -] - - -# No Correlation -data = [ - [2, 2, 1, 1, 1, 1, 1, 1, 1], - [2, 2, 2, 1, 1, 1, 1, 1, 1], - [1, 2, 2, 2, 1, 1, 1, 1, 1], - [1, 1, 2, 2, 2, 1, 1, 1, 1], - [1, 1, 1, 2, 2, 2, 1, 1, 1], - [1, 1, 1, 1, 2, 2, 2, 1, 1], - [1, 1, 1, 1, 1, 2, 2, 2, 1], - [1, 1, 1, 1, 1, 1, 2, 2, 2], - [1, 1, 1, 1, 1, 1, 1, 2, 2] - -] - - -# No Correlation -data = [ - [2, 0, 0, 0, 1, 1, 1, 1, 1], - [0, 2, 0, 0, 0, 1, 1, 1, 1], - [0, 0, 2, 0, 0, 0, 1, 1, 1], - [0, 0, 0, 2, 0, 0, 0, 1, 1], - [1, 0, 0, 0, 2, 0, 0, 0, 1], - [1, 1, 0, 0, 0, 2, 0, 0, 0], - [1, 1, 1, 0, 0, 0, 2, 0, 0], - [1, 1, 1, 1, 0, 0, 0, 2, 0], - [1, 1, 1, 1, 1, 0, 0, 0, 2] - -] - - -# Convert nested list to numps 2D array -copula1 = np.array(data) -# Flip array upside down to get the other diagonal starting in the top left corner -copula2 = np.flipud(copula1) - -print("Copula 1:\n" , copula1 , end = "\n\n") -print("Copula 2:\n" , copula2 , end = "\n\n") - -# Get the dimensions of the copula -rows, cols = copula1.shape - -# Variable to store the sum of values across the 1st diagonal -cop_a_diag_sum = 0 -# Variable to store the count of values across the 1st diagonal -cop_a_diag_count = 0 -# Variable to store the sum of values across the edges (non 1st diagonal values) -cop_a_edge_sum = 0 -# Variable to store the count of values across the edges (non 1st diagonal counts) -cop_a_edge_count = 0 - -# Variable to store the sum of values across the 2st diagonal -cop_b_diag_sum = 0 -# Variable to store the count of values across the 2st diagonal -cop_b_diag_count = 0 -# Variable to store the sum of values across the edges (non 2nd diagonal values) -cop_b_edge_sum = 0 -# Variable to store the count of values across the edges (non 2nd diagonal counts) -cop_b_edge_count = 0 - -# Iterate every copula's row -for row in range(rows): - # Iterate every copula's column - for col in range(cols): - # Find combination of rows and column near the diagonal - if ((row-col >= -0.2*rows) & (row-col <= 0.2*rows)): - # Sum the values of combinations that met the criteria - cop_a_diag_sum = cop_a_diag_sum + copula1[row][col] - # Count the occurences of combinations that met the criteria - cop_a_diag_count += 1 - - # Do the same for the flipped copula (2nd diagonal) - cop_b_diag_sum = cop_b_diag_sum + copula2[row][col] - cop_b_diag_count += 1 - - else: - # Do the same for values not belonging in the 1st diagonal - cop_a_edge_sum = cop_a_edge_sum + copula1[row][col] - cop_a_edge_count += 1 - - # Do the same for values not belonging in the 2nd diagonal - cop_b_edge_sum = cop_b_edge_sum + copula2[row][col] - cop_b_edge_count += 1 - - -# Calculate the fraction of mass in the diagonal to the mass in the edges (1st diagonal) -# Add a value of 1e-9 to avoid inf values -diagonal_a_avg = ((cop_a_diag_sum / cop_a_diag_count + 1e-9) / - (cop_a_edge_sum / cop_a_edge_count + 1e-9)) - -# Calculate the fraction of mass in the diagonal to the mass in the edges (2nd diagonal) -# Add a value of 1e-9 to avoid inf values -diagonal_b_avg = ((cop_b_diag_sum / cop_b_diag_count + 1e-9) / - (cop_b_edge_sum / cop_b_edge_count + 1e-9)) - -# If the fraction between the 2 diagonals is similar ==> No Correlation -if( ((diagonal_a_avg / diagonal_b_avg) < 2) and ((diagonal_b_avg / diagonal_a_avg) < 2) ): - print("No Correlation") -else: - # If 1st diagonal has a higher fraction - if diagonal_a_avg > diagonal_b_avg: - print("Positive Correlation") - # If 2nd diagonal has a higher fraction - else: - print("Negative Correlation") \ No newline at end of file +plot_corr_matrix(corr_matrix, reactions, color="RdYlBu") +plot_corr_matrix(updated_corr_matrix, reactions, color="RdYlBu") \ No newline at end of file From 33d30940fd27cca3359ab85fa74922fe53c82b73 Mon Sep 17 00:00:00 2001 From: SotirisTouliopoulos Date: Sun, 28 Jul 2024 00:45:55 +0300 Subject: [PATCH 08/16] added unittest and fixed heatmap --- dingo/illustrations.py | 47 +++++++++++--- dingo/utils.py | 139 +++++++++++++++++++++++++++-------------- tests/correlation.py | 63 +++++++++++++------ 3 files changed, 173 insertions(+), 76 deletions(-) diff --git a/dingo/illustrations.py b/dingo/illustrations.py index 341b5138..b90d7dca 100644 --- a/dingo/illustrations.py +++ b/dingo/illustrations.py @@ -9,6 +9,7 @@ import matplotlib.pyplot as plt import plotly.graph_objects as go import plotly.io as pio +import plotly.express as px from dingo.utils import compute_copula def plot_copula(data_flux1, data_flux2, n = 5, width = 900 , height = 600, export_format = "svg"): @@ -85,19 +86,47 @@ def plot_histogram(reaction_fluxes, reaction, n_bins=40): -def plot_corr_matrix(corr_matrix, reactions, color="RdYlBu"): - - import plotly.express as px +def plot_corr_matrix(corr_matrix, reactions): + """A Python function to plot the histogram of a certain reaction flux. - print("You can see the full list of color scales here: ", px.colors.named_colorscales()) + Keyword arguments: + corr_matrix -- + reactions -- + color -- + """ + + sns_colormap = [[0.0, '#3f7f93'], + [0.1, '#6397a7'], + [0.2, '#88b1bd'], + [0.3, '#acc9d2'], + [0.4, '#d1e2e7'], + [0.5, '#f2f2f2'], + [0.6, '#f6cdd0'], + [0.7, '#efa8ad'], + [0.8, '#e8848b'], + [0.9, '#e15e68'], + [1.0, '#da3b46']] + + if len(reactions) <= 95: + reactions_x = reactions + reactions_y = reactions + else: + reactions_x = None + reactions_y = None + fig = px.imshow(corr_matrix, - color_continuous_scale = color, - x = reactions, - y = reactions) + color_continuous_scale = sns_colormap, + x = reactions_x, y = reactions_y, origin="upper") fig.update_layout( xaxis=dict(tickfont=dict(size=5)), - yaxis=dict(tickfont=dict(size=5)) ) + yaxis=dict(tickfont=dict(size=5)), + width=900, height=900, plot_bgcolor="rgba(0,0,0,0)") + + fig.update_traces(xgap=1, ygap=1, hoverongaps=False) + + fig.show() - fig.show() \ No newline at end of file + #fig_name = "CorrelationMatrix.svg" + #pio.write_image(fig, fig_name, scale=2) diff --git a/dingo/utils.py b/dingo/utils.py index 4341a458..411c1426 100644 --- a/dingo/utils.py +++ b/dingo/utils.py @@ -204,7 +204,24 @@ def get_matrices_of_full_dim_polytope(A, b, Aeq, beq): -def correlated_reactions(steady_states, pearson_cutoff = 0.5, indicator_cutoff = 1000, n = 10): +def correlated_reactions(steady_states, pearson_cutoff = 0.6, indicator_cutoff = 10, cells = 10, cop_coeff = 0.3): + """A Python function to + + Keyword arguments: + steady_states -- + pearson_cutoff -- + indicator_cutoff -- + cells -- + cop_coeff -- + """ + + if cop_coeff > 0.4 or cop_coeff < 0.2: + raise Exception("Input value to diag_coeff parameter must be between 0.1 and 0.4") + + # calculate coefficients to access red and blue copula mass + cop_coeff_1 = cop_coeff + cop_coeff_2 = 1 - cop_coeff + cop_coeff_3 = 1 + cop_coeff # compute correlation matrix corr_matrix = np.corrcoef(steady_states, rowvar=True) @@ -215,60 +232,88 @@ def correlated_reactions(steady_states, pearson_cutoff = 0.5, indicator_cutoff = # replace diagonal values with 0 np.fill_diagonal(corr_matrix, 0) - combinations = np.argwhere(corr_matrix != 0) + # if user does not provide an indicator cutoff then do not proceed with the filtering + # of the correlation matrix + if indicator_cutoff == 0: + np.fill_diagonal(corr_matrix, 1) + return corr_matrix + else: + + # find reactions combinations + combinations = sum(range(1, corr_matrix.shape[0])) - # find indices of corr matrix where correlation occurs - indices = np.argwhere((corr_matrix > pearson_cutoff) | (corr_matrix < -pearson_cutoff)) + # find indices of correlation matrix where correlation occurs + corr_indices = np.argwhere((corr_matrix > pearson_cutoff) | (corr_matrix < -pearson_cutoff)) - # find indices of corr matrix where correlation occurs - anti_indices = np.argwhere((corr_matrix < pearson_cutoff) & (corr_matrix > -pearson_cutoff)) - - updated_corr_matrix = corr_matrix.copy() + # create a copy of correlation matrix to replace values + filtered_corr_matrix = corr_matrix.copy() - for i in range(0, anti_indices.shape[0]): - index1 = anti_indices[i][0] - index2 = anti_indices[i][1] - print(index1, index2) - updated_corr_matrix[index1, index2] = 0 - + # find indices of correlation matrix where correlation does not occur + no_corr_indices = np.argwhere((corr_matrix < pearson_cutoff) & (corr_matrix > -pearson_cutoff)) - positive = 0 - negative = 0 + # replace values from the correlation matrix that do not overcome + # the pearson cutoff with 0 + for i in range(0, no_corr_indices.shape[0]): + index1 = no_corr_indices[i][0] + index2 = no_corr_indices[i][1] + filtered_corr_matrix[index1, index2] = 0 + + # count reactions with positive or negative correlation based on indicator + positive = 0 + negative = 0 - # compute copula for this set of correlated reactions - for i in range(0, indices.shape[0]): - index1 = indices[i][0] - index2 = indices[i][1] + # compute copula for each set of correlated reactions + for i in range(0, corr_indices.shape[0]): + + index1 = corr_indices[i][0] + index2 = corr_indices[i][1] - flux1 = steady_states[index1] - flux2 = steady_states[index2] + flux1 = steady_states[index1] + flux2 = steady_states[index2] - copula = compute_copula(flux1, flux2, n) - rows, cols = copula.shape + copula = compute_copula(flux1, flux2, cells) + rows, cols = copula.shape - red_mass = 0 - blue_mass = 0 - indicator = 0 + red_mass = 0 + blue_mass = 0 + indicator = 0 + + for row in range(rows): + for col in range(cols): + # values in the diagonal + if ((row-col >= -cop_coeff_1*rows) & (row-col <= cop_coeff_1*rows)): + # values near the top left and bottom right corner + if ((row+col < cop_coeff_2*rows) | (row+col > cop_coeff_3*rows)): + red_mass = red_mass + copula[row][col] + else: + # values near the top right and bottom left corner + if ((row+col >= cop_coeff_2*rows-1) & (row+col <= cop_coeff_3*rows-1)): + blue_mass = blue_mass + copula[row][col] + + indicator = (red_mass+1e-9) / (blue_mass+1e-9) + + # classify specific pair of reactions as positive or negative + # correlated based on indicator cutoff + if indicator > indicator_cutoff: + positive += 1 + elif indicator < 1/indicator_cutoff: + negative += 1 + # if they do not overcome the cutoff replace their corresponding + # value in the correlation matrix with 0 + else: + filtered_corr_matrix[index1, index2] = 0 - for row in range(rows): - for col in range(cols): - if ((row-col >= -0.2*rows) & (row-col <= 0.2*rows)): - if ((row+col < 0.8*rows) | (row+col > 1.2*rows)): - red_mass = red_mass + copula[row][col] - else: - if ((row+col >= 0.8*rows-1) & (row+col <= 1.2*rows-1)): - blue_mass = blue_mass + copula[row][col] - - indicator = (red_mass+1e-9) / (blue_mass+1e-9) - - if indicator > indicator_cutoff: - positive += 1 - elif indicator < 1/indicator_cutoff: - negative += 1 - else: - updated_corr_matrix[index1, index2] = 0 + print("Completed process of",i+1,"from",corr_indices.shape[0],"copulas") - print(indices.shape[0],"out of",combinations.shape[0], - "reactions combinations were filtered based on pearson correlation") - print(negative, "out of", i+1, "copulas were negative correlated based on copula indicator") \ No newline at end of file + print(corr_indices.shape[0],"out of",combinations, + "reactions combinations were filtered based on pearson correlation") + + print(positive, "out of", i+1, "copulas were positive correlated based on copula indicator") + print(negative, "out of", i+1, "copulas were negative correlated based on copula indicator") + + + np.fill_diagonal(corr_matrix, 1) + np.fill_diagonal(filtered_corr_matrix, 1) + + return corr_matrix, filtered_corr_matrix \ No newline at end of file diff --git a/tests/correlation.py b/tests/correlation.py index 537968e6..e55c8ff4 100644 --- a/tests/correlation.py +++ b/tests/correlation.py @@ -2,33 +2,56 @@ from dingo.illustrations import plot_corr_matrix from dingo.utils import correlated_reactions from dingo import MetabolicNetwork, PolytopeSampler +from dingo.preprocess import PreProcess +from cobra.io import load_json_model import numpy as np -from dingo import plot_copula +import unittest +class TestCorrelation(unittest.TestCase): -model = MetabolicNetwork.from_json('ext_data/e_coli_core.json') -reactions = model.reactions + def test_correlation(self): + dingo_model = MetabolicNetwork.from_json('ext_data/e_coli_core.json') + reactions = dingo_model.reactions -sampler = PolytopeSampler(model) -steady_states = sampler.generate_steady_states() + sampler = PolytopeSampler(dingo_model) + steady_states = sampler.generate_steady_states() -corr_matrix, updated_corr_matrix = correlated_reactions(steady_states, - pearson_cutoff = 0.80, - indicator_cutoff = 9e1000, - n = 11) + corr_matrix = correlated_reactions(steady_states, indicator_cutoff=0) + # sum values in the diagonal of the correlation matrix ==> 95*pearson ==> 95*1 + self.assertTrue(np.trace(corr_matrix) == len(reactions)) + # rows and columns must be equal to model reactions + self.assertTrue(corr_matrix.shape[0] == len(reactions)) + self.assertTrue(corr_matrix.shape[1] == len(reactions)) -arr = corr_matrix == updated_corr_matrix -rows, cols = np.where(arr == True) -for i in range(len(rows)): - row = rows[i] - col = cols[i] - print(reactions[row], reactions[col]) - #data_flux2=[steady_states[row],reactions[row]] - #data_flux1=[steady_states[col],reactions[col]] + plot_corr_matrix(corr_matrix, reactions) - #plot_copula(data_flux1, data_flux2, n=10) -plot_corr_matrix(corr_matrix, reactions, color="RdYlBu") -plot_corr_matrix(updated_corr_matrix, reactions, color="RdYlBu") \ No newline at end of file + cobra_model = load_json_model('ext_data/e_coli_core.json') + obj = PreProcess(cobra_model, tol=1e-5, open_exchanges=False) + removed_reactions, reduced_dingo_model = obj.reduce(extend=True) + reactions = reduced_dingo_model.reactions + + sampler = PolytopeSampler(reduced_dingo_model) + steady_states = sampler.generate_steady_states() + + corr_matrix, filtered_corr_matrix = correlated_reactions( + steady_states, + pearson_cutoff = 0.6, + indicator_cutoff = 2, + cells = 10, + cop_coeff = 0.3) + + # sum values in the diagonal of the correlation matrix ==> 95*pearson ==> 95*1 + self.assertTrue(np.trace(corr_matrix) == len(reactions)) + # rows and columns must be equal to model reactions + self.assertTrue(filtered_corr_matrix.shape[0] == len(reactions)) + self.assertTrue(filtered_corr_matrix.shape[1] == len(reactions)) + + plot_corr_matrix(corr_matrix, reactions) + plot_corr_matrix(filtered_corr_matrix, reactions) + + +if __name__ == "__main__": + unittest.main() From 98ffa6bc000ead129be795d3a328b45d4d481b4b Mon Sep 17 00:00:00 2001 From: SotirisTouliopoulos Date: Mon, 29 Jul 2024 01:28:10 +0300 Subject: [PATCH 09/16] fixed plots of corr matrix --- dingo/illustrations.py | 23 ++++------- dingo/utils.py | 86 +++++++++++++++++++++++++----------------- tests/correlation.py | 56 +++++++++++++++++---------- 3 files changed, 95 insertions(+), 70 deletions(-) diff --git a/dingo/illustrations.py b/dingo/illustrations.py index b90d7dca..eeba009b 100644 --- a/dingo/illustrations.py +++ b/dingo/illustrations.py @@ -86,13 +86,12 @@ def plot_histogram(reaction_fluxes, reaction, n_bins=40): -def plot_corr_matrix(corr_matrix, reactions): - """A Python function to plot the histogram of a certain reaction flux. +def plot_corr_matrix(corr_matrix, reactions, format="svg"): + """A Python function to plot the heatmap of a model's pearson correlation matrix. Keyword arguments: - corr_matrix -- - reactions -- - color -- + corr_matrix -- A matrix produced from the "correlated_reactions" function + reactions -- A list with the model's reactions """ sns_colormap = [[0.0, '#3f7f93'], @@ -106,18 +105,10 @@ def plot_corr_matrix(corr_matrix, reactions): [0.8, '#e8848b'], [0.9, '#e15e68'], [1.0, '#da3b46']] - - - if len(reactions) <= 95: - reactions_x = reactions - reactions_y = reactions - else: - reactions_x = None - reactions_y = None - + fig = px.imshow(corr_matrix, color_continuous_scale = sns_colormap, - x = reactions_x, y = reactions_y, origin="upper") + x = reactions, y = reactions, origin="upper") fig.update_layout( xaxis=dict(tickfont=dict(size=5)), @@ -128,5 +119,5 @@ def plot_corr_matrix(corr_matrix, reactions): fig.show() - #fig_name = "CorrelationMatrix.svg" + #fig_name = "CorrelationMatrix." + format #pio.write_image(fig, fig_name, scale=2) diff --git a/dingo/utils.py b/dingo/utils.py index 411c1426..db523a72 100644 --- a/dingo/utils.py +++ b/dingo/utils.py @@ -204,19 +204,22 @@ def get_matrices_of_full_dim_polytope(A, b, Aeq, beq): -def correlated_reactions(steady_states, pearson_cutoff = 0.6, indicator_cutoff = 10, cells = 10, cop_coeff = 0.3): - """A Python function to +def correlated_reactions(steady_states, pearson_cutoff = 0.5, indicator_cutoff = 2, + cells = 10, cop_coeff = 0.3, lower_triangle = True): + """A Python function to calculate the pearson correlation matrix of a model + and filter values based on the copula's indicator Keyword arguments: - steady_states -- - pearson_cutoff -- - indicator_cutoff -- - cells -- - cop_coeff -- + steady_states -- A numpy array of the generated steady states fluxes + pearson_cutoff -- A cutoff to filter reactions based on pearson coefficient + indicator_cutoff -- A cutoff to filter reactions based on indicator value + cells -- Number of cells to compute the copula + cop_coeff -- A value that narrows or widens the width of the copula's diagonal + lower_triangle -- A boolean variable that if True keeps only the lower triangular matrix """ if cop_coeff > 0.4 or cop_coeff < 0.2: - raise Exception("Input value to diag_coeff parameter must be between 0.1 and 0.4") + raise Exception("Input value to cop_coeff parameter must be between 0.2 and 0.4") # calculate coefficients to access red and blue copula mass cop_coeff_1 = cop_coeff @@ -225,39 +228,47 @@ def correlated_reactions(steady_states, pearson_cutoff = 0.6, indicator_cutoff = # compute correlation matrix corr_matrix = np.corrcoef(steady_states, rowvar=True) + # replace not assigned values with 0 corr_matrix[np.isnan(corr_matrix)] = 0 - # keep only the lower triangle - corr_matrix = np.tril(corr_matrix) - # replace diagonal values with 0 - np.fill_diagonal(corr_matrix, 0) - # if user does not provide an indicator cutoff then do not proceed with the filtering - # of the correlation matrix + # create a copy of correlation matrix to replace/filter values + filtered_corr_matrix = corr_matrix.copy() + + # find indices of correlation matrix where correlation does not occur + no_corr_indices = np.argwhere((filtered_corr_matrix < pearson_cutoff) & (filtered_corr_matrix > -pearson_cutoff)) + + # replace values from the correlation matrix that do not overcome + # the pearson cutoff with 0 + for i in range(0, no_corr_indices.shape[0]): + index1 = no_corr_indices[i][0] + index2 = no_corr_indices[i][1] + filtered_corr_matrix[index1, index2] = 0 + + # if user does not provide an indicator cutoff then do not proceed + # with the filtering of the correlation matrix if indicator_cutoff == 0: - np.fill_diagonal(corr_matrix, 1) - return corr_matrix + if lower_triangle == True: + corr_matrix[np.triu_indices(corr_matrix.shape[0], 1)] = np.nan + #corr_matrix = np.tril(corr_matrix) + np.fill_diagonal(corr_matrix, 1) + return corr_matrix + else: + np.fill_diagonal(corr_matrix, 1) + return corr_matrix else: + + # keep only the lower triangle + corr_matrix = np.tril(corr_matrix) + # replace diagonal values with 0 + np.fill_diagonal(corr_matrix, 0) # find reactions combinations combinations = sum(range(1, corr_matrix.shape[0])) # find indices of correlation matrix where correlation occurs corr_indices = np.argwhere((corr_matrix > pearson_cutoff) | (corr_matrix < -pearson_cutoff)) - - # create a copy of correlation matrix to replace values - filtered_corr_matrix = corr_matrix.copy() - - # find indices of correlation matrix where correlation does not occur - no_corr_indices = np.argwhere((corr_matrix < pearson_cutoff) & (corr_matrix > -pearson_cutoff)) - - # replace values from the correlation matrix that do not overcome - # the pearson cutoff with 0 - for i in range(0, no_corr_indices.shape[0]): - index1 = no_corr_indices[i][0] - index2 = no_corr_indices[i][1] - filtered_corr_matrix[index1, index2] = 0 - + # count reactions with positive or negative correlation based on indicator positive = 0 negative = 0 @@ -302,6 +313,7 @@ def correlated_reactions(steady_states, pearson_cutoff = 0.6, indicator_cutoff = # value in the correlation matrix with 0 else: filtered_corr_matrix[index1, index2] = 0 + filtered_corr_matrix[index2, index1] = 0 print("Completed process of",i+1,"from",corr_indices.shape[0],"copulas") @@ -311,9 +323,13 @@ def correlated_reactions(steady_states, pearson_cutoff = 0.6, indicator_cutoff = print(positive, "out of", i+1, "copulas were positive correlated based on copula indicator") print(negative, "out of", i+1, "copulas were negative correlated based on copula indicator") + + if lower_triangle == True: + filtered_corr_matrix[np.triu_indices(filtered_corr_matrix.shape[0], 1)] = np.nan + #filtered_corr_matrix = np.tril(filtered_corr_matrix) + np.fill_diagonal(filtered_corr_matrix, 1) + return filtered_corr_matrix - - np.fill_diagonal(corr_matrix, 1) - np.fill_diagonal(filtered_corr_matrix, 1) - - return corr_matrix, filtered_corr_matrix \ No newline at end of file + else: + np.fill_diagonal(filtered_corr_matrix, 1) + return filtered_corr_matrix \ No newline at end of file diff --git a/tests/correlation.py b/tests/correlation.py index e55c8ff4..2470f8d0 100644 --- a/tests/correlation.py +++ b/tests/correlation.py @@ -10,13 +10,31 @@ class TestCorrelation(unittest.TestCase): def test_correlation(self): + dingo_model = MetabolicNetwork.from_json('ext_data/e_coli_core.json') reactions = dingo_model.reactions sampler = PolytopeSampler(dingo_model) steady_states = sampler.generate_steady_states() + + + # calculate correlation matrix without filtering from copula indicator + corr_matrix = correlated_reactions(steady_states, + indicator_cutoff=0, + pearson_cutoff = 0, + lower_triangle = False) + + plot_corr_matrix(corr_matrix, reactions, format="svg") + + + + # calculate correlation matrix without filtering from copula indicator + corr_matrix = correlated_reactions(steady_states, + indicator_cutoff=2, + pearson_cutoff = 0.5, + lower_triangle = False) - corr_matrix = correlated_reactions(steady_states, indicator_cutoff=0) + plot_corr_matrix(corr_matrix, reactions, format="svg") # sum values in the diagonal of the correlation matrix ==> 95*pearson ==> 95*1 self.assertTrue(np.trace(corr_matrix) == len(reactions)) @@ -24,34 +42,34 @@ def test_correlation(self): self.assertTrue(corr_matrix.shape[0] == len(reactions)) self.assertTrue(corr_matrix.shape[1] == len(reactions)) - plot_corr_matrix(corr_matrix, reactions) - cobra_model = load_json_model('ext_data/e_coli_core.json') + + # reduce the metabolic model obj = PreProcess(cobra_model, tol=1e-5, open_exchanges=False) - removed_reactions, reduced_dingo_model = obj.reduce(extend=True) + removed_reactions, reduced_dingo_model = obj.reduce(extend=False) reactions = reduced_dingo_model.reactions + for reaction in reactions: + index = reactions.index(reaction) + if reaction in removed_reactions: + reactions[index] = None + sampler = PolytopeSampler(reduced_dingo_model) steady_states = sampler.generate_steady_states() - corr_matrix, filtered_corr_matrix = correlated_reactions( - steady_states, - pearson_cutoff = 0.6, - indicator_cutoff = 2, - cells = 10, - cop_coeff = 0.3) + # calculate correlation matrix with additional filtering from copula indicator + filtered_corr_matrix = correlated_reactions( + steady_states, + pearson_cutoff = 0, + indicator_cutoff = 0, + cells = 10, + cop_coeff = 0.3, + lower_triangle = False) + + plot_corr_matrix(filtered_corr_matrix, reactions, format="svg") - # sum values in the diagonal of the correlation matrix ==> 95*pearson ==> 95*1 - self.assertTrue(np.trace(corr_matrix) == len(reactions)) - # rows and columns must be equal to model reactions - self.assertTrue(filtered_corr_matrix.shape[0] == len(reactions)) - self.assertTrue(filtered_corr_matrix.shape[1] == len(reactions)) - - plot_corr_matrix(corr_matrix, reactions) - plot_corr_matrix(filtered_corr_matrix, reactions) - if __name__ == "__main__": unittest.main() From e9a23a298fe05526a8df094f63d7eb714b532c3d Mon Sep 17 00:00:00 2001 From: SotirisTouliopoulos Date: Mon, 29 Jul 2024 03:40:57 +0300 Subject: [PATCH 10/16] new method for additional reaction removal --- dingo/preprocess.py | 87 +++++++++++++++++++++++++-------------------- tests/preprocess.py | 18 ++++++---- 2 files changed, 61 insertions(+), 44 deletions(-) diff --git a/dingo/preprocess.py b/dingo/preprocess.py index a148d303..9d4ea94b 100644 --- a/dingo/preprocess.py +++ b/dingo/preprocess.py @@ -2,7 +2,9 @@ import cobra import cobra.manipulation from collections import Counter -from dingo import MetabolicNetwork +from dingo import MetabolicNetwork, PolytopeSampler +from dingo.utils import correlated_reactions +import numpy as np class PreProcess: @@ -209,50 +211,59 @@ def reduce(self, extend=False): return self._removed_reactions, self._dingo_model elif extend == True: - - # find additional reactions with a possibility of removal - additional_removed_reactions_list = [] - additional_removed_reactions_count = 0 - for reaction in remained_reactions: - fba_solution_before = self._model.optimize().objective_value - - # perform a knock-out and check the output - self._model.reactions.get_by_id(reaction).lower_bound = 0 - self._model.reactions.get_by_id(reaction).upper_bound = 0 - - fba_solution_after = self._model.optimize().objective_value + reduced_dingo_model = MetabolicNetwork.from_cobra_model(self._model) + reactions = reduced_dingo_model.reactions + sampler = PolytopeSampler(reduced_dingo_model) + steady_states = sampler.generate_steady_states() + + # calculate correlation matrix with additional filtering from copula indicator + corr_matrix = correlated_reactions( + steady_states, + pearson_cutoff = 0, + indicator_cutoff = 0, + cells = 10, + cop_coeff = 0.3, + lower_triangle = False) - if fba_solution_after != None: - if (abs(fba_solution_after - fba_solution_before) < tol): - self._removed_reactions.append(reaction) - additional_removed_reactions_list.append(reaction) - additional_removed_reactions_count += 1 - - self._model.reactions.get_by_id(reaction).upper_bound = self._reaction_bounds_dict[reaction][1] - self._model.reactions.get_by_id(reaction).lower_bound = self._reaction_bounds_dict[reaction][0] + abs_array = abs(corr_matrix) + sum_array = np.sum((abs_array), axis=1) + order_sum_indices = np.argsort(sum_array) + # find additional reactions with a possibility of removal + additional_removed_reactions_list = [] + additional_removed_reactions_count = 0 - # compare FBA solution before and after the removal of additional reactions - fba_solution_initial = self._model.optimize().objective_value - self._remove_model_reactions() - fba_solution_final = self._model.optimize().objective_value + fba_solution_before = self._model.optimize().objective_value + for index in order_sum_indices: + if reactions[index] in remained_reactions: + reaction = reactions[index] + + # perform a knock-out and check the output + self._model.reactions.get_by_id(reaction).lower_bound = 0 + self._model.reactions.get_by_id(reaction).upper_bound = 0 + + try: + fba_solution_after = self._model.optimize().objective_value + if (abs(fba_solution_after - fba_solution_before) > tol): + self._model.reactions.get_by_id(reaction).bounds = self._reaction_bounds_dict[reaction] + + except: + self._model.reactions.get_by_id(reaction).bounds = self._reaction_bounds_dict[reaction] + break + + finally: + if fba_solution_after != None and fba_solution_after != 0: + if (abs(fba_solution_after - fba_solution_before) < tol): + self._removed_reactions.append(reaction) + additional_removed_reactions_list.append(reaction) + additional_removed_reactions_count += 1 + - # if FBA solution after removal is infeasible or altered - # restore the initial reactions bounds - if (fba_solution_final == None) | (abs(fba_solution_final - fba_solution_initial) > tol): - for reaction in additional_removed_reactions_list: - self._model.reactions.get_by_id(reaction).bounds = self._reaction_bounds_dict[reaction] - self._removed_reactions.remove(reaction) - print(len(self._removed_reactions), "of the", len(self._initial_reactions), \ - "reactions were removed from the model with extend set to", extend) - - else: - print(len(self._removed_reactions), "of the", len(self._initial_reactions), \ - "reactions were removed from the model with extend set to", extend) + print(len(self._removed_reactions), "of the", len(self._initial_reactions), \ + "reactions were removed from the model with extend set to", extend) - # call this functon to convert cobra to dingo model self._dingo_model = MetabolicNetwork.from_cobra_model(self._model) return self._removed_reactions, self._dingo_model diff --git a/tests/preprocess.py b/tests/preprocess.py index 28db8bbf..dbf21c83 100644 --- a/tests/preprocess.py +++ b/tests/preprocess.py @@ -11,6 +11,9 @@ def test_preprocess(self): # load cobra model cobra_model = load_json_model("ext_data/e_coli_core.json") + #cobra_model = load_json_model("../../../iAB_RBC_283.json") + #cobra_model = load_json_model("../../../iAF1260.json") + # convert cobra to dingo model initial_dingo_model = MetabolicNetwork.from_cobra_model(cobra_model) @@ -26,20 +29,23 @@ def test_preprocess(self): # calculate the count of removed reactions with extend set to False removed_reactions_count = len(removed_reactions) - self.assertTrue( 46 - removed_reactions_count == 0 ) + #self.assertTrue( 46 - removed_reactions_count == 0 ) # calculate the count of reactions with bounds equal to 0 # with extend set to False from the dingo model dingo_removed_reactions = np.sum((final_dingo_model.lb == 0) & (final_dingo_model.ub == 0)) - self.assertTrue( 46 - dingo_removed_reactions == 0 ) + #self.assertTrue( 46 - dingo_removed_reactions == 0 ) # perform an FBA to check the solution after reactions removal final_fba_solution = final_dingo_model.fba()[1] - self.assertTrue(abs(final_fba_solution - initial_fba_solution) < 1e-03) + #self.assertTrue(abs(final_fba_solution - initial_fba_solution) < 1e-03) # load models in cobra and dingo format again to restore bounds cobra_model = load_json_model("ext_data/e_coli_core.json") + #cobra_model = load_json_model("../../../iAB_RBC_283.json") + #cobra_model = load_json_model("../../../iAF1260.json") + # convert cobra to dingo model initial_dingo_model = MetabolicNetwork.from_cobra_model(cobra_model) @@ -51,16 +57,16 @@ def test_preprocess(self): # calculate the count of removed reactions with extend set to True removed_reactions_count = len(removed_reactions) - self.assertTrue( 47 - removed_reactions_count == 0 ) + #self.assertTrue( 47 - removed_reactions_count == 0 ) # calculate the count of reactions with bounds equal to 0 # with extend set to True from the dingo model dingo_removed_reactions = np.sum((final_dingo_model.lb == 0) & (final_dingo_model.ub == 0)) - self.assertTrue( 47 - dingo_removed_reactions == 0 ) + #self.assertTrue( 47 - dingo_removed_reactions == 0 ) # perform an FBA to check the result after reactions removal final_fba_solution = final_dingo_model.fba()[1] - self.assertTrue(abs(final_fba_solution - initial_fba_solution) < 1e-03) + #self.assertTrue(abs(final_fba_solution - initial_fba_solution) < 1e-03) if __name__ == "__main__": From 7087b4bb473eacb4cbf944a638270df3b3079f78 Mon Sep 17 00:00:00 2001 From: SotirisTouliopoulos Date: Mon, 29 Jul 2024 13:38:23 +0300 Subject: [PATCH 11/16] documentation correction --- dingo/preprocess.py | 28 +++++++++++++++------------- tests/preprocess.py | 18 ++++++------------ 2 files changed, 21 insertions(+), 25 deletions(-) diff --git a/dingo/preprocess.py b/dingo/preprocess.py index 9d4ea94b..7bd87b97 100644 --- a/dingo/preprocess.py +++ b/dingo/preprocess.py @@ -172,10 +172,11 @@ def reduce(self, extend=False): When the "extend" parameter is set to True, the function performes an additional check to remove further reactions. These reactions are the ones that if knocked-down, they do not affect the value - of the objective function. These reactions are removed simultaneously - from the model. If this simultaneous removal produces an infesible - solution (or a solution of 0) to the objective function, - these reactions are restored with their initial bounds. + of the objective function. Reactions are removed in an ordered way. + The ones with the least overall correlation in a correlation matrix + are removed first. These reactions are removed one by one from the model. + If this removal produces an infeasible solution (or a solution of 0) + to the objective function, these reactions are restored to their initial bounds. A dingo-type tuple is then created from the cobra model using the "cobra_dingo_tuple" function. @@ -249,21 +250,22 @@ def reduce(self, extend=False): if (abs(fba_solution_after - fba_solution_before) > tol): self._model.reactions.get_by_id(reaction).bounds = self._reaction_bounds_dict[reaction] + # if system has no solution except: self._model.reactions.get_by_id(reaction).bounds = self._reaction_bounds_dict[reaction] - break finally: - if fba_solution_after != None and fba_solution_after != 0: - if (abs(fba_solution_after - fba_solution_before) < tol): - self._removed_reactions.append(reaction) - additional_removed_reactions_list.append(reaction) - additional_removed_reactions_count += 1 - - + if (fba_solution_after != None) and (abs(fba_solution_after - fba_solution_before) < tol): + self._removed_reactions.append(reaction) + additional_removed_reactions_list.append(reaction) + additional_removed_reactions_count += 1 + else: + self._model.reactions.get_by_id(reaction).bounds = self._reaction_bounds_dict[reaction] + + print(len(self._removed_reactions), "of the", len(self._initial_reactions), \ "reactions were removed from the model with extend set to", extend) - + # call this functon to convert cobra to dingo model self._dingo_model = MetabolicNetwork.from_cobra_model(self._model) return self._removed_reactions, self._dingo_model diff --git a/tests/preprocess.py b/tests/preprocess.py index dbf21c83..6093b579 100644 --- a/tests/preprocess.py +++ b/tests/preprocess.py @@ -11,9 +11,6 @@ def test_preprocess(self): # load cobra model cobra_model = load_json_model("ext_data/e_coli_core.json") - #cobra_model = load_json_model("../../../iAB_RBC_283.json") - #cobra_model = load_json_model("../../../iAF1260.json") - # convert cobra to dingo model initial_dingo_model = MetabolicNetwork.from_cobra_model(cobra_model) @@ -29,23 +26,20 @@ def test_preprocess(self): # calculate the count of removed reactions with extend set to False removed_reactions_count = len(removed_reactions) - #self.assertTrue( 46 - removed_reactions_count == 0 ) + self.assertTrue( 46 - removed_reactions_count == 0 ) # calculate the count of reactions with bounds equal to 0 # with extend set to False from the dingo model dingo_removed_reactions = np.sum((final_dingo_model.lb == 0) & (final_dingo_model.ub == 0)) - #self.assertTrue( 46 - dingo_removed_reactions == 0 ) + self.assertTrue( 46 - dingo_removed_reactions == 0 ) # perform an FBA to check the solution after reactions removal final_fba_solution = final_dingo_model.fba()[1] - #self.assertTrue(abs(final_fba_solution - initial_fba_solution) < 1e-03) + self.assertTrue(abs(final_fba_solution - initial_fba_solution) < 1e-03) # load models in cobra and dingo format again to restore bounds cobra_model = load_json_model("ext_data/e_coli_core.json") - #cobra_model = load_json_model("../../../iAB_RBC_283.json") - #cobra_model = load_json_model("../../../iAF1260.json") - # convert cobra to dingo model initial_dingo_model = MetabolicNetwork.from_cobra_model(cobra_model) @@ -57,16 +51,16 @@ def test_preprocess(self): # calculate the count of removed reactions with extend set to True removed_reactions_count = len(removed_reactions) - #self.assertTrue( 47 - removed_reactions_count == 0 ) + self.assertTrue( 46 - removed_reactions_count <= 0 ) # calculate the count of reactions with bounds equal to 0 # with extend set to True from the dingo model dingo_removed_reactions = np.sum((final_dingo_model.lb == 0) & (final_dingo_model.ub == 0)) - #self.assertTrue( 47 - dingo_removed_reactions == 0 ) + self.assertTrue( 46 - dingo_removed_reactions <= 0 ) # perform an FBA to check the result after reactions removal final_fba_solution = final_dingo_model.fba()[1] - #self.assertTrue(abs(final_fba_solution - initial_fba_solution) < 1e-03) + self.assertTrue(abs(final_fba_solution - initial_fba_solution) < 1e-03) if __name__ == "__main__": From ad2ae99dbf36391a940eff6baac57da6435bab6a Mon Sep 17 00:00:00 2001 From: SotirisTouliopoulos Date: Mon, 29 Jul 2024 16:29:57 +0300 Subject: [PATCH 12/16] documentation correction --- dingo/illustrations.py | 17 +++++++++--- dingo/preprocess.py | 15 +++++++---- dingo/utils.py | 4 +-- tests/correlation.py | 59 ++++++++++++------------------------------ 4 files changed, 41 insertions(+), 54 deletions(-) diff --git a/dingo/illustrations.py b/dingo/illustrations.py index eeba009b..c95bf180 100644 --- a/dingo/illustrations.py +++ b/dingo/illustrations.py @@ -86,12 +86,14 @@ def plot_histogram(reaction_fluxes, reaction, n_bins=40): -def plot_corr_matrix(corr_matrix, reactions, format="svg"): +def plot_corr_matrix(corr_matrix, reactions, removed_reactions=[], format="svg"): """A Python function to plot the heatmap of a model's pearson correlation matrix. Keyword arguments: corr_matrix -- A matrix produced from the "correlated_reactions" function reactions -- A list with the model's reactions + removed_reactions -- A list with the removed reactions in case of a preprocess. + If provided removed reactions are not plotted. """ sns_colormap = [[0.0, '#3f7f93'], @@ -105,7 +107,14 @@ def plot_corr_matrix(corr_matrix, reactions, format="svg"): [0.8, '#e8848b'], [0.9, '#e15e68'], [1.0, '#da3b46']] - + + print(len(removed_reactions)) + if removed_reactions != 0: + for reaction in reactions: + index = reactions.index(reaction) + if reaction in removed_reactions: + reactions[index] = None + fig = px.imshow(corr_matrix, color_continuous_scale = sns_colormap, x = reactions, y = reactions, origin="upper") @@ -119,5 +128,5 @@ def plot_corr_matrix(corr_matrix, reactions, format="svg"): fig.show() - #fig_name = "CorrelationMatrix." + format - #pio.write_image(fig, fig_name, scale=2) + fig_name = "CorrelationMatrix." + format + pio.write_image(fig, fig_name, scale=2) diff --git a/dingo/preprocess.py b/dingo/preprocess.py index 7bd87b97..341631b6 100644 --- a/dingo/preprocess.py +++ b/dingo/preprocess.py @@ -227,16 +227,19 @@ def reduce(self, extend=False): cop_coeff = 0.3, lower_triangle = False) + # convert pearson values to absolute values abs_array = abs(corr_matrix) + # sum absolute pearson values per row sum_array = np.sum((abs_array), axis=1) + # get indices of ordered sum values order_sum_indices = np.argsort(sum_array) - # find additional reactions with a possibility of removal - additional_removed_reactions_list = [] + fba_solution_before = self._model.optimize().objective_value + + # count additional reactions with a possibility of removal additional_removed_reactions_count = 0 - fba_solution_before = self._model.optimize().objective_value - + # find additional reactions with a possibility of removal for index in order_sum_indices: if reactions[index] in remained_reactions: reaction = reactions[index] @@ -248,6 +251,7 @@ def reduce(self, extend=False): try: fba_solution_after = self._model.optimize().objective_value if (abs(fba_solution_after - fba_solution_before) > tol): + # restore bounds self._model.reactions.get_by_id(reaction).bounds = self._reaction_bounds_dict[reaction] # if system has no solution @@ -257,14 +261,15 @@ def reduce(self, extend=False): finally: if (fba_solution_after != None) and (abs(fba_solution_after - fba_solution_before) < tol): self._removed_reactions.append(reaction) - additional_removed_reactions_list.append(reaction) additional_removed_reactions_count += 1 else: + # restore bounds self._model.reactions.get_by_id(reaction).bounds = self._reaction_bounds_dict[reaction] print(len(self._removed_reactions), "of the", len(self._initial_reactions), \ "reactions were removed from the model with extend set to", extend) + print(additional_removed_reactions_count, "additional reaction(s) removed") # call this functon to convert cobra to dingo model self._dingo_model = MetabolicNetwork.from_cobra_model(self._model) diff --git a/dingo/utils.py b/dingo/utils.py index db523a72..ae5ee11c 100644 --- a/dingo/utils.py +++ b/dingo/utils.py @@ -215,7 +215,7 @@ def correlated_reactions(steady_states, pearson_cutoff = 0.5, indicator_cutoff = indicator_cutoff -- A cutoff to filter reactions based on indicator value cells -- Number of cells to compute the copula cop_coeff -- A value that narrows or widens the width of the copula's diagonal - lower_triangle -- A boolean variable that if True keeps only the lower triangular matrix + lower_triangle -- A boolean variable that if True plots only the lower triangular matrix """ if cop_coeff > 0.4 or cop_coeff < 0.2: @@ -250,7 +250,6 @@ def correlated_reactions(steady_states, pearson_cutoff = 0.5, indicator_cutoff = if indicator_cutoff == 0: if lower_triangle == True: corr_matrix[np.triu_indices(corr_matrix.shape[0], 1)] = np.nan - #corr_matrix = np.tril(corr_matrix) np.fill_diagonal(corr_matrix, 1) return corr_matrix else: @@ -326,7 +325,6 @@ def correlated_reactions(steady_states, pearson_cutoff = 0.5, indicator_cutoff = if lower_triangle == True: filtered_corr_matrix[np.triu_indices(filtered_corr_matrix.shape[0], 1)] = np.nan - #filtered_corr_matrix = np.tril(filtered_corr_matrix) np.fill_diagonal(filtered_corr_matrix, 1) return filtered_corr_matrix diff --git a/tests/correlation.py b/tests/correlation.py index 2470f8d0..88239dd0 100644 --- a/tests/correlation.py +++ b/tests/correlation.py @@ -1,9 +1,6 @@ -from dingo.illustrations import plot_corr_matrix from dingo.utils import correlated_reactions from dingo import MetabolicNetwork, PolytopeSampler -from dingo.preprocess import PreProcess -from cobra.io import load_json_model import numpy as np import unittest @@ -17,59 +14,37 @@ def test_correlation(self): sampler = PolytopeSampler(dingo_model) steady_states = sampler.generate_steady_states() - # calculate correlation matrix without filtering from copula indicator corr_matrix = correlated_reactions(steady_states, - indicator_cutoff=0, - pearson_cutoff = 0, + indicator_cutoff=2, + pearson_cutoff = 0.99999, lower_triangle = False) - plot_corr_matrix(corr_matrix, reactions, format="svg") - + # sum values in the diagonal of the correlation matrix ==> 95*pearson ==> 95*1 + self.assertTrue(np.trace(corr_matrix) == len(reactions)) + # rows and columns must be equal to model reactions + self.assertTrue(corr_matrix.shape[0] == len(reactions)) + self.assertTrue(corr_matrix.shape[1] == len(reactions)) + + dingo_model = MetabolicNetwork.from_json('ext_data/e_coli_core.json') + reactions = dingo_model.reactions + + sampler = PolytopeSampler(dingo_model) + steady_states = sampler.generate_steady_states() # calculate correlation matrix without filtering from copula indicator corr_matrix = correlated_reactions(steady_states, - indicator_cutoff=2, - pearson_cutoff = 0.5, - lower_triangle = False) - - plot_corr_matrix(corr_matrix, reactions, format="svg") + indicator_cutoff=0, + pearson_cutoff = 0, + lower_triangle = True) # sum values in the diagonal of the correlation matrix ==> 95*pearson ==> 95*1 self.assertTrue(np.trace(corr_matrix) == len(reactions)) # rows and columns must be equal to model reactions self.assertTrue(corr_matrix.shape[0] == len(reactions)) self.assertTrue(corr_matrix.shape[1] == len(reactions)) - - - - cobra_model = load_json_model('ext_data/e_coli_core.json') - - # reduce the metabolic model - obj = PreProcess(cobra_model, tol=1e-5, open_exchanges=False) - removed_reactions, reduced_dingo_model = obj.reduce(extend=False) - reactions = reduced_dingo_model.reactions - - for reaction in reactions: - index = reactions.index(reaction) - if reaction in removed_reactions: - reactions[index] = None - - sampler = PolytopeSampler(reduced_dingo_model) - steady_states = sampler.generate_steady_states() - - # calculate correlation matrix with additional filtering from copula indicator - filtered_corr_matrix = correlated_reactions( - steady_states, - pearson_cutoff = 0, - indicator_cutoff = 0, - cells = 10, - cop_coeff = 0.3, - lower_triangle = False) - - plot_corr_matrix(filtered_corr_matrix, reactions, format="svg") - + if __name__ == "__main__": unittest.main() From bc77cb44bf0bf9e67870d9e026dda180135f3a6c Mon Sep 17 00:00:00 2001 From: SotirisTouliopoulos Date: Mon, 5 Aug 2024 20:41:57 +0300 Subject: [PATCH 13/16] corr matrix pearson fix --- dingo/illustrations.py | 1 - dingo/utils.py | 20 +++++++------------- 2 files changed, 7 insertions(+), 14 deletions(-) diff --git a/dingo/illustrations.py b/dingo/illustrations.py index c95bf180..0e9a120f 100644 --- a/dingo/illustrations.py +++ b/dingo/illustrations.py @@ -108,7 +108,6 @@ def plot_corr_matrix(corr_matrix, reactions, removed_reactions=[], format="svg") [0.9, '#e15e68'], [1.0, '#da3b46']] - print(len(removed_reactions)) if removed_reactions != 0: for reaction in reactions: index = reactions.index(reaction) diff --git a/dingo/utils.py b/dingo/utils.py index ae5ee11c..473721bf 100644 --- a/dingo/utils.py +++ b/dingo/utils.py @@ -204,7 +204,7 @@ def get_matrices_of_full_dim_polytope(A, b, Aeq, beq): -def correlated_reactions(steady_states, pearson_cutoff = 0.5, indicator_cutoff = 2, +def correlated_reactions(steady_states, pearson_cutoff = 0.90, indicator_cutoff = 10, cells = 10, cop_coeff = 0.3, lower_triangle = True): """A Python function to calculate the pearson correlation matrix of a model and filter values based on the copula's indicator @@ -249,21 +249,18 @@ def correlated_reactions(steady_states, pearson_cutoff = 0.5, indicator_cutoff = # with the filtering of the correlation matrix if indicator_cutoff == 0: if lower_triangle == True: - corr_matrix[np.triu_indices(corr_matrix.shape[0], 1)] = np.nan - np.fill_diagonal(corr_matrix, 1) - return corr_matrix + filtered_corr_matrix[np.triu_indices(filtered_corr_matrix.shape[0], 1)] = np.nan + np.fill_diagonal(filtered_corr_matrix, 1) + return filtered_corr_matrix else: - np.fill_diagonal(corr_matrix, 1) - return corr_matrix + np.fill_diagonal(filtered_corr_matrix, 1) + return filtered_corr_matrix else: # keep only the lower triangle corr_matrix = np.tril(corr_matrix) # replace diagonal values with 0 np.fill_diagonal(corr_matrix, 0) - - # find reactions combinations - combinations = sum(range(1, corr_matrix.shape[0])) # find indices of correlation matrix where correlation occurs corr_indices = np.argwhere((corr_matrix > pearson_cutoff) | (corr_matrix < -pearson_cutoff)) @@ -316,10 +313,7 @@ def correlated_reactions(steady_states, pearson_cutoff = 0.5, indicator_cutoff = print("Completed process of",i+1,"from",corr_indices.shape[0],"copulas") - - print(corr_indices.shape[0],"out of",combinations, - "reactions combinations were filtered based on pearson correlation") - + print(positive, "out of", i+1, "copulas were positive correlated based on copula indicator") print(negative, "out of", i+1, "copulas were negative correlated based on copula indicator") From e61ef3b8b7f7f6725d8abf5b111c2238e4a47643 Mon Sep 17 00:00:00 2001 From: SotirisTouliopoulos Date: Wed, 14 Aug 2024 00:36:39 +0300 Subject: [PATCH 14/16] correction to correlation labels output --- dingo/utils.py | 43 ++++++++++++++++++++++++++++--------------- tests/correlation.py | 13 +++++++------ 2 files changed, 35 insertions(+), 21 deletions(-) diff --git a/dingo/utils.py b/dingo/utils.py index 473721bf..711531cf 100644 --- a/dingo/utils.py +++ b/dingo/utils.py @@ -204,13 +204,14 @@ def get_matrices_of_full_dim_polytope(A, b, Aeq, beq): -def correlated_reactions(steady_states, pearson_cutoff = 0.90, indicator_cutoff = 10, +def correlated_reactions(steady_states, reactions=[], pearson_cutoff = 0.90, indicator_cutoff = 10, cells = 10, cop_coeff = 0.3, lower_triangle = True): """A Python function to calculate the pearson correlation matrix of a model and filter values based on the copula's indicator Keyword arguments: - steady_states -- A numpy array of the generated steady states fluxes + steady_states -- A numpy array of the generated steady states fluxes + reactions -- A list with the model's reactions pearson_cutoff -- A cutoff to filter reactions based on pearson coefficient indicator_cutoff -- A cutoff to filter reactions based on indicator value cells -- Number of cells to compute the copula @@ -256,6 +257,10 @@ def correlated_reactions(steady_states, pearson_cutoff = 0.90, indicator_cutoff np.fill_diagonal(filtered_corr_matrix, 1) return filtered_corr_matrix else: + # a dictionary that will store for each filtered reaction combination, + # the pearson correlation value, the copula's indicator value + # and the correlation classification + indicator_dict = {} # keep only the lower triangle corr_matrix = np.tril(corr_matrix) @@ -264,17 +269,16 @@ def correlated_reactions(steady_states, pearson_cutoff = 0.90, indicator_cutoff # find indices of correlation matrix where correlation occurs corr_indices = np.argwhere((corr_matrix > pearson_cutoff) | (corr_matrix < -pearson_cutoff)) - - # count reactions with positive or negative correlation based on indicator - positive = 0 - negative = 0 # compute copula for each set of correlated reactions for i in range(0, corr_indices.shape[0]): index1 = corr_indices[i][0] index2 = corr_indices[i][1] - + + reaction1 = reactions[index1] + reaction2 = reactions[index2] + flux1 = steady_states[index1] flux2 = steady_states[index2] @@ -302,26 +306,35 @@ def correlated_reactions(steady_states, pearson_cutoff = 0.90, indicator_cutoff # classify specific pair of reactions as positive or negative # correlated based on indicator cutoff if indicator > indicator_cutoff: - positive += 1 + pearson = filtered_corr_matrix[index1, index2] + indicator_dict[reaction1 + "_" + reaction2] = {'pearson': pearson, + 'indicator': indicator, + 'classification': "positive"} + elif indicator < 1/indicator_cutoff: - negative += 1 + pearson = filtered_corr_matrix[index1, index2] + indicator_dict[reaction1 + "_" + reaction2] = {'pearson': pearson, + 'indicator': indicator, + 'classification': "negative"} + # if they do not overcome the cutoff replace their corresponding # value in the correlation matrix with 0 else: filtered_corr_matrix[index1, index2] = 0 filtered_corr_matrix[index2, index1] = 0 + pearson = filtered_corr_matrix[index1, index2] + indicator_dict[reaction1 + "_" + reaction2] = {'pearson': pearson, + 'indicator': indicator, + 'classification': "no correlation"} print("Completed process of",i+1,"from",corr_indices.shape[0],"copulas") - - - print(positive, "out of", i+1, "copulas were positive correlated based on copula indicator") - print(negative, "out of", i+1, "copulas were negative correlated based on copula indicator") if lower_triangle == True: filtered_corr_matrix[np.triu_indices(filtered_corr_matrix.shape[0], 1)] = np.nan np.fill_diagonal(filtered_corr_matrix, 1) - return filtered_corr_matrix + return filtered_corr_matrix, indicator_dict else: np.fill_diagonal(filtered_corr_matrix, 1) - return filtered_corr_matrix \ No newline at end of file + return filtered_corr_matrix, indicator_dict + \ No newline at end of file diff --git a/tests/correlation.py b/tests/correlation.py index 88239dd0..0e935bb0 100644 --- a/tests/correlation.py +++ b/tests/correlation.py @@ -14,11 +14,12 @@ def test_correlation(self): sampler = PolytopeSampler(dingo_model) steady_states = sampler.generate_steady_states() - # calculate correlation matrix without filtering from copula indicator - corr_matrix = correlated_reactions(steady_states, - indicator_cutoff=2, - pearson_cutoff = 0.99999, - lower_triangle = False) + # calculate correlation matrix with filtering from copula indicator + corr_matrix, indicator_dict = correlated_reactions(steady_states, + reactions = reactions, + indicator_cutoff = 5, + pearson_cutoff = 0.999999, + lower_triangle = False) # sum values in the diagonal of the correlation matrix ==> 95*pearson ==> 95*1 self.assertTrue(np.trace(corr_matrix) == len(reactions)) @@ -35,7 +36,7 @@ def test_correlation(self): # calculate correlation matrix without filtering from copula indicator corr_matrix = correlated_reactions(steady_states, - indicator_cutoff=0, + indicator_cutoff = 0, pearson_cutoff = 0, lower_triangle = True) From 3fd6967d855c1c75f7462183bbd8d58232f1c908 Mon Sep 17 00:00:00 2001 From: SotirisTouliopoulos Date: Wed, 14 Aug 2024 15:15:15 +0300 Subject: [PATCH 15/16] correction to correlation labels output symbol --- dingo/utils.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/dingo/utils.py b/dingo/utils.py index 711531cf..e8d3d567 100644 --- a/dingo/utils.py +++ b/dingo/utils.py @@ -238,14 +238,15 @@ def correlated_reactions(steady_states, reactions=[], pearson_cutoff = 0.90, ind # find indices of correlation matrix where correlation does not occur no_corr_indices = np.argwhere((filtered_corr_matrix < pearson_cutoff) & (filtered_corr_matrix > -pearson_cutoff)) - + # replace values from the correlation matrix that do not overcome # the pearson cutoff with 0 - for i in range(0, no_corr_indices.shape[0]): + for i in range(0, no_corr_indices.shape[0]): index1 = no_corr_indices[i][0] index2 = no_corr_indices[i][1] - filtered_corr_matrix[index1, index2] = 0 - + + filtered_corr_matrix[index1, index2] = 0 + # if user does not provide an indicator cutoff then do not proceed # with the filtering of the correlation matrix if indicator_cutoff == 0: @@ -307,13 +308,13 @@ def correlated_reactions(steady_states, reactions=[], pearson_cutoff = 0.90, ind # correlated based on indicator cutoff if indicator > indicator_cutoff: pearson = filtered_corr_matrix[index1, index2] - indicator_dict[reaction1 + "_" + reaction2] = {'pearson': pearson, + indicator_dict[reaction1 + "~" + reaction2] = {'pearson': pearson, 'indicator': indicator, 'classification': "positive"} elif indicator < 1/indicator_cutoff: pearson = filtered_corr_matrix[index1, index2] - indicator_dict[reaction1 + "_" + reaction2] = {'pearson': pearson, + indicator_dict[reaction1 + "~" + reaction2] = {'pearson': pearson, 'indicator': indicator, 'classification': "negative"} @@ -323,7 +324,7 @@ def correlated_reactions(steady_states, reactions=[], pearson_cutoff = 0.90, ind filtered_corr_matrix[index1, index2] = 0 filtered_corr_matrix[index2, index1] = 0 pearson = filtered_corr_matrix[index1, index2] - indicator_dict[reaction1 + "_" + reaction2] = {'pearson': pearson, + indicator_dict[reaction1 + "~" + reaction2] = {'pearson': pearson, 'indicator': indicator, 'classification': "no correlation"} From c5cb6f2408bff348f00a4c44c191ba49b5baa73c Mon Sep 17 00:00:00 2001 From: SotirisTouliopoulos Date: Thu, 15 Aug 2024 00:04:01 +0300 Subject: [PATCH 16/16] added verbose boolean variable --- dingo/preprocess.py | 41 ++++++++++++++++++++++++----------------- dingo/utils.py | 6 ++++-- tests/correlation.py | 6 ++++-- tests/preprocess.py | 4 ++-- 4 files changed, 34 insertions(+), 23 deletions(-) diff --git a/dingo/preprocess.py b/dingo/preprocess.py index 341631b6..4f711ff2 100644 --- a/dingo/preprocess.py +++ b/dingo/preprocess.py @@ -9,27 +9,32 @@ class PreProcess: - def __init__(self, model, tol=1e-6, open_exchanges=False): + def __init__(self, model, tol = 1e-6, open_exchanges = False, verbose = False): """ - model parameter gets a cobra model as input + model -- parameter gets a cobra model as input - tol parameter gets a cutoff value used to classify - zero-flux and mle reactions and compare FBA solutions - before and after reactions removal + tol -- parameter gets a cutoff value used to classify + zero-flux and mle reactions and compare FBA solutions + before and after reactions removal - open_exchanges parameter is used in the function that identifies blocked reactions - It controls whether or not to open all exchange reactions to very high flux ranges + open_exchanges -- parameter is used in the function that identifies blocked reactions + It controls whether or not to open all exchange reactions + to very high flux ranges. + + verbose -- A boolean type variable that if True + additional information for preprocess is printed. """ self._model = model self._tol = tol - - if self._tol > 1e-6: - print("Tolerance value set to",self._tol,"while default value is 1e-6. A looser check will be performed") - + self._open_exchanges = open_exchanges + self._verbose = verbose + if self._tol > 1e-6 and verbose == True: + print("Tolerance value set to",self._tol,"while default value is 1e-6. A looser check will be performed") + self._objective = self._objective_function() self._initial_reactions = self._initial() self._reaction_bounds_dict = self._reaction_bounds_dictionary() @@ -204,8 +209,9 @@ def reduce(self, extend=False): elif extend == False: - print(len(self._removed_reactions), "of the", len(self._initial_reactions), \ - "reactions were removed from the model with extend set to", extend) + if self._verbose == True: + print(len(self._removed_reactions), "of the", len(self._initial_reactions), \ + "reactions were removed from the model with extend set to", extend) # call this functon to convert cobra to dingo model self._dingo_model = MetabolicNetwork.from_cobra_model(self._model) @@ -266,10 +272,11 @@ def reduce(self, extend=False): # restore bounds self._model.reactions.get_by_id(reaction).bounds = self._reaction_bounds_dict[reaction] - - print(len(self._removed_reactions), "of the", len(self._initial_reactions), \ - "reactions were removed from the model with extend set to", extend) - print(additional_removed_reactions_count, "additional reaction(s) removed") + + if self._verbose == True: + print(len(self._removed_reactions), "of the", len(self._initial_reactions), \ + "reactions were removed from the model with extend set to", extend) + print(additional_removed_reactions_count, "additional reaction(s) removed") # call this functon to convert cobra to dingo model self._dingo_model = MetabolicNetwork.from_cobra_model(self._model) diff --git a/dingo/utils.py b/dingo/utils.py index e8d3d567..3fb0888c 100644 --- a/dingo/utils.py +++ b/dingo/utils.py @@ -205,7 +205,7 @@ def get_matrices_of_full_dim_polytope(A, b, Aeq, beq): def correlated_reactions(steady_states, reactions=[], pearson_cutoff = 0.90, indicator_cutoff = 10, - cells = 10, cop_coeff = 0.3, lower_triangle = True): + cells = 10, cop_coeff = 0.3, lower_triangle = True, verbose = False): """A Python function to calculate the pearson correlation matrix of a model and filter values based on the copula's indicator @@ -217,6 +217,7 @@ def correlated_reactions(steady_states, reactions=[], pearson_cutoff = 0.90, ind cells -- Number of cells to compute the copula cop_coeff -- A value that narrows or widens the width of the copula's diagonal lower_triangle -- A boolean variable that if True plots only the lower triangular matrix + verbose -- A boolean variable that if True additional information is printed as an output. """ if cop_coeff > 0.4 or cop_coeff < 0.2: @@ -328,7 +329,8 @@ def correlated_reactions(steady_states, reactions=[], pearson_cutoff = 0.90, ind 'indicator': indicator, 'classification': "no correlation"} - print("Completed process of",i+1,"from",corr_indices.shape[0],"copulas") + if verbose == True: + print("Completed process of",i+1,"from",corr_indices.shape[0],"copulas") if lower_triangle == True: filtered_corr_matrix[np.triu_indices(filtered_corr_matrix.shape[0], 1)] = np.nan diff --git a/tests/correlation.py b/tests/correlation.py index 0e935bb0..fdb21b75 100644 --- a/tests/correlation.py +++ b/tests/correlation.py @@ -19,7 +19,8 @@ def test_correlation(self): reactions = reactions, indicator_cutoff = 5, pearson_cutoff = 0.999999, - lower_triangle = False) + lower_triangle = False, + verbose = False) # sum values in the diagonal of the correlation matrix ==> 95*pearson ==> 95*1 self.assertTrue(np.trace(corr_matrix) == len(reactions)) @@ -38,7 +39,8 @@ def test_correlation(self): corr_matrix = correlated_reactions(steady_states, indicator_cutoff = 0, pearson_cutoff = 0, - lower_triangle = True) + lower_triangle = True, + verbose = False) # sum values in the diagonal of the correlation matrix ==> 95*pearson ==> 95*1 self.assertTrue(np.trace(corr_matrix) == len(reactions)) diff --git a/tests/preprocess.py b/tests/preprocess.py index 6093b579..64edce9c 100644 --- a/tests/preprocess.py +++ b/tests/preprocess.py @@ -21,7 +21,7 @@ def test_preprocess(self): # call the reduce function from the PreProcess class # with extend=False to remove reactions from the model - obj = PreProcess(cobra_model, tol=1e-5, open_exchanges=False) + obj = PreProcess(cobra_model, tol=1e-5, open_exchanges=False, verbose=False) removed_reactions, final_dingo_model = obj.reduce(extend=False) # calculate the count of removed reactions with extend set to False @@ -46,7 +46,7 @@ def test_preprocess(self): # call the reduce function from the PreProcess class # with extend=True to remove additional reactions from the model - obj = PreProcess(cobra_model, tol=1e-6, open_exchanges=False) + obj = PreProcess(cobra_model, tol=1e-6, open_exchanges=False, verbose=False) removed_reactions, final_dingo_model = obj.reduce(extend=True) # calculate the count of removed reactions with extend set to True