Exposed two HMC walks with exponential and Gaussian sampling from Vol…

…esti (#68) * Exposed two HMC exact walks with exponential and Gaussian sampling from Volesti * Modified the parameters used to call the Gaussian and exponential HMC walks in bindings. Added tests to check they work as expected * Implemented variance and bias vector as user-defined parameters * Deleted redundant bias_vector_final and added sampling tests to workflow * Excluded sampling.py from github actions * modified sampling_no_multiphase.py to investigate the error in the github actions * modified sampling_no_multiphase.py to investigate the error in the github actions associated to the mean * Changed sampling.py to check with github actions * Modified sampling_no_multiphase.py to account for the behaviour of the mean. Now it checks if the size of the matrix steady_states is correct * Changed variance of exponential distribution to 50 in sampling_no_multiphase.py * Changed n to 500 in sampling_no_multiphase.py
GeomScale · Aug 9, 2023 · 5c6a4f5 · 5c6a4f5
1 parent f5440b6
commit 5c6a4f5
Show file tree

Hide file tree

Showing 7 changed files with 145 additions and 22 deletions.
diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml
@@ -47,6 +47,8 @@ jobs:
         poetry run python3 tests/full_dimensional.py;
         poetry run python3 tests/max_ball.py;
         poetry run python3 tests/scaling.py;
+        poetry run python3 tests/sampling.py;
+        poetry run python3 tests/sampling_no_multiphase.py;
         # currently we do not test with gurobi
         # python3 tests/fast_implementation_test.py;
 

diff --git a/dingo/PolytopeSampler.py b/dingo/PolytopeSampler.py
@@ -187,7 +187,7 @@ def generate_steady_states(
         return steady_states
 
     def generate_steady_states_no_multiphase(
-        self, method = 'billiard_walk', n=1000, burn_in=0, thinning=1
+        self, method = 'billiard_walk', n=1000, burn_in=0, thinning=1, variance=1.0, bias_vector=None
     ):
         """A member function to sample steady states.
 
@@ -197,12 +197,17 @@ def generate_steady_states_no_multiphase(
         burn_in -- the number of points to burn before sampling
         thinning -- the walk length of the chain
         """
-
+        	
         self.get_polytope()
 
         P = HPolytope(self._A, self._b)
+
+        if bias_vector is None:
+            bias_vector = np.ones(self._A.shape[1], dtype=np.float64)
+        else:
+            bias_vector = bias_vector.astype('float64')
 
-        samples = P.generate_samples(method, n, burn_in, thinning, self._parameters["fast_computations"])
+        samples = P.generate_samples(method, n, burn_in, thinning, self._parameters["fast_computations"], variance, bias_vector)
         samples_T = samples.T
 
         steady_states = map_samples_to_steady_states(
@@ -243,7 +248,7 @@ def sample_from_polytope(
 
     @staticmethod
     def sample_from_polytope_no_multiphase(
-        A, b, method = 'billiard_walk', n=1000, burn_in=0, thinning=1
+        A, b, method = 'billiard_walk', n=1000, burn_in=0, thinning=1, variance=1.0, bias_vector=None
     ):
         """A static function to sample from a full dimensional polytope with an MCMC method.
 
@@ -255,14 +260,18 @@ def sample_from_polytope_no_multiphase(
         burn_in -- the number of points to burn before sampling
         thinning -- the walk length of the chain
         """
-
+        if bias_vector is None:
+            bias_vector = np.ones(A.shape[1], dtype=np.float64)
+        else:
+            bias_vector = bias_vector.astype('float64')
+
         P = HPolytope(A, b)
 
         try:
             import gurobipy
-            samples = P.generate_samples(method, n, burn_in, thinning, True)
+            samples = P.generate_samples(method, n, burn_in, thinning, True, variance, bias_vector)
         except ImportError as e:
-            samples = P.generate_samples(method, n, burn_in, thinning, False)
+            samples = P.generate_samples(method, n, burn_in, thinning, False, variance, bias_vector)
 
         samples_T = samples.T
         return samples_T

diff --git a/dingo/bindings/bindings.cpp b/dingo/bindings/bindings.cpp
@@ -87,7 +87,10 @@ double HPolytopeCPP::apply_sampling(int walk_len,
                                     int method,
                                     double* inner_point, 
                                     double radius,
-                                    double* samples) {
+                                    double* samples,
+                                    double variance_value,
+                                    double* bias_vector_){ 
+
    RNGType rng(HP.dimension());
    HP.normalize();
    int d = HP.dimension();
@@ -103,6 +106,8 @@ double HPolytopeCPP::apply_sampling(int walk_len,
    HP.set_InnerBall(CheBall);
    starting_point = inner_point2;
    std::list<Point> rand_points;
+
+   NT variance = variance_value;
 
    if (method == 1) { // cdhr
       uniform_sampling<CDHRWalk>(rand_points, HP, rng, walk_len, number_of_points,
@@ -117,16 +122,30 @@ double HPolytopeCPP::apply_sampling(int walk_len,
    } else if (method == 4) { // ball walk
       uniform_sampling<BallWalk>(rand_points, HP, rng, walk_len, number_of_points, 
                                  starting_point, number_of_points_to_burn);
-   } else if (method == 5) { // dikin walk {
+   } else if (method == 5) { // dikin walk
       uniform_sampling<DikinWalk>(rand_points, HP, rng, walk_len, number_of_points,
                                   starting_point, number_of_points_to_burn);
-   } else if (method == 6) { // dikin walk {
+   } else if (method == 6) { // john walk
       uniform_sampling<JohnWalk>(rand_points, HP, rng, walk_len, number_of_points,
                                  starting_point, number_of_points_to_burn);
-   } else if (method == 7) { // dikin walk {
+   } else if (method == 7) { // vaidya walk
       uniform_sampling<VaidyaWalk>(rand_points, HP, rng, walk_len, number_of_points,
                                    starting_point, number_of_points_to_burn);
-   } else {
+   } else if (method == 8) { // gaussian sampling with gaussian HMC exact walk
+      NT a = NT(1)/(NT(2)*variance);
+      gaussian_sampling<GaussianHamiltonianMonteCarloExactWalk>(rand_points, HP, rng, walk_len, number_of_points, a,
+                                   starting_point, number_of_points_to_burn);
+   } else if (method == 9) { // exponential sampling with exponential HMC exact walk
+      VT c(d);
+      for (int i = 0; i < d; i++){
+         c(i) = bias_vector_[i];
+      }
+      Point bias_vector(c);       
+      exponential_sampling<ExponentialHamiltonianMonteCarloExactWalk>(rand_points, HP, rng, walk_len, number_of_points, bias_vector, variance,
+                                   starting_point, number_of_points_to_burn);
+      } 
+
+   else {
       throw std::runtime_error("This function must not be called.");
    }
 

diff --git a/dingo/bindings/bindings.h b/dingo/bindings/bindings.h
@@ -141,7 +141,7 @@ class HPolytopeCPP{
 
       // the apply_sampling() function
       double apply_sampling(int walk_len, int number_of_points, int number_of_points_to_burn,
-                            int method, double* inner_point, double radius, double* samples);
+                            int method, double* inner_point, double radius, double* samples, double variance_value, double* bias_vector);
 
       void mmcs_initialize(int d, int ess, bool psrf_check, bool parallelism, int num_threads);
 

diff --git a/dingo/volestipy.pyx b/dingo/volestipy.pyx
@@ -59,7 +59,7 @@ cdef extern from "bindings.h":
 
       # Random sampling
       double apply_sampling(int walk_len, int number_of_points, int number_of_points_to_burn, \
-                              int method, double* inner_point, double radius, double* samples)
+                              int method, double* inner_point, double radius, double* samples, double variance_value, double* bias_vector)
 
       # Initialize the parameters for the (m)ultiphase (m)onte (c)arlo (s)ampling algorithm
       void mmcs_initialize(unsigned int d, int ess, int psrf_check, int parallelism, int num_threads);
@@ -122,7 +122,7 @@ cdef class HPolytope:
          raise Exception('"{}" is not implemented to compute volume. Available methods are: {}'.format(vol_method, volume_methods))
 
    # Likewise, the generate_samples() function
-   def generate_samples(self, method, number_of_points, number_of_points_to_burn, walk_len, fast_mode):
+   def generate_samples(self, method, number_of_points, number_of_points_to_burn, walk_len, fast_mode, variance_value, bias_vector):
 
       n_variables = self._A.shape[1]
       cdef double[:,::1] samples = np.zeros((number_of_points, n_variables), dtype = np.float64, order = "C")
@@ -135,6 +135,8 @@ cdef class HPolytope:
 
       cdef double[::1] inner_point_for_c = np.asarray(temp_center)
 
+      cdef double[::1] bias_vector_ = np.asarray(bias_vector)
+
       if method == 'cdhr':
          int_method = 1
       elif method == 'rdhr':
@@ -149,11 +151,15 @@ cdef class HPolytope:
          int_method = 6
       elif method == 'vaidya_walk':
          int_method = 7
+      elif method == 'gaussian_hmc_walk':
+         int_method = 8
+      elif method == 'exponential_hmc_walk':
+         int_method = 9
       else:
          raise RuntimeError("Uknown MCMC sampling method")
 
       self.polytope_cpp.apply_sampling(walk_len, number_of_points, number_of_points_to_burn, \
-                                       int_method, &inner_point_for_c[0], radius, &samples[0,0])
+                                       int_method, &inner_point_for_c[0], radius, &samples[0,0], variance_value, &bias_vector_[0])
       return np.asarray(samples)
 
 

diff --git a/tests/sampling.py b/tests/sampling.py
@@ -20,10 +20,10 @@ def test_sample_json(self):
         model = MetabolicNetwork.from_json( input_file_json )
         sampler = PolytopeSampler(model)
 
-        steady_states = sampler.generate_steady_states(ess = 1000, psrf = True) 
+        steady_states = sampler.generate_steady_states(ess = 20000, psrf = True)
 
         self.assertTrue( steady_states.shape[0] == 95 )
-        self.assertTrue( abs( steady_states[12].mean()  - 2.504 ) < 1e-03 )
+        self.assertTrue( abs( steady_states[12].mean()  - 2.504 ) < 1e-02 )
 
 
     def test_sample_mat(self):
@@ -32,10 +32,10 @@ def test_sample_mat(self):
         model = MetabolicNetwork.from_mat(input_file_mat)
         sampler = PolytopeSampler(model)
 
-        steady_states = sampler.generate_steady_states(ess = 1000, psrf = True) 
+        steady_states = sampler.generate_steady_states(ess = 20000, psrf = True) 
 
         self.assertTrue( steady_states.shape[0] == 95 )
-        self.assertTrue( abs( steady_states[12].mean()  - 2.504 ) < 1e-03 )
+        self.assertTrue( abs( steady_states[12].mean()  - 2.504 ) < 1e-02 )
 
 
     def test_sample_sbml(self):
@@ -44,10 +44,10 @@ def test_sample_sbml(self):
         model = MetabolicNetwork.from_sbml( input_file_sbml )
         sampler = PolytopeSampler(model)
 
-        steady_states = sampler.generate_steady_states(ess = 1000, psrf = True) 
+        steady_states = sampler.generate_steady_states(ess = 20000, psrf = True)
 
         self.assertTrue( steady_states.shape[0] == 95 )
-        self.assertTrue( abs( steady_states[12].mean()  - 2.504 ) < 1e-03 )
+        self.assertTrue( abs( steady_states[12].mean()  - 2.504 ) < 1e-02 )
 
 
 

diff --git a/tests/sampling_no_multiphase.py b/tests/sampling_no_multiphase.py
@@ -0,0 +1,87 @@
+# dingo : a python library for metabolic networks sampling and analysis
+# dingo is part of GeomScale project
+
+# Copyright (c) 2022 Apostolos Chalkis
+# Copyright (c) 2022 Vissarion Fisikopoulos
+# Copyright (c) 2022 Haris Zafeiropoulos
+
+# Licensed under GNU LGPL.3, see LICENCE file
+
+import unittest
+import os
+from dingo import MetabolicNetwork, PolytopeSampler
+
+
+class TestSampling(unittest.TestCase):
+
+    def test_sample_json(self):
+
+        input_file_json = os.getcwd() + "/ext_data/e_coli_core.json"
+        model = MetabolicNetwork.from_json( input_file_json )
+        sampler = PolytopeSampler(model)
+
+        #gaussian hmc sampling
+        steady_states = sampler.generate_steady_states_no_multiphase(method = 'gaussian_hmc_walk', n=500)
+
+        self.assertTrue( steady_states.shape[0] == 95 )
+        self.assertTrue( steady_states.shape[1] == 500 )
+        #steady_states[12].mean() seems to have a lot of discrepancy between experiments, so we won't check the mean for now
+        #self.assertTrue( abs( steady_states[12].mean()  - 2.504 ) < 1e-03 )
+
+        #exponential hmc sampling
+        steady_states = sampler.generate_steady_states_no_multiphase(method = 'exponential_hmc_walk', n=500, variance=50)
+
+        self.assertTrue( steady_states.shape[0] == 95 )
+        self.assertTrue( steady_states.shape[1] == 500 )
+        #steady_states[12].mean() seems to have a lot of discrepancy between experiments, so we won't check the mean for now
+        #self.assertTrue( abs( steady_states[12].mean()  - 2.504 ) < 1e-03 )
+
+    def test_sample_mat(self):
+
+        input_file_mat = os.getcwd() + "/ext_data/e_coli_core.mat"
+        model = MetabolicNetwork.from_mat(input_file_mat)
+        sampler = PolytopeSampler(model)
+
+        #gaussian hmc sampling
+        steady_states = sampler.generate_steady_states_no_multiphase(method = 'gaussian_hmc_walk', n=500)
+
+        self.assertTrue( steady_states.shape[0] == 95 )
+        self.assertTrue( steady_states.shape[1] == 500 )
+        #steady_states[12].mean() seems to have a lot of discrepancy between experiments, so we won't check the mean for now
+        #self.assertTrue( abs( steady_states[12].mean()  - 2.504 ) < 1e-03 )
+
+        #exponential hmc sampling
+        steady_states = sampler.generate_steady_states_no_multiphase(method = 'exponential_hmc_walk', n=500, variance=50)
+
+        self.assertTrue( steady_states.shape[0] == 95 )
+        self.assertTrue( steady_states.shape[1] == 500 )
+        #steady_states[12].mean() seems to have a lot of discrepancy between experiments, so we won't check the mean for now
+        #self.assertTrue( abs( steady_states[12].mean()  - 2.504 ) < 1e-03 )
+
+
+    def test_sample_sbml(self):
+
+        input_file_sbml = os.getcwd() + "/ext_data/e_coli_core.xml"
+        model = MetabolicNetwork.from_sbml( input_file_sbml )
+        sampler = PolytopeSampler(model)
+
+        #gaussian hmc sampling
+        steady_states = sampler.generate_steady_states_no_multiphase(method = 'gaussian_hmc_walk', n=500)
+
+        self.assertTrue( steady_states.shape[0] == 95 )
+        self.assertTrue( steady_states.shape[1] == 500 )
+        #steady_states[12].mean() seems to have a lot of discrepancy between experiments, so we won't check the mean for now
+        #self.assertTrue( abs( steady_states[12].mean()  - 2.504 ) < 1e-03 )
+
+        #exponential hmc sampling
+        steady_states = sampler.generate_steady_states_no_multiphase(method = 'exponential_hmc_walk', n=500, variance=50)
+
+        self.assertTrue( steady_states.shape[0] == 95 )
+        self.assertTrue( steady_states.shape[1] == 500 )
+        #steady_states[12].mean() seems to have a lot of discrepancy between experiments, so we won't check the mean for now
+        #self.assertTrue( abs( steady_states[12].mean()  - 2.504 ) < 1e-03 )
+
+
+
+if __name__ == "__main__":
+    unittest.main()