From a834fbc696ca6b720eff9891e731e75f1c58494b Mon Sep 17 00:00:00 2001 From: Pierre Kuenzli Date: Thu, 25 Oct 2018 18:08:28 +0100 Subject: [PATCH 01/41] Added map functionality for multi process slaves using communicators, needs refactoring and management of bds and pds --- Makefile | 1 + abcpy/backends/mpi.py | 117 ++++++++++++++++----- tests/backend_tests_mpi_model_mpi.py | 147 +++++++++++++++++++++++++++ 3 files changed, 241 insertions(+), 24 deletions(-) create mode 100644 tests/backend_tests_mpi_model_mpi.py diff --git a/Makefile b/Makefile index e973beb0..710ddf5a 100644 --- a/Makefile +++ b/Makefile @@ -30,6 +30,7 @@ unittest: unittest_mpi: echo "Running MPI backend unit tests.." mpirun -np 2 python3 -m unittest discover -s tests -v -p "backend_tests_mpi.py" || (echo "Error in MPI unit tests."; exit 1) + mpirun -np 3 python3 -m unittest discover -s tests -v -p "backend_tests_mpi_model_mpi.py" || (echo "Error in MPI unit tests."; exit 1) exampletest: $(MAKEDIRS) echo "Testing standard examples.." diff --git a/abcpy/backends/mpi.py b/abcpy/backends/mpi.py index c792979c..8002b901 100644 --- a/abcpy/backends/mpi.py +++ b/abcpy/backends/mpi.py @@ -94,7 +94,8 @@ def __command_slaves(self, command, data): elif command == self.OP_FINISH: data_packet = (command,) - _ = self.comm.bcast(data_packet, root=0) + #_ = self.comm.bcast(data_packet, root=0) + _ = self.master_communicator.bcast(data_packet, root=0) @@ -169,7 +170,8 @@ def orchestrate_map(self,pds_id): responding to them with the data and then sending them a Sentinel signalling that they can exit. """ - is_map_done = [True if i in self.master_node_ranks else False for i in range(self.size)] + #is_map_done = [True if i in self.master_node_ranks else False for i in range(self.size)] + is_map_done = [True if i in self.master_node_ranks else False for i in range(self.master_size)] status = MPI.Status() #Copy it to the pending. This is so when master accesses @@ -177,9 +179,11 @@ def orchestrate_map(self,pds_id): self.pds_pending_store[pds_id] = list(self.pds_store[pds_id]) #While we have some ranks that haven't finished - while sum(is_map_done) 1): + return func(self.model_communicator, data_item) + else: + return func(data_item) + + def __slave_model_run(self): + while True: + data = self.model_communicator.bcast(None, root=0) + # print("Received some instruction from model master") + op = data[0] + if op == self.OP_MAP: + # print("Map") + #Receive data from master of the model + function_packed = self.model_communicator.bcast(None, root=0)[0] + data_item = self.model_communicator.bcast(None, root=0)[0] + self.__runfunc(function_packed, data_item) + elif op == self.OP_FINISH: + # print("Finish") + quit() + else: + raise Exception("Slave model received unknown command code") + + def __get_received_pds_id(self): """ Function to retrieve the pds_id(s) we received from the master to associate @@ -455,12 +499,18 @@ def __get_received_pds_id(self): return self.__rec_pds_id, self.__rec_pds_id_result + def __master_model_function_run(self, function_packed, data_item): + #Send function and data to other processes + self.model_communicator.bcast([self.OP_MAP], root=0) + self.model_communicator.bcast([function_packed], root=0) + self.model_communicator.bcast([data_item], root=0) + return self.__runfunc(function_packed, data_item) def parallelize(self): pass - def map(self, func): + def map(self, function_packed): """ A distributed implementation of map that works on parallel data sets (PDS). @@ -485,7 +535,8 @@ def map(self, func): rdd = [] while True: #Ask for a chunk of data since it's free - data_chunks = self.comm.sendrecv(pds_id, 0, pds_id) + #data_chunks = self.comm.sendrecv(pds_id, 0, pds_id) + data_chunks = self.master_communicator.sendrecv(pds_id, 0, pds_id) #If it receives a sentinel, it's done and it can exit if data_chunks is None: @@ -494,11 +545,8 @@ def map(self, func): #Accumulate the indicess and *processed* chunks for chunk in data_chunks: data_index,data_item = chunk - try: - result = func(data_item) - except Exception as e: - result = e - rdd.append((data_index, result)) + res = self.__master_model_function_run(function_packed, data_item) + rdd+=[(data_index,res)] pds_res = PDSMPI(rdd, pds_id_new, self) @@ -522,14 +570,16 @@ def collect(self, pds): """ #Send the data we have back to the master - _ = self.comm.gather(pds.python_list, root=0) + #_ = self.comm.gather(pds.python_list, root=0) + _ = self.master_communicator.gather(pds.python_list, root=0) def broadcast(self, value): """ Value is ignored for the slaves. We get data from master """ - value = self.comm.bcast(None, root=0) + #value = self.comm.bcast(None, root=0) + value = self.master_communicator.bcast(None, root=0) self.bds_store[self.__bds_id] = value @@ -543,7 +593,7 @@ class BackendMPI(BackendMPIMaster if MPI.COMM_WORLD.Get_rank() == 0 else Backend and the slaves. """ - def __init__(self, master_node_ranks=[0]): + def __init__(self, master_node_ranks=[0], process_per_model=1): self.comm = MPI.COMM_WORLD self.size = self.comm.Get_size() self.rank = self.comm.Get_rank() @@ -551,6 +601,25 @@ def __init__(self, master_node_ranks=[0]): if self.size < 2: raise ValueError('A minimum of 2 ranks are required for the MPI backend') + #Construct the appropriate communicators for resource allocation to models + #There is one communicator for master nodes + #And one communicator per model + self.process_per_model = process_per_model + self.model_color = int(((self.rank - sum(i < self.rank for i in master_node_ranks)) / process_per_model) + 1) + if(self.rank in master_node_ranks): + self.model_color = 0 + # print("Global rank : ", self.rank, ", color : ", self.model_color) + self.model_communicator = MPI.COMM_WORLD.Split(self.model_color, self.rank) + self.model_size = self.model_communicator.Get_size() + self.model_rank = self.model_communicator.Get_rank() + + # create a communicator to broadcast instructions to slaves + self.master_color = 1 + if(self.model_color == 0 or self.model_rank == 0): + self.master_color = 0 + self.master_communicator = MPI.COMM_WORLD.Split(self.master_color, self.rank) + self.master_size = self.master_communicator.Get_size() + self.master_rank = self.master_communicator.Get_rank() #Set the global backend globals()['backend'] = self diff --git a/tests/backend_tests_mpi_model_mpi.py b/tests/backend_tests_mpi_model_mpi.py new file mode 100644 index 00000000..3046269d --- /dev/null +++ b/tests/backend_tests_mpi_model_mpi.py @@ -0,0 +1,147 @@ +import unittest +from mpi4py import MPI +from abcpy.backends import BackendMPI,BackendMPITestHelper +import numpy + +def setUpModule(): + ''' + If an exception is raised in a setUpModule then none of + the tests in the module will be run. + + This is useful because the slaves run in a while loop on initialization + only responding to the master's commands and will never execute anything else. + + On termination of master, the slaves call quit() that raises a SystemExit(). + Because of the behaviour of setUpModule, it will not run any unit tests + for the slave and we now only need to write unit-tests from the master's + point of view. + ''' + global rank,backend_mpi + comm = MPI.COMM_WORLD + rank = comm.Get_rank() + backend_mpi = BackendMPI(process_per_model=2) + +class MPIBackendTests(unittest.TestCase): + + def test_parallelize(self): + data = [0]*backend_mpi.size + pds = backend_mpi.parallelize(data) + pds_map = backend_mpi.map(lambda model_comm, x: x + MPI.COMM_WORLD.Get_rank(), pds) + res = backend_mpi.collect(pds_map) + + for master_index in backend_mpi.master_node_ranks: + self.assertTrue(master_index not in res,"Node in master_node_ranks performed map.") + + def test_map(self): + def square_mpi(model_comm, x): + local_res = numpy.array([x**2], 'i') + global_res = numpy.array([0], 'i') + model_comm.Reduce([local_res,MPI.INT], [global_res,MPI.INT], op=MPI.SUM, root=0) + return global_res[0] + + data = [1,2,3,4,5] + pds = backend_mpi.parallelize(data) + pds_map = backend_mpi.map(square_mpi, pds) + res = backend_mpi.collect(pds_map) + assert res==list(map(lambda x:2*(x**2),data)) + + + # def test_broadcast(self): + # data = [1,2,3,4,5] + # pds = backend_mpi.parallelize(data) + + # bds = backend_mpi.broadcast(100) + + # #Pollute the BDS values of the master to confirm slaves + # # use their broadcasted value + # for k,v in backend_mpi.bds_store.items(): + # backend_mpi.bds_store[k] = 99999 + + # def test_map(model_comm, x): + # return x + bds.value() + + # pds_m = backend_mpi.map(test_map, pds) + # self.assertTrue(backend_mpi.collect(pds_m)==[101,102,103,104,105]) + + # def test_pds_delete(self): + + # def check_if_exists(model_comm, x): + # obj = BackendMPITestHelper() + # return obj.check_pds(x) + + # data = [1,2,3,4,5] + # pds = backend_mpi.parallelize(data) + + # #Check if the pds we just created exists in all the slaves(+master) + + # id_check_pds = backend_mpi.parallelize([pds.pds_id]*5) + # pds_check_result = backend_mpi.map(check_if_exists, id_check_pds) + # self.assertTrue(False not in backend_mpi.collect(pds_check_result),"PDS was not created") + + # #Delete the PDS on master and try again + # del pds + # pds_check_result = backend_mpi.map(check_if_exists,id_check_pds) + + # self.assertTrue(True not in backend_mpi.collect(pds_check_result),"PDS was not deleted") + + + def test_bds_delete(self): + + def check_if_exists(model_comm, x): + obj = BackendMPITestHelper() + return obj.check_bds(x) + + data = [1,2,3,4,5] + bds = backend_mpi.broadcast(data) + + #Check if the pds we just created exists in all the slaves(+master) + id_check_bds = backend_mpi.parallelize([bds.bds_id]*5) + bds_check_result = backend_mpi.map(check_if_exists, id_check_bds) + self.assertTrue(False not in backend_mpi.collect(bds_check_result),"BDS was not created") + + #Delete the PDS on master and try again + del bds + bds_check_result = backend_mpi.map(check_if_exists,id_check_bds) + self.assertTrue(True not in backend_mpi.collect(bds_check_result),"BDS was not deleted") + + + def test_function_pickle(self): + + def square_mpi(model_comm, x): + local_res = numpy.array([x**2], 'i') + global_res = numpy.array([0], 'i') + model_comm.Reduce([local_res,MPI.INT], [global_res,MPI.INT], op=MPI.SUM, root=0) + return global_res[0] + + class staticfunctest_mpi: + @staticmethod + def square_mpi(model_comm, x): + local_res = numpy.array([x**2], 'i') + global_res = numpy.array([0], 'i') + model_comm.Reduce([local_res,MPI.INT], [global_res,MPI.INT], op=MPI.SUM, root=0) + return global_res[0] + + class nonstaticfunctest_mpi: + def square_mpi(self, model_comm, x): + local_res = numpy.array([x**2], 'i') + global_res = numpy.array([0], 'i') + model_comm.Reduce([local_res,MPI.INT], [global_res,MPI.INT], op=MPI.SUM, root=0) + return global_res[0] + + data = [1,2,3,4,5] + expected_result = [2,8,18,32,50] + + pds = backend_mpi.parallelize(data) + pds_map1 = backend_mpi.map(square_mpi,pds) + pds_res1 = backend_mpi.collect(pds_map1) + + self.assertTrue(pds_res1==expected_result,"Failed pickle test for general function") + + pds_map3 = backend_mpi.map(staticfunctest_mpi.square_mpi,pds) + pds_res3 = backend_mpi.collect(pds_map3) + self.assertTrue(pds_res3==expected_result,"Failed pickle test for static function") + + obj = nonstaticfunctest_mpi() + pds_map4 = backend_mpi.map(obj.square_mpi ,pds) + pds_res4 = backend_mpi.collect(pds_map4) + self.assertTrue(pds_res4==expected_result,"Failed pickle test for non-static function") From f8a93ced9e3ee442f376cb1ab133d51e747ec06a Mon Sep 17 00:00:00 2001 From: Pierre Kuenzli Date: Mon, 29 Oct 2018 16:15:36 +0000 Subject: [PATCH 02/41] modified the class structure of BackendMPI, there are now leaders dans workers which are parts of a team and master is now scheduler. created a class mpimanager that manage the different communicators. interface of BackendMPI is modified as follow : rank and master_node_ranks are now functions rank() and master_node_rank() --- abcpy/backends/__init__.py | 13 ++ abcpy/backends/mpi.py | 261 +++++++++++++-------------- abcpy/backends/mpimanager.py | 99 ++++++++++ tests/backend_tests_mpi.py | 4 +- tests/backend_tests_mpi_model_mpi.py | 58 +++--- 5 files changed, 271 insertions(+), 164 deletions(-) create mode 100644 abcpy/backends/mpimanager.py diff --git a/abcpy/backends/__init__.py b/abcpy/backends/__init__.py index 93a9b88b..f95e76b5 100644 --- a/abcpy/backends/__init__.py +++ b/abcpy/backends/__init__.py @@ -2,9 +2,22 @@ def BackendMPI(*args,**kwargs): + + # import and setup module mpimanager + import abcpy.backends.mpimanager + master_node_ranks = [0] + process_per_model = 1 + if 'master_node_ranks' in kwargs: + master_node_ranks = kwargs['master_node_ranks'] + if 'process_per_model' in kwargs: + process_per_model = kwargs['process_per_model'] + abcpy.backends.mpimanager.create_mpi_manager(master_node_ranks, process_per_model) + + # import BackendMPI and return and instance from abcpy.backends.mpi import BackendMPI return BackendMPI(*args,**kwargs) + def BackendMPITestHelper(*args,**kwargs): from abcpy.backends.mpi import BackendMPITestHelper return BackendMPITestHelper(*args,**kwargs) diff --git a/abcpy/backends/mpi.py b/abcpy/backends/mpi.py index 8002b901..139a26e1 100644 --- a/abcpy/backends/mpi.py +++ b/abcpy/backends/mpi.py @@ -8,7 +8,10 @@ from abcpy.backends import BDS, PDS, Backend -class BackendMPIMaster(Backend): +import abcpy.backends.mpimanager +from mpi4py import MPI + +class BackendMPIScheduler(Backend): """Defines the behavior of the master process This class defines the behavior of the master process (The one @@ -20,7 +23,7 @@ class BackendMPIMaster(Backend): OP_PARALLELIZE, OP_MAP, OP_COLLECT, OP_BROADCAST, OP_DELETEPDS, OP_DELETEBDS, OP_FINISH = [1, 2, 3, 4, 5, 6, 7] finalized = False - def __init__(self, master_node_ranks=[0],chunk_size=1): + def __init__(self, chunk_size=1): """ Parameters ---------- @@ -33,11 +36,7 @@ def __init__(self, master_node_ranks=[0],chunk_size=1): size of one block of data to be sent to free executors """ - self.comm = MPI.COMM_WORLD - self.size = self.comm.Get_size() - self.rank = self.comm.Get_rank() - - self.master_node_ranks = master_node_ranks + #self.master_node_ranks = master_node_ranks #Initialize the current_pds_id and bds_id self.__current_pds_id = 0 @@ -76,7 +75,6 @@ def __command_slaves(self, command, data): elif command == self.OP_MAP: #In map we receive data as (pds_id,pds_id_new,func) #Use cloudpickle to dump the function into a string. - # function_packed = self.__sanitize_and_pack_func() function_packed = cloudpickle.dumps(data[2],pickle.HIGHEST_PROTOCOL) data_packet = (command, data[0], data[1], function_packed) @@ -94,8 +92,7 @@ def __command_slaves(self, command, data): elif command == self.OP_FINISH: data_packet = (command,) - #_ = self.comm.bcast(data_packet, root=0) - _ = self.master_communicator.bcast(data_packet, root=0) + _ = self.mpimanager.get_master_communicator().bcast(data_packet, root=0) @@ -157,7 +154,6 @@ def parallelize(self, python_list): #Don't send any data. Just keep it as a queue we're going to pop. self.pds_store[pds_id] = list(python_list) - pds = PDSMPI([], pds_id, self) return pds @@ -170,8 +166,7 @@ def orchestrate_map(self,pds_id): responding to them with the data and then sending them a Sentinel signalling that they can exit. """ - #is_map_done = [True if i in self.master_node_ranks else False for i in range(self.size)] - is_map_done = [True if i in self.master_node_ranks else False for i in range(self.master_size)] + is_map_done = [True if i in self.mpimanager.get_master_node_ranks() else False for i in range(self.mpimanager.get_master_size())] status = MPI.Status() #Copy it to the pending. This is so when master accesses @@ -179,11 +174,9 @@ def orchestrate_map(self,pds_id): self.pds_pending_store[pds_id] = list(self.pds_store[pds_id]) #While we have some ranks that haven't finished - #while sum(is_map_done) 1): + return func(self.mpimanager.get_model_communicator(), data_item) + else: + return func(data_item) + + + def __worker_run(self): + while True: + data = self.mpimanager.get_model_communicator().bcast(None, root=0) + op = data[0] + if op == self.OP_MAP: + #Receive data from master of the model + function_packed = self.mpimanager.get_model_communicator().bcast(None, root=0)[0] + data_item = self.mpimanager.get_model_communicator().bcast(None, root=0)[0] + self.run_function(function_packed, data_item) + elif op == self.OP_BROADCAST: + self._bds_id = data[1] + self.broadcast(None) + elif op == self.OP_FINISH: + quit() + else: + raise Exception("Slave model received unknown command code") + + def collect(self): + pass + + def map(self): + pass + + def parallelize(): + pass + + def broadcast(self, value): + """ + Value is ignored for the slaves. We get data from master + """ + value = self.mpimanager.get_world_communicator().bcast(None, root=0) + self.bds_store[self._bds_id] = value + + +class BackendMPILeader(BackendMPIWorker): """Defines the behavior of the slaves/worker processes This class defines how the slaves should behave during operation. @@ -364,26 +402,20 @@ class BackendMPISlave(Backend): def __init__(self): - self.comm = MPI.COMM_WORLD - self.size = self.comm.Get_size() - self.rank = self.comm.Get_rank() - #Define the vars that will hold the pds ids received from master to operate on - self.__rec_pds_id = None - self.__rec_pds_id_result = None + self.mpimanager =abcpy.backends.mpimanager.get_mpi_manager() - #Initialize a BDS store for both master & slave. - self.bds_store = {} + self.__leader_run() #Go into an infinite loop waiting for commands from the user. #Process 0 of the model is the "master" of the model and deal with the central master - if self.model_rank == 0: - self.__slave_run() - else : - self.__slave_model_run() + # if self.model_rank == 0: + # self.__slave_run() + # else : + # self.__slave_model_run() - def __slave_run(self): + def __leader_run(self): """ This method is the infinite loop a slave enters directly from init. It makes the slave wait for a command to perform from the master and @@ -408,36 +440,31 @@ def __slave_run(self): self.pds_store = {} while True: - #data = self.comm.bcast(None, root=0) - data = self.master_communicator.bcast(None, root=0) - # print("Received some instruction from master") + data = self.mpimanager.get_master_communicator().bcast(None, root=0) op = data[0] if op == self.OP_PARALLELIZE: pds_id = data[1] - self.__rec_pds_id = pds_id + self._rec_pds_id = pds_id pds_id, pds_id_new = self.__get_received_pds_id() self.pds_store[pds_id] = None elif op == self.OP_MAP: - # print("Map") pds_id, pds_id_result, function_packed = data[1:] - self.__rec_pds_id, self.__rec_pds_id_result = pds_id, pds_id_result - - #Use cloudpickle to convert back function string to a function - #func = cloudpickle.loads(function_packed) + self._rec_pds_id, self._rec_pds_id_result = pds_id, pds_id_result #Enter the map so we can grab data and perform the func. #Func sent before and not during for performance reasons - #pds_res = self.map(func) pds_res = self.map(function_packed) # Store the result in a newly gnerated PDS pds_id self.pds_store[pds_res.pds_id] = pds_res elif op == self.OP_BROADCAST: - self.__bds_id = data[1] + self._bds_id = data[1] + #relay command and data into model communicator + self.mpimanager.get_model_communicator().bcast(data, root=0) self.broadcast(None) elif op == self.OP_COLLECT: @@ -457,38 +484,11 @@ def __slave_run(self): del self.bds_store[bds_id] elif op == self.OP_FINISH: - # print("Finish") # tells other processes of the worker to finish - self.model_communicator.bcast([self.OP_FINISH], root=0) + self.mpimanager.get_model_communicator().bcast([self.OP_FINISH], root=0) quit() else: - raise Exception("Slave recieved unknown command code") - - - def __runfunc(self, function_packed, data_item): - #Unpack function and run it - func = cloudpickle.loads(function_packed) - if(self.model_size > 1): - return func(self.model_communicator, data_item) - else: - return func(data_item) - - def __slave_model_run(self): - while True: - data = self.model_communicator.bcast(None, root=0) - # print("Received some instruction from model master") - op = data[0] - if op == self.OP_MAP: - # print("Map") - #Receive data from master of the model - function_packed = self.model_communicator.bcast(None, root=0)[0] - data_item = self.model_communicator.bcast(None, root=0)[0] - self.__runfunc(function_packed, data_item) - elif op == self.OP_FINISH: - # print("Finish") - quit() - else: - raise Exception("Slave model received unknown command code") + raise Exception("Slave received unknown command code") def __get_received_pds_id(self): @@ -497,14 +497,14 @@ def __get_received_pds_id(self): our slave's created PDS with the master's. """ - return self.__rec_pds_id, self.__rec_pds_id_result + return self._rec_pds_id, self._rec_pds_id_result - def __master_model_function_run(self, function_packed, data_item): - #Send function and data to other processes - self.model_communicator.bcast([self.OP_MAP], root=0) - self.model_communicator.bcast([function_packed], root=0) - self.model_communicator.bcast([data_item], root=0) - return self.__runfunc(function_packed, data_item) + def __leader_run_function(self, function_packed, data_item): + #Send function and data to other workers + self.mpimanager.get_model_communicator().bcast([self.OP_MAP], root=0) + self.mpimanager.get_model_communicator().bcast([function_packed], root=0) + self.mpimanager.get_model_communicator().bcast([data_item], root=0) + return self.run_function(function_packed, data_item) def parallelize(self): @@ -535,9 +535,8 @@ def map(self, function_packed): rdd = [] while True: #Ask for a chunk of data since it's free - #data_chunks = self.comm.sendrecv(pds_id, 0, pds_id) - data_chunks = self.master_communicator.sendrecv(pds_id, 0, pds_id) - + data_chunks = self.mpimanager.get_master_communicator().sendrecv(pds_id, 0, pds_id) + #If it receives a sentinel, it's done and it can exit if data_chunks is None: break @@ -545,7 +544,7 @@ def map(self, function_packed): #Accumulate the indicess and *processed* chunks for chunk in data_chunks: data_index,data_item = chunk - res = self.__master_model_function_run(function_packed, data_item) + res = self.__leader_run_function(function_packed, data_item) rdd+=[(data_index,res)] pds_res = PDSMPI(rdd, pds_id_new, self) @@ -570,23 +569,32 @@ def collect(self, pds): """ #Send the data we have back to the master - #_ = self.comm.gather(pds.python_list, root=0) - _ = self.master_communicator.gather(pds.python_list, root=0) + _ = self.mpimanager.get_master_communicator().gather(pds.python_list, root=0) - def broadcast(self, value): - """ - Value is ignored for the slaves. We get data from master - """ - #value = self.comm.bcast(None, root=0) - value = self.master_communicator.bcast(None, root=0) - self.bds_store[self.__bds_id] = value + +class BackendMPITeam(BackendMPILeader if abcpy.backends.mpimanager.get_mpi_manager().is_leader() else BackendMPIWorker): + + OP_PARALLELIZE, OP_MAP, OP_COLLECT, OP_BROADCAST, OP_DELETEPDS, OP_DELETEBDS, OP_FINISH = [1, 2, 3, 4, 5, 6, 7] + + def __init__(self): + #Define the vars that will hold the pds ids received from master to operate on + self._rec_pds_id = None + self._rec_pds_id_result = None + + #Initialize a BDS store for both master & slave. + self.bds_store = {} + + #print("In BackendMPITeam, rank : ", self.rank, ", model_rank_global : ", globals()['model_rank_global']) + + super().__init__() + -class BackendMPI(BackendMPIMaster if MPI.COMM_WORLD.Get_rank() == 0 else BackendMPISlave): +class BackendMPI(BackendMPIScheduler if abcpy.backends.mpimanager.get_mpi_manager().is_scheduler() else BackendMPITeam): """A backend parallelized by using MPI - The backend conditionally inherits either the BackendMPIMaster class + The backend conditionally inherits either the BackendMPIScheduler class or the BackendMPISlave class depending on it's rank. This lets BackendMPI have a uniform interface for the user but allows for a logical split between functions performed by the master @@ -594,43 +602,29 @@ class BackendMPI(BackendMPIMaster if MPI.COMM_WORLD.Get_rank() == 0 else Backend """ def __init__(self, master_node_ranks=[0], process_per_model=1): - self.comm = MPI.COMM_WORLD - self.size = self.comm.Get_size() - self.rank = self.comm.Get_rank() - if self.size < 2: + self.mpimanager = abcpy.backends.mpimanager.get_mpi_manager() + + if self.mpimanager.get_world_size() < 2: raise ValueError('A minimum of 2 ranks are required for the MPI backend') - #Construct the appropriate communicators for resource allocation to models - #There is one communicator for master nodes - #And one communicator per model - self.process_per_model = process_per_model - self.model_color = int(((self.rank - sum(i < self.rank for i in master_node_ranks)) / process_per_model) + 1) - if(self.rank in master_node_ranks): - self.model_color = 0 - # print("Global rank : ", self.rank, ", color : ", self.model_color) - self.model_communicator = MPI.COMM_WORLD.Split(self.model_color, self.rank) - self.model_size = self.model_communicator.Get_size() - self.model_rank = self.model_communicator.Get_rank() - - # create a communicator to broadcast instructions to slaves - self.master_color = 1 - if(self.model_color == 0 or self.model_rank == 0): - self.master_color = 0 - self.master_communicator = MPI.COMM_WORLD.Split(self.master_color, self.rank) - self.master_size = self.master_communicator.Get_size() - self.master_rank = self.master_communicator.Get_rank() + #print("abcpy.backends.mpimanager.is_scheduler : ", abcpy.backends.mpimanager.is_scheduler) #Set the global backend globals()['backend'] = self - #Call the appropriate constructors and pass the required data - if self.rank == 0: - super().__init__(master_node_ranks) - else: - super().__init__() - raise Exception("Slaves exitted main loop.") + #if self.rank == 0: + super().__init__() + #else: + # super().__init__(master_node_ranks, process_per_model) + # raise Exception("Teams exited main loop.") + + def size(self): + return self.mpimanager.get_world_size() + + def master_node_ranks(self): + return self.mpimanager.get_master_node_ranks() @@ -666,7 +660,6 @@ def __init__(self, object, bds_id, backend_obj): #It will access & store the data only from the current backend self.bds_id = bds_id backend.bds_store[self.bds_id] = object - # self.backend_obj = backend_obj def value(self): """ diff --git a/abcpy/backends/mpimanager.py b/abcpy/backends/mpimanager.py new file mode 100644 index 00000000..d7f689a8 --- /dev/null +++ b/abcpy/backends/mpimanager.py @@ -0,0 +1,99 @@ +from mpi4py import MPI +import sys + +mpimanager = None + +class MPIManager(object): + + def __init__(self, master_node_ranks=[0], process_per_model=1): + self.world_communicator = MPI.COMM_WORLD + self.size = self.world_communicator.Get_size() + self.rank = self.world_communicator.Get_rank() + + #Construct the appropriate communicators for resource allocation to models + #There is one communicator for master nodes + #And one communicator per model + self.master_node_ranks = master_node_ranks + self.process_per_model = process_per_model + self.model_color = int(((self.rank - sum(i < self.rank for i in master_node_ranks)) / process_per_model) + 1) + if(self.rank in master_node_ranks): + self.model_color = 0 + self.model_communicator = MPI.COMM_WORLD.Split(self.model_color, self.rank) + self.model_size = self.model_communicator.Get_size() + self.model_rank = self.model_communicator.Get_rank() + + # create a communicator to broadcast instructions to slaves + self.master_color = 1 + if(self.model_color == 0 or self.model_rank == 0): + self.master_color = 0 + self.master_communicator = MPI.COMM_WORLD.Split(self.master_color, self.rank) + self.master_size = self.master_communicator.Get_size() + self.master_rank = self.master_communicator.Get_rank() + + self.leader = False + self.scheduler = False + self.team = False + self.worker = False + + if self.rank == 0: + self.scheduler = True + elif self.model_rank == 0: + self.team = True + self.leader = True + else: + self.team = True + self.worker = True + + + def is_scheduler(self): + return self.scheduler + + def is_team(self): + return self.team + + def is_leader(self): + return self.leader + + def is_worker(self): + return self.worker + + def get_master_node_ranks(self): + return self.master_node_ranks + + def get_world_rank(self): + return self.rank + + def get_world_size(self): + return self.size + + def get_world_communicator(self): + return self.world_communicator + + def get_model_rank(self): + return self.model_rank + + def get_model_size(self): + return self.model_size + + def get_model_communicator(self): + return self.model_communicator + + def get_master_rank(self): + return self.master_rank + + def get_master_size(self): + return self.master_size + + def get_master_communicator(self): + return self.master_communicator + +def get_mpi_manager(): + global mpimanager + # Error prone ? + if mpimanager == None : + create_mpi_manager([0], 1) + return mpimanager + +def create_mpi_manager(master_node_ranks, process_per_model): + global mpimanager + mpimanager = MPIManager(master_node_ranks, process_per_model) \ No newline at end of file diff --git a/tests/backend_tests_mpi.py b/tests/backend_tests_mpi.py index 2c11f116..07c88ae8 100644 --- a/tests/backend_tests_mpi.py +++ b/tests/backend_tests_mpi.py @@ -26,12 +26,12 @@ def setUpModule(): class MPIBackendTests(unittest.TestCase): def test_parallelize(self): - data = [0]*backend_mpi.size + data = [0]*backend_mpi.size() pds = backend_mpi.parallelize(data) pds_map = backend_mpi.map(lambda x: x + MPI.COMM_WORLD.Get_rank(), pds) res = backend_mpi.collect(pds_map) - for master_index in backend_mpi.master_node_ranks: + for master_index in backend_mpi.master_node_ranks(): self.assertTrue(master_index not in res,"Node in master_node_ranks performed map.") def test_map(self): diff --git a/tests/backend_tests_mpi_model_mpi.py b/tests/backend_tests_mpi_model_mpi.py index 3046269d..ed01a2e6 100644 --- a/tests/backend_tests_mpi_model_mpi.py +++ b/tests/backend_tests_mpi_model_mpi.py @@ -24,12 +24,12 @@ def setUpModule(): class MPIBackendTests(unittest.TestCase): def test_parallelize(self): - data = [0]*backend_mpi.size + data = [0]*backend_mpi.size() pds = backend_mpi.parallelize(data) pds_map = backend_mpi.map(lambda model_comm, x: x + MPI.COMM_WORLD.Get_rank(), pds) res = backend_mpi.collect(pds_map) - for master_index in backend_mpi.master_node_ranks: + for master_index in backend_mpi.master_node_ranks(): self.assertTrue(master_index not in res,"Node in master_node_ranks performed map.") def test_map(self): @@ -46,43 +46,45 @@ def square_mpi(model_comm, x): assert res==list(map(lambda x:2*(x**2),data)) - # def test_broadcast(self): - # data = [1,2,3,4,5] - # pds = backend_mpi.parallelize(data) + def test_broadcast(self): + data = [1,2,3,4,5] + pds = backend_mpi.parallelize(data) - # bds = backend_mpi.broadcast(100) + bds = backend_mpi.broadcast(100) - # #Pollute the BDS values of the master to confirm slaves - # # use their broadcasted value - # for k,v in backend_mpi.bds_store.items(): - # backend_mpi.bds_store[k] = 99999 + #Pollute the BDS values of the master to confirm slaves + # use their broadcasted value + for k,v in backend_mpi.bds_store.items(): + backend_mpi.bds_store[k] = 99999 - # def test_map(model_comm, x): - # return x + bds.value() + def test_map(model_comm, x): + return x + bds.value() - # pds_m = backend_mpi.map(test_map, pds) - # self.assertTrue(backend_mpi.collect(pds_m)==[101,102,103,104,105]) + pds_m = backend_mpi.map(test_map, pds) + self.assertTrue(backend_mpi.collect(pds_m)==[101,102,103,104,105]) - # def test_pds_delete(self): + def test_pds_delete(self): - # def check_if_exists(model_comm, x): - # obj = BackendMPITestHelper() - # return obj.check_pds(x) + def check_if_exists(model_comm, x): + obj = BackendMPITestHelper() + if model_comm.Get_rank() == 0: + return obj.check_pds(x) + return None - # data = [1,2,3,4,5] - # pds = backend_mpi.parallelize(data) + data = [1,2,3,4,5] + pds = backend_mpi.parallelize(data) - # #Check if the pds we just created exists in all the slaves(+master) + #Check if the pds we just created exists in all the slaves(+master) - # id_check_pds = backend_mpi.parallelize([pds.pds_id]*5) - # pds_check_result = backend_mpi.map(check_if_exists, id_check_pds) - # self.assertTrue(False not in backend_mpi.collect(pds_check_result),"PDS was not created") + id_check_pds = backend_mpi.parallelize([pds.pds_id]*5) + pds_check_result = backend_mpi.map(check_if_exists, id_check_pds) + self.assertTrue(False not in backend_mpi.collect(pds_check_result),"PDS was not created") - # #Delete the PDS on master and try again - # del pds - # pds_check_result = backend_mpi.map(check_if_exists,id_check_pds) + #Delete the PDS on master and try again + del pds + pds_check_result = backend_mpi.map(check_if_exists,id_check_pds) - # self.assertTrue(True not in backend_mpi.collect(pds_check_result),"PDS was not deleted") + self.assertTrue(True not in backend_mpi.collect(pds_check_result),"PDS was not deleted") def test_bds_delete(self): From c3cc0aa5744fd45bd5f8196678d80245660b5c1f Mon Sep 17 00:00:00 2001 From: Pierre Kuenzli Date: Thu, 1 Nov 2018 16:43:00 +0000 Subject: [PATCH 03/41] some refactoring and documentation on mpi backend --- abcpy/backends/__init__.py | 1 - abcpy/backends/mpi.py | 238 ++++++++++++++------------- abcpy/backends/mpimanager.py | 144 +++++++++------- tests/backend_tests_mpi.py | 22 +-- tests/backend_tests_mpi_model_mpi.py | 22 +-- 5 files changed, 237 insertions(+), 190 deletions(-) diff --git a/abcpy/backends/__init__.py b/abcpy/backends/__init__.py index f95e76b5..a6c41518 100644 --- a/abcpy/backends/__init__.py +++ b/abcpy/backends/__init__.py @@ -2,7 +2,6 @@ def BackendMPI(*args,**kwargs): - # import and setup module mpimanager import abcpy.backends.mpimanager master_node_ranks = [0] diff --git a/abcpy/backends/mpi.py b/abcpy/backends/mpi.py index 139a26e1..ab212409 100644 --- a/abcpy/backends/mpi.py +++ b/abcpy/backends/mpi.py @@ -12,9 +12,9 @@ from mpi4py import MPI class BackendMPIScheduler(Backend): - """Defines the behavior of the master process + """Defines the behavior of the scheduler process - This class defines the behavior of the master process (The one + This class defines the behavior of the scheduler process (The one with rank==0) in MPI. """ @@ -27,42 +27,36 @@ def __init__(self, chunk_size=1): """ Parameters ---------- - master_node_ranks: Python list - list of ranks computation should not happen on. - Should include the master so it doesn't get - overwhelmed with work. - chunk_size: Integer size of one block of data to be sent to free - executors + execution teams """ - #self.master_node_ranks = master_node_ranks #Initialize the current_pds_id and bds_id self.__current_pds_id = 0 self.__current_bds_id = 0 - #Initialize a BDS store for both master & slave. + #Initialize a BDS store for both scheduler & team. self.bds_store = {} self.pds_store = {} #Initialize a store for the pds data that - #.. hasn't been sent to the workers yet + #.. hasn't been sent to the teams yet self.pds_pending_store = {} self.chunk_size = chunk_size - def __command_slaves(self, command, data): - """Tell slaves to enter relevant execution block - This method handles the sending of the command to the slaves + def __command_teams(self, command, data): + """Tell teams to enter relevant execution block + This method handles the sending of the command to the teams telling them what operation to perform next. Parameters ---------- command: operation code of OP_xxx One of the operation codes defined in the class definition as OP_xxx - which tell the slaves what operation they're performing. + which tell the teams what operation they're performing. data: tuple Any of the data required for the operation which needs to be bundled in the data packet sent. @@ -92,14 +86,14 @@ def __command_slaves(self, command, data): elif command == self.OP_FINISH: data_packet = (command,) - _ = self.mpimanager.get_master_communicator().bcast(data_packet, root=0) + _ = self.mpimanager.get_scheduler_communicator().bcast(data_packet, root=0) def __generate_new_pds_id(self): """ This method generates a new pds_id to associate a PDS with it's remote counterpart - that slaves use to store & index data based on the pds_id they receive + that teams use to store & index data based on the pds_id they receive Returns ------- @@ -114,7 +108,7 @@ def __generate_new_pds_id(self): def __generate_new_bds_id(self): """ This method generates a new bds_id to associate a BDS with it's remote counterpart - that slaves use to store & index data based on the bds_id they receive + that teams use to store & index data based on the bds_id they receive Returns ------- @@ -128,18 +122,18 @@ def __generate_new_bds_id(self): def parallelize(self, python_list): """ - This method distributes the list on the available workers and returns a + This method distributes the list on the available teams and returns a reference object. - The list is split into number of workers many parts as a numpy array. - Each part is sent to a separate worker node using the MPI scatter. + The list is split into number of teams many parts as a numpy array. + Each part is sent to a separate team node using the MPI scatter. - MASTER: python_list is the real data that is to be split up + scheduler: python_list is the real data that is to be split up Parameters ---------- list: Python list - the list that should get distributed on the worker nodes + the list that should get distributed on the leader nodes of the teams Returns ------- @@ -147,9 +141,9 @@ def parallelize(self, python_list): A reference object that represents the parallelized list """ - # Tell the slaves to enter parallelize() + # Tell the teams to enter parallelize() pds_id = self.__generate_new_pds_id() - self.__command_slaves(self.OP_PARALLELIZE, (pds_id,)) + self.__command_teams(self.OP_PARALLELIZE, (pds_id,)) #Don't send any data. Just keep it as a queue we're going to pop. self.pds_store[pds_id] = list(python_list) @@ -159,24 +153,24 @@ def parallelize(self, python_list): return pds def orchestrate_map(self,pds_id): - """Orchestrates the slaves/workers to perform a map function + """Orchestrates the teams to perform a map function - This works by keeping track of the workers who haven't finished executing, + This works by keeping track of the teams who haven't finished executing, waiting for them to request the next chunk of data when they are free, responding to them with the data and then sending them a Sentinel signalling that they can exit. """ - is_map_done = [True if i in self.mpimanager.get_master_node_ranks() else False for i in range(self.mpimanager.get_master_size())] + is_map_done = [True if i in self.mpimanager.get_scheduler_node_ranks() else False for i in range(self.mpimanager.get_scheduler_size())] status = MPI.Status() - #Copy it to the pending. This is so when master accesses + #Copy it to the pending. This is so when scheduler accesses #the PDS data it's not empty. self.pds_pending_store[pds_id] = list(self.pds_store[pds_id]) #While we have some ranks that haven't finished - while sum(is_map_done) 1): return func(self.mpimanager.get_model_communicator(), data_item) @@ -357,11 +365,14 @@ def run_function(self, function_packed, data_item): def __worker_run(self): + """ + Workers enter an infinite loop and waits for instructions from their leader + """ while True: data = self.mpimanager.get_model_communicator().bcast(None, root=0) op = data[0] if op == self.OP_MAP: - #Receive data from master of the model + #Receive data from scheduler of the model function_packed = self.mpimanager.get_model_communicator().bcast(None, root=0)[0] data_item = self.mpimanager.get_model_communicator().bcast(None, root=0)[0] self.run_function(function_packed, data_item) @@ -371,7 +382,7 @@ def __worker_run(self): elif op == self.OP_FINISH: quit() else: - raise Exception("Slave model received unknown command code") + raise Exception("worker model received unknown command code") def collect(self): pass @@ -384,63 +395,55 @@ def parallelize(): def broadcast(self, value): """ - Value is ignored for the slaves. We get data from master + Receives data from scheduler """ value = self.mpimanager.get_world_communicator().bcast(None, root=0) self.bds_store[self._bds_id] = value class BackendMPILeader(BackendMPIWorker): - """Defines the behavior of the slaves/worker processes + """Defines the behavior of the leader processes - This class defines how the slaves should behave during operation. - Slaves are those processes(not nodes like Spark) that have rank!=0 - and whose ids are not present in the list of non workers. + This class defines how the leaders should behave during operation. + leaders are those processes(not nodes like Spark) that have rank==0 in the model communicator """ OP_PARALLELIZE, OP_MAP, OP_COLLECT, OP_BROADCAST, OP_DELETEPDS, OP_DELETEBDS, OP_FINISH = [1, 2, 3, 4, 5, 6, 7] def __init__(self): - - self.mpimanager =abcpy.backends.mpimanager.get_mpi_manager() + """ No parameter, just call leader_run """ self.__leader_run() - #Go into an infinite loop waiting for commands from the user. - #Process 0 of the model is the "master" of the model and deal with the central master - # if self.model_rank == 0: - # self.__slave_run() - # else : - # self.__slave_model_run() def __leader_run(self): """ - This method is the infinite loop a slave enters directly from init. - It makes the slave wait for a command to perform from the master and + This method is the infinite loop a leader enters directly from init. + It makes the leader wait for a command to perform from the scheduler and then calls the appropriate function. This method also takes care of the synchronization of data between the - master and the slaves by matching PDSs based on the pds_ids sent by the master + scheduler and the leaders by matching PDSs based on the pds_ids sent by the scheduler with the command. - Commands received from the master are of the form of a tuple. + Commands received from the scheduler are of the form of a tuple. The first component of the tuple is always the operation to be performed and the rest are conditional on the operation. (op,pds_id) where op == OP_PARALLELIZE for parallelize (op,pds_id, pds_id_result,func) where op == OP_MAP for map. (op,pds_id) where op == OP_COLLECT for a collect operation - (op,pds_id) where op == OP_DELETEPDS for a delete of the remote PDS on slaves - (op,) where op==OP_FINISH for the slave to break out of the loop and terminate + (op,pds_id) where op == OP_DELETEPDS for a delete of the remote PDS on leaders + (op,) where op==OP_FINISH for the leader to break out of the loop and terminate """ - # Initialize PDS data store here because only slaves need to do it. + # Initialize PDS data store here because only teams need to do it. self.pds_store = {} while True: - data = self.mpimanager.get_master_communicator().bcast(None, root=0) + data = self.mpimanager.get_scheduler_communicator().bcast(None, root=0) op = data[0] if op == self.OP_PARALLELIZE: @@ -488,19 +491,21 @@ def __leader_run(self): self.mpimanager.get_model_communicator().bcast([self.OP_FINISH], root=0) quit() else: - raise Exception("Slave received unknown command code") + raise Exception("team received unknown command code") def __get_received_pds_id(self): """ - Function to retrieve the pds_id(s) we received from the master to associate - our slave's created PDS with the master's. + Function to retrieve the pds_id(s) we received from the scheduler to associate + our team's created PDS with the scheduler's. """ return self._rec_pds_id, self._rec_pds_id_result def __leader_run_function(self, function_packed, data_item): - #Send function and data to other workers + """ + This function sends data and serialized function to workers and executes it + """ self.mpimanager.get_model_communicator().bcast([self.OP_MAP], root=0) self.mpimanager.get_model_communicator().bcast([function_packed], root=0) self.mpimanager.get_model_communicator().bcast([data_item], root=0) @@ -513,13 +518,13 @@ def parallelize(self): def map(self, function_packed): """ A distributed implementation of map that works on parallel data sets (PDS). - On every element of pds the function func is called. + We consider that process 0 of each MPI model should return the final result. Parameters ---------- - func: Python func - A function that can be applied to every element of the pds + func: Python function_packed + A serialized function that can be applied to every element of the pds Returns ------- @@ -535,7 +540,7 @@ def map(self, function_packed): rdd = [] while True: #Ask for a chunk of data since it's free - data_chunks = self.mpimanager.get_master_communicator().sendrecv(pds_id, 0, pds_id) + data_chunks = self.mpimanager.get_scheduler_communicator().sendrecv(pds_id, 0, pds_id) #If it receives a sentinel, it's done and it can exit if data_chunks is None: @@ -554,8 +559,8 @@ def map(self, function_packed): def collect(self, pds): """ - Gather the pds from all the workers, - send it to the master and return it as a standard Python list. + Gather the pds from all the leaders, + send it to the scheduler and return it as a standard Python list. Parameters ---------- @@ -568,21 +573,24 @@ def collect(self, pds): all elements of pds as a list """ - #Send the data we have back to the master - _ = self.mpimanager.get_master_communicator().gather(pds.python_list, root=0) + #Send the data we have back to the scheduler + _ = self.mpimanager.get_scheduler_communicator().gather(pds.python_list, root=0) class BackendMPITeam(BackendMPILeader if abcpy.backends.mpimanager.get_mpi_manager().is_leader() else BackendMPIWorker): + """ + A team is compounded by workers and a leader. One process per team is a leader, others are workers + """ OP_PARALLELIZE, OP_MAP, OP_COLLECT, OP_BROADCAST, OP_DELETEPDS, OP_DELETEBDS, OP_FINISH = [1, 2, 3, 4, 5, 6, 7] def __init__(self): - #Define the vars that will hold the pds ids received from master to operate on + #Define the vars that will hold the pds ids received from scheduler to operate on self._rec_pds_id = None self._rec_pds_id_result = None - #Initialize a BDS store for both master & slave. + #Initialize a BDS store for both scheduler & team. self.bds_store = {} #print("In BackendMPITeam, rank : ", self.rank, ", model_rank_global : ", globals()['model_rank_global']) @@ -595,36 +603,42 @@ class BackendMPI(BackendMPIScheduler if abcpy.backends.mpimanager.get_mpi_manage """A backend parallelized by using MPI The backend conditionally inherits either the BackendMPIScheduler class - or the BackendMPISlave class depending on it's rank. This lets + or the BackendMPIteam class depending on it's rank. This lets BackendMPI have a uniform interface for the user but allows for a - logical split between functions performed by the master - and the slaves. + logical split between functions performed by the scheduler + and the teams. """ - def __init__(self, master_node_ranks=[0], process_per_model=1): - + def __init__(self, scheduler_node_ranks=[0], process_per_model=1): + """ + Parameters + ---------- + scheduler_node_ranks: Python list + list of scheduler nodes + + process_per_model: Integer + number of MPI processes to allocate to each model + """ + # get mpimanager instance from the mpimanager module (which has to be setup before calling the constructor) self.mpimanager = abcpy.backends.mpimanager.get_mpi_manager() if self.mpimanager.get_world_size() < 2: raise ValueError('A minimum of 2 ranks are required for the MPI backend') - #print("abcpy.backends.mpimanager.is_scheduler : ", abcpy.backends.mpimanager.is_scheduler) - #Set the global backend globals()['backend'] = self #Call the appropriate constructors and pass the required data - #if self.rank == 0: super().__init__() - #else: - # super().__init__(master_node_ranks, process_per_model) - # raise Exception("Teams exited main loop.") + def size(self): + """ Returns world size """ return self.mpimanager.get_world_size() - def master_node_ranks(self): - return self.mpimanager.get_master_node_ranks() + def scheduler_node_ranks(self): + """ Returns scheduler node ranks """ + return self.mpimanager.get_scheduler_node_ranks() @@ -641,12 +655,12 @@ def __init__(self, python_list, pds_id, backend_obj): def __del__(self): """ Destructor to be called when a PDS falls out of scope and/or is being deleted. - Uses the backend to send a message to destroy the slaves' copy of the pds. + Uses the backend to send a message to destroy the teams' copy of the pds. """ try: self.backend_obj.delete_remote_pds(self.pds_id) except AttributeError: - #Catch "delete_remote_pds not defined" for slaves and ignore. + #Catch "delete_remote_pds not defined" for teams and ignore. pass @@ -670,13 +684,13 @@ def value(self): def __del__(self): """ Destructor to be called when a BDS falls out of scope and/or is being deleted. - Uses the backend to send a message to destroy the slaves' copy of the bds. + Uses the backend to send a message to destroy the teams' copy of the bds. """ try: backend.delete_remote_bds(self.bds_id) except AttributeError: - #Catch "delete_remote_pds not defined" for slaves and ignore. + #Catch "delete_remote_pds not defined" for teams and ignore. pass class BackendMPITestHelper: diff --git a/abcpy/backends/mpimanager.py b/abcpy/backends/mpimanager.py index d7f689a8..17a7b33c 100644 --- a/abcpy/backends/mpimanager.py +++ b/abcpy/backends/mpimanager.py @@ -1,99 +1,133 @@ from mpi4py import MPI import sys -mpimanager = None +__mpimanager = None class MPIManager(object): - - def __init__(self, master_node_ranks=[0], process_per_model=1): - self.world_communicator = MPI.COMM_WORLD - self.size = self.world_communicator.Get_size() - self.rank = self.world_communicator.Get_rank() + """Defines the behavior of the slaves/worker processes + + This class construct the MPI communicators structure needed + if the rank of the process is in scheduler_node_ranks, the process is a scheduler + then there is process_per_model process per communicator + """ + + def __init__(self, scheduler_node_ranks=[0], process_per_model=1): + """ + Parameters + ---------- + scheduler_node_ranks: Python list + list of ranks computation should not happen on. + Should include the scheduler so it doesn't get + overwhelmed with work. + + process_per_model: Integer + the number of process to allow to each model + """ + + self._world_communicator = MPI.COMM_WORLD + self._size = self._world_communicator.Get_size() + self._rank = self._world_communicator.Get_rank() #Construct the appropriate communicators for resource allocation to models - #There is one communicator for master nodes + #There is one communicator for scheduler nodes #And one communicator per model - self.master_node_ranks = master_node_ranks - self.process_per_model = process_per_model - self.model_color = int(((self.rank - sum(i < self.rank for i in master_node_ranks)) / process_per_model) + 1) - if(self.rank in master_node_ranks): - self.model_color = 0 - self.model_communicator = MPI.COMM_WORLD.Split(self.model_color, self.rank) - self.model_size = self.model_communicator.Get_size() - self.model_rank = self.model_communicator.Get_rank() + self._scheduler_node_ranks = scheduler_node_ranks + self._process_per_model = process_per_model + self._model_color = int(((self._rank - sum(i < self._rank for i in scheduler_node_ranks)) / process_per_model) + 1) + if(self._rank in scheduler_node_ranks): + self._model_color = 0 + self._model_communicator = MPI.COMM_WORLD.Split(self._model_color, self._rank) + self._model_size = self._model_communicator.Get_size() + self._model_rank = self._model_communicator.Get_rank() # create a communicator to broadcast instructions to slaves - self.master_color = 1 - if(self.model_color == 0 or self.model_rank == 0): - self.master_color = 0 - self.master_communicator = MPI.COMM_WORLD.Split(self.master_color, self.rank) - self.master_size = self.master_communicator.Get_size() - self.master_rank = self.master_communicator.Get_rank() - - self.leader = False - self.scheduler = False - self.team = False - self.worker = False - - if self.rank == 0: - self.scheduler = True - elif self.model_rank == 0: - self.team = True - self.leader = True + self._scheduler_color = 1 + if(self._model_color == 0 or self._model_rank == 0): + self._scheduler_color = 0 + self._scheduler_communicator = MPI.COMM_WORLD.Split(self._scheduler_color, self._rank) + self._scheduler_size = self._scheduler_communicator.Get_size() + self._scheduler_rank = self._scheduler_communicator.Get_rank() + + self._leader = False + self._scheduler = False + self._team = False + self._worker = False + + if self._rank == 0: + self._scheduler = True + elif self._model_rank == 0: + self._team = True + self._leader = True else: - self.team = True - self.worker = True + self._team = True + self._worker = True def is_scheduler(self): - return self.scheduler + ''' Tells if the process is a scheduler ''' + return self._scheduler def is_team(self): - return self.team + ''' Tells if the process is a team ''' + return self._team def is_leader(self): - return self.leader + ''' Tells if the process is a leader ''' + return self._leader def is_worker(self): - return self.worker + ''' Tells if the process is a worker ''' + return self._worker - def get_master_node_ranks(self): - return self.master_node_ranks + def get_scheduler_node_ranks(self): + ''' Returns the list of scheduler node wanks ''' + return self._scheduler_node_ranks def get_world_rank(self): - return self.rank + ''' Returns the current rank ''' + return self._rank def get_world_size(self): - return self.size + ''' Returns the size of the world communicator ''' + return self._size def get_world_communicator(self): - return self.world_communicator + ''' Returns the world communicator ''' + return self._world_communicator def get_model_rank(self): - return self.model_rank + ''' Returns the rank in the world communicator ''' + return self._model_rank def get_model_size(self): - return self.model_size + ''' Returns the size of the model communicator ''' + return self._model_size def get_model_communicator(self): - return self.model_communicator + ''' Returns the model communicator ''' + return self._model_communicator - def get_master_rank(self): - return self.master_rank + def get_scheduler_rank(self): + ''' Returns the rank in the scheduler communicator ''' + return self._scheduler_rank - def get_master_size(self): - return self.master_size + def get_scheduler_size(self): + ''' Returns the size of the scheduler communicator ''' + return self._scheduler_size - def get_master_communicator(self): - return self.master_communicator + def get_scheduler_communicator(self): + ''' Returns the scheduler communicator ''' + return self._scheduler_communicator def get_mpi_manager(): + ''' Return the instance of mpimanager + Creates one with default parameters is not already existing ''' global mpimanager - # Error prone ? if mpimanager == None : create_mpi_manager([0], 1) return mpimanager -def create_mpi_manager(master_node_ranks, process_per_model): +def create_mpi_manager(scheduler_node_ranks, process_per_model): + ''' Creates the instance of mpimanager with given parameters ''' global mpimanager - mpimanager = MPIManager(master_node_ranks, process_per_model) \ No newline at end of file + mpimanager = MPIManager(scheduler_node_ranks, process_per_model) \ No newline at end of file diff --git a/tests/backend_tests_mpi.py b/tests/backend_tests_mpi.py index 07c88ae8..d0b81e9d 100644 --- a/tests/backend_tests_mpi.py +++ b/tests/backend_tests_mpi.py @@ -10,12 +10,12 @@ def setUpModule(): If an exception is raised in a setUpModule then none of the tests in the module will be run. - This is useful because the slaves run in a while loop on initialization - only responding to the master's commands and will never execute anything else. + This is useful because the teams run in a while loop on initialization + only responding to the scheduler's commands and will never execute anything else. - On termination of master, the slaves call quit() that raises a SystemExit(). + On termination of scheduler, the teams call quit() that raises a SystemExit(). Because of the behaviour of setUpModule, it will not run any unit tests - for the slave and we now only need to write unit-tests from the master's + for the team and we now only need to write unit-tests from the scheduler's point of view. ''' global rank,backend_mpi @@ -31,8 +31,8 @@ def test_parallelize(self): pds_map = backend_mpi.map(lambda x: x + MPI.COMM_WORLD.Get_rank(), pds) res = backend_mpi.collect(pds_map) - for master_index in backend_mpi.master_node_ranks(): - self.assertTrue(master_index not in res,"Node in master_node_ranks performed map.") + for scheduler_index in backend_mpi.scheduler_node_ranks(): + self.assertTrue(scheduler_index not in res,"Node in scheduler_node_ranks performed map.") def test_map(self): data = [1,2,3,4,5] @@ -48,7 +48,7 @@ def test_broadcast(self): bds = backend_mpi.broadcast(100) - #Pollute the BDS values of the master to confirm slaves + #Pollute the BDS values of the scheduler to confirm teams # use their broadcasted value for k,v in backend_mpi.bds_store.items(): backend_mpi.bds_store[k] = 99999 @@ -68,13 +68,13 @@ def check_if_exists(x): data = [1,2,3,4,5] pds = backend_mpi.parallelize(data) - #Check if the pds we just created exists in all the slaves(+master) + #Check if the pds we just created exists in all the teams(+scheduler) id_check_pds = backend_mpi.parallelize([pds.pds_id]*5) pds_check_result = backend_mpi.map(check_if_exists, id_check_pds) self.assertTrue(False not in backend_mpi.collect(pds_check_result),"PDS was not created") - #Delete the PDS on master and try again + #Delete the PDS on scheduler and try again del pds pds_check_result = backend_mpi.map(check_if_exists,id_check_pds) @@ -90,12 +90,12 @@ def check_if_exists(x): data = [1,2,3,4,5] bds = backend_mpi.broadcast(data) - #Check if the pds we just created exists in all the slaves(+master) + #Check if the pds we just created exists in all the teams(+scheduler) id_check_bds = backend_mpi.parallelize([bds.bds_id]*5) bds_check_result = backend_mpi.map(check_if_exists, id_check_bds) self.assertTrue(False not in backend_mpi.collect(bds_check_result),"BDS was not created") - #Delete the PDS on master and try again + #Delete the PDS on scheduler and try again del bds bds_check_result = backend_mpi.map(check_if_exists,id_check_bds) self.assertTrue(True not in backend_mpi.collect(bds_check_result),"BDS was not deleted") diff --git a/tests/backend_tests_mpi_model_mpi.py b/tests/backend_tests_mpi_model_mpi.py index ed01a2e6..2f196bec 100644 --- a/tests/backend_tests_mpi_model_mpi.py +++ b/tests/backend_tests_mpi_model_mpi.py @@ -8,12 +8,12 @@ def setUpModule(): If an exception is raised in a setUpModule then none of the tests in the module will be run. - This is useful because the slaves run in a while loop on initialization - only responding to the master's commands and will never execute anything else. + This is useful because the teams run in a while loop on initialization + only responding to the scheduler's commands and will never execute anything else. - On termination of master, the slaves call quit() that raises a SystemExit(). + On termination of scheduler, the teams call quit() that raises a SystemExit(). Because of the behaviour of setUpModule, it will not run any unit tests - for the slave and we now only need to write unit-tests from the master's + for the team and we now only need to write unit-tests from the scheduler's point of view. ''' global rank,backend_mpi @@ -29,8 +29,8 @@ def test_parallelize(self): pds_map = backend_mpi.map(lambda model_comm, x: x + MPI.COMM_WORLD.Get_rank(), pds) res = backend_mpi.collect(pds_map) - for master_index in backend_mpi.master_node_ranks(): - self.assertTrue(master_index not in res,"Node in master_node_ranks performed map.") + for scheduler_index in backend_mpi.scheduler_node_ranks(): + self.assertTrue(scheduler_index not in res,"Node in scheduler_node_ranks performed map.") def test_map(self): def square_mpi(model_comm, x): @@ -52,7 +52,7 @@ def test_broadcast(self): bds = backend_mpi.broadcast(100) - #Pollute the BDS values of the master to confirm slaves + #Pollute the BDS values of the scheduler to confirm teams # use their broadcasted value for k,v in backend_mpi.bds_store.items(): backend_mpi.bds_store[k] = 99999 @@ -74,13 +74,13 @@ def check_if_exists(model_comm, x): data = [1,2,3,4,5] pds = backend_mpi.parallelize(data) - #Check if the pds we just created exists in all the slaves(+master) + #Check if the pds we just created exists in all the teams(+scheduler) id_check_pds = backend_mpi.parallelize([pds.pds_id]*5) pds_check_result = backend_mpi.map(check_if_exists, id_check_pds) self.assertTrue(False not in backend_mpi.collect(pds_check_result),"PDS was not created") - #Delete the PDS on master and try again + #Delete the PDS on scheduler and try again del pds pds_check_result = backend_mpi.map(check_if_exists,id_check_pds) @@ -96,12 +96,12 @@ def check_if_exists(model_comm, x): data = [1,2,3,4,5] bds = backend_mpi.broadcast(data) - #Check if the pds we just created exists in all the slaves(+master) + #Check if the pds we just created exists in all the teams(+scheduler) id_check_bds = backend_mpi.parallelize([bds.bds_id]*5) bds_check_result = backend_mpi.map(check_if_exists, id_check_bds) self.assertTrue(False not in backend_mpi.collect(bds_check_result),"BDS was not created") - #Delete the PDS on master and try again + #Delete the PDS on scheduler and try again del bds bds_check_result = backend_mpi.map(check_if_exists,id_check_bds) self.assertTrue(True not in backend_mpi.collect(bds_check_result),"BDS was not deleted") From cdde1b177bc2ae7ff3c1703447d9dc098df6e1c8 Mon Sep 17 00:00:00 2001 From: Pierre Kuenzli Date: Fri, 2 Nov 2018 12:44:15 +0000 Subject: [PATCH 04/41] added exception handling in run_function which was missing --- abcpy/backends/mpi.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/abcpy/backends/mpi.py b/abcpy/backends/mpi.py index ab212409..2dba862b 100644 --- a/abcpy/backends/mpi.py +++ b/abcpy/backends/mpi.py @@ -358,10 +358,14 @@ def run_function(self, function_packed, data_item): Passes the model communicator if ther is more than one process per model """ func = cloudpickle.loads(function_packed) - if(self.mpimanager.get_model_size() > 1): - return func(self.mpimanager.get_model_communicator(), data_item) - else: - return func(data_item) + try: + if(self.mpimanager.get_model_size() > 1): + res = func(self.mpimanager.get_model_communicator(), data_item) + else: + res = func(data_item) + except Exception as e: + res = e + return res def __worker_run(self): From df67aba255ed40ced381356bc1ad3baf3756c4df Mon Sep 17 00:00:00 2001 From: "Pierre.Kuenzli" Date: Thu, 22 Nov 2018 15:50:52 +0100 Subject: [PATCH 05/41] added a paragraph in the doc an a small example on the usage of MPI nested parallelization --- doc/source/parallelization.rst | 28 +++++++++++++++++++++- examples/backends/mpi/mpi_model.py | 37 ++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+), 1 deletion(-) create mode 100644 examples/backends/mpi/mpi_model.py diff --git a/doc/source/parallelization.rst b/doc/source/parallelization.rst index 9df905cd..33dbc7b3 100644 --- a/doc/source/parallelization.rst +++ b/doc/source/parallelization.rst @@ -69,7 +69,7 @@ run. A minimum of two ranks is required, since rank 0 (master) is used to orchestrate the calculation and all other ranks (workers) actually perform the calculation. -The standard way to run the script using Open MPI is directly via mpirun like below +The standard way to run the script using MPI is directly via mpirun like below or on a cluster through a job scheduler like Slurm: :: @@ -80,6 +80,32 @@ or on a cluster through a job scheduler like Slurm: The adapted Python code can be found in `examples/backend/mpi/pmcabc_gaussian.py`. +MPI nested parallelization +-------------------------- + +Sometimes, the model on which we want to perform parameter inference is itself parallelized. +When this parallelization is achieved using threads, there is no problem for each MPI process +to spawn multiple threads on a node. But even if the standard allows it, there can be situation +where the spawning of MPI process at runtime is limited. In order to have a fully portable execution model, +it is then preferable to spawn as many MPI processes as necessary and then split the MPI processes +into multiple communicators. For example, if we want to run n instances of a MPI model and allows +m processes to each instance, we will have to spawn (n*m)+1 processes, because of the master process. + +The MPI parallelized model has then to be able to take the communicator +created by abcpy as a parameter. + +In the case of MPI nested parallelization, we have to specify the number of process that will be +allocated to each MPI model. For example + +.. literalinclude:: ../../examples/backends/mpi/mpi_model.py + :language: python + :lines: 6-7 + :dedent: 4 + +An example using the nested MPI parallelization can be found in +`examples/backend/mpi/mpi_model.py`. + + Note that in order to run jobs in parallel you need to have MPI installed on the system(s) in question with the requisite Python bindings for MPI (mpi4py). The dependencies of the MPI backend can be install with diff --git a/examples/backends/mpi/mpi_model.py b/examples/backends/mpi/mpi_model.py new file mode 100644 index 00000000..76bd3c05 --- /dev/null +++ b/examples/backends/mpi/mpi_model.py @@ -0,0 +1,37 @@ +import numpy as np + +def setup_backend(): + global backend + + from abcpy.backends import BackendMPI as Backend + backend = Backend(process_per_model=2) + +def run_model(): + def square_mpi(model_comm, x): + local_res = np.array([x**2], 'i') + global_res = np.array([0], 'i') + model_comm.Reduce([local_res,MPI.INT], [global_res,MPI.INT], op=MPI.SUM, root=0) + return global_res[0] + + data = [1,2,3,4,5] + pds = backend.parallelize(data) + pds_map = backend.map(square_mpi, pds) + res = backend.collect(pds_map) + return res + +import unittest +from mpi4py import MPI + +def setUpModule(): + setup_backend() + +class ExampleMPIModelTest(unittest.TestCase): + def test_example(self): + result = run_model() + data = [1,2,3,4,5] + expected_result = list(map(lambda x:2*(x**2),data)) + assert result==expected_result + +if __name__ == "__main__": + setup_backend() + print(run_model()) From a70eb734ee056490e285992ce80f8d7b0dd9e1ef Mon Sep 17 00:00:00 2001 From: statrita2004 Date: Thu, 22 Nov 2018 15:59:25 +0000 Subject: [PATCH 06/41] Correcting references of the published papers in README.md --- README.md | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index ec75eca6..457be273 100644 --- a/README.md +++ b/README.md @@ -62,23 +62,22 @@ BibTex reference. Publications in which ABCpy was applied: -* R. Dutta, M. Schoengens, A. Ummadisingu, J. P. Onnela, A. Mira, "ABCpy: A - High-Performance Computing Perspective to Approximate Bayesian Computation", - 2017, arXiv:1711.04694 - * R. Dutta, J. P. Onnela, A. Mira, "Bayesian Inference of Spreading Processes - on Networks", 2017, arXiv:1709.08862 + on Networks", 2018, Proc. R. Soc. A, 474(2215), 20180129. + +* R. Dutta, Z. Faidon Brotzakis and A. Mira, "Bayesian Calibration of + Force-fields from Experimental Data: TIP4P Water", 2018, Journal of Chemical Physics 149, 154110. * R. Dutta, B. Chopard, J. Lätt, F. Dubois, K. Zouaoui Boudjeltia and A. Mira, "Parameter Estimation of Platelets Deposition: Approximate Bayesian - Computation with High Performance Computing", 2017, arXiv:1710.01054 + Computation with High Performance Computing", 2018, Frontiers in physiology, 9. * A. Ebert, R. Dutta, P. Wu, K. Mengersen and A. Mira, "Likelihood-Free Parameter Estimation for Dynamic Queueing Networks", 2018, arXiv:1804.02526 -* R. Dutta, Z. Faidon Brotzakis and A. Mira, "Bayesian Calibration of - Force-fields from Experimental Data: TIP4P Water", 2018, arXiv:1804.02742 - +* R. Dutta, M. Schoengens, A. Ummadisingu, N. Widerman, J. P. Onnela, A. Mira, "ABCpy: A + High-Performance Computing Perspective to Approximate Bayesian Computation", + 2017, arXiv:1711.04694 ## License ABCpy is published under the BSD 3-clause license, see [here](LICENSE). From 8cd8ef7c3696a998ce259aa0c0625c275a1314dc Mon Sep 17 00:00:00 2001 From: Marcel Schoengens Date: Thu, 29 Nov 2018 16:16:55 +0100 Subject: [PATCH 07/41] Adapt documentation and start implementing mpi parallelized showcase model --- doc/source/parallelization.rst | 36 +++++---- examples/backends/mpi/mpi_model.py | 114 ++++++++++++++++++++++++++++- 2 files changed, 134 insertions(+), 16 deletions(-) diff --git a/doc/source/parallelization.rst b/doc/source/parallelization.rst index 33dbc7b3..ffc973e5 100644 --- a/doc/source/parallelization.rst +++ b/doc/source/parallelization.rst @@ -80,32 +80,38 @@ or on a cluster through a job scheduler like Slurm: The adapted Python code can be found in `examples/backend/mpi/pmcabc_gaussian.py`. -MPI nested parallelization --------------------------- +Nested parallelization +---------------------- -Sometimes, the model on which we want to perform parameter inference is itself parallelized. -When this parallelization is achieved using threads, there is no problem for each MPI process -to spawn multiple threads on a node. But even if the standard allows it, there can be situation -where the spawning of MPI process at runtime is limited. In order to have a fully portable execution model, -it is then preferable to spawn as many MPI processes as necessary and then split the MPI processes -into multiple communicators. For example, if we want to run n instances of a MPI model and allows -m processes to each instance, we will have to spawn (n*m)+1 processes, because of the master process. +Sometimes, the model on which we want to perform parameter inference has itself +large compute requirements and needs parallelization. When this parallelization +is achieved using threads, there MPI has just to be configured that each MPI +rank can spawn multiple threads on a node. However, there might be situations +where node-local parallelization using threads is not sufficient and +parallelization across nodes is required. -The MPI parallelized model has then to be able to take the communicator -created by abcpy as a parameter. - -In the case of MPI nested parallelization, we have to specify the number of process that will be -allocated to each MPI model. For example +Parallelization of the forward model across nodes is possible *but limited* to +the MPI backend. Technically, this is implemented using individual MPI +communicators for each forward model. The amount of ranks per communicator +can be passed at the initialization of the backend as follows: .. literalinclude:: ../../examples/backends/mpi/mpi_model.py :language: python :lines: 6-7 :dedent: 4 +Here each model is assigned a MPI communicator with 2 ranks. Clearly, the MPI +job has to be configured manually that the total amount of MPI ranks is ideally +a multiple of the ranks per communicator plus one additional rank for the +master. For example, if we want to run n instances of a MPI model and allows m +processes to each instance, we will have to spawn (n*m)+1 ranks. + +For nested parallelization the model has to be able to take an MPI communicator +as a parameter. + An example using the nested MPI parallelization can be found in `examples/backend/mpi/mpi_model.py`. - Note that in order to run jobs in parallel you need to have MPI installed on the system(s) in question with the requisite Python bindings for MPI (mpi4py). The dependencies of the MPI backend can be install with diff --git a/examples/backends/mpi/mpi_model.py b/examples/backends/mpi/mpi_model.py index 76bd3c05..8c80b7dc 100644 --- a/examples/backends/mpi/mpi_model.py +++ b/examples/backends/mpi/mpi_model.py @@ -1,4 +1,6 @@ import numpy as np +from mpi4py import MPI +from abcpy.probabilisticmodels import ProbabilisticModel, InputConnector def setup_backend(): global backend @@ -19,6 +21,114 @@ def square_mpi(model_comm, x): res = backend.collect(pds_map) return res + +class NestedBivariateGaussian(ProbabilisticModel): + """ + This is a show case model of bi-variate Gaussian distribution where we assume + the standard deviation to be unit. + """ + + def __init__(self, parameters, name='Gaussian'): + # We expect input of type parameters = [mu, sigma] + if not isinstance(parameters, list): + raise TypeError('Input of Normal model is of type list') + + if len(parameters) != 2: + raise RuntimeError('Input list must be of length 2, containing [mu, sigma].') + + input_connector = InputConnector.from_list(parameters) + super().__init__(input_connector, name) + + + def _check_input(self, input_values): + # Check whether input has correct type or format + if len(input_values) != 2: + raise ValueError('Number of parameters are 2 (two means).') + return True + + + def _check_output(self, values): + if not isinstance(values, np.ndarray): + raise ValueError('Output of the normal distribution is always a numpy array.') + + if value.shape[0] != 2: + raise ValueError('Output shape should be of dimension 2.') + + return True + + + def get_output_dimension(self): + return 2 + + + def forward_simulate(self, input_values, k, rng=np.random.RandomState(), mpi_comm=None): + # def forward_simulate(self, mpi_comm, input_values, k, rng=np.random.RandomState()): #, mpi_comm=None): + rank = mpi_comm.Get_rank() + + # Extract the input parameters + mu = input_values[rank] + sigma = 1 + + # Do the actual forward simulation + vector_of_k_samples = np.array(rng.normal(mu, sigma, k)) + + # Send everything back to rank 0 + data = mpi_comm.gather(vector_of_k_samples) + + # Format the output to obey API but only on rank 0 + if rank == 0: + result = [None]*k + for i in range(k): + element0 = data[0][i] + element1 = data[1][i] + point = np.array([element0, element1]) + result[i] = point + return result + else: + return + + + def pdf(self, input_values, x): + mu = input_values[0] + sigma = input_values[1] + pdf = np.norm(mu,sigma).pdf(x) + return pdf + + +def infer_parameters(): + # define observation for true parameters mean=170, 65 + rng = np.random.RandomState() + y_obs = rng.multivariate_normal([170, 65], np.eye(2), 100) + + # define prior + from abcpy.continuousmodels import Uniform + mu0 = Uniform([[150], [200]], ) + mu1 = Uniform([[25], [100]], ) + + # define the model + from abcpy.continuousmodels import Normal + height_weight_model = NestedBivariateGaussian([mu0, mu1]) + + # define statistics + from abcpy.statistics import Identity + statistics_calculator = Identity(degree = 2, cross = False) + + # define distance + from abcpy.distances import LogReg + distance_calculator = LogReg(statistics_calculator) + + # define sampling scheme + from abcpy.inferences import PMCABC + sampler = PMCABC([height_weight_model], [distance_calculator], backend, seed=1) + + # sample from scheme + T, n_sample, n_samples_per_param = 3, 250, 10 + eps_arr = np.array([.75]) + epsilon_percentile = 10 + journal = sampler.sample([y_obs], T, eps_arr, n_sample, n_samples_per_param, epsilon_percentile) + + return journal + import unittest from mpi4py import MPI @@ -34,4 +144,6 @@ def test_example(self): if __name__ == "__main__": setup_backend() - print(run_model()) + #print(run_mod#print(run_model()) + model = NestedBivariateGaussian([100,200]) + print(infer_parameters()) From 33dc4d2ef3187e4690e9d8c2529a8de025b15788 Mon Sep 17 00:00:00 2001 From: pierre Date: Wed, 5 Dec 2018 17:27:48 +0100 Subject: [PATCH 08/41] work in progress mpi backend integration --- abcpy/backends/mpi.py | 2 +- abcpy/continuousmodels.py | 10 +- abcpy/discretemodels.py | 6 +- abcpy/distances.py | 6 + abcpy/graphtools.py | 6 +- abcpy/inferences.py | 22 ++- abcpy/perturbationkernel.py | 7 + examples/backends/mpi/mpi_model_pmc.py | 157 +++++++++++++++++ .../mpi/{mpi_model.py => mpi_model_pmcabc.py} | 16 +- examples/backends/mpi/mpi_model_simple.py | 161 ++++++++++++++++++ 10 files changed, 373 insertions(+), 20 deletions(-) create mode 100644 examples/backends/mpi/mpi_model_pmc.py rename examples/backends/mpi/{mpi_model.py => mpi_model_pmcabc.py} (87%) create mode 100644 examples/backends/mpi/mpi_model_simple.py diff --git a/abcpy/backends/mpi.py b/abcpy/backends/mpi.py index 2dba862b..4390e7f7 100644 --- a/abcpy/backends/mpi.py +++ b/abcpy/backends/mpi.py @@ -360,7 +360,7 @@ def run_function(self, function_packed, data_item): func = cloudpickle.loads(function_packed) try: if(self.mpimanager.get_model_size() > 1): - res = func(self.mpimanager.get_model_communicator(), data_item) + res = func(data_item, self.mpimanager.get_model_communicator()) else: res = func(data_item) except Exception as e: diff --git a/abcpy/continuousmodels.py b/abcpy/continuousmodels.py index a06be373..bf8ada5d 100644 --- a/abcpy/continuousmodels.py +++ b/abcpy/continuousmodels.py @@ -65,7 +65,7 @@ def _check_output(self, parameters): return True - def forward_simulate(self, input_values, k, rng=np.random.RandomState()): + def forward_simulate(self, input_values, k, rng=np.random.RandomState(), mpi_comm=None): """ Samples from a uniform distribution using the current values for each probabilistic model from which the model derives. @@ -167,7 +167,7 @@ def _check_output(self, parameters): return True - def forward_simulate(self, input_values, k, rng=np.random.RandomState()): + def forward_simulate(self, input_values, k, rng=np.random.RandomState(), mpi_comm=None): """ Samples from a normal distribution using the current values for each probabilistic model from which the model derives. @@ -248,7 +248,7 @@ def __init__(self, parameters, name='StudentT'): super(StudentT, self).__init__(input_parameters, name) self.visited = False - def forward_simulate(self, input_values, k, rng=np.random.RandomState()): + def forward_simulate(self, input_values, k, rng=np.random.RandomState(), mpi_comm=None): """ Samples from a Student's T-distribution using the current values for each probabilistic model from which the model derives. @@ -398,7 +398,7 @@ def _check_output(self, parameters): return True - def forward_simulate(self, input_values, k, rng=np.random.RandomState()): + def forward_simulate(self, input_values, k, rng=np.random.RandomState(), mpi_comm=None): """ Samples from a multivariate normal distribution using the current values for each probabilistic model from which the model derives. @@ -532,7 +532,7 @@ def _check_output(self, parameters): """ return True - def forward_simulate(self, input_values, k, rng=np.random.RandomState()): + def forward_simulate(self, input_values, k, rng=np.random.RandomState(), mpi_comm=None): """ Samples from a multivariate Student's T-distribution using the current values for each probabilistic model from which the model derives. diff --git a/abcpy/discretemodels.py b/abcpy/discretemodels.py index 81c12cf4..6520f71c 100644 --- a/abcpy/discretemodels.py +++ b/abcpy/discretemodels.py @@ -52,7 +52,7 @@ def _check_output(self, parameters): return True - def forward_simulate(self, input_values, k, rng=np.random.RandomState()): + def forward_simulate(self, input_values, k, rng=np.random.RandomState(), mpi_comm=None): """ Samples from the bernoulli distribution associtated with the probabilistic model. @@ -157,7 +157,7 @@ def _check_output(self, parameters): return True - def forward_simulate(self, input_values, k, rng=np.random.RandomState()): + def forward_simulate(self, input_values, k, rng=np.random.RandomState(), mpi_comm=None): """ Samples from a binomial distribution using the current values for each probabilistic model from which the model derives. @@ -256,7 +256,7 @@ def _check_output(self, parameters): return True - def forward_simulate(self, input_values, k, rng=np.random.RandomState()): + def forward_simulate(self, input_values, k, rng=np.random.RandomState(), mpi_comm=None): """ Samples k values from the defined possion distribution. diff --git a/abcpy/distances.py b/abcpy/distances.py index 8c2df7ed..267d4d90 100644 --- a/abcpy/distances.py +++ b/abcpy/distances.py @@ -239,6 +239,12 @@ def distance(self, d1, d2): d1, d2: list A list, containing a list describing the data set """ + + print("d1 : ", d1) + print("type(d1) : ", type(d1)) + print("d2 : ", d2) + print("type(d2) : ", type(d2)) + if not isinstance(d1, list): raise TypeError('Data is not of allowed types') if not isinstance(d2, list): diff --git a/abcpy/graphtools.py b/abcpy/graphtools.py index d0f706ef..2c73d29e 100644 --- a/abcpy/graphtools.py +++ b/abcpy/graphtools.py @@ -388,7 +388,7 @@ def get_correct_ordering(self, parameters_and_models, models=None, is_root = Tru return ordered_parameters - def simulate(self, n_samples_per_param, rng=np.random.RandomState()): + def simulate(self, n_samples_per_param, rng=np.random.RandomState(), mpi_comm=None): """Simulates data of each model using the currently sampled or perturbed parameters. Parameters @@ -405,8 +405,10 @@ def simulate(self, n_samples_per_param, rng=np.random.RandomState()): for model in self.model: parameters_compatible = model._check_input(model.get_input_values()) if parameters_compatible: - simulation_result = model.forward_simulate(model.get_input_values(), n_samples_per_param, rng=rng) + simulation_result = model.forward_simulate(model.get_input_values(), n_samples_per_param, rng=rng, mpi_comm=mpi_comm) result.append(simulation_result) + if mpi_comm.Get_rank() != 0: + return None else: return None return result diff --git a/abcpy/inferences.py b/abcpy/inferences.py index 0f54a3fc..9b3e080a 100644 --- a/abcpy/inferences.py +++ b/abcpy/inferences.py @@ -2,6 +2,8 @@ import logging import numpy as np +import sys + from abc import ABCMeta, abstractmethod, abstractproperty from scipy import optimize @@ -515,7 +517,9 @@ def sample(self, observations, steps, epsilon_init, n_samples = 10000, n_samples return journal # define helper functions for map step - def _resample_parameter(self, rng): + #def _resample_parameter(self, rng): + #def _resample_parameter(self, mpi_comm, rng): + def _resample_parameter(self, rng, mpi_comm=None): """ Samples a single model parameter and simulate from it until distance between simulated outcome and the observation is @@ -548,7 +552,7 @@ def _resample_parameter(self, rng): if self.accepted_parameters_manager.accepted_parameters_bds == None: self.sample_from_prior(rng=rng) theta = self.get_parameters() - y_sim = self.simulate(self.n_samples_per_param, rng=rng) + y_sim = self.simulate(self.n_samples_per_param, rng=rng, mpi_comm=mpi_comm) counter+=1 else: @@ -560,13 +564,19 @@ def _resample_parameter(self, rng): if(perturbation_output[0] and self.pdf_of_prior(self.model, perturbation_output[1])!=0): theta = perturbation_output[1] break - y_sim = self.simulate(self.n_samples_per_param, rng=rng) + y_sim = self.simulate(mpi_comm, self.n_samples_per_param, rng=rng, mpi_comm=mpi_comm) counter+=1 if(y_sim is not None): + print("Will compute distance") + print("self.accepted_parameters_manager.observations_bds.value() : ", self.accepted_parameters_manager.observations_bds.value()) + print("type(self.accepted_parameters_manager.observations_bds.value()) : ", type(self.accepted_parameters_manager.observations_bds.value())) + print("y_sim : ", y_sim) + print("type(y_sim) : ", type(y_sim)) distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(),y_sim) self.logger.debug("distance after {:4d} simulations: {:e}".format( counter, distance)) + print("Distance computed") else: distance = self.distance.dist_max() @@ -875,7 +885,7 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1 return journal # define helper functions for map step - def _approx_lik_calc(self, theta): + def _approx_lik_calc(self, theta, mpi_comm=None): """ Compute likelihood for new parameters using approximate likelihood function @@ -892,7 +902,7 @@ def _approx_lik_calc(self, theta): # Simulate the fake data from the model given the parameter value theta # print("DEBUG: Simulate model for parameter " + str(theta)) - y_sim = self.simulate(self.n_samples_per_param, self.rng) + y_sim = self.simulate(self.n_samples_per_param, self.rng, mpi_comm=mpi_comm) # print("DEBUG: Extracting observation.") obs = self.accepted_parameters_manager.observations_bds.value() # print("DEBUG: Computing likelihood...") @@ -910,7 +920,7 @@ def _approx_lik_calc(self, theta): # print("DEBUG: prior pdf evaluated at theta is :" + str(pdf_at_theta)) return (total_pdf_at_theta, 1) - def _calculate_weight(self, theta): + def _calculate_weight(self, theta, mpi_comm=None): """ Calculates the weight for the given parameter using accepted_parameters, accepted_cov_mat diff --git a/abcpy/perturbationkernel.py b/abcpy/perturbationkernel.py index ec0d5d23..cbbaae01 100644 --- a/abcpy/perturbationkernel.py +++ b/abcpy/perturbationkernel.py @@ -269,8 +269,12 @@ def calculate_cov(self, accepted_parameters_manager, kernel_index): The covariance matrix corresponding to this kernel. """ + print("in calculate_cov line 255") + if(accepted_parameters_manager.accepted_weights_bds is not None): weights = accepted_parameters_manager.accepted_weights_bds.value() + print("np.array(accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index]).astype(float) : ", np.array(accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index]).astype(float).shape) + print("weights.reshape(-1).astype(float) : ", weights.reshape(-1).astype(float).shape) cov = np.cov(np.array(accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index]).astype(float), aweights=weights.reshape(-1).astype(float), rowvar=False) else: @@ -278,6 +282,9 @@ def calculate_cov(self, accepted_parameters_manager, kernel_index): cov = np.var(np.array(accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index]).astype(float)) else: cov = np.cov(np.array(accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index]).astype(float), rowvar=False) + + print("calculate_cov done") + return cov diff --git a/examples/backends/mpi/mpi_model_pmc.py b/examples/backends/mpi/mpi_model_pmc.py new file mode 100644 index 00000000..04744824 --- /dev/null +++ b/examples/backends/mpi/mpi_model_pmc.py @@ -0,0 +1,157 @@ +import numpy as np +from mpi4py import MPI +from abcpy.probabilisticmodels import ProbabilisticModel, InputConnector + +def setup_backend(): + global backend + + from abcpy.backends import BackendMPI as Backend + backend = Backend(process_per_model=2) + +def run_model(): + def square_mpi(model_comm, x): + local_res = np.array([x**2], 'i') + global_res = np.array([0], 'i') + model_comm.Reduce([local_res,MPI.INT], [global_res,MPI.INT], op=MPI.SUM, root=0) + return global_res[0] + + data = [1,2,3,4,5] + pds = backend.parallelize(data) + pds_map = backend.map(square_mpi, pds) + res = backend.collect(pds_map) + return res + + +class NestedBivariateGaussian(ProbabilisticModel): + """ + This is a show case model of bi-variate Gaussian distribution where we assume + the standard deviation to be unit. + """ + + def __init__(self, parameters, name='Gaussian'): + # We expect input of type parameters = [mu, sigma] + if not isinstance(parameters, list): + raise TypeError('Input of Normal model is of type list') + + if len(parameters) != 2: + raise RuntimeError('Input list must be of length 2, containing [mu, sigma].') + + input_connector = InputConnector.from_list(parameters) + super().__init__(input_connector, name) + + + def _check_input(self, input_values): + # Check whether input has correct type or format + if len(input_values) != 2: + raise ValueError('Number of parameters are 2 (two means).') + return True + + + def _check_output(self, values): + if not isinstance(values, np.ndarray): + raise ValueError('Output of the normal distribution is always a numpy array.') + + if value.shape[0] != 2: + raise ValueError('Output shape should be of dimension 2.') + + return True + + + def get_output_dimension(self): + return 2 + + + #def forward_simulate(self, input_values, k, rng=np.random.RandomState(), mpi_comm=None): + #def forward_simulate(self, mpi_comm, input_values, k, rng=np.random.RandomState()): #, mpi_comm=None): + def forward_simulate(self, input_values, k, rng=np.random.RandomState(), mpi_comm=None): + + rank = mpi_comm.Get_rank() + + # Extract the input parameters + mu = input_values[rank] + sigma = 1 + + # Do the actual forward simulation + vector_of_k_samples = np.array(rng.normal(mu, sigma, k)) + + # Send everything back to rank 0 + # print("Hello from forward_simulate before gather, rank = ", rank) + data = mpi_comm.gather(vector_of_k_samples) + # print("Hello from forward_simulate after gather, rank = ", rank) + + # Format the output to obey API but only on rank 0 + if rank == 0: + result = [None]*k + for i in range(k): + element0 = data[0][i] + element1 = data[1][i] + point = np.array([element0, element1]) + result[i] = point + print("Process 0 will return : ", result) + return result + else: + return + + + def pdf(self, input_values, x): + mu = input_values[0] + sigma = input_values[1] + pdf = np.norm(mu,sigma).pdf(x) + return pdf + + +def infer_parameters(): + # define observation for true parameters mean=170, 65 + rng = np.random.RandomState() + y_obs = rng.multivariate_normal([170, 65], np.eye(2), 100) + + # define prior + from abcpy.continuousmodels import Uniform + mu0 = Uniform([[150], [200]], ) + mu1 = Uniform([[25], [100]], ) + + # define the model + from abcpy.continuousmodels import Normal + height_weight_model = NestedBivariateGaussian([mu0, mu1]) + + # define statistics + from abcpy.statistics import Identity + statistics_calculator = Identity(degree = 2, cross = False) + + # define distance + from abcpy.distances import LogReg + distance_calculator = LogReg(statistics_calculator) + + from abcpy.approx_lhd import SynLiklihood + approx_lhd = SynLiklihood(statistics_calculator) + + # define sampling scheme + from abcpy.inferences import PMC + sampler = PMC([height_weight_model], [approx_lhd], backend, seed=1) + + # sample from scheme + #T, n_sample, n_samples_per_param = 3, 250, 10 + T, n_sample, n_samples_per_param = 1, 1, 1 + + journal = sampler.sample([y_obs], T, n_sample, n_samples_per_param) + + return journal + +import unittest +from mpi4py import MPI + +def setUpModule(): + setup_backend() + +class ExampleMPIModelTest(unittest.TestCase): + def test_example(self): + result = run_model() + data = [1,2,3,4,5] + expected_result = list(map(lambda x:2*(x**2),data)) + assert result==expected_result + +if __name__ == "__main__": + setup_backend() + #print(run_mod#print(run_model()) + model = NestedBivariateGaussian([100,200]) + print(infer_parameters()) diff --git a/examples/backends/mpi/mpi_model.py b/examples/backends/mpi/mpi_model_pmcabc.py similarity index 87% rename from examples/backends/mpi/mpi_model.py rename to examples/backends/mpi/mpi_model_pmcabc.py index 8c80b7dc..659ea7a1 100644 --- a/examples/backends/mpi/mpi_model.py +++ b/examples/backends/mpi/mpi_model_pmcabc.py @@ -61,8 +61,10 @@ def get_output_dimension(self): return 2 + #def forward_simulate(self, input_values, k, rng=np.random.RandomState(), mpi_comm=None): + #def forward_simulate(self, mpi_comm, input_values, k, rng=np.random.RandomState()): #, mpi_comm=None): def forward_simulate(self, input_values, k, rng=np.random.RandomState(), mpi_comm=None): - # def forward_simulate(self, mpi_comm, input_values, k, rng=np.random.RandomState()): #, mpi_comm=None): + rank = mpi_comm.Get_rank() # Extract the input parameters @@ -73,7 +75,9 @@ def forward_simulate(self, input_values, k, rng=np.random.RandomState(), mpi_com vector_of_k_samples = np.array(rng.normal(mu, sigma, k)) # Send everything back to rank 0 + # print("Hello from forward_simulate before gather, rank = ", rank) data = mpi_comm.gather(vector_of_k_samples) + # print("Hello from forward_simulate after gather, rank = ", rank) # Format the output to obey API but only on rank 0 if rank == 0: @@ -83,6 +87,7 @@ def forward_simulate(self, input_values, k, rng=np.random.RandomState(), mpi_com element1 = data[1][i] point = np.array([element0, element1]) result[i] = point + print("Process 0 will return : ", result) return result else: return @@ -117,14 +122,19 @@ def infer_parameters(): from abcpy.distances import LogReg distance_calculator = LogReg(statistics_calculator) + from abcpy.approx_lhd import SynLiklihood + approx_lhd = SynLiklihood(statistics_calculator) + # define sampling scheme from abcpy.inferences import PMCABC sampler = PMCABC([height_weight_model], [distance_calculator], backend, seed=1) - + # sample from scheme - T, n_sample, n_samples_per_param = 3, 250, 10 + #T, n_sample, n_samples_per_param = 3, 250, 10 + T, n_sample, n_samples_per_param = 1, 1, 1 eps_arr = np.array([.75]) epsilon_percentile = 10 + journal = sampler.sample([y_obs], T, eps_arr, n_sample, n_samples_per_param, epsilon_percentile) return journal diff --git a/examples/backends/mpi/mpi_model_simple.py b/examples/backends/mpi/mpi_model_simple.py new file mode 100644 index 00000000..6b1d0e21 --- /dev/null +++ b/examples/backends/mpi/mpi_model_simple.py @@ -0,0 +1,161 @@ +import numpy as np +from mpi4py import MPI +from abcpy.probabilisticmodels import ProbabilisticModel, InputConnector + +def setup_backend(): + global backend + + from abcpy.backends import BackendMPI as Backend + backend = Backend() + +def run_model(): + def square_mpi(model_comm, x): + local_res = np.array([x**2], 'i') + global_res = np.array([0], 'i') + model_comm.Reduce([local_res,MPI.INT], [global_res,MPI.INT], op=MPI.SUM, root=0) + return global_res[0] + + data = [1,2,3,4,5] + pds = backend.parallelize(data) + pds_map = backend.map(square_mpi, pds) + res = backend.collect(pds_map) + return res + + +class NestedBivariateGaussian(ProbabilisticModel): + """ + This is a show case model of bi-variate Gaussian distribution where we assume + the standard deviation to be unit. + """ + + def __init__(self, parameters, name='Gaussian'): + # We expect input of type parameters = [mu, sigma] + if not isinstance(parameters, list): + raise TypeError('Input of Normal model is of type list') + + if len(parameters) != 2: + raise RuntimeError('Input list must be of length 2, containing [mu, sigma].') + + input_connector = InputConnector.from_list(parameters) + super().__init__(input_connector, name) + + + def _check_input(self, input_values): + # Check whether input has correct type or format + if len(input_values) != 2: + raise ValueError('Number of parameters are 2 (two means).') + return True + + + def _check_output(self, values): + if not isinstance(values, np.ndarray): + raise ValueError('Output of the normal distribution is always a numpy array.') + + if value.shape[0] != 2: + raise ValueError('Output shape should be of dimension 2.') + + return True + + + def get_output_dimension(self): + return 2 + + + #def forward_simulate(self, input_values, k, rng=np.random.RandomState(), mpi_comm=None): + #def forward_simulate(self, mpi_comm, input_values, k, rng=np.random.RandomState()): + def forward_simulate(self, input_values, k, rng=np.random.RandomState()): #, mpi_comm=None): + + #rank = mpi_comm.Get_rank() + + # Extract the input parameters + #mu = input_values[rank] + mu = input_values[0] + sigma = 1 + + #print("salut") + + # Do the actual forward simulation + vector_of_k_samples = np.array(rng.normal(mu, sigma, k)) + + # Send everything back to rank 0 + #data = mpi_comm.gather(vector_of_k_samples) + + data = vector_of_k_samples + + # Format the output to obey API but only on rank 0 + #if rank == 0: + result = [None]*k + # for i in range(k): + #element0 = data[0][i] + #element1 = data[1][i] + #element0 = data[0][0] + #element1 = data[1][0] + element0 = data[0] + element1 = data[1] + point = np.array([element0, element1]) + result[0] = point + return result + #else: + # return + + + def pdf(self, input_values, x): + mu = input_values[0] + sigma = input_values[1] + pdf = np.norm(mu,sigma).pdf(x) + return pdf + + +def infer_parameters(): + # define observation for true parameters mean=170, 65 + rng = np.random.RandomState() + y_obs = rng.multivariate_normal([170, 65], np.eye(2), 100) + + # define prior + from abcpy.continuousmodels import Uniform + mu0 = Uniform([[150], [200]], ) + mu1 = Uniform([[25], [100]], ) + + # define the model + from abcpy.continuousmodels import Normal + height_weight_model = NestedBivariateGaussian([mu0, mu1]) + + # define statistics + from abcpy.statistics import Identity + statistics_calculator = Identity(degree = 2, cross = False) + + # define distance + from abcpy.distances import LogReg + distance_calculator = LogReg(statistics_calculator) + + # define sampling scheme + from abcpy.inferences import PMCABC + sampler = PMCABC([height_weight_model], [distance_calculator], backend, seed=1) + + # sample from scheme + T, n_sample, n_samples_per_param = 3, 250, 10 + eps_arr = np.array([.75]) + epsilon_percentile = 10 + journal = sampler.sample([y_obs], T, eps_arr, n_sample, n_samples_per_param, epsilon_percentile) + + return journal + +import unittest +from mpi4py import MPI + +def setUpModule(): + setup_backend() + +class ExampleMPIModelTest(unittest.TestCase): + def test_example(self): + result = run_model() + data = [1,2,3,4,5] + expected_result = list(map(lambda x:2*(x**2),data)) + assert result==expected_result + +if __name__ == "__main__": + setup_backend() + #print(run_mod#print(run_model()) + #print(run_model()) + model = NestedBivariateGaussian([100,200]) + print(infer_parameters()) From 0105854eea4f81f863df9c2492a56de80bd71d2f Mon Sep 17 00:00:00 2001 From: "Pierre.Kuenzli" Date: Thu, 6 Dec 2018 11:24:53 +0100 Subject: [PATCH 09/41] added a pmc mpi example --- .../mpi/mpi_pmc_hierarchical_models.py | 93 +++++++++++++++++++ 1 file changed, 93 insertions(+) create mode 100644 examples/backends/mpi/mpi_pmc_hierarchical_models.py diff --git a/examples/backends/mpi/mpi_pmc_hierarchical_models.py b/examples/backends/mpi/mpi_pmc_hierarchical_models.py new file mode 100644 index 00000000..46031115 --- /dev/null +++ b/examples/backends/mpi/mpi_pmc_hierarchical_models.py @@ -0,0 +1,93 @@ +import numpy as np + +def setup_backend(): + global backend + + from abcpy.backends import BackendMPI as Backend + backend = Backend(process_per_model=1) + +"""An example showing how to implement a bayesian network in ABCpy""" +def infer_parameters(): + # The data corresponding to model_1 defined below + grades_obs = [3.872486707973337, 4.6735380808674405, 3.9703538990858376, 4.11021272048805, 4.211048655421368, 4.154817956586653, 4.0046893064392695, 4.01891381384729, 4.123804757702919, 4.014941267301294, 3.888174595940634, 4.185275142948246, 4.55148774469135, 3.8954427675259016, 4.229264035335705, 3.839949451328312, 4.039402553532825, 4.128077814241238, 4.361488645531874, 4.086279074446419, 4.370801602256129, 3.7431697332475466, 4.459454162392378, 3.8873973643008255, 4.302566721487124, 4.05556051626865, 4.128817316703757, 3.8673704442215984, 4.2174459453805015, 4.202280254493361, 4.072851400451234, 3.795173229398952, 4.310702877332585, 4.376886328810306, 4.183704734748868, 4.332192463368128, 3.9071312388426587, 4.311681374107893, 3.55187913252144, 3.318878360783221, 4.187850500877817, 4.207923106081567, 4.190462065625179, 4.2341474252986036, 4.110228694304768, 4.1589891480847765, 4.0345604687633045, 4.090635481715123, 3.1384654393449294, 4.20375641386518, 4.150452690356067, 4.015304457401275, 3.9635442007388195, 4.075915739179875, 3.5702080541929284, 4.722333310410388, 3.9087618197155227, 4.3990088006390735, 3.968501165774181, 4.047603645360087, 4.109184340976979, 4.132424805281853, 4.444358334346812, 4.097211737683927, 4.288553086265748, 3.8668863066511303, 3.8837108501541007] + + # The prior information changing the class size and social background, depending on school location + from abcpy.continuousmodels import Uniform, Normal + school_location = Uniform([[0.2], [0.3]], ) + + # The average class size of a certain school + class_size = Normal([[school_location], [0.1]], ) + + # The social background of a student + background = Normal([[school_location], [0.1]], ) + + # The grade a student would receive without any bias + grade_without_additional_effects = Normal([[4.5], [0.25]], ) + + # The grade a student of a certain school receives + final_grade = grade_without_additional_effects-class_size-background + + # The data corresponding to model_2 defined below + scholarship_obs = [2.7179657436207805, 2.124647285937229, 3.07193407853297, 2.335024761813643, 2.871893855192, 3.4332002458233837, 3.649996835818173, 3.50292335102711, 2.815638168018455, 2.3581613289315992, 2.2794821846395568, 2.8725835459926503, 3.5588573782815685, 2.26053126526137, 1.8998143530749971, 2.101110815311782, 2.3482974964831573, 2.2707679029919206, 2.4624550491079225, 2.867017757972507, 3.204249152084959, 2.4489542437714213, 1.875415915801106, 2.5604889644872433, 3.891985093269989, 2.7233633223405205, 2.2861070389383533, 2.9758813233490082, 3.1183403287267755, 2.911814060853062, 2.60896794303205, 3.5717098647480316, 3.3355752461779824, 1.99172284546858, 2.339937680892163, 2.9835630207301636, 2.1684912355975774, 3.014847335983034, 2.7844122961916202, 2.752119871525148, 2.1567428931391635, 2.5803629307680644, 2.7326646074552103, 2.559237193255186, 3.13478196958166, 2.388760269933492, 3.2822443541491815, 2.0114405441787437, 3.0380056368041073, 2.4889680313769724, 2.821660164621084, 3.343985964873723, 3.1866861970287808, 4.4535037154856045, 3.0026333138006027, 2.0675706089352612, 2.3835301730913185, 2.584208398359566, 3.288077633446465, 2.6955853384148183, 2.918315169739928, 3.2464814419322985, 2.1601516779909433, 3.231003347780546, 1.0893224045062178, 0.8032302688764734, 2.868438615047827] + + # A quantity that determines whether a student will receive a scholarship + scholarship_without_additional_effects = Normal([[2], [0.5]], ) + + # A quantity determining whether a student receives a scholarship, including his social background + final_scholarship = scholarship_without_additional_effects + 3*background + + # Define a summary statistics for final grade and final scholarship + from abcpy.statistics import Identity + statistics_calculator_final_grade = Identity(degree = 2, cross = False) + statistics_calculator_final_scholarship = Identity(degree = 3, cross = False) + + # Define a distance measure for final grade and final scholarship + from abcpy.approx_lhd import SynLiklihood + approx_lhd_final_grade = SynLiklihood(statistics_calculator_final_grade) + approx_lhd_final_scholarship = SynLiklihood(statistics_calculator_final_scholarship) + + # Define a backend + # from abcpy.backends import BackendDummy as Backend + # backend = Backend() + + setup_backend() + + # Define a perturbation kernel + from abcpy.perturbationkernel import DefaultKernel + kernel = DefaultKernel([school_location, class_size, grade_without_additional_effects, \ + background, scholarship_without_additional_effects]) + + # Define sampling parameters + T, n_sample, n_samples_per_param = 3, 250, 10 + + # Define sampler + from abcpy.inferences import PMC + sampler = PMC([final_grade, final_scholarship], \ + [approx_lhd_final_grade, approx_lhd_final_scholarship], backend, kernel) + + # Sample + journal = sampler.sample([grades_obs, scholarship_obs], T, n_sample, n_samples_per_param) + + +def analyse_journal(journal): + # output parameters and weights + print(journal.get_stored_output_values()) + print(journal.weights) + + # do post analysis + print(journal.posterior_mean()) + print(journal.posterior_cov()) + print(journal.posterior_histogram()) + + # print configuration + print(journal.configuration) + + # save and load journal + journal.save("experiments.jnl") + + from abcpy.output import Journal + new_journal = Journal.fromFile('experiments.jnl') + +if __name__ == "__main__": + journal = infer_parameters() + analyse_journal(journal) From 78eaa40b68481f943a2f82fc953d3bdbdb65ec9e Mon Sep 17 00:00:00 2001 From: "Pierre.Kuenzli" Date: Thu, 6 Dec 2018 11:44:28 +0100 Subject: [PATCH 10/41] mpi_pmc_hierarchical_models.py is working with more than one process per model and mpi backend, but this is still a dummy computation while there is no mpi operation in the model --- abcpy/graphtools.py | 2 +- abcpy/probabilisticmodels.py | 18 +++++++++--------- .../mpi/mpi_pmc_hierarchical_models.py | 2 +- examples/backends/mpi/pmcabc_gaussian.py | 2 +- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/abcpy/graphtools.py b/abcpy/graphtools.py index 2c73d29e..ab35900d 100644 --- a/abcpy/graphtools.py +++ b/abcpy/graphtools.py @@ -407,7 +407,7 @@ def simulate(self, n_samples_per_param, rng=np.random.RandomState(), mpi_comm=No if parameters_compatible: simulation_result = model.forward_simulate(model.get_input_values(), n_samples_per_param, rng=rng, mpi_comm=mpi_comm) result.append(simulation_result) - if mpi_comm.Get_rank() != 0: + if mpi_comm != None and mpi_comm.Get_rank() != 0: return None else: return None diff --git a/abcpy/probabilisticmodels.py b/abcpy/probabilisticmodels.py index b783f7bf..07bd956d 100644 --- a/abcpy/probabilisticmodels.py +++ b/abcpy/probabilisticmodels.py @@ -681,7 +681,7 @@ def _check_output(self, values): @abstractmethod - def forward_simulate(self, input_values, k, rng): + def forward_simulate(self, input_values, k, rng, mpi_comm): """ Provides the output (pseudo data) from a forward simulation of the current model. @@ -843,7 +843,7 @@ def get_input_values(self): return [] - def forward_simulate(self, input_values, k, rng=np.random.RandomState()): + def forward_simulate(self, input_values, k, rng=np.random.RandomState(), mpi_comm=None): return [np.array(self._fixed_values) for _ in range(k)] @@ -889,7 +889,7 @@ def __init__(self, parameters, name=''): super(ModelResultingFromOperation, self).__init__(input_parameters, name) - def forward_simulate(self, input_values, k, rng=np.random.RandomState()): + def forward_simulate(self, input_values, k, rng=np.random.RandomState(), mpi_comm=None): raise NotImplementedError @@ -971,7 +971,7 @@ def sample_from_input_models(self, k, rng=np.random.RandomState()): class SummationModel(ModelResultingFromOperation): """This class represents all probabilistic models resulting from an addition of two probabilistic models""" - def forward_simulate(self, input_values, k, rng=np.random.RandomState()): + def forward_simulate(self, input_values, k, rng=np.random.RandomState(), mpi_comm=None): """Adds the sampled values of both parent distributions. Parameters @@ -1015,7 +1015,7 @@ def forward_simulate(self, input_values, k, rng=np.random.RandomState()): class SubtractionModel(ModelResultingFromOperation): """This class represents all probabilistic models resulting from an subtraction of two probabilistic models""" - def forward_simulate(self, input_values, k, rng=np.random.RandomState()): + def forward_simulate(self, input_values, k, rng=np.random.RandomState(), mpi_comm=None): """Adds the sampled values of both parent distributions. Parameters @@ -1057,7 +1057,7 @@ def forward_simulate(self, input_values, k, rng=np.random.RandomState()): class MultiplicationModel(ModelResultingFromOperation): """This class represents all probabilistic models resulting from a multiplication of two probabilistic models""" - def forward_simulate(self, input_values, k, rng=np.random.RandomState()): + def forward_simulate(self, input_values, k, rng=np.random.RandomState(), mpi_comm=None): """Multiplies the sampled values of both parent distributions element wise. Parameters @@ -1099,7 +1099,7 @@ def forward_simulate(self, input_values, k, rng=np.random.RandomState()): class DivisionModel(ModelResultingFromOperation): """This class represents all probabilistic models resulting from a division of two probabilistic models""" - def forward_simulate(self, input_valus, k, rng=np.random.RandomState()): + def forward_simulate(self, input_valus, k, rng=np.random.RandomState(), mpi_comm=None): """Divides the sampled values of both parent distributions. Parameters @@ -1161,7 +1161,7 @@ def _check_input(self, input_values): return True - def forward_simulate(self, input_values, k, rng=np.random.RandomState()): + def forward_simulate(self, input_values, k, rng=np.random.RandomState(), mpi_comm=None): """Raises the sampled values of the base by the exponent. Parameters @@ -1223,7 +1223,7 @@ def _check_input(self, input_values): return True - def forward_simulate(self, input_values, k, rng=np.random.RandomState()): + def forward_simulate(self, input_values, k, rng=np.random.RandomState(), mpi_comm=None): """Raises the base by the sampled value of the exponent. Parameters diff --git a/examples/backends/mpi/mpi_pmc_hierarchical_models.py b/examples/backends/mpi/mpi_pmc_hierarchical_models.py index 46031115..37e07fa6 100644 --- a/examples/backends/mpi/mpi_pmc_hierarchical_models.py +++ b/examples/backends/mpi/mpi_pmc_hierarchical_models.py @@ -4,7 +4,7 @@ def setup_backend(): global backend from abcpy.backends import BackendMPI as Backend - backend = Backend(process_per_model=1) + backend = Backend() """An example showing how to implement a bayesian network in ABCpy""" def infer_parameters(): diff --git a/examples/backends/mpi/pmcabc_gaussian.py b/examples/backends/mpi/pmcabc_gaussian.py index e1b2250d..f6b9126f 100644 --- a/examples/backends/mpi/pmcabc_gaussian.py +++ b/examples/backends/mpi/pmcabc_gaussian.py @@ -4,7 +4,7 @@ def setup_backend(): global backend from abcpy.backends import BackendMPI as Backend - backend = Backend() + backend = Backend(process_per_model=2) def infer_parameters(): From ae366cfbc64e836dd9eb7faddc80bc8b4d555f72 Mon Sep 17 00:00:00 2001 From: "Pierre.Kuenzli" Date: Thu, 6 Dec 2018 11:49:19 +0100 Subject: [PATCH 11/41] setting process_per_model=2 in mpi_pmc_hierarchical_models.py --- examples/backends/mpi/mpi_pmc_hierarchical_models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/backends/mpi/mpi_pmc_hierarchical_models.py b/examples/backends/mpi/mpi_pmc_hierarchical_models.py index 37e07fa6..d56edf5d 100644 --- a/examples/backends/mpi/mpi_pmc_hierarchical_models.py +++ b/examples/backends/mpi/mpi_pmc_hierarchical_models.py @@ -4,7 +4,7 @@ def setup_backend(): global backend from abcpy.backends import BackendMPI as Backend - backend = Backend() + backend = Backend(process_per_model=2) """An example showing how to implement a bayesian network in ABCpy""" def infer_parameters(): From 54119146e3578364af6194c889eaef3552c55ab5 Mon Sep 17 00:00:00 2001 From: "Pierre.Kuenzli" Date: Fri, 7 Dec 2018 16:12:39 +0100 Subject: [PATCH 12/41] solved covariance bug --- examples/backends/mpi/mpi_model_pmc.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/examples/backends/mpi/mpi_model_pmc.py b/examples/backends/mpi/mpi_model_pmc.py index 04744824..c143fdee 100644 --- a/examples/backends/mpi/mpi_model_pmc.py +++ b/examples/backends/mpi/mpi_model_pmc.py @@ -7,6 +7,7 @@ def setup_backend(): from abcpy.backends import BackendMPI as Backend backend = Backend(process_per_model=2) + #backend = Backend() def run_model(): def square_mpi(model_comm, x): @@ -103,7 +104,11 @@ def pdf(self, input_values, x): def infer_parameters(): # define observation for true parameters mean=170, 65 rng = np.random.RandomState() - y_obs = rng.multivariate_normal([170, 65], np.eye(2), 100) + y_obs = rng.multivariate_normal([170, 65], np.eye(2), 100).reshape(200) + + + + print("type : ", type(y_obs), " shape : ", y_obs.shape) # define prior from abcpy.continuousmodels import Uniform @@ -119,8 +124,8 @@ def infer_parameters(): statistics_calculator = Identity(degree = 2, cross = False) # define distance - from abcpy.distances import LogReg - distance_calculator = LogReg(statistics_calculator) + #from abcpy.distances import LogReg + #distance_calculator = LogReg(statistics_calculator) from abcpy.approx_lhd import SynLiklihood approx_lhd = SynLiklihood(statistics_calculator) @@ -131,7 +136,7 @@ def infer_parameters(): # sample from scheme #T, n_sample, n_samples_per_param = 3, 250, 10 - T, n_sample, n_samples_per_param = 1, 1, 1 + T, n_sample, n_samples_per_param = 2, 10, 10 journal = sampler.sample([y_obs], T, n_sample, n_samples_per_param) From aba999e81335eeec9dc3e7d209d081378b3a61bb Mon Sep 17 00:00:00 2001 From: "Pierre.Kuenzli" Date: Sat, 8 Dec 2018 22:22:55 +0100 Subject: [PATCH 13/41] debugging mpi model and backend --- abcpy/approx_lhd.py | 14 ++++++---- abcpy/inferences.py | 36 +++++++++++++++++++------- examples/backends/mpi/mpi_model_pmc.py | 2 +- 3 files changed, 36 insertions(+), 16 deletions(-) diff --git a/abcpy/approx_lhd.py b/abcpy/approx_lhd.py index 46a000fa..6097064a 100644 --- a/abcpy/approx_lhd.py +++ b/abcpy/approx_lhd.py @@ -69,6 +69,8 @@ def __init__(self, statistics_calc): def likelihood(self, y_obs, y_sim): # print("DEBUG: SynLiklihood.likelihood().") if not isinstance(y_obs, list): + # print("type(y_obs) : ", type(y_obs), " , type(y_sim) : ", type(y_sim)) + # print("y_obs : ", y_obs) raise TypeError('Observed data is not of allowed types') if not isinstance(y_sim, list): @@ -81,20 +83,22 @@ def likelihood(self, y_obs, y_sim): # Extract summary statistics from the simulated data stat_sim = self.statistics_calc.statistics(y_sim) - + # Compute the mean, robust precision matrix and determinant of precision matrix - # print("DEBUG: meansim computation.") + print("DEBUG: meansim computation.") mean_sim = np.mean(stat_sim,0) - # print("DEBUG: robust_precision_sim computation.") + print("DEBUG: robust_precision_sim computation.") lw_cov_, _ = ledoit_wolf(stat_sim) robust_precision_sim = np.linalg.inv(lw_cov_) - # print("DEBUG: robust_precision_sim_det computation..") + print("DEBUG: robust_precision_sim_det computation..") robust_precision_sim_det = np.linalg.det(robust_precision_sim) - # print("DEBUG: combining.") + + print("DEBUG: combining.") result = pow(np.sqrt((1/(2*np.pi))*robust_precision_sim_det),self.stat_obs.shape[0])\ *np.exp(np.sum(-0.5*np.sum(np.array(self.stat_obs-mean_sim)* \ np.array(np.matrix(robust_precision_sim)*np.matrix(self.stat_obs-mean_sim).T).T, axis = 1))) + print("DEBUG: done") return result diff --git a/abcpy/inferences.py b/abcpy/inferences.py index 9b3e080a..c1432a54 100644 --- a/abcpy/inferences.py +++ b/abcpy/inferences.py @@ -821,11 +821,15 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1 break # 2: calculate approximate lieklihood for new parameters self.logger.info("Calculate approximate likelihood") + print("Calculate approximate likelihood") new_parameters_pds = self.backend.parallelize(new_parameters) approx_likelihood_new_parameters_and_counter_pds = self.backend.map(self._approx_lik_calc, new_parameters_pds) self.logger.debug("collect approximate likelihood from pds") + print("collect approximate likelihood from pds") approx_likelihood_new_parameters_and_counter = self.backend.collect(approx_likelihood_new_parameters_and_counter_pds) + print("collect done") approx_likelihood_new_parameters, counter = [list(t) for t in zip(*approx_likelihood_new_parameters_and_counter)] + print("done") approx_likelihood_new_parameters = np.array(approx_likelihood_new_parameters).reshape(-1,1) @@ -902,23 +906,35 @@ def _approx_lik_calc(self, theta, mpi_comm=None): # Simulate the fake data from the model given the parameter value theta # print("DEBUG: Simulate model for parameter " + str(theta)) + + # Every process of the communicator executes simulate, only process 0 returns relevant data y_sim = self.simulate(self.n_samples_per_param, self.rng, mpi_comm=mpi_comm) - # print("DEBUG: Extracting observation.") - obs = self.accepted_parameters_manager.observations_bds.value() - # print("DEBUG: Computing likelihood...") + # if the mpi_comm is none or our rank is 0, we have relevant data + if(mpi_comm==None or mpi_comm.Get_rank()==0): + + # print("DEBUG: Extracting observation.") + obs = self.accepted_parameters_manager.observations_bds.value() + # print("DEBUG: Computing likelihood...") + + + total_pdf_at_theta = 1. - total_pdf_at_theta = 1. + # trick to avoid data not of allowed type... + obs[0] = list(obs[0]) - lhd = self.likfun.likelihood(obs, y_sim) + # will crash inside likelihood function, approx_lhd.py line 97 + lhd = self.likfun.likelihood(obs, y_sim) - # print("DEBUG: Likelihood is :" + str(lhd)) - pdf_at_theta = self.pdf_of_prior(self.model, theta) + # print("DEBUG: Likelihood is :" + str(lhd)) + pdf_at_theta = self.pdf_of_prior(self.model, theta) - total_pdf_at_theta*=(pdf_at_theta*lhd) + total_pdf_at_theta*=(pdf_at_theta*lhd) - # print("DEBUG: prior pdf evaluated at theta is :" + str(pdf_at_theta)) - return (total_pdf_at_theta, 1) + # print("DEBUG: prior pdf evaluated at theta is :" + str(pdf_at_theta)) + return (total_pdf_at_theta, 1) + + return None def _calculate_weight(self, theta, mpi_comm=None): """ diff --git a/examples/backends/mpi/mpi_model_pmc.py b/examples/backends/mpi/mpi_model_pmc.py index c143fdee..241298b7 100644 --- a/examples/backends/mpi/mpi_model_pmc.py +++ b/examples/backends/mpi/mpi_model_pmc.py @@ -88,7 +88,7 @@ def forward_simulate(self, input_values, k, rng=np.random.RandomState(), mpi_com element1 = data[1][i] point = np.array([element0, element1]) result[i] = point - print("Process 0 will return : ", result) + # print("Process 0 will return : ", result) return result else: return From fda3ce5645ac98799b14811be06d9e4c1b3f0a62 Mon Sep 17 00:00:00 2001 From: statrita2004 Date: Sun, 9 Dec 2018 00:26:52 +0000 Subject: [PATCH 14/41] Rito's changes --- examples/backends/mpi/mpi_model_pmc.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/examples/backends/mpi/mpi_model_pmc.py b/examples/backends/mpi/mpi_model_pmc.py index 241298b7..c1cdf3c2 100644 --- a/examples/backends/mpi/mpi_model_pmc.py +++ b/examples/backends/mpi/mpi_model_pmc.py @@ -68,6 +68,8 @@ def forward_simulate(self, input_values, k, rng=np.random.RandomState(), mpi_com rank = mpi_comm.Get_rank() + print('Hello world') + # Extract the input parameters mu = input_values[rank] sigma = 1 @@ -104,11 +106,7 @@ def pdf(self, input_values, x): def infer_parameters(): # define observation for true parameters mean=170, 65 rng = np.random.RandomState() - y_obs = rng.multivariate_normal([170, 65], np.eye(2), 100).reshape(200) - - - - print("type : ", type(y_obs), " shape : ", y_obs.shape) + y_obs = [np.array(rng.multivariate_normal([170, 65], np.eye(2), 1).reshape(2,))] # define prior from abcpy.continuousmodels import Uniform @@ -123,10 +121,6 @@ def infer_parameters(): from abcpy.statistics import Identity statistics_calculator = Identity(degree = 2, cross = False) - # define distance - #from abcpy.distances import LogReg - #distance_calculator = LogReg(statistics_calculator) - from abcpy.approx_lhd import SynLiklihood approx_lhd = SynLiklihood(statistics_calculator) From 18c7bbe4f399fd7f6067e5ced60d505aa91f3fc0 Mon Sep 17 00:00:00 2001 From: statrita2004 Date: Sun, 9 Dec 2018 00:34:08 +0000 Subject: [PATCH 15/41] Rito's changes --- examples/backends/mpi/mpi_model_pmc.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/examples/backends/mpi/mpi_model_pmc.py b/examples/backends/mpi/mpi_model_pmc.py index c1cdf3c2..d768658a 100644 --- a/examples/backends/mpi/mpi_model_pmc.py +++ b/examples/backends/mpi/mpi_model_pmc.py @@ -68,8 +68,6 @@ def forward_simulate(self, input_values, k, rng=np.random.RandomState(), mpi_com rank = mpi_comm.Get_rank() - print('Hello world') - # Extract the input parameters mu = input_values[rank] sigma = 1 @@ -91,7 +89,7 @@ def forward_simulate(self, input_values, k, rng=np.random.RandomState(), mpi_com point = np.array([element0, element1]) result[i] = point # print("Process 0 will return : ", result) - return result + return [np.array([result[i]]).reshape(-1,) for i in range(k)] else: return From 474237348e15188dd76adc5318690e5f0f8dcafc Mon Sep 17 00:00:00 2001 From: "Pierre.Kuenzli" Date: Mon, 10 Dec 2018 10:47:52 +0100 Subject: [PATCH 16/41] debugging inference with mpi backend --- abcpy/inferences.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/abcpy/inferences.py b/abcpy/inferences.py index c1432a54..8676fd21 100644 --- a/abcpy/inferences.py +++ b/abcpy/inferences.py @@ -845,6 +845,8 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1 for i in range(0, self.n_samples): new_weights[i] = new_weights[i] * approx_likelihood_new_parameters[i] sum_of_weights += new_weights[i] + + print("new_weights : ", new_weights, ", sum_of_weights : ", sum_of_weights) new_weights = new_weights / sum_of_weights accepted_parameters = new_parameters @@ -921,7 +923,7 @@ def _approx_lik_calc(self, theta, mpi_comm=None): total_pdf_at_theta = 1. # trick to avoid data not of allowed type... - obs[0] = list(obs[0]) + # obs[0] = list(obs[0]) # will crash inside likelihood function, approx_lhd.py line 97 lhd = self.likfun.likelihood(obs, y_sim) From c010472aad509964e014cb0e804b54ae26aed02b Mon Sep 17 00:00:00 2001 From: statrita2004 Date: Mon, 10 Dec 2018 09:59:22 +0000 Subject: [PATCH 17/41] Rito's changes --- examples/backends/mpi/mpi_model_pmc.py | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/backends/mpi/mpi_model_pmc.py b/examples/backends/mpi/mpi_model_pmc.py index d768658a..ef6522eb 100644 --- a/examples/backends/mpi/mpi_model_pmc.py +++ b/examples/backends/mpi/mpi_model_pmc.py @@ -66,6 +66,7 @@ def get_output_dimension(self): #def forward_simulate(self, mpi_comm, input_values, k, rng=np.random.RandomState()): #, mpi_comm=None): def forward_simulate(self, input_values, k, rng=np.random.RandomState(), mpi_comm=None): + print('Type of mpi_comm: ' + str(mpi_comm)) rank = mpi_comm.Get_rank() # Extract the input parameters From 8ce47b0bb2f7e8c7f8312a303b0ab24473a4ccab Mon Sep 17 00:00:00 2001 From: statrita2004 Date: Mon, 10 Dec 2018 20:10:12 +0000 Subject: [PATCH 18/41] Rito's amendments to make mpi_model_pmc.py work --- abcpy/approx_lhd.py | 7 +------ abcpy/inferences.py | 10 ++++------ abcpy/perturbationkernel.py | 6 ------ examples/backends/mpi/mpi_model_pmc.py | 27 +++++--------------------- 4 files changed, 10 insertions(+), 40 deletions(-) diff --git a/abcpy/approx_lhd.py b/abcpy/approx_lhd.py index 6097064a..93d5274f 100644 --- a/abcpy/approx_lhd.py +++ b/abcpy/approx_lhd.py @@ -85,20 +85,15 @@ def likelihood(self, y_obs, y_sim): stat_sim = self.statistics_calc.statistics(y_sim) # Compute the mean, robust precision matrix and determinant of precision matrix - print("DEBUG: meansim computation.") mean_sim = np.mean(stat_sim,0) - print("DEBUG: robust_precision_sim computation.") lw_cov_, _ = ledoit_wolf(stat_sim) robust_precision_sim = np.linalg.inv(lw_cov_) - print("DEBUG: robust_precision_sim_det computation..") robust_precision_sim_det = np.linalg.det(robust_precision_sim) - - print("DEBUG: combining.") + result = pow(np.sqrt((1/(2*np.pi))*robust_precision_sim_det),self.stat_obs.shape[0])\ *np.exp(np.sum(-0.5*np.sum(np.array(self.stat_obs-mean_sim)* \ np.array(np.matrix(robust_precision_sim)*np.matrix(self.stat_obs-mean_sim).T).T, axis = 1))) - print("DEBUG: done") return result diff --git a/abcpy/inferences.py b/abcpy/inferences.py index 8676fd21..b7d268fc 100644 --- a/abcpy/inferences.py +++ b/abcpy/inferences.py @@ -757,7 +757,8 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1 self.accepted_parameters_manager.update_broadcast(self.backend, accepted_parameters=accepted_parameters, accepted_weights=accepted_weights) - # The parameters relevant to each kernel have to be used to calculate n_sample times. It is therefore more efficient to broadcast these parameters once, instead of collecting them at each kernel in each step + # The parameters relevant to each kernel have to be used to calculate n_sample times. It is therefore more efficient + # to broadcast these parameters once, instead of collecting them at each kernel in each step kernel_parameters = [] for kernel in self.kernel.kernels: kernel_parameters.append( @@ -821,15 +822,11 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1 break # 2: calculate approximate lieklihood for new parameters self.logger.info("Calculate approximate likelihood") - print("Calculate approximate likelihood") new_parameters_pds = self.backend.parallelize(new_parameters) approx_likelihood_new_parameters_and_counter_pds = self.backend.map(self._approx_lik_calc, new_parameters_pds) self.logger.debug("collect approximate likelihood from pds") - print("collect approximate likelihood from pds") approx_likelihood_new_parameters_and_counter = self.backend.collect(approx_likelihood_new_parameters_and_counter_pds) - print("collect done") approx_likelihood_new_parameters, counter = [list(t) for t in zip(*approx_likelihood_new_parameters_and_counter)] - print("done") approx_likelihood_new_parameters = np.array(approx_likelihood_new_parameters).reshape(-1,1) @@ -846,7 +843,7 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1 new_weights[i] = new_weights[i] * approx_likelihood_new_parameters[i] sum_of_weights += new_weights[i] - print("new_weights : ", new_weights, ", sum_of_weights : ", sum_of_weights) + #print("new_weights : ", new_weights, ", sum_of_weights : ", sum_of_weights) new_weights = new_weights / sum_of_weights accepted_parameters = new_parameters @@ -910,6 +907,7 @@ def _approx_lik_calc(self, theta, mpi_comm=None): # print("DEBUG: Simulate model for parameter " + str(theta)) # Every process of the communicator executes simulate, only process 0 returns relevant data + self.set_parameters(theta) y_sim = self.simulate(self.n_samples_per_param, self.rng, mpi_comm=mpi_comm) # if the mpi_comm is none or our rank is 0, we have relevant data diff --git a/abcpy/perturbationkernel.py b/abcpy/perturbationkernel.py index cbbaae01..4d920747 100644 --- a/abcpy/perturbationkernel.py +++ b/abcpy/perturbationkernel.py @@ -269,12 +269,8 @@ def calculate_cov(self, accepted_parameters_manager, kernel_index): The covariance matrix corresponding to this kernel. """ - print("in calculate_cov line 255") - if(accepted_parameters_manager.accepted_weights_bds is not None): weights = accepted_parameters_manager.accepted_weights_bds.value() - print("np.array(accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index]).astype(float) : ", np.array(accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index]).astype(float).shape) - print("weights.reshape(-1).astype(float) : ", weights.reshape(-1).astype(float).shape) cov = np.cov(np.array(accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index]).astype(float), aweights=weights.reshape(-1).astype(float), rowvar=False) else: @@ -283,8 +279,6 @@ def calculate_cov(self, accepted_parameters_manager, kernel_index): else: cov = np.cov(np.array(accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index]).astype(float), rowvar=False) - print("calculate_cov done") - return cov diff --git a/examples/backends/mpi/mpi_model_pmc.py b/examples/backends/mpi/mpi_model_pmc.py index ef6522eb..e3be3e64 100644 --- a/examples/backends/mpi/mpi_model_pmc.py +++ b/examples/backends/mpi/mpi_model_pmc.py @@ -35,7 +35,7 @@ def __init__(self, parameters, name='Gaussian'): raise TypeError('Input of Normal model is of type list') if len(parameters) != 2: - raise RuntimeError('Input list must be of length 2, containing [mu, sigma].') + raise RuntimeError('Input list must be of length 2, containing [mu1, mu1].') input_connector = InputConnector.from_list(parameters) super().__init__(input_connector, name) @@ -52,27 +52,21 @@ def _check_output(self, values): if not isinstance(values, np.ndarray): raise ValueError('Output of the normal distribution is always a numpy array.') - if value.shape[0] != 2: + if values.shape[0] != 2: raise ValueError('Output shape should be of dimension 2.') return True - def get_output_dimension(self): return 2 - - #def forward_simulate(self, input_values, k, rng=np.random.RandomState(), mpi_comm=None): - #def forward_simulate(self, mpi_comm, input_values, k, rng=np.random.RandomState()): #, mpi_comm=None): def forward_simulate(self, input_values, k, rng=np.random.RandomState(), mpi_comm=None): - print('Type of mpi_comm: ' + str(mpi_comm)) rank = mpi_comm.Get_rank() - # Extract the input parameters mu = input_values[rank] sigma = 1 - + #print(mu) # Do the actual forward simulation vector_of_k_samples = np.array(rng.normal(mu, sigma, k)) @@ -95,13 +89,6 @@ def forward_simulate(self, input_values, k, rng=np.random.RandomState(), mpi_com return - def pdf(self, input_values, x): - mu = input_values[0] - sigma = input_values[1] - pdf = np.norm(mu,sigma).pdf(x) - return pdf - - def infer_parameters(): # define observation for true parameters mean=170, 65 rng = np.random.RandomState() @@ -113,7 +100,6 @@ def infer_parameters(): mu1 = Uniform([[25], [100]], ) # define the model - from abcpy.continuousmodels import Normal height_weight_model = NestedBivariateGaussian([mu0, mu1]) # define statistics @@ -128,8 +114,7 @@ def infer_parameters(): sampler = PMC([height_weight_model], [approx_lhd], backend, seed=1) # sample from scheme - #T, n_sample, n_samples_per_param = 3, 250, 10 - T, n_sample, n_samples_per_param = 2, 10, 10 + T, n_sample, n_samples_per_param = 2, 100, 100 journal = sampler.sample([y_obs], T, n_sample, n_samples_per_param) @@ -150,6 +135,4 @@ def test_example(self): if __name__ == "__main__": setup_backend() - #print(run_mod#print(run_model()) - model = NestedBivariateGaussian([100,200]) - print(infer_parameters()) + print('Posterior Mean: ' + str(infer_parameters().posterior_mean())) From b34a0f1213d757cc0058675999268557ec5e2e72 Mon Sep 17 00:00:00 2001 From: statrita2004 Date: Mon, 10 Dec 2018 20:11:28 +0000 Subject: [PATCH 19/41] Rito's amendments to make mpi_model_pmc.py work --- examples/backends/mpi/mpi_model_pmc.py | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/backends/mpi/mpi_model_pmc.py b/examples/backends/mpi/mpi_model_pmc.py index e3be3e64..7edf469a 100644 --- a/examples/backends/mpi/mpi_model_pmc.py +++ b/examples/backends/mpi/mpi_model_pmc.py @@ -136,3 +136,4 @@ def test_example(self): if __name__ == "__main__": setup_backend() print('Posterior Mean: ' + str(infer_parameters().posterior_mean())) + print('True Value was: ' + str([170, 65])) From 551472b946e431faad7bc9ddc8ed3f023f461967 Mon Sep 17 00:00:00 2001 From: statrita2004 Date: Tue, 11 Dec 2018 18:10:13 +0000 Subject: [PATCH 20/41] PMC made to work --- abcpy/inferences.py | 83 +++++++++++------------ examples/backends/mpi/mpi_model_pmc.py | 2 +- examples/backends/mpi/mpi_model_pmcabc.py | 64 +++++++---------- 3 files changed, 64 insertions(+), 85 deletions(-) diff --git a/abcpy/inferences.py b/abcpy/inferences.py index 0389d78b..b37b6bea 100644 --- a/abcpy/inferences.py +++ b/abcpy/inferences.py @@ -412,6 +412,7 @@ def sample(self, observations, steps, epsilon_init, n_samples = 10000, n_samples raise ValueError("The length of epsilon_init can only be equal to 1 or steps.") # main PMCABC algorithm + print("Starting PMC iterations") self.logger.info("Starting PMC iterations") for aStep in range(steps): self.logger.debug("iteration {} of PMC algorithm".format(aStep)) @@ -438,21 +439,21 @@ def sample(self, observations, steps, epsilon_init, n_samples = 10000, n_samples rng_pds = self.backend.parallelize(rng_arr) # 0: update remotely required variables - # print("INFO: Broadcasting parameters.") + print("INFO: Broadcasting parameters.") self.logger.info("Broadcasting parameters") self.epsilon = epsilon_arr[aStep] self.accepted_parameters_manager.update_broadcast(self.backend, accepted_parameters, accepted_weights, accepted_cov_mats) # 1: calculate resample parameters - # print("INFO: Resampling parameters") + print("INFO: Resampling parameters") self.logger.info("Resamping parameters") - params_and_dists_and_ysim_and_counter_pds = self.backend.map(self._resample_parameter, rng_pds) - params_and_dists_and_ysim_and_counter = self.backend.collect(params_and_dists_and_ysim_and_counter_pds) - new_parameters, distances, counter = [list(t) for t in zip(*params_and_dists_and_ysim_and_counter)] + params_and_dists_and_counter_pds = self.backend.map(self._resample_parameter, rng_pds) + params_and_dists_and_counter = self.backend.collect(params_and_dists_and_counter_pds) + new_parameters, distances, counter = [list(t) for t in zip(*params_and_dists_and_counter)] new_parameters = np.array(new_parameters) - #print(new_parameters) + print(new_parameters) for count in counter: self.simulation_counter+=count @@ -516,9 +517,6 @@ def sample(self, observations, steps, epsilon_init, n_samples = 10000, n_samples return journal - # define helper functions for map step - #def _resample_parameter(self, rng): - #def _resample_parameter(self, mpi_comm, rng): def _resample_parameter(self, rng, mpi_comm=None): """ Samples a single model parameter and simulate from it until @@ -564,30 +562,27 @@ def _resample_parameter(self, rng, mpi_comm=None): if(perturbation_output[0] and self.pdf_of_prior(self.model, perturbation_output[1])!=0): theta = perturbation_output[1] break - y_sim = self.simulate(mpi_comm, self.n_samples_per_param, rng=rng, mpi_comm=mpi_comm) + y_sim = self.simulate(self.n_samples_per_param, rng=rng, mpi_comm=mpi_comm) counter+=1 if(y_sim is not None): - print("Will compute distance") - print("self.accepted_parameters_manager.observations_bds.value() : ", self.accepted_parameters_manager.observations_bds.value()) - print("type(self.accepted_parameters_manager.observations_bds.value()) : ", type(self.accepted_parameters_manager.observations_bds.value())) - print("y_sim : ", y_sim) - print("type(y_sim) : ", type(y_sim)) distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(),y_sim) self.logger.debug("distance after {:4d} simulations: {:e}".format( counter, distance)) - print("Distance computed") else: distance = self.distance.dist_max() - self.logger.debug( - "Needed {:4d} simulations to reach distance {:e} < epsilon = {:e}". - format(counter, distance, float(self.epsilon)) - ) + if mpi_comm == None or mpi_comm.Get_rank() == 0: + self.logger.debug( + "Needed {:4d} simulations to reach distance {:e} < epsilon = {:e}". + format(counter, distance, float(self.epsilon)) + ) + print(str(theta)+str(distance)) + return (theta, distance, counter) - return (theta, distance, counter) + return None - def _calculate_weight(self, theta): + def _calculate_weight(self, theta, mpi_comm=None): """ Calculates the weight for the given parameter using accepted_parameters, accepted_cov_mat @@ -823,10 +818,10 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1 # 2: calculate approximate lieklihood for new parameters self.logger.info("Calculate approximate likelihood") merged_sim_data_parameter = self.flat_map(new_parameters, self.n_samples_per_param, self._simulate_data) + # Compute likelihood for each parameter value approx_likelihood_new_parameters, counter = self.simple_map(merged_sim_data_parameter, self._approx_calc) approx_likelihood_new_parameters = np.array(approx_likelihood_new_parameters).reshape(-1, 1) - for count in counter: self.simulation_counter+=count @@ -893,10 +888,18 @@ def simple_map(self, data, map_function): main_result, counter = [list(t) for t in zip(*result)] return main_result, counter def flat_map(self, data, n_repeat, map_function): + # Create an array of data, with each data repeated n_repeat many times repeated_data = np.repeat(data, n_repeat, axis=0) - repeated_data_pds = self.backend.parallelize(repeated_data) - repeated_data__result_pds = self.backend.map(map_function, repeated_data_pds) - repeated_data_result = self.backend.collect(repeated_data__result_pds) + # Create an see array + n_total = n_repeat * data.shape[0] + seed_arr = self.rng.randint(1, n_total * n_total, size=n_total, dtype=np.int32) + rng_arr = np.array([np.random.RandomState(seed) for seed in seed_arr]) + # Create data and rng array + repeated_data_rng = [[repeated_data[ind,:],rng_arr[ind]] for ind in range(n_total)] + repeated_data_rng_pds = self.backend.parallelize(repeated_data_rng) + # Map the function on the data using the corresponding rng + repeated_data_result_pds = self.backend.map(map_function, repeated_data_rng_pds) + repeated_data_result = self.backend.collect(repeated_data_result_pds) repeated_data, result = [list(t) for t in zip(*repeated_data_result)] merged_result_data = [] for ind in range(0, data.shape[0]): @@ -907,7 +910,7 @@ def flat_map(self, data, n_repeat, map_function): return merged_result_data # define helper functions for map step - def _simulate_data(self, theta, mpi_comm=None): + def _simulate_data(self, data, mpi_comm=None): """ Simulate n_sample_per_param many datasets for new parameter Parameters @@ -922,9 +925,9 @@ def _simulate_data(self, theta, mpi_comm=None): # Simulate the fake data from the model given the parameter value theta # print("DEBUG: Simulate model for parameter " + str(theta)) + theta, rng = data[0], data[1] self.set_parameters(theta) - y_sim = self.simulate(1, self.rng, mpi_comm=mpi_comm) - + y_sim = self.simulate(1, rng, mpi_comm=mpi_comm) return (theta, y_sim) def _approx_calc(self, sim_data_parameter, mpi_comm=None): @@ -940,26 +943,20 @@ def _approx_calc(self, sim_data_parameter, mpi_comm=None): The approximated likelihood function """ - if mpi_comm == None or mpi_comm.Get_rank()==0: - # Extract data and parameter - y_sim, theta = sim_data_parameter[0], sim_data_parameter[1] + # Extract data and parameter + y_sim, theta = sim_data_parameter[0], sim_data_parameter[1] - # print("DEBUG: Extracting observation.") - obs = self.accepted_parameters_manager.observations_bds.value() - # print("DEBUG: Computing likelihood...") + obs = self.accepted_parameters_manager.observations_bds.value() - total_pdf_at_theta = 1. + total_pdf_at_theta = 1. - lhd = self.likfun.likelihood(obs, y_sim) + lhd = self.likfun.likelihood(obs, y_sim) - pdf_at_theta = self.pdf_of_prior(self.model, theta) + pdf_at_theta = self.pdf_of_prior(self.model, theta) - total_pdf_at_theta *= (pdf_at_theta * lhd) + total_pdf_at_theta *= (pdf_at_theta * lhd) - # print("DEBUG: prior pdf evaluated at theta is :" + str(pdf_at_theta)) - return (total_pdf_at_theta, 1) - - return None + return (total_pdf_at_theta, 1) def _calculate_weight(self, theta, mpi_comm=None): """ diff --git a/examples/backends/mpi/mpi_model_pmc.py b/examples/backends/mpi/mpi_model_pmc.py index 7edf469a..50f0b71d 100644 --- a/examples/backends/mpi/mpi_model_pmc.py +++ b/examples/backends/mpi/mpi_model_pmc.py @@ -114,7 +114,7 @@ def infer_parameters(): sampler = PMC([height_weight_model], [approx_lhd], backend, seed=1) # sample from scheme - T, n_sample, n_samples_per_param = 2, 100, 100 + T, n_sample, n_samples_per_param = 2, 10, 10 journal = sampler.sample([y_obs], T, n_sample, n_samples_per_param) diff --git a/examples/backends/mpi/mpi_model_pmcabc.py b/examples/backends/mpi/mpi_model_pmcabc.py index 659ea7a1..5d43907a 100644 --- a/examples/backends/mpi/mpi_model_pmcabc.py +++ b/examples/backends/mpi/mpi_model_pmcabc.py @@ -2,20 +2,23 @@ from mpi4py import MPI from abcpy.probabilisticmodels import ProbabilisticModel, InputConnector + def setup_backend(): global backend - + from abcpy.backends import BackendMPI as Backend backend = Backend(process_per_model=2) + # backend = Backend() + def run_model(): def square_mpi(model_comm, x): - local_res = np.array([x**2], 'i') + local_res = np.array([x ** 2], 'i') global_res = np.array([0], 'i') - model_comm.Reduce([local_res,MPI.INT], [global_res,MPI.INT], op=MPI.SUM, root=0) + model_comm.Reduce([local_res, MPI.INT], [global_res, MPI.INT], op=MPI.SUM, root=0) return global_res[0] - - data = [1,2,3,4,5] + + data = [1, 2, 3, 4, 5] pds = backend.parallelize(data) pds_map = backend.map(square_mpi, pds) res = backend.collect(pds_map) @@ -34,43 +37,36 @@ def __init__(self, parameters, name='Gaussian'): raise TypeError('Input of Normal model is of type list') if len(parameters) != 2: - raise RuntimeError('Input list must be of length 2, containing [mu, sigma].') + raise RuntimeError('Input list must be of length 2, containing [mu1, mu1].') input_connector = InputConnector.from_list(parameters) super().__init__(input_connector, name) - def _check_input(self, input_values): # Check whether input has correct type or format if len(input_values) != 2: raise ValueError('Number of parameters are 2 (two means).') return True - def _check_output(self, values): if not isinstance(values, np.ndarray): raise ValueError('Output of the normal distribution is always a numpy array.') - if value.shape[0] != 2: + if values.shape[0] != 2: raise ValueError('Output shape should be of dimension 2.') return True - def get_output_dimension(self): return 2 - - #def forward_simulate(self, input_values, k, rng=np.random.RandomState(), mpi_comm=None): - #def forward_simulate(self, mpi_comm, input_values, k, rng=np.random.RandomState()): #, mpi_comm=None): def forward_simulate(self, input_values, k, rng=np.random.RandomState(), mpi_comm=None): rank = mpi_comm.Get_rank() - # Extract the input parameters mu = input_values[rank] sigma = 1 - + # print(mu) # Do the actual forward simulation vector_of_k_samples = np.array(rng.normal(mu, sigma, k)) @@ -81,29 +77,21 @@ def forward_simulate(self, input_values, k, rng=np.random.RandomState(), mpi_com # Format the output to obey API but only on rank 0 if rank == 0: - result = [None]*k + result = [None] * k for i in range(k): element0 = data[0][i] element1 = data[1][i] point = np.array([element0, element1]) result[i] = point - print("Process 0 will return : ", result) - return result + # print("Process 0 will return : ", result) + return [np.array([result[i]]).reshape(-1, ) for i in range(k)] else: return - - def pdf(self, input_values, x): - mu = input_values[0] - sigma = input_values[1] - pdf = np.norm(mu,sigma).pdf(x) - return pdf - - def infer_parameters(): # define observation for true parameters mean=170, 65 rng = np.random.RandomState() - y_obs = rng.multivariate_normal([170, 65], np.eye(2), 100) + y_obs = [np.array(rng.multivariate_normal([170, 65], np.eye(2), 1).reshape(2,))] # define prior from abcpy.continuousmodels import Uniform @@ -111,7 +99,6 @@ def infer_parameters(): mu1 = Uniform([[25], [100]], ) # define the model - from abcpy.continuousmodels import Normal height_weight_model = NestedBivariateGaussian([mu0, mu1]) # define statistics @@ -119,21 +106,17 @@ def infer_parameters(): statistics_calculator = Identity(degree = 2, cross = False) # define distance - from abcpy.distances import LogReg - distance_calculator = LogReg(statistics_calculator) - - from abcpy.approx_lhd import SynLiklihood - approx_lhd = SynLiklihood(statistics_calculator) + from abcpy.distances import Euclidean + distance_calculator = Euclidean(statistics_calculator) # define sampling scheme from abcpy.inferences import PMCABC sampler = PMCABC([height_weight_model], [distance_calculator], backend, seed=1) - + print('sampling') # sample from scheme - #T, n_sample, n_samples_per_param = 3, 250, 10 - T, n_sample, n_samples_per_param = 1, 1, 1 - eps_arr = np.array([.75]) - epsilon_percentile = 10 + T, n_sample, n_samples_per_param = 2, 10, 1 + eps_arr = np.array([10000]) + epsilon_percentile = 90 journal = sampler.sample([y_obs], T, eps_arr, n_sample, n_samples_per_param, epsilon_percentile) @@ -154,6 +137,5 @@ def test_example(self): if __name__ == "__main__": setup_backend() - #print(run_mod#print(run_model()) - model = NestedBivariateGaussian([100,200]) - print(infer_parameters()) + print('Posterior Mean: ' + str(infer_parameters().posterior_mean())) + print('True Value was: ' + str([170, 65])) \ No newline at end of file From cc21c713004d2afeb90dd7cab5f792ecdae74e35 Mon Sep 17 00:00:00 2001 From: statrita2004 Date: Tue, 11 Dec 2018 18:37:33 +0000 Subject: [PATCH 21/41] PMCABC made to work --- abcpy/inferences.py | 17 +++++------------ examples/backends/mpi/mpi_model_pmcabc.py | 2 +- 2 files changed, 6 insertions(+), 13 deletions(-) diff --git a/abcpy/inferences.py b/abcpy/inferences.py index b37b6bea..21811923 100644 --- a/abcpy/inferences.py +++ b/abcpy/inferences.py @@ -412,7 +412,6 @@ def sample(self, observations, steps, epsilon_init, n_samples = 10000, n_samples raise ValueError("The length of epsilon_init can only be equal to 1 or steps.") # main PMCABC algorithm - print("Starting PMC iterations") self.logger.info("Starting PMC iterations") for aStep in range(steps): self.logger.debug("iteration {} of PMC algorithm".format(aStep)) @@ -439,13 +438,13 @@ def sample(self, observations, steps, epsilon_init, n_samples = 10000, n_samples rng_pds = self.backend.parallelize(rng_arr) # 0: update remotely required variables - print("INFO: Broadcasting parameters.") + #print("INFO: Broadcasting parameters.") self.logger.info("Broadcasting parameters") self.epsilon = epsilon_arr[aStep] self.accepted_parameters_manager.update_broadcast(self.backend, accepted_parameters, accepted_weights, accepted_cov_mats) # 1: calculate resample parameters - print("INFO: Resampling parameters") + #print("INFO: Resampling parameters") self.logger.info("Resamping parameters") params_and_dists_and_counter_pds = self.backend.map(self._resample_parameter, rng_pds) @@ -453,8 +452,6 @@ def sample(self, observations, steps, epsilon_init, n_samples = 10000, n_samples new_parameters, distances, counter = [list(t) for t in zip(*params_and_dists_and_counter)] new_parameters = np.array(new_parameters) - print(new_parameters) - for count in counter: self.simulation_counter+=count @@ -565,19 +562,15 @@ def _resample_parameter(self, rng, mpi_comm=None): y_sim = self.simulate(self.n_samples_per_param, rng=rng, mpi_comm=mpi_comm) counter+=1 - if(y_sim is not None): - distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(),y_sim) - self.logger.debug("distance after {:4d} simulations: {:e}".format( - counter, distance)) - else: - distance = self.distance.dist_max() + distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) + self.logger.debug("distance after {:4d} simulations: {:e}".format( + counter, distance)) if mpi_comm == None or mpi_comm.Get_rank() == 0: self.logger.debug( "Needed {:4d} simulations to reach distance {:e} < epsilon = {:e}". format(counter, distance, float(self.epsilon)) ) - print(str(theta)+str(distance)) return (theta, distance, counter) return None diff --git a/examples/backends/mpi/mpi_model_pmcabc.py b/examples/backends/mpi/mpi_model_pmcabc.py index 5d43907a..ac6fe04b 100644 --- a/examples/backends/mpi/mpi_model_pmcabc.py +++ b/examples/backends/mpi/mpi_model_pmcabc.py @@ -114,7 +114,7 @@ def infer_parameters(): sampler = PMCABC([height_weight_model], [distance_calculator], backend, seed=1) print('sampling') # sample from scheme - T, n_sample, n_samples_per_param = 2, 10, 1 + T, n_sample, n_samples_per_param = 1, 10, 1 eps_arr = np.array([10000]) epsilon_percentile = 90 From 91e3141f2117f25235d0d2ba272ced86ddded998 Mon Sep 17 00:00:00 2001 From: "Pierre.Kuenzli" Date: Fri, 14 Dec 2018 15:18:18 +0100 Subject: [PATCH 22/41] fixed pmcabc inference for mpi model --- abcpy/inferences.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/abcpy/inferences.py b/abcpy/inferences.py index 21811923..8c67d127 100644 --- a/abcpy/inferences.py +++ b/abcpy/inferences.py @@ -433,6 +433,7 @@ def sample(self, observations, steps, epsilon_init, n_samples = 10000, n_samples # Since each entry of new_cov_mats is a numpy array, we can multiply like this accepted_cov_mats = [covFactor * new_cov_mat for new_cov_mat in new_cov_mats] + seed_arr = self.rng.randint(0, np.iinfo(np.uint32).max, size=n_samples, dtype=np.uint32) rng_arr = np.array([np.random.RandomState(seed) for seed in seed_arr]) rng_pds = self.backend.parallelize(rng_arr) @@ -464,6 +465,7 @@ def sample(self, observations, steps, epsilon_init, n_samples = 10000, n_samples else: epsilon_arr[aStep + 1] = np.max( [np.percentile(distances, epsilon_percentile), epsilon_arr[aStep + 1]]) + # 2: calculate weights for new parameters self.logger.info("Calculating weights") @@ -542,8 +544,6 @@ def _resample_parameter(self, rng, mpi_comm=None): counter=0 while distance > self.epsilon: - #print( " distance: " + str(distance) + " epsilon: " + str(self.epsilon)) - if self.accepted_parameters_manager.accepted_parameters_bds == None: self.sample_from_prior(rng=rng) theta = self.get_parameters() @@ -562,7 +562,12 @@ def _resample_parameter(self, rng, mpi_comm=None): y_sim = self.simulate(self.n_samples_per_param, rng=rng, mpi_comm=mpi_comm) counter+=1 - distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) + distance = None + # y_sim valid only at rank 0 + if mpi_comm.Get_rank() == 0: + distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) + distance = mpi_comm.bcast(distance) + self.logger.debug("distance after {:4d} simulations: {:e}".format( counter, distance)) From b887596da7945c9815b7c899137ad62fccfd43a8 Mon Sep 17 00:00:00 2001 From: statrita2004 Date: Fri, 14 Dec 2018 22:35:16 +0000 Subject: [PATCH 23/41] Adding SABC example to run with nested MPI --- abcpy/inferences.py | 23 ++-- examples/backends/mpi/mpi_model_pmcabc.py | 2 +- examples/backends/mpi/mpi_model_sabc.py | 140 ++++++++++++++++++++++ 3 files changed, 157 insertions(+), 8 deletions(-) create mode 100644 examples/backends/mpi/mpi_model_sabc.py diff --git a/abcpy/inferences.py b/abcpy/inferences.py index 8c67d127..7f14e80c 100644 --- a/abcpy/inferences.py +++ b/abcpy/inferences.py @@ -1404,15 +1404,15 @@ def destroy(bc): self.all_distances_bds = self.backend.broadcast(all_distances) # define helper functions for map step - def _accept_parameter(self, data): + def _accept_parameter(self, data, mpi_comm=None): """ Samples a single model parameter and simulate from it until accepted with probabilty exp[-rho(x,y)/epsilon]. Parameters ---------- - seed: integer - Initial seed for the random number generator. + seed_and_index: list of two integers + Initial seed for the random number generator and the index of data to operate on Returns ------- @@ -1437,9 +1437,14 @@ def _accept_parameter(self, data): self.sample_from_prior(rng=rng) new_theta = np.array(self.get_parameters()).reshape(-1,) all_parameters.append(new_theta) - y_sim = self.simulate(self.n_samples_per_param, rng=rng) + y_sim = self.simulate(self.n_samples_per_param, rng=rng, mpi_comm=mpi_comm) counter+=1 - distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) + distance = None + # As y_sim valid only at rank 0 + if mpi_comm.Get_rank() == 0: + distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) + distance = mpi_comm.bcast(distance) + all_distances.append(distance) acceptance = rng.binomial(1, np.exp(-distance / self.epsilon), 1) acceptance = 1 @@ -1455,9 +1460,13 @@ def _accept_parameter(self, data): new_theta = np.array(perturbation_output[1]).reshape(-1,) break - y_sim = self.simulate(self.n_samples_per_param, rng=rng) + y_sim = self.simulate(self.n_samples_per_param, rng=rng, mpi_comm=mpi_comm) counter+=1 - distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) + distance = None + # As y_sim valid only at rank 0 + if mpi_comm.Get_rank() == 0: + distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) + distance = mpi_comm.bcast(distance) smooth_distance = self._smoother_distance([distance], self.all_distances_bds.value()) diff --git a/examples/backends/mpi/mpi_model_pmcabc.py b/examples/backends/mpi/mpi_model_pmcabc.py index ac6fe04b..6332d87e 100644 --- a/examples/backends/mpi/mpi_model_pmcabc.py +++ b/examples/backends/mpi/mpi_model_pmcabc.py @@ -114,7 +114,7 @@ def infer_parameters(): sampler = PMCABC([height_weight_model], [distance_calculator], backend, seed=1) print('sampling') # sample from scheme - T, n_sample, n_samples_per_param = 1, 10, 1 + T, n_sample, n_samples_per_param = 2, 100, 1 eps_arr = np.array([10000]) epsilon_percentile = 90 diff --git a/examples/backends/mpi/mpi_model_sabc.py b/examples/backends/mpi/mpi_model_sabc.py new file mode 100644 index 00000000..86e45f0d --- /dev/null +++ b/examples/backends/mpi/mpi_model_sabc.py @@ -0,0 +1,140 @@ +import numpy as np +from mpi4py import MPI +from abcpy.probabilisticmodels import ProbabilisticModel, InputConnector + + +def setup_backend(): + global backend + + from abcpy.backends import BackendMPI as Backend + backend = Backend(process_per_model=2) + # backend = Backend() + + +def run_model(): + def square_mpi(model_comm, x): + local_res = np.array([x ** 2], 'i') + global_res = np.array([0], 'i') + model_comm.Reduce([local_res, MPI.INT], [global_res, MPI.INT], op=MPI.SUM, root=0) + return global_res[0] + + data = [1, 2, 3, 4, 5] + pds = backend.parallelize(data) + pds_map = backend.map(square_mpi, pds) + res = backend.collect(pds_map) + return res + + +class NestedBivariateGaussian(ProbabilisticModel): + """ + This is a show case model of bi-variate Gaussian distribution where we assume + the standard deviation to be unit. + """ + + def __init__(self, parameters, name='Gaussian'): + # We expect input of type parameters = [mu, sigma] + if not isinstance(parameters, list): + raise TypeError('Input of Normal model is of type list') + + if len(parameters) != 2: + raise RuntimeError('Input list must be of length 2, containing [mu1, mu1].') + + input_connector = InputConnector.from_list(parameters) + super().__init__(input_connector, name) + + def _check_input(self, input_values): + # Check whether input has correct type or format + if len(input_values) != 2: + raise ValueError('Number of parameters are 2 (two means).') + return True + + def _check_output(self, values): + if not isinstance(values, np.ndarray): + raise ValueError('Output of the normal distribution is always a numpy array.') + + if values.shape[0] != 2: + raise ValueError('Output shape should be of dimension 2.') + + return True + + def get_output_dimension(self): + return 2 + + def forward_simulate(self, input_values, k, rng=np.random.RandomState(), mpi_comm=None): + + rank = mpi_comm.Get_rank() + # Extract the input parameters + mu = input_values[rank] + sigma = 1 + # print(mu) + # Do the actual forward simulation + vector_of_k_samples = np.array(rng.normal(mu, sigma, k)) + + # Send everything back to rank 0 + # print("Hello from forward_simulate before gather, rank = ", rank) + data = mpi_comm.gather(vector_of_k_samples) + # print("Hello from forward_simulate after gather, rank = ", rank) + + # Format the output to obey API but only on rank 0 + if rank == 0: + result = [None] * k + for i in range(k): + element0 = data[0][i] + element1 = data[1][i] + point = np.array([element0, element1]) + result[i] = point + # print("Process 0 will return : ", result) + return [np.array([result[i]]).reshape(-1, ) for i in range(k)] + else: + return + +def infer_parameters(): + # define observation for true parameters mean=170, 65 + rng = np.random.RandomState() + y_obs = [np.array(rng.multivariate_normal([170, 65], np.eye(2), 1).reshape(2,))] + + # define prior + from abcpy.continuousmodels import Uniform + mu0 = Uniform([[150], [200]], ) + mu1 = Uniform([[25], [100]], ) + + # define the model + height_weight_model = NestedBivariateGaussian([mu0, mu1]) + + # define statistics + from abcpy.statistics import Identity + statistics_calculator = Identity(degree = 2, cross = False) + + # define distance + from abcpy.distances import Euclidean + distance_calculator = Euclidean(statistics_calculator) + + # define sampling scheme + from abcpy.inferences import SABC + sampler = SABC([height_weight_model], [distance_calculator], backend, seed=1) + print('sampling') + steps, epsilon, n_samples, n_samples_per_param, beta, delta, v = 2, np.array([10000]), 10, 1, 2, 0.2, 0.3 + ar_cutoff, resample, n_update, adaptcov, full_output = 0.1, None, None, 1, 1 + # + # # print('SABC Inferring') + journal = sampler.sample([y_obs], steps, epsilon, n_samples, n_samples_per_param, beta, delta, v, ar_cutoff, resample, n_update, adaptcov, full_output) + + return journal + +import unittest +from mpi4py import MPI + +def setUpModule(): + setup_backend() + +class ExampleMPIModelTest(unittest.TestCase): + def test_example(self): + result = run_model() + data = [1,2,3,4,5] + expected_result = list(map(lambda x:2*(x**2),data)) + assert result==expected_result + +if __name__ == "__main__": + setup_backend() + print('Posterior Mean: ' + str(infer_parameters().posterior_mean())) + print('True Value was: ' + str([170, 65])) \ No newline at end of file From 926e5a0001505289a758d102e546a4efb27bc854 Mon Sep 17 00:00:00 2001 From: statrita2004 Date: Fri, 14 Dec 2018 22:42:58 +0000 Subject: [PATCH 24/41] Adding example of APMCABC to work with nested MPI --- abcpy/inferences.py | 23 +++- examples/backends/mpi/mpi_model_apmcabc.py | 137 +++++++++++++++++++++ 2 files changed, 154 insertions(+), 6 deletions(-) create mode 100644 examples/backends/mpi/mpi_model_apmcabc.py diff --git a/abcpy/inferences.py b/abcpy/inferences.py index 7f14e80c..2f557e28 100644 --- a/abcpy/inferences.py +++ b/abcpy/inferences.py @@ -2397,7 +2397,7 @@ def destroy(bc): self.accepted_dist_bds = self.backend.broadcast(accepted_dist) # define helper functions for map step - def _accept_parameter(self, rng): + def _accept_parameter(self, rng, mpi_comm=None): """ Samples a single model parameter and simulate from it until distance between simulated outcome and the observation is @@ -2423,9 +2423,15 @@ def _accept_parameter(self, rng): if self.accepted_parameters_manager.accepted_parameters_bds == None: self.sample_from_prior(rng=rng) - y_sim = self.simulate(self.n_samples_per_param, rng=rng) + y_sim = self.simulate(self.n_samples_per_param, rng=rng, mpi_comm=mpi_comm) counter+=1 - dist = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) + + distance = None + # As y_sim valid only at rank 0 + if mpi_comm.Get_rank() == 0: + distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) + distance = mpi_comm.bcast(distance) + weight = 1.0 else: index = rng.choice(len(self.accepted_parameters_manager.accepted_weights_bds.value()), size=1, @@ -2437,9 +2443,14 @@ def _accept_parameter(self, rng): if perturbation_output[0] and self.pdf_of_prior(self.model, perturbation_output[1]) != 0: break - y_sim = self.simulate(self.n_samples_per_param, rng=rng) + y_sim = self.simulate(self.n_samples_per_param, rng=rng, mpi_comm=mpi_comm) counter+=1 - dist = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) + + distance = None + # As y_sim valid only at rank 0 + if mpi_comm.Get_rank() == 0: + distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) + distance = mpi_comm.bcast(distance) prior_prob = self.pdf_of_prior(self.model, perturbation_output[1]) denominator = 0.0 @@ -2448,7 +2459,7 @@ def _accept_parameter(self, rng): denominator += self.accepted_parameters_manager.accepted_weights_bds.value()[i, 0] * pdf_value weight = 1.0 * prior_prob / denominator - return (self.get_parameters(self.model), dist, weight, counter) + return (self.get_parameters(self.model), distance, weight, counter) class SMCABC(BaseDiscrepancy, InferenceMethod): diff --git a/examples/backends/mpi/mpi_model_apmcabc.py b/examples/backends/mpi/mpi_model_apmcabc.py new file mode 100644 index 00000000..f4937846 --- /dev/null +++ b/examples/backends/mpi/mpi_model_apmcabc.py @@ -0,0 +1,137 @@ +import numpy as np +from mpi4py import MPI +from abcpy.probabilisticmodels import ProbabilisticModel, InputConnector + + +def setup_backend(): + global backend + + from abcpy.backends import BackendMPI as Backend + backend = Backend(process_per_model=2) + # backend = Backend() + + +def run_model(): + def square_mpi(model_comm, x): + local_res = np.array([x ** 2], 'i') + global_res = np.array([0], 'i') + model_comm.Reduce([local_res, MPI.INT], [global_res, MPI.INT], op=MPI.SUM, root=0) + return global_res[0] + + data = [1, 2, 3, 4, 5] + pds = backend.parallelize(data) + pds_map = backend.map(square_mpi, pds) + res = backend.collect(pds_map) + return res + + +class NestedBivariateGaussian(ProbabilisticModel): + """ + This is a show case model of bi-variate Gaussian distribution where we assume + the standard deviation to be unit. + """ + + def __init__(self, parameters, name='Gaussian'): + # We expect input of type parameters = [mu, sigma] + if not isinstance(parameters, list): + raise TypeError('Input of Normal model is of type list') + + if len(parameters) != 2: + raise RuntimeError('Input list must be of length 2, containing [mu1, mu1].') + + input_connector = InputConnector.from_list(parameters) + super().__init__(input_connector, name) + + def _check_input(self, input_values): + # Check whether input has correct type or format + if len(input_values) != 2: + raise ValueError('Number of parameters are 2 (two means).') + return True + + def _check_output(self, values): + if not isinstance(values, np.ndarray): + raise ValueError('Output of the normal distribution is always a numpy array.') + + if values.shape[0] != 2: + raise ValueError('Output shape should be of dimension 2.') + + return True + + def get_output_dimension(self): + return 2 + + def forward_simulate(self, input_values, k, rng=np.random.RandomState(), mpi_comm=None): + + rank = mpi_comm.Get_rank() + # Extract the input parameters + mu = input_values[rank] + sigma = 1 + # print(mu) + # Do the actual forward simulation + vector_of_k_samples = np.array(rng.normal(mu, sigma, k)) + + # Send everything back to rank 0 + # print("Hello from forward_simulate before gather, rank = ", rank) + data = mpi_comm.gather(vector_of_k_samples) + # print("Hello from forward_simulate after gather, rank = ", rank) + + # Format the output to obey API but only on rank 0 + if rank == 0: + result = [None] * k + for i in range(k): + element0 = data[0][i] + element1 = data[1][i] + point = np.array([element0, element1]) + result[i] = point + # print("Process 0 will return : ", result) + return [np.array([result[i]]).reshape(-1, ) for i in range(k)] + else: + return + +def infer_parameters(): + # define observation for true parameters mean=170, 65 + rng = np.random.RandomState() + y_obs = [np.array(rng.multivariate_normal([170, 65], np.eye(2), 1).reshape(2,))] + + # define prior + from abcpy.continuousmodels import Uniform + mu0 = Uniform([[150], [200]], ) + mu1 = Uniform([[25], [100]], ) + + # define the model + height_weight_model = NestedBivariateGaussian([mu0, mu1]) + + # define statistics + from abcpy.statistics import Identity + statistics_calculator = Identity(degree = 2, cross = False) + + # define distance + from abcpy.distances import Euclidean + distance_calculator = Euclidean(statistics_calculator) + + # define sampling scheme + from abcpy.inferences import APMCABC + sampler = APMCABC([height_weight_model], [distance_calculator], backend, seed=1) + print('sampling') + steps, n_samples, n_samples_per_param, alpha, acceptance_cutoff, covFactor, full_output, journal_file = 2, 100, 1, 0.2, 0.03, 2.0, 1, None + journal = sampler.sample([y_obs], steps, n_samples, n_samples_per_param, alpha, acceptance_cutoff, covFactor, full_output, journal_file) + + return journal + +import unittest +from mpi4py import MPI + +def setUpModule(): + setup_backend() + +class ExampleMPIModelTest(unittest.TestCase): + def test_example(self): + result = run_model() + data = [1,2,3,4,5] + expected_result = list(map(lambda x:2*(x**2),data)) + assert result==expected_result + +if __name__ == "__main__": + setup_backend() + print('Posterior Mean: ' + str(infer_parameters().posterior_mean())) + print('True Value was: ' + str([170, 65])) \ No newline at end of file From ea7176b1213621675d28f1e02c471bb119725f76 Mon Sep 17 00:00:00 2001 From: statrita2004 Date: Fri, 14 Dec 2018 23:00:17 +0000 Subject: [PATCH 25/41] Examples of SMCABC and RSMCABC added, inference modified, but they are still buggy --- abcpy/inferences.py | 42 +++++-- examples/backends/mpi/mpi_model_rsmcabc.py | 138 ++++++++++++++++++++ examples/backends/mpi/mpi_model_smcabc.py | 139 +++++++++++++++++++++ 3 files changed, 309 insertions(+), 10 deletions(-) create mode 100644 examples/backends/mpi/mpi_model_rsmcabc.py create mode 100644 examples/backends/mpi/mpi_model_smcabc.py diff --git a/abcpy/inferences.py b/abcpy/inferences.py index 2f557e28..00af0cef 100644 --- a/abcpy/inferences.py +++ b/abcpy/inferences.py @@ -2101,7 +2101,7 @@ def destroy(bc): self.accepted_dist_bds = self.backend.broadcast(accepted_dist) # define helper functions for map step - def _accept_parameter(self, rng): + def _accept_parameter(self, rng, mpi_comm=None): """ Samples a single model parameter and simulate from it until distance between simulated outcome and the observation is @@ -2129,9 +2129,15 @@ def _accept_parameter(self, rng): if self.accepted_parameters_manager.accepted_parameters_bds == None: while distance > self.epsilon[-1]: self.sample_from_prior(rng=rng) - y_sim = self.simulate(self.n_samples_per_param, rng=rng) + y_sim = self.simulate(self.n_samples_per_param, rng=rng, mpi_comm=mpi_comm) counter+=1 - distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) + + distance = None + # As y_sim valid only at rank 0 + if mpi_comm.Get_rank() == 0: + distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) + distance = mpi_comm.bcast(distance) + index_accept = 1 else: index = rng.choice(len(self.accepted_parameters_manager.accepted_parameters_bds.value()), size=1) @@ -2142,9 +2148,15 @@ def _accept_parameter(self, rng): perturbation_output = self.perturb(index[0], rng=rng) if perturbation_output[0] and self.pdf_of_prior(self.model, perturbation_output[1]) != 0: break - y_sim = self.simulate(self.n_samples_per_param, rng=rng) + y_sim = self.simulate(self.n_samples_per_param, rng=rng, mpi_comm=mpi_comm) counter+=1 - distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) + + distance = None + # As y_sim valid only at rank 0 + if mpi_comm.Get_rank() == 0: + distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) + distance = mpi_comm.bcast(distance) + ratio_prior_prob = self.pdf_of_prior(self.model, perturbation_output[1]) / self.pdf_of_prior(self.model, theta) kernel_numerator = self.kernel.pdf(mapping_for_kernels, self.accepted_parameters_manager, index[0], theta) kernel_denominator = self.kernel.pdf(mapping_for_kernels, self.accepted_parameters_manager, index[0], perturbation_output[1]) @@ -2792,7 +2804,7 @@ def destroy(bc): # define helper functions for map step - def _accept_parameter(self, rng_and_index): + def _accept_parameter(self, rng_and_index, mpi_comm=None): """ Samples a single model parameter and simulate from it until distance between simulated outcome and the observation is @@ -2822,7 +2834,7 @@ def _accept_parameter(self, rng_and_index): # print("on seed " + str(seed) + " distance: " + str(distance) + " epsilon: " + str(self.epsilon)) if self.accepted_parameters_manager.accepted_parameters_bds == None: self.sample_from_prior(rng=rng) - y_sim = self.simulate(self.n_samples_per_param, rng=rng) + y_sim = self.simulate(self.n_samples_per_param, rng=rng, mpi_comm=mpi_comm) counter+=1 else: if self.accepted_parameters_manager.accepted_weights_bds.value()[index] > 0: @@ -2831,15 +2843,25 @@ def _accept_parameter(self, rng_and_index): perturbation_output = self.perturb(index, rng=rng) if perturbation_output[0] and self.pdf_of_prior(self.model, perturbation_output[1]) != 0: break - y_sim = self.simulate(self.n_samples_per_param, rng=rng) + y_sim = self.simulate(self.n_samples_per_param, rng=rng, mpi_comm=mpi_comm) counter+=1 y_sim_old = self.accepted_y_sim_bds.value()[index] ## Calculate acceptance probability: numerator = 0.0 denominator = 0.0 for ind in range(self.n_samples_per_param): - numerator += (self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), [[y_sim[0][ind]]]) < self.epsilon[-1]) - denominator += (self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), [[y_sim_old[0][ind]]]) < self.epsilon[-1]) + + distance_new = None + distance_old = None + # As y_sim valid only at rank 0 + if mpi_comm.Get_rank() == 0: + distance_new = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), [[y_sim[0][ind]]]) + distance_old = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), [[y_sim_old[0][ind]]]) + distance_new = mpi_comm.bcast(distance_new) + distance_old = mpi_comm.bcast(distance_old) + + numerator += (distance_new < self.epsilon[-1]) + denominator += (distance_old < self.epsilon[-1]) if denominator == 0: ratio_data_epsilon = 1 else: diff --git a/examples/backends/mpi/mpi_model_rsmcabc.py b/examples/backends/mpi/mpi_model_rsmcabc.py new file mode 100644 index 00000000..ff43decb --- /dev/null +++ b/examples/backends/mpi/mpi_model_rsmcabc.py @@ -0,0 +1,138 @@ +import numpy as np +from mpi4py import MPI +from abcpy.probabilisticmodels import ProbabilisticModel, InputConnector + + +def setup_backend(): + global backend + + from abcpy.backends import BackendMPI as Backend + backend = Backend(process_per_model=2) + # backend = Backend() + + +def run_model(): + def square_mpi(model_comm, x): + local_res = np.array([x ** 2], 'i') + global_res = np.array([0], 'i') + model_comm.Reduce([local_res, MPI.INT], [global_res, MPI.INT], op=MPI.SUM, root=0) + return global_res[0] + + data = [1, 2, 3, 4, 5] + pds = backend.parallelize(data) + pds_map = backend.map(square_mpi, pds) + res = backend.collect(pds_map) + return res + + +class NestedBivariateGaussian(ProbabilisticModel): + """ + This is a show case model of bi-variate Gaussian distribution where we assume + the standard deviation to be unit. + """ + + def __init__(self, parameters, name='Gaussian'): + # We expect input of type parameters = [mu, sigma] + if not isinstance(parameters, list): + raise TypeError('Input of Normal model is of type list') + + if len(parameters) != 2: + raise RuntimeError('Input list must be of length 2, containing [mu1, mu1].') + + input_connector = InputConnector.from_list(parameters) + super().__init__(input_connector, name) + + def _check_input(self, input_values): + # Check whether input has correct type or format + if len(input_values) != 2: + raise ValueError('Number of parameters are 2 (two means).') + return True + + def _check_output(self, values): + if not isinstance(values, np.ndarray): + raise ValueError('Output of the normal distribution is always a numpy array.') + + if values.shape[0] != 2: + raise ValueError('Output shape should be of dimension 2.') + + return True + + def get_output_dimension(self): + return 2 + + def forward_simulate(self, input_values, k, rng=np.random.RandomState(), mpi_comm=None): + + rank = mpi_comm.Get_rank() + # Extract the input parameters + mu = input_values[rank] + sigma = 1 + # print(mu) + # Do the actual forward simulation + vector_of_k_samples = np.array(rng.normal(mu, sigma, k)) + + # Send everything back to rank 0 + # print("Hello from forward_simulate before gather, rank = ", rank) + data = mpi_comm.gather(vector_of_k_samples) + # print("Hello from forward_simulate after gather, rank = ", rank) + + # Format the output to obey API but only on rank 0 + if rank == 0: + result = [None] * k + for i in range(k): + element0 = data[0][i] + element1 = data[1][i] + point = np.array([element0, element1]) + result[i] = point + # print("Process 0 will return : ", result) + return [np.array([result[i]]).reshape(-1, ) for i in range(k)] + else: + return + +def infer_parameters(): + # define observation for true parameters mean=170, 65 + rng = np.random.RandomState() + y_obs = [np.array(rng.multivariate_normal([170, 65], np.eye(2), 1).reshape(2,))] + + # define prior + from abcpy.continuousmodels import Uniform + mu0 = Uniform([[150], [200]], ) + mu1 = Uniform([[25], [100]], ) + + # define the model + height_weight_model = NestedBivariateGaussian([mu0, mu1]) + + # define statistics + from abcpy.statistics import Identity + statistics_calculator = Identity(degree = 2, cross = False) + + # define distance + from abcpy.distances import Euclidean + distance_calculator = Euclidean(statistics_calculator) + + # define sampling scheme + from abcpy.inferences import SMCABC + sampler = SMCABC([height_weight_model], [distance_calculator], backend, seed=1) + print('sampling') + steps, n_samples, n_samples_per_param, alpha, epsilon_init, epsilon_final = 2, 10, 1, 0.1, 10000, 2000 + print('RSMCABC Inferring') + journal = sampler.sample([y_obs], steps, n_samples, n_samples_per_param, alpha , epsilon_init, epsilon_final,full_output=1) + + return journal + +import unittest +from mpi4py import MPI + +def setUpModule(): + setup_backend() + +class ExampleMPIModelTest(unittest.TestCase): + def test_example(self): + result = run_model() + data = [1,2,3,4,5] + expected_result = list(map(lambda x:2*(x**2),data)) + assert result==expected_result + +if __name__ == "__main__": + setup_backend() + print('Posterior Mean: ' + str(infer_parameters().posterior_mean())) + print('True Value was: ' + str([170, 65])) \ No newline at end of file diff --git a/examples/backends/mpi/mpi_model_smcabc.py b/examples/backends/mpi/mpi_model_smcabc.py new file mode 100644 index 00000000..b1c71f42 --- /dev/null +++ b/examples/backends/mpi/mpi_model_smcabc.py @@ -0,0 +1,139 @@ +import numpy as np +from mpi4py import MPI +from abcpy.probabilisticmodels import ProbabilisticModel, InputConnector + + +def setup_backend(): + global backend + + from abcpy.backends import BackendMPI as Backend + backend = Backend(process_per_model=2) + # backend = Backend() + + +def run_model(): + def square_mpi(model_comm, x): + local_res = np.array([x ** 2], 'i') + global_res = np.array([0], 'i') + model_comm.Reduce([local_res, MPI.INT], [global_res, MPI.INT], op=MPI.SUM, root=0) + return global_res[0] + + data = [1, 2, 3, 4, 5] + pds = backend.parallelize(data) + pds_map = backend.map(square_mpi, pds) + res = backend.collect(pds_map) + return res + + +class NestedBivariateGaussian(ProbabilisticModel): + """ + This is a show case model of bi-variate Gaussian distribution where we assume + the standard deviation to be unit. + """ + + def __init__(self, parameters, name='Gaussian'): + # We expect input of type parameters = [mu, sigma] + if not isinstance(parameters, list): + raise TypeError('Input of Normal model is of type list') + + if len(parameters) != 2: + raise RuntimeError('Input list must be of length 2, containing [mu1, mu1].') + + input_connector = InputConnector.from_list(parameters) + super().__init__(input_connector, name) + + def _check_input(self, input_values): + # Check whether input has correct type or format + if len(input_values) != 2: + raise ValueError('Number of parameters are 2 (two means).') + return True + + def _check_output(self, values): + if not isinstance(values, np.ndarray): + raise ValueError('Output of the normal distribution is always a numpy array.') + + if values.shape[0] != 2: + raise ValueError('Output shape should be of dimension 2.') + + return True + + def get_output_dimension(self): + return 2 + + def forward_simulate(self, input_values, k, rng=np.random.RandomState(), mpi_comm=None): + + rank = mpi_comm.Get_rank() + # Extract the input parameters + mu = input_values[rank] + sigma = 1 + # print(mu) + # Do the actual forward simulation + vector_of_k_samples = np.array(rng.normal(mu, sigma, k)) + + # Send everything back to rank 0 + # print("Hello from forward_simulate before gather, rank = ", rank) + data = mpi_comm.gather(vector_of_k_samples) + # print("Hello from forward_simulate after gather, rank = ", rank) + + # Format the output to obey API but only on rank 0 + if rank == 0: + result = [None] * k + for i in range(k): + element0 = data[0][i] + element1 = data[1][i] + point = np.array([element0, element1]) + result[i] = point + # print("Process 0 will return : ", result) + return [np.array([result[i]]).reshape(-1, ) for i in range(k)] + else: + return + +def infer_parameters(): + # define observation for true parameters mean=170, 65 + rng = np.random.RandomState() + y_obs = [np.array(rng.multivariate_normal([170, 65], np.eye(2), 1).reshape(2,))] + + # define prior + from abcpy.continuousmodels import Uniform + mu0 = Uniform([[150], [200]], ) + mu1 = Uniform([[25], [100]], ) + + # define the model + height_weight_model = NestedBivariateGaussian([mu0, mu1]) + + # define statistics + from abcpy.statistics import Identity + statistics_calculator = Identity(degree = 2, cross = False) + + # define distance + from abcpy.distances import Euclidean + distance_calculator = Euclidean(statistics_calculator) + + # define sampling scheme + from abcpy.inferences import SMCABC + sampler = SMCABC([height_weight_model], [distance_calculator], backend, seed=1) + print('sampling') + steps, n_samples, n_samples_per_param, epsilon = 4, 100, 1, 10000 + print('SMCABC Inferring') + journal = sampler.sample([y_obs], steps, n_samples, n_samples_per_param, epsilon, full_output=1) + print(np.array(journal.get_parameters())) + + return journal + +import unittest +from mpi4py import MPI + +def setUpModule(): + setup_backend() + +class ExampleMPIModelTest(unittest.TestCase): + def test_example(self): + result = run_model() + data = [1,2,3,4,5] + expected_result = list(map(lambda x:2*(x**2),data)) + assert result==expected_result + +if __name__ == "__main__": + setup_backend() + print('Posterior Mean: ' + str(infer_parameters().posterior_mean())) + print('True Value was: ' + str([170, 65])) \ No newline at end of file From 8454c372fea45bb97f0576b1a5f8367f5721ef4d Mon Sep 17 00:00:00 2001 From: statrita2004 Date: Sun, 16 Dec 2018 15:36:06 +0000 Subject: [PATCH 26/41] Inferences modified to run with nested MPI --- abcpy/inferences.py | 121 +++--- examples/backends/mpi/mpi_model_apmcabc.py | 137 ------- examples/backends/mpi/mpi_model_inferences.py | 365 ++++++++++++++++++ examples/backends/mpi/mpi_model_pmc.py | 139 ------- examples/backends/mpi/mpi_model_pmcabc.py | 141 ------- examples/backends/mpi/mpi_model_rsmcabc.py | 138 ------- examples/backends/mpi/mpi_model_sabc.py | 140 ------- examples/backends/mpi/mpi_model_smcabc.py | 139 ------- 8 files changed, 440 insertions(+), 880 deletions(-) delete mode 100644 examples/backends/mpi/mpi_model_apmcabc.py create mode 100644 examples/backends/mpi/mpi_model_inferences.py delete mode 100644 examples/backends/mpi/mpi_model_pmc.py delete mode 100644 examples/backends/mpi/mpi_model_pmcabc.py delete mode 100644 examples/backends/mpi/mpi_model_rsmcabc.py delete mode 100644 examples/backends/mpi/mpi_model_sabc.py delete mode 100644 examples/backends/mpi/mpi_model_smcabc.py diff --git a/abcpy/inferences.py b/abcpy/inferences.py index 00af0cef..d97c4210 100644 --- a/abcpy/inferences.py +++ b/abcpy/inferences.py @@ -248,7 +248,7 @@ def sample(self, observations, n_samples, n_samples_per_param, epsilon, full_out return journal - def _sample_parameter(self, rng): + def _sample_parameter(self, rng, mpi_comm=None): """ Samples a single model parameter and simulates from it until distance between simulated outcome and the observation is @@ -276,10 +276,16 @@ def _sample_parameter(self, rng): # Accept new parameter value if the distance is less than epsilon self.sample_from_prior(rng=rng) theta = np.array(self.get_parameters(self.model)).reshape(-1,) - y_sim = self.simulate(self.n_samples_per_param, rng=rng) + y_sim = self.simulate(self.n_samples_per_param, rng=rng, mpi_comm=mpi_comm) counter+=1 if(y_sim is not None): - distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) + # y_sim valid only at rank 0, when used with nested MPI + if mpi_comm != None and mpi_comm.Get_rank() == 0: + distance = None + distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) + distance = mpi_comm.bcast(distance) + else: + distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) self.logger.debug("distance after {:4d} simulations: {:e}".format( counter, distance)) else: @@ -562,11 +568,13 @@ def _resample_parameter(self, rng, mpi_comm=None): y_sim = self.simulate(self.n_samples_per_param, rng=rng, mpi_comm=mpi_comm) counter+=1 - distance = None - # y_sim valid only at rank 0 - if mpi_comm.Get_rank() == 0: + # y_sim valid only at rank 0, when used with nested MPI + if mpi_comm != None and mpi_comm.Get_rank() == 0: + distance = None + distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) + distance = mpi_comm.bcast(distance) + else: distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) - distance = mpi_comm.bcast(distance) self.logger.debug("distance after {:4d} simulations: {:e}".format( counter, distance)) @@ -1439,11 +1447,13 @@ def _accept_parameter(self, data, mpi_comm=None): all_parameters.append(new_theta) y_sim = self.simulate(self.n_samples_per_param, rng=rng, mpi_comm=mpi_comm) counter+=1 - distance = None - # As y_sim valid only at rank 0 - if mpi_comm.Get_rank() == 0: + + # y_sim valid only at rank 0, when used with nested MPI + if mpi_comm != None and mpi_comm.Get_rank() == 0: + distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) + distance = mpi_comm.bcast(distance) + else: distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) - distance = mpi_comm.bcast(distance) all_distances.append(distance) acceptance = rng.binomial(1, np.exp(-distance / self.epsilon), 1) @@ -1462,11 +1472,12 @@ def _accept_parameter(self, data, mpi_comm=None): y_sim = self.simulate(self.n_samples_per_param, rng=rng, mpi_comm=mpi_comm) counter+=1 - distance = None - # As y_sim valid only at rank 0 - if mpi_comm.Get_rank() == 0: + # y_sim valid only at rank 0, when used with nested MPI + if mpi_comm != None and mpi_comm.Get_rank() == 0: + distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) + distance = mpi_comm.bcast(distance) + else: distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) - distance = mpi_comm.bcast(distance) smooth_distance = self._smoother_distance([distance], self.all_distances_bds.value()) @@ -1720,7 +1731,7 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1 return journal # define helper functions for map step - def _accept_parameter(self, rng_and_index): + def _accept_parameter(self, rng_and_index, mpi_comm=None): """ Samples a single model parameter and simulate from it until distance between simulated outcome and the observation is @@ -1752,17 +1763,27 @@ def _accept_parameter(self, rng_and_index): if self.accepted_parameters_manager.accepted_parameters_bds == None: self.sample_from_prior(rng=rng) - y_sim = self.simulate(self.n_samples_per_param, rng=rng) + y_sim = self.simulate(self.n_samples_per_param, rng=rng, mpi_comm=mpi_comm) counter+=1 - distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) + # y_sim valid only at rank 0, when used with nested MPI + if mpi_comm != None and mpi_comm.Get_rank() == 0: + distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) + distance = mpi_comm.bcast(distance) + else: + distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) result_theta.append(self.get_parameters()) result_distance.append(distance) else: theta = np.array(self.accepted_parameters_manager.accepted_parameters_bds.value()[index]).reshape(-1,) self.set_parameters(theta) - y_sim = self.simulate(self.n_samples_per_param, rng=rng) + y_sim = self.simulate(self.n_samples_per_param, rng=rng, mpi_comm=mpi_comm) counter+=1 - distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) + # y_sim valid only at rank 0, when used with nested MPI + if mpi_comm != None and mpi_comm.Get_rank() == 0: + distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) + distance = mpi_comm.bcast(distance) + else: + distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) result_theta.append(theta) result_distance.append(distance) for ind in range(0, self.chain_length - 1): @@ -1770,9 +1791,14 @@ def _accept_parameter(self, rng_and_index): perturbation_output = self.perturb(index, rng=rng) if perturbation_output[0] and self.pdf_of_prior(self.model, perturbation_output[1])!= 0: break - y_sim = self.simulate(self.n_samples_per_param, rng=rng) + y_sim = self.simulate(self.n_samples_per_param, rng=rng,mpi_comm=mpi_comm) counter+=1 - new_distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) + # y_sim valid only at rank 0, when used with nested MPI + if mpi_comm != None and mpi_comm.Get_rank() == 0: + new_distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) + new_distance = mpi_comm.bcast(new_distance) + else: + new_distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) ## Calculate acceptance probability: ratio_prior_prob = self.pdf_of_prior(self.model, perturbation_output[1]) / self.pdf_of_prior(self.model, theta) @@ -2132,11 +2158,12 @@ def _accept_parameter(self, rng, mpi_comm=None): y_sim = self.simulate(self.n_samples_per_param, rng=rng, mpi_comm=mpi_comm) counter+=1 - distance = None - # As y_sim valid only at rank 0 - if mpi_comm.Get_rank() == 0: + # y_sim valid only at rank 0, when used with nested MPI + if mpi_comm != None and mpi_comm.Get_rank() == 0: + distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) + distance = mpi_comm.bcast(distance) + else: distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) - distance = mpi_comm.bcast(distance) index_accept = 1 else: @@ -2151,11 +2178,12 @@ def _accept_parameter(self, rng, mpi_comm=None): y_sim = self.simulate(self.n_samples_per_param, rng=rng, mpi_comm=mpi_comm) counter+=1 - distance = None - # As y_sim valid only at rank 0 - if mpi_comm.Get_rank() == 0: + # y_sim valid only at rank 0, when used with nested MPI + if mpi_comm != None and mpi_comm.Get_rank() == 0: + distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) + distance = mpi_comm.bcast(distance) + else: distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) - distance = mpi_comm.bcast(distance) ratio_prior_prob = self.pdf_of_prior(self.model, perturbation_output[1]) / self.pdf_of_prior(self.model, theta) kernel_numerator = self.kernel.pdf(mapping_for_kernels, self.accepted_parameters_manager, index[0], theta) @@ -2438,11 +2466,12 @@ def _accept_parameter(self, rng, mpi_comm=None): y_sim = self.simulate(self.n_samples_per_param, rng=rng, mpi_comm=mpi_comm) counter+=1 - distance = None - # As y_sim valid only at rank 0 - if mpi_comm.Get_rank() == 0: + # y_sim valid only at rank 0, when used with nested MPI + if mpi_comm != None and mpi_comm.Get_rank() == 0: + distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) + distance = mpi_comm.bcast(distance) + else: distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) - distance = mpi_comm.bcast(distance) weight = 1.0 else: @@ -2458,11 +2487,12 @@ def _accept_parameter(self, rng, mpi_comm=None): y_sim = self.simulate(self.n_samples_per_param, rng=rng, mpi_comm=mpi_comm) counter+=1 - distance = None - # As y_sim valid only at rank 0 - if mpi_comm.Get_rank() == 0: + # y_sim valid only at rank 0, when used with nested MPI + if mpi_comm != None and mpi_comm.Get_rank() == 0: + distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) + distance = mpi_comm.bcast(distance) + else: distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) - distance = mpi_comm.bcast(distance) prior_prob = self.pdf_of_prior(self.model, perturbation_output[1]) denominator = 0.0 @@ -2850,16 +2880,15 @@ def _accept_parameter(self, rng_and_index, mpi_comm=None): numerator = 0.0 denominator = 0.0 for ind in range(self.n_samples_per_param): - - distance_new = None - distance_old = None - # As y_sim valid only at rank 0 - if mpi_comm.Get_rank() == 0: + # y_sim valid only at rank 0, when used with nested MPI + if mpi_comm != None and mpi_comm.Get_rank() == 0: + distance_new = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), [[y_sim[0][ind]]]) + distance_old = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), [[y_sim_old[0][ind]]]) + distance_new = mpi_comm.bcast(distance_new) + distance_old = mpi_comm.bcast(distance_old) + else: distance_new = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), [[y_sim[0][ind]]]) distance_old = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), [[y_sim_old[0][ind]]]) - distance_new = mpi_comm.bcast(distance_new) - distance_old = mpi_comm.bcast(distance_old) - numerator += (distance_new < self.epsilon[-1]) denominator += (distance_old < self.epsilon[-1]) if denominator == 0: diff --git a/examples/backends/mpi/mpi_model_apmcabc.py b/examples/backends/mpi/mpi_model_apmcabc.py deleted file mode 100644 index f4937846..00000000 --- a/examples/backends/mpi/mpi_model_apmcabc.py +++ /dev/null @@ -1,137 +0,0 @@ -import numpy as np -from mpi4py import MPI -from abcpy.probabilisticmodels import ProbabilisticModel, InputConnector - - -def setup_backend(): - global backend - - from abcpy.backends import BackendMPI as Backend - backend = Backend(process_per_model=2) - # backend = Backend() - - -def run_model(): - def square_mpi(model_comm, x): - local_res = np.array([x ** 2], 'i') - global_res = np.array([0], 'i') - model_comm.Reduce([local_res, MPI.INT], [global_res, MPI.INT], op=MPI.SUM, root=0) - return global_res[0] - - data = [1, 2, 3, 4, 5] - pds = backend.parallelize(data) - pds_map = backend.map(square_mpi, pds) - res = backend.collect(pds_map) - return res - - -class NestedBivariateGaussian(ProbabilisticModel): - """ - This is a show case model of bi-variate Gaussian distribution where we assume - the standard deviation to be unit. - """ - - def __init__(self, parameters, name='Gaussian'): - # We expect input of type parameters = [mu, sigma] - if not isinstance(parameters, list): - raise TypeError('Input of Normal model is of type list') - - if len(parameters) != 2: - raise RuntimeError('Input list must be of length 2, containing [mu1, mu1].') - - input_connector = InputConnector.from_list(parameters) - super().__init__(input_connector, name) - - def _check_input(self, input_values): - # Check whether input has correct type or format - if len(input_values) != 2: - raise ValueError('Number of parameters are 2 (two means).') - return True - - def _check_output(self, values): - if not isinstance(values, np.ndarray): - raise ValueError('Output of the normal distribution is always a numpy array.') - - if values.shape[0] != 2: - raise ValueError('Output shape should be of dimension 2.') - - return True - - def get_output_dimension(self): - return 2 - - def forward_simulate(self, input_values, k, rng=np.random.RandomState(), mpi_comm=None): - - rank = mpi_comm.Get_rank() - # Extract the input parameters - mu = input_values[rank] - sigma = 1 - # print(mu) - # Do the actual forward simulation - vector_of_k_samples = np.array(rng.normal(mu, sigma, k)) - - # Send everything back to rank 0 - # print("Hello from forward_simulate before gather, rank = ", rank) - data = mpi_comm.gather(vector_of_k_samples) - # print("Hello from forward_simulate after gather, rank = ", rank) - - # Format the output to obey API but only on rank 0 - if rank == 0: - result = [None] * k - for i in range(k): - element0 = data[0][i] - element1 = data[1][i] - point = np.array([element0, element1]) - result[i] = point - # print("Process 0 will return : ", result) - return [np.array([result[i]]).reshape(-1, ) for i in range(k)] - else: - return - -def infer_parameters(): - # define observation for true parameters mean=170, 65 - rng = np.random.RandomState() - y_obs = [np.array(rng.multivariate_normal([170, 65], np.eye(2), 1).reshape(2,))] - - # define prior - from abcpy.continuousmodels import Uniform - mu0 = Uniform([[150], [200]], ) - mu1 = Uniform([[25], [100]], ) - - # define the model - height_weight_model = NestedBivariateGaussian([mu0, mu1]) - - # define statistics - from abcpy.statistics import Identity - statistics_calculator = Identity(degree = 2, cross = False) - - # define distance - from abcpy.distances import Euclidean - distance_calculator = Euclidean(statistics_calculator) - - # define sampling scheme - from abcpy.inferences import APMCABC - sampler = APMCABC([height_weight_model], [distance_calculator], backend, seed=1) - print('sampling') - steps, n_samples, n_samples_per_param, alpha, acceptance_cutoff, covFactor, full_output, journal_file = 2, 100, 1, 0.2, 0.03, 2.0, 1, None - journal = sampler.sample([y_obs], steps, n_samples, n_samples_per_param, alpha, acceptance_cutoff, covFactor, full_output, journal_file) - - return journal - -import unittest -from mpi4py import MPI - -def setUpModule(): - setup_backend() - -class ExampleMPIModelTest(unittest.TestCase): - def test_example(self): - result = run_model() - data = [1,2,3,4,5] - expected_result = list(map(lambda x:2*(x**2),data)) - assert result==expected_result - -if __name__ == "__main__": - setup_backend() - print('Posterior Mean: ' + str(infer_parameters().posterior_mean())) - print('True Value was: ' + str([170, 65])) \ No newline at end of file diff --git a/examples/backends/mpi/mpi_model_inferences.py b/examples/backends/mpi/mpi_model_inferences.py new file mode 100644 index 00000000..c4190f8c --- /dev/null +++ b/examples/backends/mpi/mpi_model_inferences.py @@ -0,0 +1,365 @@ +import numpy as np +from mpi4py import MPI +from abcpy.probabilisticmodels import ProbabilisticModel, InputConnector + + +def setup_backend(): + global backend + + from abcpy.backends import BackendMPI as Backend + backend = Backend(process_per_model=2) + # backend = Backend() + + +def run_model(): + def square_mpi(model_comm, x): + local_res = np.array([x ** 2], 'i') + global_res = np.array([0], 'i') + model_comm.Reduce([local_res, MPI.INT], [global_res, MPI.INT], op=MPI.SUM, root=0) + return global_res[0] + + data = [1, 2, 3, 4, 5] + pds = backend.parallelize(data) + pds_map = backend.map(square_mpi, pds) + res = backend.collect(pds_map) + return res + + +class NestedBivariateGaussian(ProbabilisticModel): + """ + This is a show case model of bi-variate Gaussian distribution where we assume + the standard deviation to be unit. + """ + + def __init__(self, parameters, name='Gaussian'): + # We expect input of type parameters = [mu, sigma] + if not isinstance(parameters, list): + raise TypeError('Input of Normal model is of type list') + + if len(parameters) != 2: + raise RuntimeError('Input list must be of length 2, containing [mu1, mu1].') + + input_connector = InputConnector.from_list(parameters) + super().__init__(input_connector, name) + + def _check_input(self, input_values): + # Check whether input has correct type or format + if len(input_values) != 2: + raise ValueError('Number of parameters are 2 (two means).') + return True + + def _check_output(self, values): + if not isinstance(values, np.ndarray): + raise ValueError('Output of the normal distribution is always a numpy array.') + + if values.shape[0] != 2: + raise ValueError('Output shape should be of dimension 2.') + + return True + + def get_output_dimension(self): + return 2 + + def forward_simulate(self, input_values, k, rng=np.random.RandomState(), mpi_comm=None): + + rank = mpi_comm.Get_rank() + # Extract the input parameters + mu = input_values[rank] + sigma = 1 + # print(mu) + # Do the actual forward simulation + vector_of_k_samples = np.array(rng.normal(mu, sigma, k)) + + # Send everything back to rank 0 + # print("Hello from forward_simulate before gather, rank = ", rank) + data = mpi_comm.gather(vector_of_k_samples) + # print("Hello from forward_simulate after gather, rank = ", rank) + + # Format the output to obey API but only on rank 0 + if rank == 0: + result = [None] * k + for i in range(k): + element0 = data[0][i] + element1 = data[1][i] + point = np.array([element0, element1]) + result[i] = point + # print("Process 0 will return : ", result) + return [np.array([result[i]]).reshape(-1, ) for i in range(k)] + else: + return + +def infer_parameters_pmcabc(): + # define observation for true parameters mean=170, 65 + rng = np.random.RandomState() + y_obs = [np.array(rng.multivariate_normal([170, 65], np.eye(2), 1).reshape(2,))] + + # define prior + from abcpy.continuousmodels import Uniform + mu0 = Uniform([[150], [200]], ) + mu1 = Uniform([[25], [100]], ) + + # define the model + height_weight_model = NestedBivariateGaussian([mu0, mu1]) + + # define statistics + from abcpy.statistics import Identity + statistics_calculator = Identity(degree = 2, cross = False) + + # define distance + from abcpy.distances import Euclidean + distance_calculator = Euclidean(statistics_calculator) + + # define sampling scheme + from abcpy.inferences import PMCABC + sampler = PMCABC([height_weight_model], [distance_calculator], backend, seed=1) + print('sampling') + # sample from scheme + T, n_sample, n_samples_per_param = 2, 100, 1 + eps_arr = np.array([10000]) + epsilon_percentile = 90 + + journal = sampler.sample([y_obs], T, eps_arr, n_sample, n_samples_per_param, epsilon_percentile) + + return journal + +def infer_parameters_abcsubsim(): + # define observation for true parameters mean=170, 65 + rng = np.random.RandomState() + y_obs = [np.array(rng.multivariate_normal([170, 65], np.eye(2), 1).reshape(2,))] + + # define prior + from abcpy.continuousmodels import Uniform + mu0 = Uniform([[150], [200]], ) + mu1 = Uniform([[25], [100]], ) + + # define the model + height_weight_model = NestedBivariateGaussian([mu0, mu1]) + + # define statistics + from abcpy.statistics import Identity + statistics_calculator = Identity(degree = 2, cross = False) + + # define distance + from abcpy.distances import Euclidean + distance_calculator = Euclidean(statistics_calculator) + + # define sampling scheme + from abcpy.inferences import ABCsubsim + sampler = ABCsubsim([height_weight_model], [distance_calculator], backend, seed=1) + steps, n_samples = 10, 1000 + print('ABCsubsim Inferring') + journal = sampler.sample([y_obs], steps, n_samples) + + return journal + +def infer_parameters_rsmcabc(): + # define observation for true parameters mean=170, 65 + rng = np.random.RandomState() + y_obs = [np.array(rng.multivariate_normal([170, 65], np.eye(2), 1).reshape(2,))] + + # define prior + from abcpy.continuousmodels import Uniform + mu0 = Uniform([[150], [200]], ) + mu1 = Uniform([[25], [100]], ) + + # define the model + height_weight_model = NestedBivariateGaussian([mu0, mu1]) + + # define statistics + from abcpy.statistics import Identity + statistics_calculator = Identity(degree = 2, cross = False) + + # define distance + from abcpy.distances import Euclidean + distance_calculator = Euclidean(statistics_calculator) + + # define sampling scheme + from abcpy.inferences import SMCABC + sampler = SMCABC([height_weight_model], [distance_calculator], backend, seed=1) + print('sampling') + steps, n_samples, n_samples_per_param, alpha, epsilon_init, epsilon_final = 2, 10, 1, 0.1, 10000, 2000 + print('RSMCABC Inferring') + journal = sampler.sample([y_obs], steps, n_samples, n_samples_per_param, alpha , epsilon_init, epsilon_final,full_output=1) + + return journal + +def infer_parameters_sabc(): + # define observation for true parameters mean=170, 65 + rng = np.random.RandomState() + y_obs = [np.array(rng.multivariate_normal([170, 65], np.eye(2), 1).reshape(2,))] + + # define prior + from abcpy.continuousmodels import Uniform + mu0 = Uniform([[150], [200]], ) + mu1 = Uniform([[25], [100]], ) + + # define the model + height_weight_model = NestedBivariateGaussian([mu0, mu1]) + + # define statistics + from abcpy.statistics import Identity + statistics_calculator = Identity(degree = 2, cross = False) + + # define distance + from abcpy.distances import Euclidean + distance_calculator = Euclidean(statistics_calculator) + + # define sampling scheme + from abcpy.inferences import SABC + sampler = SABC([height_weight_model], [distance_calculator], backend, seed=1) + print('sampling') + steps, epsilon, n_samples, n_samples_per_param, beta, delta, v = 2, np.array([10000]), 10, 1, 2, 0.2, 0.3 + ar_cutoff, resample, n_update, adaptcov, full_output = 0.1, None, None, 1, 1 + # + # # print('SABC Inferring') + journal = sampler.sample([y_obs], steps, epsilon, n_samples, n_samples_per_param, beta, delta, v, ar_cutoff, resample, n_update, adaptcov, full_output) + + return journal + +def infer_parameters_smcabc(): + # define observation for true parameters mean=170, 65 + rng = np.random.RandomState() + y_obs = [np.array(rng.multivariate_normal([170, 65], np.eye(2), 1).reshape(2,))] + + # define prior + from abcpy.continuousmodels import Uniform + mu0 = Uniform([[150], [200]], ) + mu1 = Uniform([[25], [100]], ) + + # define the model + height_weight_model = NestedBivariateGaussian([mu0, mu1]) + + # define statistics + from abcpy.statistics import Identity + statistics_calculator = Identity(degree = 2, cross = False) + + # define distance + from abcpy.distances import Euclidean + distance_calculator = Euclidean(statistics_calculator) + + # define sampling scheme + from abcpy.inferences import SMCABC + sampler = SMCABC([height_weight_model], [distance_calculator], backend, seed=1) + print('sampling') + steps, n_samples, n_samples_per_param, epsilon = 4, 100, 1, 10000 + print('SMCABC Inferring') + journal = sampler.sample([y_obs], steps, n_samples, n_samples_per_param, epsilon, full_output=1) + print(np.array(journal.get_parameters())) + + return journal + +def infer_parameters_apmcabc(): + # define observation for true parameters mean=170, 65 + rng = np.random.RandomState() + y_obs = [np.array(rng.multivariate_normal([170, 65], np.eye(2), 1).reshape(2,))] + + # define prior + from abcpy.continuousmodels import Uniform + mu0 = Uniform([[150], [200]], ) + mu1 = Uniform([[25], [100]], ) + + # define the model + height_weight_model = NestedBivariateGaussian([mu0, mu1]) + + # define statistics + from abcpy.statistics import Identity + statistics_calculator = Identity(degree = 2, cross = False) + + # define distance + from abcpy.distances import Euclidean + distance_calculator = Euclidean(statistics_calculator) + + # define sampling scheme + from abcpy.inferences import APMCABC + sampler = APMCABC([height_weight_model], [distance_calculator], backend, seed=1) + print('sampling') + steps, n_samples, n_samples_per_param, alpha, acceptance_cutoff, covFactor, full_output, journal_file = 2, 100, 1, 0.2, 0.03, 2.0, 1, None + journal = sampler.sample([y_obs], steps, n_samples, n_samples_per_param, alpha, acceptance_cutoff, covFactor, full_output, journal_file) + + return journal + +def infer_parameters_rejectionabc(): + # define observation for true parameters mean=170, 65 + rng = np.random.RandomState() + y_obs = [np.array(rng.multivariate_normal([170, 65], np.eye(2), 1).reshape(2,))] + + # define prior + from abcpy.continuousmodels import Uniform + mu0 = Uniform([[150], [200]], ) + mu1 = Uniform([[25], [100]], ) + + # define the model + height_weight_model = NestedBivariateGaussian([mu0, mu1]) + + # define statistics + from abcpy.statistics import Identity + statistics_calculator = Identity(degree = 2, cross = False) + + # define distance + from abcpy.distances import Euclidean + distance_calculator = Euclidean(statistics_calculator) + + # define sampling scheme + from abcpy.inferences import RejectionABC + sampler = RejectionABC([height_weight_model], [distance_calculator], backend, seed=1) + n_samples, n_samples_per_param, epsilon = 2, 2, 20 + print('Rejection ABC Inferring') + journal = sampler.sample([y_obs], n_samples, n_samples_per_param, epsilon) + + return journal + +def infer_parameters_pmc(): + # define observation for true parameters mean=170, 65 + rng = np.random.RandomState() + y_obs = [np.array(rng.multivariate_normal([170, 65], np.eye(2), 1).reshape(2,))] + + # define prior + from abcpy.continuousmodels import Uniform + mu0 = Uniform([[150], [200]], ) + mu1 = Uniform([[25], [100]], ) + + # define the model + height_weight_model = NestedBivariateGaussian([mu0, mu1]) + + # define statistics + from abcpy.statistics import Identity + statistics_calculator = Identity(degree = 2, cross = False) + + from abcpy.approx_lhd import SynLiklihood + approx_lhd = SynLiklihood(statistics_calculator) + + # define sampling scheme + from abcpy.inferences import PMC + sampler = PMC([height_weight_model], [approx_lhd], backend, seed=1) + + # sample from scheme + T, n_sample, n_samples_per_param = 2, 10, 10 + + journal = sampler.sample([y_obs], T, n_sample, n_samples_per_param) + + return journal + +import unittest +from mpi4py import MPI + +def setUpModule(): + setup_backend() + +class ExampleMPIModelTest(unittest.TestCase): + def test_example(self): + result = run_model() + data = [1,2,3,4,5] + expected_result = list(map(lambda x:2*(x**2),data)) + assert result==expected_result + +if __name__ == "__main__": + setup_backend() + print('True Value was: ' + str([170, 65])) + print('Posterior Mean: ' + str(infer_parameters_pmcabc().posterior_mean())) + print('Posterior Mean: ' + str(infer_parameters_abcsubsim().posterior_mean())) + print('Posterior Mean: ' + str(infer_parameters_rsmcabc().posterior_mean())) + print('Posterior Mean: ' + str(infer_parameters_sabc().posterior_mean())) + print('Posterior Mean: ' + str(infer_parameters_smcabc().posterior_mean())) + print('Posterior Mean: ' + str(infer_parameters_apmcabc().posterior_mean())) + print('Posterior Mean: ' + str(infer_parameters_rejectionabc().posterior_mean())) + print('Posterior Mean: ' + str(infer_parameters_pmc().posterior_mean())) diff --git a/examples/backends/mpi/mpi_model_pmc.py b/examples/backends/mpi/mpi_model_pmc.py deleted file mode 100644 index 50f0b71d..00000000 --- a/examples/backends/mpi/mpi_model_pmc.py +++ /dev/null @@ -1,139 +0,0 @@ -import numpy as np -from mpi4py import MPI -from abcpy.probabilisticmodels import ProbabilisticModel, InputConnector - -def setup_backend(): - global backend - - from abcpy.backends import BackendMPI as Backend - backend = Backend(process_per_model=2) - #backend = Backend() - -def run_model(): - def square_mpi(model_comm, x): - local_res = np.array([x**2], 'i') - global_res = np.array([0], 'i') - model_comm.Reduce([local_res,MPI.INT], [global_res,MPI.INT], op=MPI.SUM, root=0) - return global_res[0] - - data = [1,2,3,4,5] - pds = backend.parallelize(data) - pds_map = backend.map(square_mpi, pds) - res = backend.collect(pds_map) - return res - - -class NestedBivariateGaussian(ProbabilisticModel): - """ - This is a show case model of bi-variate Gaussian distribution where we assume - the standard deviation to be unit. - """ - - def __init__(self, parameters, name='Gaussian'): - # We expect input of type parameters = [mu, sigma] - if not isinstance(parameters, list): - raise TypeError('Input of Normal model is of type list') - - if len(parameters) != 2: - raise RuntimeError('Input list must be of length 2, containing [mu1, mu1].') - - input_connector = InputConnector.from_list(parameters) - super().__init__(input_connector, name) - - - def _check_input(self, input_values): - # Check whether input has correct type or format - if len(input_values) != 2: - raise ValueError('Number of parameters are 2 (two means).') - return True - - - def _check_output(self, values): - if not isinstance(values, np.ndarray): - raise ValueError('Output of the normal distribution is always a numpy array.') - - if values.shape[0] != 2: - raise ValueError('Output shape should be of dimension 2.') - - return True - - def get_output_dimension(self): - return 2 - - def forward_simulate(self, input_values, k, rng=np.random.RandomState(), mpi_comm=None): - - rank = mpi_comm.Get_rank() - # Extract the input parameters - mu = input_values[rank] - sigma = 1 - #print(mu) - # Do the actual forward simulation - vector_of_k_samples = np.array(rng.normal(mu, sigma, k)) - - # Send everything back to rank 0 - # print("Hello from forward_simulate before gather, rank = ", rank) - data = mpi_comm.gather(vector_of_k_samples) - # print("Hello from forward_simulate after gather, rank = ", rank) - - # Format the output to obey API but only on rank 0 - if rank == 0: - result = [None]*k - for i in range(k): - element0 = data[0][i] - element1 = data[1][i] - point = np.array([element0, element1]) - result[i] = point - # print("Process 0 will return : ", result) - return [np.array([result[i]]).reshape(-1,) for i in range(k)] - else: - return - - -def infer_parameters(): - # define observation for true parameters mean=170, 65 - rng = np.random.RandomState() - y_obs = [np.array(rng.multivariate_normal([170, 65], np.eye(2), 1).reshape(2,))] - - # define prior - from abcpy.continuousmodels import Uniform - mu0 = Uniform([[150], [200]], ) - mu1 = Uniform([[25], [100]], ) - - # define the model - height_weight_model = NestedBivariateGaussian([mu0, mu1]) - - # define statistics - from abcpy.statistics import Identity - statistics_calculator = Identity(degree = 2, cross = False) - - from abcpy.approx_lhd import SynLiklihood - approx_lhd = SynLiklihood(statistics_calculator) - - # define sampling scheme - from abcpy.inferences import PMC - sampler = PMC([height_weight_model], [approx_lhd], backend, seed=1) - - # sample from scheme - T, n_sample, n_samples_per_param = 2, 10, 10 - - journal = sampler.sample([y_obs], T, n_sample, n_samples_per_param) - - return journal - -import unittest -from mpi4py import MPI - -def setUpModule(): - setup_backend() - -class ExampleMPIModelTest(unittest.TestCase): - def test_example(self): - result = run_model() - data = [1,2,3,4,5] - expected_result = list(map(lambda x:2*(x**2),data)) - assert result==expected_result - -if __name__ == "__main__": - setup_backend() - print('Posterior Mean: ' + str(infer_parameters().posterior_mean())) - print('True Value was: ' + str([170, 65])) diff --git a/examples/backends/mpi/mpi_model_pmcabc.py b/examples/backends/mpi/mpi_model_pmcabc.py deleted file mode 100644 index 6332d87e..00000000 --- a/examples/backends/mpi/mpi_model_pmcabc.py +++ /dev/null @@ -1,141 +0,0 @@ -import numpy as np -from mpi4py import MPI -from abcpy.probabilisticmodels import ProbabilisticModel, InputConnector - - -def setup_backend(): - global backend - - from abcpy.backends import BackendMPI as Backend - backend = Backend(process_per_model=2) - # backend = Backend() - - -def run_model(): - def square_mpi(model_comm, x): - local_res = np.array([x ** 2], 'i') - global_res = np.array([0], 'i') - model_comm.Reduce([local_res, MPI.INT], [global_res, MPI.INT], op=MPI.SUM, root=0) - return global_res[0] - - data = [1, 2, 3, 4, 5] - pds = backend.parallelize(data) - pds_map = backend.map(square_mpi, pds) - res = backend.collect(pds_map) - return res - - -class NestedBivariateGaussian(ProbabilisticModel): - """ - This is a show case model of bi-variate Gaussian distribution where we assume - the standard deviation to be unit. - """ - - def __init__(self, parameters, name='Gaussian'): - # We expect input of type parameters = [mu, sigma] - if not isinstance(parameters, list): - raise TypeError('Input of Normal model is of type list') - - if len(parameters) != 2: - raise RuntimeError('Input list must be of length 2, containing [mu1, mu1].') - - input_connector = InputConnector.from_list(parameters) - super().__init__(input_connector, name) - - def _check_input(self, input_values): - # Check whether input has correct type or format - if len(input_values) != 2: - raise ValueError('Number of parameters are 2 (two means).') - return True - - def _check_output(self, values): - if not isinstance(values, np.ndarray): - raise ValueError('Output of the normal distribution is always a numpy array.') - - if values.shape[0] != 2: - raise ValueError('Output shape should be of dimension 2.') - - return True - - def get_output_dimension(self): - return 2 - - def forward_simulate(self, input_values, k, rng=np.random.RandomState(), mpi_comm=None): - - rank = mpi_comm.Get_rank() - # Extract the input parameters - mu = input_values[rank] - sigma = 1 - # print(mu) - # Do the actual forward simulation - vector_of_k_samples = np.array(rng.normal(mu, sigma, k)) - - # Send everything back to rank 0 - # print("Hello from forward_simulate before gather, rank = ", rank) - data = mpi_comm.gather(vector_of_k_samples) - # print("Hello from forward_simulate after gather, rank = ", rank) - - # Format the output to obey API but only on rank 0 - if rank == 0: - result = [None] * k - for i in range(k): - element0 = data[0][i] - element1 = data[1][i] - point = np.array([element0, element1]) - result[i] = point - # print("Process 0 will return : ", result) - return [np.array([result[i]]).reshape(-1, ) for i in range(k)] - else: - return - -def infer_parameters(): - # define observation for true parameters mean=170, 65 - rng = np.random.RandomState() - y_obs = [np.array(rng.multivariate_normal([170, 65], np.eye(2), 1).reshape(2,))] - - # define prior - from abcpy.continuousmodels import Uniform - mu0 = Uniform([[150], [200]], ) - mu1 = Uniform([[25], [100]], ) - - # define the model - height_weight_model = NestedBivariateGaussian([mu0, mu1]) - - # define statistics - from abcpy.statistics import Identity - statistics_calculator = Identity(degree = 2, cross = False) - - # define distance - from abcpy.distances import Euclidean - distance_calculator = Euclidean(statistics_calculator) - - # define sampling scheme - from abcpy.inferences import PMCABC - sampler = PMCABC([height_weight_model], [distance_calculator], backend, seed=1) - print('sampling') - # sample from scheme - T, n_sample, n_samples_per_param = 2, 100, 1 - eps_arr = np.array([10000]) - epsilon_percentile = 90 - - journal = sampler.sample([y_obs], T, eps_arr, n_sample, n_samples_per_param, epsilon_percentile) - - return journal - -import unittest -from mpi4py import MPI - -def setUpModule(): - setup_backend() - -class ExampleMPIModelTest(unittest.TestCase): - def test_example(self): - result = run_model() - data = [1,2,3,4,5] - expected_result = list(map(lambda x:2*(x**2),data)) - assert result==expected_result - -if __name__ == "__main__": - setup_backend() - print('Posterior Mean: ' + str(infer_parameters().posterior_mean())) - print('True Value was: ' + str([170, 65])) \ No newline at end of file diff --git a/examples/backends/mpi/mpi_model_rsmcabc.py b/examples/backends/mpi/mpi_model_rsmcabc.py deleted file mode 100644 index ff43decb..00000000 --- a/examples/backends/mpi/mpi_model_rsmcabc.py +++ /dev/null @@ -1,138 +0,0 @@ -import numpy as np -from mpi4py import MPI -from abcpy.probabilisticmodels import ProbabilisticModel, InputConnector - - -def setup_backend(): - global backend - - from abcpy.backends import BackendMPI as Backend - backend = Backend(process_per_model=2) - # backend = Backend() - - -def run_model(): - def square_mpi(model_comm, x): - local_res = np.array([x ** 2], 'i') - global_res = np.array([0], 'i') - model_comm.Reduce([local_res, MPI.INT], [global_res, MPI.INT], op=MPI.SUM, root=0) - return global_res[0] - - data = [1, 2, 3, 4, 5] - pds = backend.parallelize(data) - pds_map = backend.map(square_mpi, pds) - res = backend.collect(pds_map) - return res - - -class NestedBivariateGaussian(ProbabilisticModel): - """ - This is a show case model of bi-variate Gaussian distribution where we assume - the standard deviation to be unit. - """ - - def __init__(self, parameters, name='Gaussian'): - # We expect input of type parameters = [mu, sigma] - if not isinstance(parameters, list): - raise TypeError('Input of Normal model is of type list') - - if len(parameters) != 2: - raise RuntimeError('Input list must be of length 2, containing [mu1, mu1].') - - input_connector = InputConnector.from_list(parameters) - super().__init__(input_connector, name) - - def _check_input(self, input_values): - # Check whether input has correct type or format - if len(input_values) != 2: - raise ValueError('Number of parameters are 2 (two means).') - return True - - def _check_output(self, values): - if not isinstance(values, np.ndarray): - raise ValueError('Output of the normal distribution is always a numpy array.') - - if values.shape[0] != 2: - raise ValueError('Output shape should be of dimension 2.') - - return True - - def get_output_dimension(self): - return 2 - - def forward_simulate(self, input_values, k, rng=np.random.RandomState(), mpi_comm=None): - - rank = mpi_comm.Get_rank() - # Extract the input parameters - mu = input_values[rank] - sigma = 1 - # print(mu) - # Do the actual forward simulation - vector_of_k_samples = np.array(rng.normal(mu, sigma, k)) - - # Send everything back to rank 0 - # print("Hello from forward_simulate before gather, rank = ", rank) - data = mpi_comm.gather(vector_of_k_samples) - # print("Hello from forward_simulate after gather, rank = ", rank) - - # Format the output to obey API but only on rank 0 - if rank == 0: - result = [None] * k - for i in range(k): - element0 = data[0][i] - element1 = data[1][i] - point = np.array([element0, element1]) - result[i] = point - # print("Process 0 will return : ", result) - return [np.array([result[i]]).reshape(-1, ) for i in range(k)] - else: - return - -def infer_parameters(): - # define observation for true parameters mean=170, 65 - rng = np.random.RandomState() - y_obs = [np.array(rng.multivariate_normal([170, 65], np.eye(2), 1).reshape(2,))] - - # define prior - from abcpy.continuousmodels import Uniform - mu0 = Uniform([[150], [200]], ) - mu1 = Uniform([[25], [100]], ) - - # define the model - height_weight_model = NestedBivariateGaussian([mu0, mu1]) - - # define statistics - from abcpy.statistics import Identity - statistics_calculator = Identity(degree = 2, cross = False) - - # define distance - from abcpy.distances import Euclidean - distance_calculator = Euclidean(statistics_calculator) - - # define sampling scheme - from abcpy.inferences import SMCABC - sampler = SMCABC([height_weight_model], [distance_calculator], backend, seed=1) - print('sampling') - steps, n_samples, n_samples_per_param, alpha, epsilon_init, epsilon_final = 2, 10, 1, 0.1, 10000, 2000 - print('RSMCABC Inferring') - journal = sampler.sample([y_obs], steps, n_samples, n_samples_per_param, alpha , epsilon_init, epsilon_final,full_output=1) - - return journal - -import unittest -from mpi4py import MPI - -def setUpModule(): - setup_backend() - -class ExampleMPIModelTest(unittest.TestCase): - def test_example(self): - result = run_model() - data = [1,2,3,4,5] - expected_result = list(map(lambda x:2*(x**2),data)) - assert result==expected_result - -if __name__ == "__main__": - setup_backend() - print('Posterior Mean: ' + str(infer_parameters().posterior_mean())) - print('True Value was: ' + str([170, 65])) \ No newline at end of file diff --git a/examples/backends/mpi/mpi_model_sabc.py b/examples/backends/mpi/mpi_model_sabc.py deleted file mode 100644 index 86e45f0d..00000000 --- a/examples/backends/mpi/mpi_model_sabc.py +++ /dev/null @@ -1,140 +0,0 @@ -import numpy as np -from mpi4py import MPI -from abcpy.probabilisticmodels import ProbabilisticModel, InputConnector - - -def setup_backend(): - global backend - - from abcpy.backends import BackendMPI as Backend - backend = Backend(process_per_model=2) - # backend = Backend() - - -def run_model(): - def square_mpi(model_comm, x): - local_res = np.array([x ** 2], 'i') - global_res = np.array([0], 'i') - model_comm.Reduce([local_res, MPI.INT], [global_res, MPI.INT], op=MPI.SUM, root=0) - return global_res[0] - - data = [1, 2, 3, 4, 5] - pds = backend.parallelize(data) - pds_map = backend.map(square_mpi, pds) - res = backend.collect(pds_map) - return res - - -class NestedBivariateGaussian(ProbabilisticModel): - """ - This is a show case model of bi-variate Gaussian distribution where we assume - the standard deviation to be unit. - """ - - def __init__(self, parameters, name='Gaussian'): - # We expect input of type parameters = [mu, sigma] - if not isinstance(parameters, list): - raise TypeError('Input of Normal model is of type list') - - if len(parameters) != 2: - raise RuntimeError('Input list must be of length 2, containing [mu1, mu1].') - - input_connector = InputConnector.from_list(parameters) - super().__init__(input_connector, name) - - def _check_input(self, input_values): - # Check whether input has correct type or format - if len(input_values) != 2: - raise ValueError('Number of parameters are 2 (two means).') - return True - - def _check_output(self, values): - if not isinstance(values, np.ndarray): - raise ValueError('Output of the normal distribution is always a numpy array.') - - if values.shape[0] != 2: - raise ValueError('Output shape should be of dimension 2.') - - return True - - def get_output_dimension(self): - return 2 - - def forward_simulate(self, input_values, k, rng=np.random.RandomState(), mpi_comm=None): - - rank = mpi_comm.Get_rank() - # Extract the input parameters - mu = input_values[rank] - sigma = 1 - # print(mu) - # Do the actual forward simulation - vector_of_k_samples = np.array(rng.normal(mu, sigma, k)) - - # Send everything back to rank 0 - # print("Hello from forward_simulate before gather, rank = ", rank) - data = mpi_comm.gather(vector_of_k_samples) - # print("Hello from forward_simulate after gather, rank = ", rank) - - # Format the output to obey API but only on rank 0 - if rank == 0: - result = [None] * k - for i in range(k): - element0 = data[0][i] - element1 = data[1][i] - point = np.array([element0, element1]) - result[i] = point - # print("Process 0 will return : ", result) - return [np.array([result[i]]).reshape(-1, ) for i in range(k)] - else: - return - -def infer_parameters(): - # define observation for true parameters mean=170, 65 - rng = np.random.RandomState() - y_obs = [np.array(rng.multivariate_normal([170, 65], np.eye(2), 1).reshape(2,))] - - # define prior - from abcpy.continuousmodels import Uniform - mu0 = Uniform([[150], [200]], ) - mu1 = Uniform([[25], [100]], ) - - # define the model - height_weight_model = NestedBivariateGaussian([mu0, mu1]) - - # define statistics - from abcpy.statistics import Identity - statistics_calculator = Identity(degree = 2, cross = False) - - # define distance - from abcpy.distances import Euclidean - distance_calculator = Euclidean(statistics_calculator) - - # define sampling scheme - from abcpy.inferences import SABC - sampler = SABC([height_weight_model], [distance_calculator], backend, seed=1) - print('sampling') - steps, epsilon, n_samples, n_samples_per_param, beta, delta, v = 2, np.array([10000]), 10, 1, 2, 0.2, 0.3 - ar_cutoff, resample, n_update, adaptcov, full_output = 0.1, None, None, 1, 1 - # - # # print('SABC Inferring') - journal = sampler.sample([y_obs], steps, epsilon, n_samples, n_samples_per_param, beta, delta, v, ar_cutoff, resample, n_update, adaptcov, full_output) - - return journal - -import unittest -from mpi4py import MPI - -def setUpModule(): - setup_backend() - -class ExampleMPIModelTest(unittest.TestCase): - def test_example(self): - result = run_model() - data = [1,2,3,4,5] - expected_result = list(map(lambda x:2*(x**2),data)) - assert result==expected_result - -if __name__ == "__main__": - setup_backend() - print('Posterior Mean: ' + str(infer_parameters().posterior_mean())) - print('True Value was: ' + str([170, 65])) \ No newline at end of file diff --git a/examples/backends/mpi/mpi_model_smcabc.py b/examples/backends/mpi/mpi_model_smcabc.py deleted file mode 100644 index b1c71f42..00000000 --- a/examples/backends/mpi/mpi_model_smcabc.py +++ /dev/null @@ -1,139 +0,0 @@ -import numpy as np -from mpi4py import MPI -from abcpy.probabilisticmodels import ProbabilisticModel, InputConnector - - -def setup_backend(): - global backend - - from abcpy.backends import BackendMPI as Backend - backend = Backend(process_per_model=2) - # backend = Backend() - - -def run_model(): - def square_mpi(model_comm, x): - local_res = np.array([x ** 2], 'i') - global_res = np.array([0], 'i') - model_comm.Reduce([local_res, MPI.INT], [global_res, MPI.INT], op=MPI.SUM, root=0) - return global_res[0] - - data = [1, 2, 3, 4, 5] - pds = backend.parallelize(data) - pds_map = backend.map(square_mpi, pds) - res = backend.collect(pds_map) - return res - - -class NestedBivariateGaussian(ProbabilisticModel): - """ - This is a show case model of bi-variate Gaussian distribution where we assume - the standard deviation to be unit. - """ - - def __init__(self, parameters, name='Gaussian'): - # We expect input of type parameters = [mu, sigma] - if not isinstance(parameters, list): - raise TypeError('Input of Normal model is of type list') - - if len(parameters) != 2: - raise RuntimeError('Input list must be of length 2, containing [mu1, mu1].') - - input_connector = InputConnector.from_list(parameters) - super().__init__(input_connector, name) - - def _check_input(self, input_values): - # Check whether input has correct type or format - if len(input_values) != 2: - raise ValueError('Number of parameters are 2 (two means).') - return True - - def _check_output(self, values): - if not isinstance(values, np.ndarray): - raise ValueError('Output of the normal distribution is always a numpy array.') - - if values.shape[0] != 2: - raise ValueError('Output shape should be of dimension 2.') - - return True - - def get_output_dimension(self): - return 2 - - def forward_simulate(self, input_values, k, rng=np.random.RandomState(), mpi_comm=None): - - rank = mpi_comm.Get_rank() - # Extract the input parameters - mu = input_values[rank] - sigma = 1 - # print(mu) - # Do the actual forward simulation - vector_of_k_samples = np.array(rng.normal(mu, sigma, k)) - - # Send everything back to rank 0 - # print("Hello from forward_simulate before gather, rank = ", rank) - data = mpi_comm.gather(vector_of_k_samples) - # print("Hello from forward_simulate after gather, rank = ", rank) - - # Format the output to obey API but only on rank 0 - if rank == 0: - result = [None] * k - for i in range(k): - element0 = data[0][i] - element1 = data[1][i] - point = np.array([element0, element1]) - result[i] = point - # print("Process 0 will return : ", result) - return [np.array([result[i]]).reshape(-1, ) for i in range(k)] - else: - return - -def infer_parameters(): - # define observation for true parameters mean=170, 65 - rng = np.random.RandomState() - y_obs = [np.array(rng.multivariate_normal([170, 65], np.eye(2), 1).reshape(2,))] - - # define prior - from abcpy.continuousmodels import Uniform - mu0 = Uniform([[150], [200]], ) - mu1 = Uniform([[25], [100]], ) - - # define the model - height_weight_model = NestedBivariateGaussian([mu0, mu1]) - - # define statistics - from abcpy.statistics import Identity - statistics_calculator = Identity(degree = 2, cross = False) - - # define distance - from abcpy.distances import Euclidean - distance_calculator = Euclidean(statistics_calculator) - - # define sampling scheme - from abcpy.inferences import SMCABC - sampler = SMCABC([height_weight_model], [distance_calculator], backend, seed=1) - print('sampling') - steps, n_samples, n_samples_per_param, epsilon = 4, 100, 1, 10000 - print('SMCABC Inferring') - journal = sampler.sample([y_obs], steps, n_samples, n_samples_per_param, epsilon, full_output=1) - print(np.array(journal.get_parameters())) - - return journal - -import unittest -from mpi4py import MPI - -def setUpModule(): - setup_backend() - -class ExampleMPIModelTest(unittest.TestCase): - def test_example(self): - result = run_model() - data = [1,2,3,4,5] - expected_result = list(map(lambda x:2*(x**2),data)) - assert result==expected_result - -if __name__ == "__main__": - setup_backend() - print('Posterior Mean: ' + str(infer_parameters().posterior_mean())) - print('True Value was: ' + str([170, 65])) \ No newline at end of file From 9ba6255815086220447cec54ae511143549aa01c Mon Sep 17 00:00:00 2001 From: statrita2004 Date: Sun, 16 Dec 2018 15:42:01 +0000 Subject: [PATCH 27/41] Inferences modified to run with nested MPI --- tests/inferences_tests.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/inferences_tests.py b/tests/inferences_tests.py index 4308324d..ed4578f3 100644 --- a/tests/inferences_tests.py +++ b/tests/inferences_tests.py @@ -92,6 +92,7 @@ def test_sample(self): self.assertEqual(sigma_sample_shape, (10,1)) self.assertEqual(weights_sample_shape, (10,1)) self.assertLess(abs(mu_post_mean - (-3.402868)), 1e-3) + print(mu_post_mean) self.assertLess(abs(sigma_post_mean - 6.212), 1e-3) self.assertFalse(journal.number_of_simulations == 0) From ad878c7900356279032b8174688632f822ab576d Mon Sep 17 00:00:00 2001 From: statrita2004 Date: Sun, 16 Dec 2018 15:45:38 +0000 Subject: [PATCH 28/41] Inferences modified to run with nested MPI --- examples/backends/mpi/mpi_model_inferences.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/examples/backends/mpi/mpi_model_inferences.py b/examples/backends/mpi/mpi_model_inferences.py index c4190f8c..362cf364 100644 --- a/examples/backends/mpi/mpi_model_inferences.py +++ b/examples/backends/mpi/mpi_model_inferences.py @@ -355,11 +355,11 @@ def test_example(self): if __name__ == "__main__": setup_backend() print('True Value was: ' + str([170, 65])) - print('Posterior Mean: ' + str(infer_parameters_pmcabc().posterior_mean())) - print('Posterior Mean: ' + str(infer_parameters_abcsubsim().posterior_mean())) - print('Posterior Mean: ' + str(infer_parameters_rsmcabc().posterior_mean())) - print('Posterior Mean: ' + str(infer_parameters_sabc().posterior_mean())) - print('Posterior Mean: ' + str(infer_parameters_smcabc().posterior_mean())) - print('Posterior Mean: ' + str(infer_parameters_apmcabc().posterior_mean())) - print('Posterior Mean: ' + str(infer_parameters_rejectionabc().posterior_mean())) - print('Posterior Mean: ' + str(infer_parameters_pmc().posterior_mean())) + print('Posterior Mean of PMCABC: ' + str(infer_parameters_pmcabc().posterior_mean())) + print('Posterior Mean of ABCsubsim: ' + str(infer_parameters_abcsubsim().posterior_mean())) + print('Posterior Mean of RSMCABC: ' + str(infer_parameters_rsmcabc().posterior_mean())) + print('Posterior Mean of SABC: ' + str(infer_parameters_sabc().posterior_mean())) + print('Posterior Mean of SMCABC: ' + str(infer_parameters_smcabc().posterior_mean())) + print('Posterior Mean of APMCABC: ' + str(infer_parameters_apmcabc().posterior_mean())) + print('Posterior Mean of RejectionABC: ' + str(infer_parameters_rejectionabc().posterior_mean())) + print('Posterior Mean of PMC: ' + str(infer_parameters_pmc().posterior_mean())) \ No newline at end of file From 63a39281028f21106c34fb9ed892f6714243575e Mon Sep 17 00:00:00 2001 From: statrita2004 Date: Sun, 16 Dec 2018 16:34:44 +0000 Subject: [PATCH 29/41] Fixed to make tests passing --- abcpy/distances.py | 4 ---- tests/inferences_tests.py | 9 ++++----- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/abcpy/distances.py b/abcpy/distances.py index 53f26cd8..5437bda8 100644 --- a/abcpy/distances.py +++ b/abcpy/distances.py @@ -240,10 +240,6 @@ def distance(self, d1, d2): A list, containing a list describing the data set """ - print("d1 : ", d1) - print("type(d1) : ", type(d1)) - print("d2 : ", d2) - print("type(d2) : ", type(d2)) if not isinstance(d1, list): raise TypeError('Data is not of allowed types') diff --git a/tests/inferences_tests.py b/tests/inferences_tests.py index ed4578f3..f49633d9 100644 --- a/tests/inferences_tests.py +++ b/tests/inferences_tests.py @@ -91,9 +91,8 @@ def test_sample(self): self.assertEqual(mu_sample_shape, (10,1)) self.assertEqual(sigma_sample_shape, (10,1)) self.assertEqual(weights_sample_shape, (10,1)) - self.assertLess(abs(mu_post_mean - (-3.402868)), 1e-3) - print(mu_post_mean) - self.assertLess(abs(sigma_post_mean - 6.212), 1e-3) + self.assertLess(abs(mu_post_mean - (-3.56042761)), 1e-3) + self.assertLess(abs(sigma_post_mean - 5.7553691), 1e-3) self.assertFalse(journal.number_of_simulations == 0) @@ -112,8 +111,8 @@ def test_sample(self): self.assertEqual(mu_sample_shape, (10,1)) self.assertEqual(sigma_sample_shape, (10,1)) self.assertEqual(weights_sample_shape, (10,1)) - self.assertLess(abs(mu_post_mean - (-3.03325763) ), 1e-3) - self.assertLess(abs(sigma_post_mean - 6.92124735), 1e-3) + self.assertLess(abs(mu_post_mean - (-3.25971092) ), 1e-3) + self.assertLess(abs(sigma_post_mean - 7.76172201), 1e-3) self.assertFalse(journal.number_of_simulations == 0) From 928afc4da24621364448440c5364546fb8e377e7 Mon Sep 17 00:00:00 2001 From: pierre Date: Tue, 18 Dec 2018 17:14:17 +0100 Subject: [PATCH 30/41] reverted PMCABC to original version (except the mpi comm as parameter) and made model in mpi_model_inferences return valid value at every rank --- abcpy/inferences.py | 19 +++++--------- examples/backends/mpi/mpi_model_inferences.py | 25 ++++++++++--------- 2 files changed, 19 insertions(+), 25 deletions(-) diff --git a/abcpy/inferences.py b/abcpy/inferences.py index d97c4210..a623fd66 100644 --- a/abcpy/inferences.py +++ b/abcpy/inferences.py @@ -568,23 +568,16 @@ def _resample_parameter(self, rng, mpi_comm=None): y_sim = self.simulate(self.n_samples_per_param, rng=rng, mpi_comm=mpi_comm) counter+=1 - # y_sim valid only at rank 0, when used with nested MPI - if mpi_comm != None and mpi_comm.Get_rank() == 0: - distance = None - distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) - distance = mpi_comm.bcast(distance) - else: - distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) + distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) self.logger.debug("distance after {:4d} simulations: {:e}".format( counter, distance)) - if mpi_comm == None or mpi_comm.Get_rank() == 0: - self.logger.debug( - "Needed {:4d} simulations to reach distance {:e} < epsilon = {:e}". - format(counter, distance, float(self.epsilon)) - ) - return (theta, distance, counter) + self.logger.debug( + "Needed {:4d} simulations to reach distance {:e} < epsilon = {:e}". + format(counter, distance, float(self.epsilon)) + ) + return (theta, distance, counter) return None diff --git a/examples/backends/mpi/mpi_model_inferences.py b/examples/backends/mpi/mpi_model_inferences.py index 362cf364..24b1250f 100644 --- a/examples/backends/mpi/mpi_model_inferences.py +++ b/examples/backends/mpi/mpi_model_inferences.py @@ -75,7 +75,8 @@ def forward_simulate(self, input_values, k, rng=np.random.RandomState(), mpi_com data = mpi_comm.gather(vector_of_k_samples) # print("Hello from forward_simulate after gather, rank = ", rank) - # Format the output to obey API but only on rank 0 + # Format the output to obey API and broadcast it before return + result = None if rank == 0: result = [None] * k for i in range(k): @@ -83,10 +84,10 @@ def forward_simulate(self, input_values, k, rng=np.random.RandomState(), mpi_com element1 = data[1][i] point = np.array([element0, element1]) result[i] = point - # print("Process 0 will return : ", result) - return [np.array([result[i]]).reshape(-1, ) for i in range(k)] - else: - return + result = [np.array([result[i]]).reshape(-1, ) for i in range(k)] + + result = mpi_comm.bcast(result) + return result def infer_parameters_pmcabc(): # define observation for true parameters mean=170, 65 @@ -356,10 +357,10 @@ def test_example(self): setup_backend() print('True Value was: ' + str([170, 65])) print('Posterior Mean of PMCABC: ' + str(infer_parameters_pmcabc().posterior_mean())) - print('Posterior Mean of ABCsubsim: ' + str(infer_parameters_abcsubsim().posterior_mean())) - print('Posterior Mean of RSMCABC: ' + str(infer_parameters_rsmcabc().posterior_mean())) - print('Posterior Mean of SABC: ' + str(infer_parameters_sabc().posterior_mean())) - print('Posterior Mean of SMCABC: ' + str(infer_parameters_smcabc().posterior_mean())) - print('Posterior Mean of APMCABC: ' + str(infer_parameters_apmcabc().posterior_mean())) - print('Posterior Mean of RejectionABC: ' + str(infer_parameters_rejectionabc().posterior_mean())) - print('Posterior Mean of PMC: ' + str(infer_parameters_pmc().posterior_mean())) \ No newline at end of file + # print('Posterior Mean of ABCsubsim: ' + str(infer_parameters_abcsubsim().posterior_mean())) + # print('Posterior Mean of RSMCABC: ' + str(infer_parameters_rsmcabc().posterior_mean())) + # print('Posterior Mean of SABC: ' + str(infer_parameters_sabc().posterior_mean())) + # print('Posterior Mean of SMCABC: ' + str(infer_parameters_smcabc().posterior_mean())) + # print('Posterior Mean of APMCABC: ' + str(infer_parameters_apmcabc().posterior_mean())) + # print('Posterior Mean of RejectionABC: ' + str(infer_parameters_rejectionabc().posterior_mean())) + # print('Posterior Mean of PMC: ' + str(infer_parameters_pmc().posterior_mean())) \ No newline at end of file From 770349423fb4068f87d2232b895d1089893096a8 Mon Sep 17 00:00:00 2001 From: statrita2004 Date: Tue, 18 Dec 2018 23:11:12 +0000 Subject: [PATCH 31/41] Example made to run with nested MPI for all except SMCABC and ABCsubsim --- abcpy/backends/__init__.py | 2 +- abcpy/inferences.py | 98 +++-------------- abcpy/multilevel.py | 101 ++++++++++++++++++ examples/backends/mpi/mpi_model_inferences.py | 68 +++--------- 4 files changed, 133 insertions(+), 136 deletions(-) create mode 100644 abcpy/multilevel.py diff --git a/abcpy/backends/__init__.py b/abcpy/backends/__init__.py index a6c41518..936dfbd0 100644 --- a/abcpy/backends/__init__.py +++ b/abcpy/backends/__init__.py @@ -23,4 +23,4 @@ def BackendMPITestHelper(*args,**kwargs): def BackendSpark(*args,**kwargs): from abcpy.backends.spark import BackendSpark - return BackendSpark(*args,**kwargs) + return BackendSpark(*args,**kwargs) \ No newline at end of file diff --git a/abcpy/inferences.py b/abcpy/inferences.py index a623fd66..d45f0bb1 100644 --- a/abcpy/inferences.py +++ b/abcpy/inferences.py @@ -279,13 +279,7 @@ def _sample_parameter(self, rng, mpi_comm=None): y_sim = self.simulate(self.n_samples_per_param, rng=rng, mpi_comm=mpi_comm) counter+=1 if(y_sim is not None): - # y_sim valid only at rank 0, when used with nested MPI - if mpi_comm != None and mpi_comm.Get_rank() == 0: - distance = None - distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) - distance = mpi_comm.bcast(distance) - else: - distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) + distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) self.logger.debug("distance after {:4d} simulations: {:e}".format( counter, distance)) else: @@ -1440,14 +1434,7 @@ def _accept_parameter(self, data, mpi_comm=None): all_parameters.append(new_theta) y_sim = self.simulate(self.n_samples_per_param, rng=rng, mpi_comm=mpi_comm) counter+=1 - - # y_sim valid only at rank 0, when used with nested MPI - if mpi_comm != None and mpi_comm.Get_rank() == 0: - distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) - distance = mpi_comm.bcast(distance) - else: - distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) - + distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) all_distances.append(distance) acceptance = rng.binomial(1, np.exp(-distance / self.epsilon), 1) acceptance = 1 @@ -1465,13 +1452,7 @@ def _accept_parameter(self, data, mpi_comm=None): y_sim = self.simulate(self.n_samples_per_param, rng=rng, mpi_comm=mpi_comm) counter+=1 - # y_sim valid only at rank 0, when used with nested MPI - if mpi_comm != None and mpi_comm.Get_rank() == 0: - distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) - distance = mpi_comm.bcast(distance) - else: - distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) - + distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) smooth_distance = self._smoother_distance([distance], self.all_distances_bds.value()) ## Calculate acceptance probability: @@ -1758,12 +1739,7 @@ def _accept_parameter(self, rng_and_index, mpi_comm=None): self.sample_from_prior(rng=rng) y_sim = self.simulate(self.n_samples_per_param, rng=rng, mpi_comm=mpi_comm) counter+=1 - # y_sim valid only at rank 0, when used with nested MPI - if mpi_comm != None and mpi_comm.Get_rank() == 0: - distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) - distance = mpi_comm.bcast(distance) - else: - distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) + distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) result_theta.append(self.get_parameters()) result_distance.append(distance) else: @@ -1771,12 +1747,7 @@ def _accept_parameter(self, rng_and_index, mpi_comm=None): self.set_parameters(theta) y_sim = self.simulate(self.n_samples_per_param, rng=rng, mpi_comm=mpi_comm) counter+=1 - # y_sim valid only at rank 0, when used with nested MPI - if mpi_comm != None and mpi_comm.Get_rank() == 0: - distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) - distance = mpi_comm.bcast(distance) - else: - distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) + distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) result_theta.append(theta) result_distance.append(distance) for ind in range(0, self.chain_length - 1): @@ -1786,12 +1757,7 @@ def _accept_parameter(self, rng_and_index, mpi_comm=None): break y_sim = self.simulate(self.n_samples_per_param, rng=rng,mpi_comm=mpi_comm) counter+=1 - # y_sim valid only at rank 0, when used with nested MPI - if mpi_comm != None and mpi_comm.Get_rank() == 0: - new_distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) - new_distance = mpi_comm.bcast(new_distance) - else: - new_distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) + new_distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) ## Calculate acceptance probability: ratio_prior_prob = self.pdf_of_prior(self.model, perturbation_output[1]) / self.pdf_of_prior(self.model, theta) @@ -1813,7 +1779,7 @@ def _accept_parameter(self, rng_and_index, mpi_comm=None): return (result_theta, result_distance, counter) - def _update_cov_mat(self, rng_t): + def _update_cov_mat(self, rng_t, mpi_comm=None): """ Updates the covariance matrix. @@ -1849,7 +1815,7 @@ def _update_cov_mat(self, rng_t): perturbation_output = self.perturb(0, rng=rng) if perturbation_output[0] and self.pdf_of_prior(self.model, perturbation_output[1]) != 0: break - y_sim = self.simulate(self.n_samples_per_param, rng=rng) + y_sim = self.simulate(self.n_samples_per_param, rng=rng, mpi_comm=mpi_comm) counter+=1 new_distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) @@ -2139,7 +2105,6 @@ def _accept_parameter(self, rng, mpi_comm=None): rng.seed(rng.randint(np.iinfo(np.uint32).max, dtype=np.uint32)) distance = self.distance.dist_max() - mapping_for_kernels, garbage_index = self.accepted_parameters_manager.get_mapping( self.accepted_parameters_manager.model) @@ -2150,13 +2115,7 @@ def _accept_parameter(self, rng, mpi_comm=None): self.sample_from_prior(rng=rng) y_sim = self.simulate(self.n_samples_per_param, rng=rng, mpi_comm=mpi_comm) counter+=1 - - # y_sim valid only at rank 0, when used with nested MPI - if mpi_comm != None and mpi_comm.Get_rank() == 0: - distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) - distance = mpi_comm.bcast(distance) - else: - distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) + distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) index_accept = 1 else: @@ -2170,14 +2129,7 @@ def _accept_parameter(self, rng, mpi_comm=None): break y_sim = self.simulate(self.n_samples_per_param, rng=rng, mpi_comm=mpi_comm) counter+=1 - - # y_sim valid only at rank 0, when used with nested MPI - if mpi_comm != None and mpi_comm.Get_rank() == 0: - distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) - distance = mpi_comm.bcast(distance) - else: - distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) - + distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) ratio_prior_prob = self.pdf_of_prior(self.model, perturbation_output[1]) / self.pdf_of_prior(self.model, theta) kernel_numerator = self.kernel.pdf(mapping_for_kernels, self.accepted_parameters_manager, index[0], theta) kernel_denominator = self.kernel.pdf(mapping_for_kernels, self.accepted_parameters_manager, index[0], perturbation_output[1]) @@ -2458,13 +2410,7 @@ def _accept_parameter(self, rng, mpi_comm=None): self.sample_from_prior(rng=rng) y_sim = self.simulate(self.n_samples_per_param, rng=rng, mpi_comm=mpi_comm) counter+=1 - - # y_sim valid only at rank 0, when used with nested MPI - if mpi_comm != None and mpi_comm.Get_rank() == 0: - distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) - distance = mpi_comm.bcast(distance) - else: - distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) + distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) weight = 1.0 else: @@ -2479,13 +2425,7 @@ def _accept_parameter(self, rng, mpi_comm=None): y_sim = self.simulate(self.n_samples_per_param, rng=rng, mpi_comm=mpi_comm) counter+=1 - - # y_sim valid only at rank 0, when used with nested MPI - if mpi_comm != None and mpi_comm.Get_rank() == 0: - distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) - distance = mpi_comm.bcast(distance) - else: - distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) + distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) prior_prob = self.pdf_of_prior(self.model, perturbation_output[1]) denominator = 0.0 @@ -2734,7 +2674,6 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1 accepted_parameters = new_parameters accepted_y_sim = new_y_sim - if (full_output == 1 and aStep <= steps - 1) or (full_output == 0 and aStep == steps - 1): self.logger.info("Saving configuration to output journal") self.accepted_parameters_manager.update_broadcast(self.backend, accepted_parameters=accepted_parameters) @@ -2873,15 +2812,8 @@ def _accept_parameter(self, rng_and_index, mpi_comm=None): numerator = 0.0 denominator = 0.0 for ind in range(self.n_samples_per_param): - # y_sim valid only at rank 0, when used with nested MPI - if mpi_comm != None and mpi_comm.Get_rank() == 0: - distance_new = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), [[y_sim[0][ind]]]) - distance_old = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), [[y_sim_old[0][ind]]]) - distance_new = mpi_comm.bcast(distance_new) - distance_old = mpi_comm.bcast(distance_old) - else: - distance_new = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), [[y_sim[0][ind]]]) - distance_old = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), [[y_sim_old[0][ind]]]) + distance_new = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), [[y_sim[0][ind]]]) + distance_old = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), [[y_sim_old[0][ind]]]) numerator += (distance_new < self.epsilon[-1]) denominator += (distance_old < self.epsilon[-1]) if denominator == 0: @@ -2902,4 +2834,4 @@ def _accept_parameter(self, rng_and_index, mpi_comm=None): self.set_parameters(self.accepted_parameters_manager.accepted_parameters_bds.value()[index]) y_sim = self.accepted_y_sim_bds.value()[index] - return (self.get_parameters(), y_sim, counter) + return (self.get_parameters(), y_sim, counter) \ No newline at end of file diff --git a/abcpy/multilevel.py b/abcpy/multilevel.py new file mode 100644 index 00000000..ce1aa02d --- /dev/null +++ b/abcpy/multilevel.py @@ -0,0 +1,101 @@ +from abc import ABCMeta, abstractmethod + +import numpy as np +from glmnet import LogitNet +from sklearn import linear_model + + +class Multilevel(metaclass=ABCMeta): + """This abstract base class defines how the distance between the observed and + simulated data should be implemented. + """ + + @abstractmethod + def __init__(self, backend, data_thinner, criterion_calculator): + """The constructor of a sub-class must accept a non-optional data thinner and criterion + calculator as parameters. + + Parameters + ---------- + backend: abcpy.backend + Backend object + data_thinner : object + Object that operates on data and thins it + criterion_calculator: object + Object that operates on n_samples_per_param data and computes the criterion + """ + + self.bacend = backend + self.data_thinner = data_thinner + self.criterion_calculator = criterion_calculator + + raise NotImplementedError + + @abstractmethod + def compute(self, d, n_repeat): + """To be overwritten by any sub-class: should calculate the criterion for each + set of data_element in the lis data + + Notes + ----- + The data set d is an array-like structures that contain n data + points each. An implementation of the distance function should work along + the following steps: + + 1. Transform both input sets dX = [ dX1, dX2, ..., dXn ] to sX = [sX1, sX2, + ..., sXn] using the statistics object. See _calculate_summary_stat method. + + 2. Calculate the mutual desired distance, here denoted by -, between the + statstics dist = [s11 - s21, s12 - s22, ..., s1n - s2n]. + + Important: any sub-class must not calculate the distance between data sets + d1 and d2 directly. This is the reason why any sub-class must be + initialized with a statistics object. + + Parameters + ---------- + d: Python list + Contains n data points. + + + Returns + ------- + numpy.ndarray + The criterion calculated for each data point. + """ + + raise NotImplementedError + + ## Simple_map and Flat_map: Python wrapper for nested parallelization + def simple_map(self, data, map_function): + data_pds = self.backend.parallelize(data) + result_pds = self.backend.map(map_function, data_pds) + result = self.backend.collect(result_pds) + main_result, counter = [list(t) for t in zip(*result)] + return main_result, counter + + def flat_map(self, data, n_repeat, map_function): + # Create an array of data, with each data repeated n_repeat many times + repeated_data = np.repeat(data, n_repeat, axis=0) + # Create an see array + n_total = n_repeat * data.shape[0] + seed_arr = self.rng.randint(1, n_total * n_total, size=n_total, dtype=np.int32) + rng_arr = np.array([np.random.RandomState(seed) for seed in seed_arr]) + # Create data and rng array + repeated_data_rng = [[repeated_data[ind,:],rng_arr[ind]] for ind in range(n_total)] + repeated_data_rng_pds = self.backend.parallelize(repeated_data_rng) + # Map the function on the data using the corresponding rng + repeated_data_result_pds = self.backend.map(map_function, repeated_data_rng_pds) + repeated_data_result = self.backend.collect(repeated_data_result_pds) + repeated_data, result = [list(t) for t in zip(*repeated_data_result)] + merged_result_data = [] + for ind in range(0, data.shape[0]): + merged_result_data.append([[[result[np.int(i)][0][0] \ + for i in + np.where(np.mean(repeated_data == data[ind, :], axis=1) == 1)[0]]], + data[ind, :]]) + return merged_result_data + + +class Prototype(Multilevel): + \ No newline at end of file diff --git a/examples/backends/mpi/mpi_model_inferences.py b/examples/backends/mpi/mpi_model_inferences.py index 24b1250f..faa06d4b 100644 --- a/examples/backends/mpi/mpi_model_inferences.py +++ b/examples/backends/mpi/mpi_model_inferences.py @@ -113,11 +113,10 @@ def infer_parameters_pmcabc(): # define sampling scheme from abcpy.inferences import PMCABC sampler = PMCABC([height_weight_model], [distance_calculator], backend, seed=1) - print('sampling') # sample from scheme - T, n_sample, n_samples_per_param = 2, 100, 1 + T, n_sample, n_samples_per_param = 2, 10, 1 eps_arr = np.array([10000]) - epsilon_percentile = 90 + epsilon_percentile = 95 journal = sampler.sample([y_obs], T, eps_arr, n_sample, n_samples_per_param, epsilon_percentile) @@ -146,9 +145,8 @@ def infer_parameters_abcsubsim(): # define sampling scheme from abcpy.inferences import ABCsubsim - sampler = ABCsubsim([height_weight_model], [distance_calculator], backend, seed=1) - steps, n_samples = 10, 1000 - print('ABCsubsim Inferring') + sampler = ABCsubsim([height_weight_model], [distance_calculator], backend) + steps, n_samples = 2, 4 journal = sampler.sample([y_obs], steps, n_samples) return journal @@ -175,10 +173,10 @@ def infer_parameters_rsmcabc(): distance_calculator = Euclidean(statistics_calculator) # define sampling scheme - from abcpy.inferences import SMCABC - sampler = SMCABC([height_weight_model], [distance_calculator], backend, seed=1) + from abcpy.inferences import RSMCABC + sampler = RSMCABC([height_weight_model], [distance_calculator], backend, seed=1) print('sampling') - steps, n_samples, n_samples_per_param, alpha, epsilon_init, epsilon_final = 2, 10, 1, 0.1, 10000, 2000 + steps, n_samples, n_samples_per_param, alpha, epsilon_init, epsilon_final = 2, 10, 1, 0.1, 10000, 500 print('RSMCABC Inferring') journal = sampler.sample([y_obs], steps, n_samples, n_samples_per_param, alpha , epsilon_init, epsilon_final,full_output=1) @@ -209,7 +207,7 @@ def infer_parameters_sabc(): from abcpy.inferences import SABC sampler = SABC([height_weight_model], [distance_calculator], backend, seed=1) print('sampling') - steps, epsilon, n_samples, n_samples_per_param, beta, delta, v = 2, np.array([10000]), 10, 1, 2, 0.2, 0.3 + steps, epsilon, n_samples, n_samples_per_param, beta, delta, v = 2, np.array([40000]), 10, 1, 2, 0.2, 0.3 ar_cutoff, resample, n_update, adaptcov, full_output = 0.1, None, None, 1, 1 # # # print('SABC Inferring') @@ -217,38 +215,6 @@ def infer_parameters_sabc(): return journal -def infer_parameters_smcabc(): - # define observation for true parameters mean=170, 65 - rng = np.random.RandomState() - y_obs = [np.array(rng.multivariate_normal([170, 65], np.eye(2), 1).reshape(2,))] - - # define prior - from abcpy.continuousmodels import Uniform - mu0 = Uniform([[150], [200]], ) - mu1 = Uniform([[25], [100]], ) - - # define the model - height_weight_model = NestedBivariateGaussian([mu0, mu1]) - - # define statistics - from abcpy.statistics import Identity - statistics_calculator = Identity(degree = 2, cross = False) - - # define distance - from abcpy.distances import Euclidean - distance_calculator = Euclidean(statistics_calculator) - - # define sampling scheme - from abcpy.inferences import SMCABC - sampler = SMCABC([height_weight_model], [distance_calculator], backend, seed=1) - print('sampling') - steps, n_samples, n_samples_per_param, epsilon = 4, 100, 1, 10000 - print('SMCABC Inferring') - journal = sampler.sample([y_obs], steps, n_samples, n_samples_per_param, epsilon, full_output=1) - print(np.array(journal.get_parameters())) - - return journal - def infer_parameters_apmcabc(): # define observation for true parameters mean=170, 65 rng = np.random.RandomState() @@ -273,7 +239,6 @@ def infer_parameters_apmcabc(): # define sampling scheme from abcpy.inferences import APMCABC sampler = APMCABC([height_weight_model], [distance_calculator], backend, seed=1) - print('sampling') steps, n_samples, n_samples_per_param, alpha, acceptance_cutoff, covFactor, full_output, journal_file = 2, 100, 1, 0.2, 0.03, 2.0, 1, None journal = sampler.sample([y_obs], steps, n_samples, n_samples_per_param, alpha, acceptance_cutoff, covFactor, full_output, journal_file) @@ -303,8 +268,7 @@ def infer_parameters_rejectionabc(): # define sampling scheme from abcpy.inferences import RejectionABC sampler = RejectionABC([height_weight_model], [distance_calculator], backend, seed=1) - n_samples, n_samples_per_param, epsilon = 2, 2, 20 - print('Rejection ABC Inferring') + n_samples, n_samples_per_param, epsilon = 2, 1, 20000 journal = sampler.sample([y_obs], n_samples, n_samples_per_param, epsilon) return journal @@ -357,10 +321,10 @@ def test_example(self): setup_backend() print('True Value was: ' + str([170, 65])) print('Posterior Mean of PMCABC: ' + str(infer_parameters_pmcabc().posterior_mean())) - # print('Posterior Mean of ABCsubsim: ' + str(infer_parameters_abcsubsim().posterior_mean())) - # print('Posterior Mean of RSMCABC: ' + str(infer_parameters_rsmcabc().posterior_mean())) - # print('Posterior Mean of SABC: ' + str(infer_parameters_sabc().posterior_mean())) - # print('Posterior Mean of SMCABC: ' + str(infer_parameters_smcabc().posterior_mean())) - # print('Posterior Mean of APMCABC: ' + str(infer_parameters_apmcabc().posterior_mean())) - # print('Posterior Mean of RejectionABC: ' + str(infer_parameters_rejectionabc().posterior_mean())) - # print('Posterior Mean of PMC: ' + str(infer_parameters_pmc().posterior_mean())) \ No newline at end of file + #print('Posterior Mean of ABCsubsim: ' + str(infer_parameters_abcsubsim().posterior_mean())) (Buggy) + print('Posterior Mean of RSMCABC: ' + str(infer_parameters_rsmcabc().posterior_mean())) + print('Posterior Mean of SABC: ' + str(infer_parameters_sabc().posterior_mean())) + #print('Posterior Mean of SMCABC: ' + str(infer_parameters_smcabc().posterior_mean())) (Buggy) + print('Posterior Mean of APMCABC: ' + str(infer_parameters_apmcabc().posterior_mean())) + print('Posterior Mean of RejectionABC: ' + str(infer_parameters_rejectionabc().posterior_mean())) + print('Posterior Mean of PMC: ' + str(infer_parameters_pmc().posterior_mean())) \ No newline at end of file From c6bdc2c7a330b9a52d3b3011aaeb282550bb7a3f Mon Sep 17 00:00:00 2001 From: Ritabrata Dutta Date: Tue, 8 Jan 2019 12:08:54 +0000 Subject: [PATCH 32/41] Update ReadMe Updated references --- README.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index ec75eca6..28b28456 100644 --- a/README.md +++ b/README.md @@ -62,22 +62,22 @@ BibTex reference. Publications in which ABCpy was applied: -* R. Dutta, M. Schoengens, A. Ummadisingu, J. P. Onnela, A. Mira, "ABCpy: A - High-Performance Computing Perspective to Approximate Bayesian Computation", - 2017, arXiv:1711.04694 - * R. Dutta, J. P. Onnela, A. Mira, "Bayesian Inference of Spreading Processes - on Networks", 2017, arXiv:1709.08862 + on Networks", 2018, Proc. R. Soc. A, 474(2215), 20180129. + +* R. Dutta, Z. Faidon Brotzakis and A. Mira, "Bayesian Calibration of + Force-fields from Experimental Data: TIP4P Water", 2018, Journal of Chemical Physics 149, 154110. * R. Dutta, B. Chopard, J. Lätt, F. Dubois, K. Zouaoui Boudjeltia and A. Mira, "Parameter Estimation of Platelets Deposition: Approximate Bayesian - Computation with High Performance Computing", 2017, arXiv:1710.01054 + Computation with High Performance Computing", 2018, Frontiers in physiology, 9. * A. Ebert, R. Dutta, P. Wu, K. Mengersen and A. Mira, "Likelihood-Free Parameter Estimation for Dynamic Queueing Networks", 2018, arXiv:1804.02526 -* R. Dutta, Z. Faidon Brotzakis and A. Mira, "Bayesian Calibration of - Force-fields from Experimental Data: TIP4P Water", 2018, arXiv:1804.02742 +* R. Dutta, M. Schoengens, A. Ummadisingu, N. Widerman, J. P. Onnela, A. Mira, "ABCpy: A + High-Performance Computing Perspective to Approximate Bayesian Computation", + 2017, arXiv:1711.04694 ## License From f77bb9bc880578a5362142db6c189c94a6ca653c Mon Sep 17 00:00:00 2001 From: statrita2004 Date: Wed, 9 Jan 2019 10:21:08 +0000 Subject: [PATCH 33/41] fixes --- abcpy/inferences.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/abcpy/inferences.py b/abcpy/inferences.py index d45f0bb1..69d9e4e0 100644 --- a/abcpy/inferences.py +++ b/abcpy/inferences.py @@ -2792,13 +2792,14 @@ def _accept_parameter(self, rng_and_index, mpi_comm=None): self.accepted_parameters_manager.model) counter=0 - + print('Hello 1') # print("on seed " + str(seed) + " distance: " + str(distance) + " epsilon: " + str(self.epsilon)) - if self.accepted_parameters_manager.accepted_parameters_bds == None: + if self.accepted_parameters_manager.accepted_parameters_bds is None: self.sample_from_prior(rng=rng) y_sim = self.simulate(self.n_samples_per_param, rng=rng, mpi_comm=mpi_comm) counter+=1 else: + print('Hello2') if self.accepted_parameters_manager.accepted_weights_bds.value()[index] > 0: theta = np.array(self.accepted_parameters_manager.accepted_parameters_bds.value()[index]).reshape(-1,) while True: From d985a72cc771b83a0ce1826403cc3a31d3c16149 Mon Sep 17 00:00:00 2001 From: Marcel Schoengens Date: Wed, 16 Jan 2019 17:56:58 +0100 Subject: [PATCH 34/41] Fix mpi tests Parameter order of mapping function has been changed to (data, mpi_comm), which was still in the old order (mpi_comm, data) --- abcpy/backends/mpi.py | 3 ++- tests/backend_tests_mpi_model_mpi.py | 16 ++++++++-------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/abcpy/backends/mpi.py b/abcpy/backends/mpi.py index 4390e7f7..8ac6d11c 100644 --- a/abcpy/backends/mpi.py +++ b/abcpy/backends/mpi.py @@ -1,3 +1,4 @@ +# noinspection PyInterpreter import pickle import time @@ -353,7 +354,7 @@ def __init__(self): self.__worker_run() def run_function(self, function_packed, data_item): - """ + """ Receives a serialized function unpack it and run it Passes the model communicator if ther is more than one process per model """ diff --git a/tests/backend_tests_mpi_model_mpi.py b/tests/backend_tests_mpi_model_mpi.py index 2f196bec..e6c0d826 100644 --- a/tests/backend_tests_mpi_model_mpi.py +++ b/tests/backend_tests_mpi_model_mpi.py @@ -26,14 +26,14 @@ class MPIBackendTests(unittest.TestCase): def test_parallelize(self): data = [0]*backend_mpi.size() pds = backend_mpi.parallelize(data) - pds_map = backend_mpi.map(lambda model_comm, x: x + MPI.COMM_WORLD.Get_rank(), pds) + pds_map = backend_mpi.map(lambda x, model_comm: x + MPI.COMM_WORLD.Get_rank(), pds) res = backend_mpi.collect(pds_map) for scheduler_index in backend_mpi.scheduler_node_ranks(): self.assertTrue(scheduler_index not in res,"Node in scheduler_node_ranks performed map.") def test_map(self): - def square_mpi(model_comm, x): + def square_mpi(x, model_comm): local_res = numpy.array([x**2], 'i') global_res = numpy.array([0], 'i') model_comm.Reduce([local_res,MPI.INT], [global_res,MPI.INT], op=MPI.SUM, root=0) @@ -57,7 +57,7 @@ def test_broadcast(self): for k,v in backend_mpi.bds_store.items(): backend_mpi.bds_store[k] = 99999 - def test_map(model_comm, x): + def test_map(x, model_comm): return x + bds.value() pds_m = backend_mpi.map(test_map, pds) @@ -65,7 +65,7 @@ def test_map(model_comm, x): def test_pds_delete(self): - def check_if_exists(model_comm, x): + def check_if_exists(x, model_comm): obj = BackendMPITestHelper() if model_comm.Get_rank() == 0: return obj.check_pds(x) @@ -89,7 +89,7 @@ def check_if_exists(model_comm, x): def test_bds_delete(self): - def check_if_exists(model_comm, x): + def check_if_exists(x, model_comm): obj = BackendMPITestHelper() return obj.check_bds(x) @@ -109,7 +109,7 @@ def check_if_exists(model_comm, x): def test_function_pickle(self): - def square_mpi(model_comm, x): + def square_mpi(x, model_comm): local_res = numpy.array([x**2], 'i') global_res = numpy.array([0], 'i') model_comm.Reduce([local_res,MPI.INT], [global_res,MPI.INT], op=MPI.SUM, root=0) @@ -117,14 +117,14 @@ def square_mpi(model_comm, x): class staticfunctest_mpi: @staticmethod - def square_mpi(model_comm, x): + def square_mpi(x, model_comm): local_res = numpy.array([x**2], 'i') global_res = numpy.array([0], 'i') model_comm.Reduce([local_res,MPI.INT], [global_res,MPI.INT], op=MPI.SUM, root=0) return global_res[0] class nonstaticfunctest_mpi: - def square_mpi(self, model_comm, x): + def square_mpi(self, x, model_comm): local_res = numpy.array([x**2], 'i') global_res = numpy.array([0], 'i') model_comm.Reduce([local_res,MPI.INT], [global_res,MPI.INT], op=MPI.SUM, root=0) From 29d25f5b584db9d8d1d6f3d21e5a5d5543fe9cb6 Mon Sep 17 00:00:00 2001 From: Marcel Schoengens Date: Thu, 17 Jan 2019 13:33:33 +0100 Subject: [PATCH 35/41] Add debug output and remove some redundant code --- abcpy/inferences.py | 17 +++++++++++--- examples/backends/mpi/mpi_model_inferences.py | 23 +++++++++++-------- 2 files changed, 27 insertions(+), 13 deletions(-) diff --git a/abcpy/inferences.py b/abcpy/inferences.py index 69d9e4e0..b4f8fad4 100644 --- a/abcpy/inferences.py +++ b/abcpy/inferences.py @@ -1581,7 +1581,7 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1 for aStep in range(0, steps): - self.logger.info("Step {}".format(aStep)) + self.logger.info("ABCsubsim step {}".format(aStep)) if aStep==0 and journal_file is not None: accepted_parameters = journal.parameters[-1] @@ -1589,7 +1589,7 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1 accepted_cov_mats = journal.opt_values[-1] # main ABCsubsim algorithm - self.logger.info("Initializatio of ABCsubsim") + self.logger.info("Initialization of ABCsubsim") seed_arr = self.rng.randint(0, np.iinfo(np.uint32).max, size=int(n_samples / temp_chain_length), dtype=np.uint32) rng_arr = np.array([np.random.RandomState(seed) for seed in seed_arr]) @@ -1607,7 +1607,9 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1 # print("INFO: Initial accepted parameter parameters") self.logger.info("Initial accepted parameters") params_and_dists_pds = self.backend.map(self._accept_parameter, rng_and_index_pds) + self.logger.debug("Map random number to a pseudo-observation") params_and_dists = self.backend.collect(params_and_dists_pds) + self.logger.debug("Collect results from the mapping") new_parameters, new_distances, counter = [list(t) for t in zip(*params_and_dists)] for count in counter: @@ -1617,11 +1619,13 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1 distances = np.concatenate(new_distances) # 2: Sort and renumber samples + self.logger.debug("Sort and renumber samples.") SortIndex = sorted(range(len(distances)), key=lambda k: distances[k]) distances = distances[SortIndex] accepted_parameters = accepted_parameters[SortIndex, :] # 3: Calculate and broadcast annealling parameters + self.logger.debug("Calculate and broadcast annealling parameters.") temp_chain_length = chain_length if aStep > 0: anneal_parameter_old = anneal_parameter @@ -1631,6 +1635,7 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1 # 4: Update proposal covariance matrix (Parallelized) + self.logger.debug("Update proposal covariance matrix (Parallelized).") if aStep == 0: self.accepted_parameters_manager.update_broadcast(self.backend, accepted_parameters=accepted_parameters) @@ -1654,7 +1659,9 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1 rng_and_index_arr = np.column_stack((rng_arr, index_arr)) rng_and_index_pds = self.backend.parallelize(rng_and_index_arr) + self.logger.debug("Update co-variance matrix in parallel (map).") cov_mats_index_pds = self.backend.map(self._update_cov_mat, rng_and_index_pds) + self.logger.debug("Collect co-variance matrix.") cov_mats_index = self.backend.collect(cov_mats_index_pds) cov_mats, T, accept_index, counter = [list(t) for t in zip(*cov_mats_index)] @@ -1666,6 +1673,7 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1 accepted_cov_mats = cov_mats[ind] break + self.logger.debug("Broadcast accepted parameters.") self.accepted_parameters_manager.update_broadcast(self.backend, accepted_cov_mats=accepted_cov_mats) if full_output == 1: @@ -1811,6 +1819,7 @@ def _update_cov_mat(self, rng_t, mpi_comm=None): counter = 0 for ind in range(0, self.chain_length): + self.logger.debug("Parameter acceptance loop step {}.".format(ind)) while True: perturbation_output = self.perturb(0, rng=rng) if perturbation_output[0] and self.pdf_of_prior(self.model, perturbation_output[1]) != 0: @@ -1819,16 +1828,18 @@ def _update_cov_mat(self, rng_t, mpi_comm=None): counter+=1 new_distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) + self.logger.debug("Calculate acceptance probability.") ## Calculate acceptance probability: ratio_prior_prob = self.pdf_of_prior(self.model, perturbation_output[1]) / self.pdf_of_prior(self.model, theta) kernel_numerator = self.kernel.pdf(mapping_for_kernels, self.accepted_parameters_manager,0 , theta) kernel_denominator = self.kernel.pdf(mapping_for_kernels, self.accepted_parameters_manager,0 , perturbation_output[1]) ratio_likelihood_prob = kernel_numerator / kernel_denominator acceptance_prob = min(1, ratio_prior_prob * ratio_likelihood_prob) * (new_distance < self.anneal_parameter) - ## If accepted if rng.binomial(1, acceptance_prob) == 1: theta = perturbation_output[1] acceptance = acceptance + 1 + + self.logger.debug("Return accepted parameters.") if acceptance / 10 <= 0.5 and acceptance / 10 >= 0.3: return (accepted_cov_mats_transformed, t, 1, counter) else: diff --git a/examples/backends/mpi/mpi_model_inferences.py b/examples/backends/mpi/mpi_model_inferences.py index faa06d4b..af8c9b88 100644 --- a/examples/backends/mpi/mpi_model_inferences.py +++ b/examples/backends/mpi/mpi_model_inferences.py @@ -1,3 +1,6 @@ +import logging +logging.basicConfig(level=logging.DEBUG) + import numpy as np from mpi4py import MPI from abcpy.probabilisticmodels import ProbabilisticModel, InputConnector @@ -304,27 +307,27 @@ def infer_parameters_pmc(): return journal -import unittest -from mpi4py import MPI +#import unittest +#from mpi4py import MPI def setUpModule(): setup_backend() -class ExampleMPIModelTest(unittest.TestCase): - def test_example(self): - result = run_model() - data = [1,2,3,4,5] - expected_result = list(map(lambda x:2*(x**2),data)) - assert result==expected_result +#class ExampleMPIModelTest(unittest.TestCase): +# def test_example(self): +# result = run_model() +# data = [1,2,3,4,5] +# expected_result = list(map(lambda x:2*(x**2),data)) +# assert result==expected_result if __name__ == "__main__": setup_backend() print('True Value was: ' + str([170, 65])) print('Posterior Mean of PMCABC: ' + str(infer_parameters_pmcabc().posterior_mean())) - #print('Posterior Mean of ABCsubsim: ' + str(infer_parameters_abcsubsim().posterior_mean())) (Buggy) + print('Posterior Mean of ABCsubsim: ' + str(infer_parameters_abcsubsim().posterior_mean())) (Buggy) print('Posterior Mean of RSMCABC: ' + str(infer_parameters_rsmcabc().posterior_mean())) print('Posterior Mean of SABC: ' + str(infer_parameters_sabc().posterior_mean())) #print('Posterior Mean of SMCABC: ' + str(infer_parameters_smcabc().posterior_mean())) (Buggy) print('Posterior Mean of APMCABC: ' + str(infer_parameters_apmcabc().posterior_mean())) print('Posterior Mean of RejectionABC: ' + str(infer_parameters_rejectionabc().posterior_mean())) - print('Posterior Mean of PMC: ' + str(infer_parameters_pmc().posterior_mean())) \ No newline at end of file + print('Posterior Mean of PMC: ' + str(infer_parameters_pmc().posterior_mean())) From a73b94f3f7e898eea37159005b6a97d3c413e51a Mon Sep 17 00:00:00 2001 From: Marcel Schoengens Date: Tue, 22 Jan 2019 17:38:20 +0100 Subject: [PATCH 36/41] Prototype of nested parallelization controller --- abcpy/backends/base.py | 9 +++ abcpy/backends/mpi.py | 60 +++++++++++++++++-- abcpy/distances.py | 1 + abcpy/inferences.py | 10 +++- abcpy/jointdistances.py | 2 +- examples/backends/mpi/mpi_model_inferences.py | 50 +++++++++++++--- 6 files changed, 117 insertions(+), 15 deletions(-) diff --git a/abcpy/backends/base.py b/abcpy/backends/base.py index 13e6d760..b74bdcd4 100644 --- a/abcpy/backends/base.py +++ b/abcpy/backends/base.py @@ -226,3 +226,12 @@ def __init__(self, object): def value(self): return self.object + +class NestedParallelizationController(): + @abstractmethod + def nested_execution(self): + raise NotImplementedError + + @abstractmethod + def run_nested(self, func, *args, **kwargs): + raise NotImplementedError diff --git a/abcpy/backends/mpi.py b/abcpy/backends/mpi.py index 8ac6d11c..c280f40e 100644 --- a/abcpy/backends/mpi.py +++ b/abcpy/backends/mpi.py @@ -1,9 +1,9 @@ # noinspection PyInterpreter +import cloudpickle +import numpy as np import pickle import time -import cloudpickle -import numpy as np from mpi4py import MPI from abcpy.backends import BDS, PDS, Backend @@ -12,6 +12,40 @@ import abcpy.backends.mpimanager from mpi4py import MPI + +class NestedParallelizationControllerMPI(NestedParallelizationController): + def __init__(self, mpi_comm): + self.loop_workers = True + self.mpi_comm = mpi_comm + self.nested_func = None + self.func_args = () + self.func_kwargs = {} + + def get_communicator(self): + return mpi_comm + + def nested_execution(self): + while loop_workers: + self.mpi_comm.barrier() + func_kwargs['mpi_comm'] = self.mpi_comm + self.nested_func(*func_args, **func_kwargs) + self.mpi_comm.barrier() + + def run_nested(self, func, *args, **kwargs): + self.nested_func = func + self.func_args = args + self.func_kwargs = kwargs + nested_execution() + self.nested_func = None + self.func_args = () + self.func_kwargs = {} + + def stop_workers(self): + self.loop_workers = False + func = (lambda : None) + run_nested(func) + + class BackendMPIScheduler(Backend): """Defines the behavior of the scheduler process @@ -361,11 +395,17 @@ def run_function(self, function_packed, data_item): func = cloudpickle.loads(function_packed) try: if(self.mpimanager.get_model_size() > 1): - res = func(data_item, self.mpimanager.get_model_communicator()) + npc = NestedParallelizationControllerMPI(self.mpimanager.get_model_communicator()) + if self.mpimanager.get_model_communicator().Get_rank() == 0: + res = func(data_item, npc) + npc.stop_workers() + else: + npc.nested_execution() else: res = func(data_item) except Exception as e: - res = e + msg = "Exception occured while calling the map function {}: ".format(func.__name__) + res = type(e)(msg + str(e)) return res @@ -646,6 +686,18 @@ def scheduler_node_ranks(self): return self.mpimanager.get_scheduler_node_ranks() + @staticmethod + def disable_nested(mpi_comm): + if mpi_comm.Get_rank() != 0: + mpi_comm.Barrier() + + + @staticmethod + def enable_nested(mpi_comm): + if mpi_comm.Get_rank() == 0: + mpi_comm.Barrier() + + class PDSMPI(PDS): """ diff --git a/abcpy/distances.py b/abcpy/distances.py index 5437bda8..eb8ced17 100644 --- a/abcpy/distances.py +++ b/abcpy/distances.py @@ -126,6 +126,7 @@ def distance(self, d1, d2): d1, d2: list A list, containing a list describing the data set """ + if not isinstance(d1, list): raise TypeError('Data is not of allowed types') if not isinstance(d2, list): diff --git a/abcpy/inferences.py b/abcpy/inferences.py index b4f8fad4..f3857099 100644 --- a/abcpy/inferences.py +++ b/abcpy/inferences.py @@ -2818,16 +2818,21 @@ def _accept_parameter(self, rng_and_index, mpi_comm=None): if perturbation_output[0] and self.pdf_of_prior(self.model, perturbation_output[1]) != 0: break y_sim = self.simulate(self.n_samples_per_param, rng=rng, mpi_comm=mpi_comm) + print('2821: ' + y_sim.__str__()) counter+=1 y_sim_old = self.accepted_y_sim_bds.value()[index] ## Calculate acceptance probability: numerator = 0.0 denominator = 0.0 for ind in range(self.n_samples_per_param): - distance_new = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), [[y_sim[0][ind]]]) + print('2828: ' + y_sim.__str__()) + lhs = self.accepted_parameters_manager.observations_bds.value() + rhs = [[y_sim[0][ind]]] + distance_new = self.distance.distance(lhs, rhs) distance_old = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), [[y_sim_old[0][ind]]]) numerator += (distance_new < self.epsilon[-1]) denominator += (distance_old < self.epsilon[-1]) + print('denom') if denominator == 0: ratio_data_epsilon = 1 else: @@ -2837,6 +2842,7 @@ def _accept_parameter(self, rng_and_index, mpi_comm=None): kernel_denominator = self.kernel.pdf(mapping_for_kernels, self.accepted_parameters_manager, index, perturbation_output[1]) ratio_likelihood_prob = kernel_numerator / kernel_denominator acceptance_prob = min(1, ratio_data_epsilon * ratio_prior_prob * ratio_likelihood_prob) + print('binom') if rng.binomial(1, acceptance_prob) == 1: self.set_parameters(perturbation_output[1]) else: @@ -2846,4 +2852,4 @@ def _accept_parameter(self, rng_and_index, mpi_comm=None): self.set_parameters(self.accepted_parameters_manager.accepted_parameters_bds.value()[index]) y_sim = self.accepted_y_sim_bds.value()[index] - return (self.get_parameters(), y_sim, counter) \ No newline at end of file + return (self.get_parameters(), y_sim, counter) diff --git a/abcpy/jointdistances.py b/abcpy/jointdistances.py index 76b148ea..be6a5970 100644 --- a/abcpy/jointdistances.py +++ b/abcpy/jointdistances.py @@ -131,4 +131,4 @@ def dist_max(self): combined_distance_max = 0.0 for ind in range(len(self.distances)): combined_distance_max += self.weights[ind]*self.distances[ind].dist_max() - return combined_distance_max \ No newline at end of file + return combined_distance_max diff --git a/examples/backends/mpi/mpi_model_inferences.py b/examples/backends/mpi/mpi_model_inferences.py index af8c9b88..55815dce 100644 --- a/examples/backends/mpi/mpi_model_inferences.py +++ b/examples/backends/mpi/mpi_model_inferences.py @@ -10,6 +10,7 @@ def setup_backend(): global backend from abcpy.backends import BackendMPI as Backend + from abcpy.backends import NestedParallelizationController backend = Backend(process_per_model=2) # backend = Backend() @@ -63,8 +64,12 @@ def _check_output(self, values): def get_output_dimension(self): return 2 - def forward_simulate(self, input_values, k, rng=np.random.RandomState(), mpi_comm=None): + def forward_simulate(self, input_values, k, rng=np.random.RandomState(), npc=None): + print('before mpi part') + forward_simulate_mpi(input_values, k, rng=np.random.RandomState, mpi_comm=npc.get_communicator()) + print('after mpi part') + def forward_simulate_mpi(self, input_values, k, rng=np.random.RandomState, mpi_comm=None): rank = mpi_comm.Get_rank() # Extract the input parameters mu = input_values[rank] @@ -75,7 +80,7 @@ def forward_simulate(self, input_values, k, rng=np.random.RandomState(), mpi_com # Send everything back to rank 0 # print("Hello from forward_simulate before gather, rank = ", rank) - data = mpi_comm.gather(vector_of_k_samples) + data = mpi_comm.gather(vector_of_k_samples, root=0) # print("Hello from forward_simulate after gather, rank = ", rank) # Format the output to obey API and broadcast it before return @@ -88,9 +93,10 @@ def forward_simulate(self, input_values, k, rng=np.random.RandomState(), mpi_com point = np.array([element0, element1]) result[i] = point result = [np.array([result[i]]).reshape(-1, ) for i in range(k)] + return result + else: + return None - result = mpi_comm.bcast(result) - return result def infer_parameters_pmcabc(): # define observation for true parameters mean=170, 65 @@ -307,8 +313,36 @@ def infer_parameters_pmc(): return journal -#import unittest -#from mpi4py import MPI + +def infer_parameters_smcabc(): + # define observation for true parameters mean=170, 65 + rng = np.random.RandomState() + y_obs = [np.array(rng.multivariate_normal([170, 65], np.eye(2), 1).reshape(2,))] + + # define prior + from abcpy.continuousmodels import Uniform + mu0 = Uniform([[150], [200]], ) + mu1 = Uniform([[25], [100]], ) + + # define the model + height_weight_model = NestedBivariateGaussian([mu0, mu1]) + + # define statistics + from abcpy.statistics import Identity + statistics_calculator = Identity(degree = 2, cross = False) + + # define distance + from abcpy.distances import Euclidean + distance_calculator = Euclidean(statistics_calculator) + + # define sampling scheme + from abcpy.inferences import SMCABC + sampler = SMCABC([height_weight_model], [distance_calculator], backend, seed=1) + steps, n_samples, n_samples_per_param, epsilon = 2, 10, 1, 2000 + journal = sampler.sample([y_obs], steps, n_samples, n_samples_per_param, epsilon, full_output=1) + + return journal + def setUpModule(): setup_backend() @@ -324,10 +358,10 @@ def setUpModule(): setup_backend() print('True Value was: ' + str([170, 65])) print('Posterior Mean of PMCABC: ' + str(infer_parameters_pmcabc().posterior_mean())) - print('Posterior Mean of ABCsubsim: ' + str(infer_parameters_abcsubsim().posterior_mean())) (Buggy) + #print('Posterior Mean of ABCsubsim: ' + str(infer_parameters_abcsubsim().posterior_mean())) (Buggy) print('Posterior Mean of RSMCABC: ' + str(infer_parameters_rsmcabc().posterior_mean())) print('Posterior Mean of SABC: ' + str(infer_parameters_sabc().posterior_mean())) - #print('Posterior Mean of SMCABC: ' + str(infer_parameters_smcabc().posterior_mean())) (Buggy) + print('Posterior Mean of SMCABC: ' + str(infer_parameters_smcabc().posterior_mean())) (Buggy) print('Posterior Mean of APMCABC: ' + str(infer_parameters_apmcabc().posterior_mean())) print('Posterior Mean of RejectionABC: ' + str(infer_parameters_rejectionabc().posterior_mean())) print('Posterior Mean of PMC: ' + str(infer_parameters_pmc().posterior_mean())) From 34694b097cc94d6e546c1baaa5a56f6b5498643f Mon Sep 17 00:00:00 2001 From: Marcel Schoengens Date: Wed, 23 Jan 2019 14:12:22 +0100 Subject: [PATCH 37/41] Make nested parallelization prototype work --- abcpy/backends/mpi.py | 82 +++++++++++++++---- abcpy/graphtools.py | 6 +- abcpy/inferences.py | 58 ++++++------- examples/backends/mpi/mpi_model_inferences.py | 25 +++--- 4 files changed, 110 insertions(+), 61 deletions(-) diff --git a/abcpy/backends/mpi.py b/abcpy/backends/mpi.py index c280f40e..3816becb 100644 --- a/abcpy/backends/mpi.py +++ b/abcpy/backends/mpi.py @@ -3,10 +3,11 @@ import numpy as np import pickle import time +import logging from mpi4py import MPI -from abcpy.backends import BDS, PDS, Backend +from abcpy.backends import BDS, PDS, Backend, NestedParallelizationController import abcpy.backends.mpimanager @@ -15,36 +16,80 @@ class NestedParallelizationControllerMPI(NestedParallelizationController): def __init__(self, mpi_comm): + self.logger = logging.getLogger(__name__) + self.logger.info("#### Initialize NPC ####") self.loop_workers = True self.mpi_comm = mpi_comm - self.nested_func = None + self.nested_func = "NoFunction" self.func_args = () self.func_kwargs = {} + self.result = None + if self.mpi_comm.Get_rank() != 0: + self.nested_execution() + + + def communicator(self): + return self.mpi_comm - def get_communicator(self): - return mpi_comm def nested_execution(self): - while loop_workers: + rank = self.mpi_comm.Get_rank() + self.logger.debug("Starting nested loop on rank {}".format(rank)) + while self.loop_workers: self.mpi_comm.barrier() - func_kwargs['mpi_comm'] = self.mpi_comm - self.nested_func(*func_args, **func_kwargs) + self.loop_workers = self.mpi_comm.bcast(self.loop_workers, root=0) + if self.loop_workers == False: + return + func_p = None + func_args_p = None + func_kwargs_p = None + if self.mpi_comm.Get_rank() == 0: + self.logger.debug("Start pickling func on rank {}".format(rank)) + func_p = cloudpickle.dumps(self.nested_func, pickle.HIGHEST_PROTOCOL) + func_args_p = cloudpickle.dumps(self.func_args, pickle.HIGHEST_PROTOCOL) + func_kwargs_p = cloudpickle.dumps(self.func_kwargs, pickle.HIGHEST_PROTOCOL) + + self.logger.debug("Broadcasting function {} on rank {}".format(self.nested_func, rank)) + func_p = self.mpi_comm.bcast(func_p, root=0) + func_args_p = self.mpi_comm.bcast(func_args_p, root=0) + func_kwargs_p = self.mpi_comm.bcast(func_kwargs_p, root=0) + self.nested_func = cloudpickle.loads(func_p) + self.func_args = cloudpickle.loads(func_args_p) + self.func_kwargs = cloudpickle.loads(func_kwargs_p) + + func = self.nested_func + self.logger.debug("Starting map function {} on rank {}".format(func.__name__, self.mpi_comm.Get_rank())) + self.func_kwargs['mpi_comm'] = self.mpi_comm self.mpi_comm.barrier() + self.result = func(*(self.func_args), **(self.func_kwargs)) + self.logger.debug("Ending map function on rank {}".format(self.mpi_comm.Get_rank())) + self.mpi_comm.barrier() + if self.mpi_comm.Get_rank() == 0: + return + self.loop_workers = True + self.logger.debug("Ending nested loop on rank {}".format(self.mpi_comm.Get_rank())) def run_nested(self, func, *args, **kwargs): + self.logger.debug("Executing nested function {}.".format(func.__name__)) self.nested_func = func self.func_args = args self.func_kwargs = kwargs - nested_execution() + self.nested_execution() + self.logger.debug("Return from nested execution of master rank") self.nested_func = None self.func_args = () self.func_kwargs = {} + self.logger.info(self.result) + return self.result - def stop_workers(self): + def __del__(self): + rank = self.mpi_comm.Get_rank() + self.logger.debug("Stopping npc on rank {}".format(rank)) self.loop_workers = False - func = (lambda : None) - run_nested(func) - + if rank == 0: + self.mpi_comm.barrier() + self.loop_workers = self.mpi_comm.bcast(self.loop_workers, root=0) + self.logger.debug(">>>>>>>> NPC stopped on rank {}".format(rank)) class BackendMPIScheduler(Backend): """Defines the behavior of the scheduler process @@ -385,6 +430,7 @@ class BackendMPIWorker(Backend): def __init__(self): """ No parameter, just call worker_run """ + self.logger = logging.getLogger(__name__) self.__worker_run() def run_function(self, function_packed, data_item): @@ -393,14 +439,14 @@ def run_function(self, function_packed, data_item): Passes the model communicator if ther is more than one process per model """ func = cloudpickle.loads(function_packed) + res = None try: if(self.mpimanager.get_model_size() > 1): npc = NestedParallelizationControllerMPI(self.mpimanager.get_model_communicator()) if self.mpimanager.get_model_communicator().Get_rank() == 0: - res = func(data_item, npc) - npc.stop_workers() - else: - npc.nested_execution() + self.logger.debug("Executing map function on master rank 0.") + res = func(data_item, npc=npc) + del(npc) else: res = func(data_item) except Exception as e: @@ -458,7 +504,7 @@ class BackendMPILeader(BackendMPIWorker): def __init__(self): """ No parameter, just call leader_run """ - + self.logger = logging.getLogger(__name__) self.__leader_run() @@ -640,6 +686,7 @@ def __init__(self): #print("In BackendMPITeam, rank : ", self.rank, ", model_rank_global : ", globals()['model_rank_global']) + self.logger = logging.getLogger(__name__) super().__init__() @@ -665,6 +712,7 @@ def __init__(self, scheduler_node_ranks=[0], process_per_model=1): number of MPI processes to allocate to each model """ # get mpimanager instance from the mpimanager module (which has to be setup before calling the constructor) + self.logger = logging.getLogger(__name__) self.mpimanager = abcpy.backends.mpimanager.get_mpi_manager() if self.mpimanager.get_world_size() < 2: diff --git a/abcpy/graphtools.py b/abcpy/graphtools.py index ab35900d..c2ec5049 100644 --- a/abcpy/graphtools.py +++ b/abcpy/graphtools.py @@ -388,7 +388,7 @@ def get_correct_ordering(self, parameters_and_models, models=None, is_root = Tru return ordered_parameters - def simulate(self, n_samples_per_param, rng=np.random.RandomState(), mpi_comm=None): + def simulate(self, n_samples_per_param, rng=np.random.RandomState(), npc=None): """Simulates data of each model using the currently sampled or perturbed parameters. Parameters @@ -405,10 +405,8 @@ def simulate(self, n_samples_per_param, rng=np.random.RandomState(), mpi_comm=No for model in self.model: parameters_compatible = model._check_input(model.get_input_values()) if parameters_compatible: - simulation_result = model.forward_simulate(model.get_input_values(), n_samples_per_param, rng=rng, mpi_comm=mpi_comm) + simulation_result = npc.run_nested(model.forward_simulate, model.get_input_values(), n_samples_per_param, rng=rng) result.append(simulation_result) - if mpi_comm != None and mpi_comm.Get_rank() != 0: - return None else: return None return result diff --git a/abcpy/inferences.py b/abcpy/inferences.py index f3857099..eee961ab 100644 --- a/abcpy/inferences.py +++ b/abcpy/inferences.py @@ -248,7 +248,7 @@ def sample(self, observations, n_samples, n_samples_per_param, epsilon, full_out return journal - def _sample_parameter(self, rng, mpi_comm=None): + def _sample_parameter(self, rng, npc=None): """ Samples a single model parameter and simulates from it until distance between simulated outcome and the observation is @@ -276,7 +276,7 @@ def _sample_parameter(self, rng, mpi_comm=None): # Accept new parameter value if the distance is less than epsilon self.sample_from_prior(rng=rng) theta = np.array(self.get_parameters(self.model)).reshape(-1,) - y_sim = self.simulate(self.n_samples_per_param, rng=rng, mpi_comm=mpi_comm) + y_sim = self.simulate(self.n_samples_per_param, rng=rng, npc=npc) counter+=1 if(y_sim is not None): distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) @@ -516,7 +516,7 @@ def sample(self, observations, steps, epsilon_init, n_samples = 10000, n_samples return journal - def _resample_parameter(self, rng, mpi_comm=None): + def _resample_parameter(self, rng, npc=None): """ Samples a single model parameter and simulate from it until distance between simulated outcome and the observation is @@ -532,6 +532,8 @@ def _resample_parameter(self, rng, mpi_comm=None): np.array accepted parameter """ + + print(npc.communicator()) rng.seed(rng.randint(np.iinfo(np.uint32).max, dtype=np.uint32)) distance = self.distance.dist_max() @@ -547,7 +549,7 @@ def _resample_parameter(self, rng, mpi_comm=None): if self.accepted_parameters_manager.accepted_parameters_bds == None: self.sample_from_prior(rng=rng) theta = self.get_parameters() - y_sim = self.simulate(self.n_samples_per_param, rng=rng, mpi_comm=mpi_comm) + y_sim = self.simulate(self.n_samples_per_param, rng=rng, npc=npc) counter+=1 else: @@ -559,7 +561,7 @@ def _resample_parameter(self, rng, mpi_comm=None): if(perturbation_output[0] and self.pdf_of_prior(self.model, perturbation_output[1])!=0): theta = perturbation_output[1] break - y_sim = self.simulate(self.n_samples_per_param, rng=rng, mpi_comm=mpi_comm) + y_sim = self.simulate(self.n_samples_per_param, rng=rng, npc=npc) counter+=1 distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) @@ -575,7 +577,7 @@ def _resample_parameter(self, rng, mpi_comm=None): return None - def _calculate_weight(self, theta, mpi_comm=None): + def _calculate_weight(self, theta, npc=None): """ Calculates the weight for the given parameter using accepted_parameters, accepted_cov_mat @@ -903,7 +905,7 @@ def flat_map(self, data, n_repeat, map_function): return merged_result_data # define helper functions for map step - def _simulate_data(self, data, mpi_comm=None): + def _simulate_data(self, data, npc=None): """ Simulate n_sample_per_param many datasets for new parameter Parameters @@ -920,10 +922,10 @@ def _simulate_data(self, data, mpi_comm=None): # print("DEBUG: Simulate model for parameter " + str(theta)) theta, rng = data[0], data[1] self.set_parameters(theta) - y_sim = self.simulate(1, rng, mpi_comm=mpi_comm) + y_sim = self.simulate(1, rng, npc=npc) return (theta, y_sim) - def _approx_calc(self, sim_data_parameter, mpi_comm=None): + def _approx_calc(self, sim_data_parameter, npc=None): """ Compute likelihood for new parameters using approximate likelihood function Parameters @@ -951,7 +953,7 @@ def _approx_calc(self, sim_data_parameter, mpi_comm=None): return (total_pdf_at_theta, 1) - def _calculate_weight(self, theta, mpi_comm=None): + def _calculate_weight(self, theta, npc=None): """ Calculates the weight for the given parameter using accepted_parameters, accepted_cov_mat @@ -1399,7 +1401,7 @@ def destroy(bc): self.all_distances_bds = self.backend.broadcast(all_distances) # define helper functions for map step - def _accept_parameter(self, data, mpi_comm=None): + def _accept_parameter(self, data, npc=None): """ Samples a single model parameter and simulate from it until accepted with probabilty exp[-rho(x,y)/epsilon]. @@ -1432,7 +1434,7 @@ def _accept_parameter(self, data, mpi_comm=None): self.sample_from_prior(rng=rng) new_theta = np.array(self.get_parameters()).reshape(-1,) all_parameters.append(new_theta) - y_sim = self.simulate(self.n_samples_per_param, rng=rng, mpi_comm=mpi_comm) + y_sim = self.simulate(self.n_samples_per_param, rng=rng, npc=npc) counter+=1 distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) all_distances.append(distance) @@ -1450,7 +1452,7 @@ def _accept_parameter(self, data, mpi_comm=None): new_theta = np.array(perturbation_output[1]).reshape(-1,) break - y_sim = self.simulate(self.n_samples_per_param, rng=rng, mpi_comm=mpi_comm) + y_sim = self.simulate(self.n_samples_per_param, rng=rng, npc=npc) counter+=1 distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) smooth_distance = self._smoother_distance([distance], self.all_distances_bds.value()) @@ -1713,7 +1715,7 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1 return journal # define helper functions for map step - def _accept_parameter(self, rng_and_index, mpi_comm=None): + def _accept_parameter(self, rng_and_index, npc=None): """ Samples a single model parameter and simulate from it until distance between simulated outcome and the observation is @@ -1745,7 +1747,7 @@ def _accept_parameter(self, rng_and_index, mpi_comm=None): if self.accepted_parameters_manager.accepted_parameters_bds == None: self.sample_from_prior(rng=rng) - y_sim = self.simulate(self.n_samples_per_param, rng=rng, mpi_comm=mpi_comm) + y_sim = self.simulate(self.n_samples_per_param, rng=rng, npc=npc) counter+=1 distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) result_theta.append(self.get_parameters()) @@ -1753,7 +1755,7 @@ def _accept_parameter(self, rng_and_index, mpi_comm=None): else: theta = np.array(self.accepted_parameters_manager.accepted_parameters_bds.value()[index]).reshape(-1,) self.set_parameters(theta) - y_sim = self.simulate(self.n_samples_per_param, rng=rng, mpi_comm=mpi_comm) + y_sim = self.simulate(self.n_samples_per_param, rng=rng, npc=npc) counter+=1 distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) result_theta.append(theta) @@ -1763,7 +1765,7 @@ def _accept_parameter(self, rng_and_index, mpi_comm=None): perturbation_output = self.perturb(index, rng=rng) if perturbation_output[0] and self.pdf_of_prior(self.model, perturbation_output[1])!= 0: break - y_sim = self.simulate(self.n_samples_per_param, rng=rng,mpi_comm=mpi_comm) + y_sim = self.simulate(self.n_samples_per_param, rng=rng,npc=npc) counter+=1 new_distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) @@ -1787,7 +1789,7 @@ def _accept_parameter(self, rng_and_index, mpi_comm=None): return (result_theta, result_distance, counter) - def _update_cov_mat(self, rng_t, mpi_comm=None): + def _update_cov_mat(self, rng_t, npc=None): """ Updates the covariance matrix. @@ -1824,7 +1826,7 @@ def _update_cov_mat(self, rng_t, mpi_comm=None): perturbation_output = self.perturb(0, rng=rng) if perturbation_output[0] and self.pdf_of_prior(self.model, perturbation_output[1]) != 0: break - y_sim = self.simulate(self.n_samples_per_param, rng=rng, mpi_comm=mpi_comm) + y_sim = self.simulate(self.n_samples_per_param, rng=rng, npc=npc) counter+=1 new_distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) @@ -2097,7 +2099,7 @@ def destroy(bc): self.accepted_dist_bds = self.backend.broadcast(accepted_dist) # define helper functions for map step - def _accept_parameter(self, rng, mpi_comm=None): + def _accept_parameter(self, rng, npc=None): """ Samples a single model parameter and simulate from it until distance between simulated outcome and the observation is @@ -2124,7 +2126,7 @@ def _accept_parameter(self, rng, mpi_comm=None): if self.accepted_parameters_manager.accepted_parameters_bds == None: while distance > self.epsilon[-1]: self.sample_from_prior(rng=rng) - y_sim = self.simulate(self.n_samples_per_param, rng=rng, mpi_comm=mpi_comm) + y_sim = self.simulate(self.n_samples_per_param, rng=rng, npc=npc) counter+=1 distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) @@ -2138,7 +2140,7 @@ def _accept_parameter(self, rng, mpi_comm=None): perturbation_output = self.perturb(index[0], rng=rng) if perturbation_output[0] and self.pdf_of_prior(self.model, perturbation_output[1]) != 0: break - y_sim = self.simulate(self.n_samples_per_param, rng=rng, mpi_comm=mpi_comm) + y_sim = self.simulate(self.n_samples_per_param, rng=rng, npc=npc) counter+=1 distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) ratio_prior_prob = self.pdf_of_prior(self.model, perturbation_output[1]) / self.pdf_of_prior(self.model, theta) @@ -2393,7 +2395,7 @@ def destroy(bc): self.accepted_dist_bds = self.backend.broadcast(accepted_dist) # define helper functions for map step - def _accept_parameter(self, rng, mpi_comm=None): + def _accept_parameter(self, rng, npc=None): """ Samples a single model parameter and simulate from it until distance between simulated outcome and the observation is @@ -2419,7 +2421,7 @@ def _accept_parameter(self, rng, mpi_comm=None): if self.accepted_parameters_manager.accepted_parameters_bds == None: self.sample_from_prior(rng=rng) - y_sim = self.simulate(self.n_samples_per_param, rng=rng, mpi_comm=mpi_comm) + y_sim = self.simulate(self.n_samples_per_param, rng=rng, npc=npc) counter+=1 distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) @@ -2434,7 +2436,7 @@ def _accept_parameter(self, rng, mpi_comm=None): if perturbation_output[0] and self.pdf_of_prior(self.model, perturbation_output[1]) != 0: break - y_sim = self.simulate(self.n_samples_per_param, rng=rng, mpi_comm=mpi_comm) + y_sim = self.simulate(self.n_samples_per_param, rng=rng, npc=npc) counter+=1 distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) @@ -2777,7 +2779,7 @@ def destroy(bc): # define helper functions for map step - def _accept_parameter(self, rng_and_index, mpi_comm=None): + def _accept_parameter(self, rng_and_index, npc=None): """ Samples a single model parameter and simulate from it until distance between simulated outcome and the observation is @@ -2807,7 +2809,7 @@ def _accept_parameter(self, rng_and_index, mpi_comm=None): # print("on seed " + str(seed) + " distance: " + str(distance) + " epsilon: " + str(self.epsilon)) if self.accepted_parameters_manager.accepted_parameters_bds is None: self.sample_from_prior(rng=rng) - y_sim = self.simulate(self.n_samples_per_param, rng=rng, mpi_comm=mpi_comm) + y_sim = self.simulate(self.n_samples_per_param, rng=rng, npc=npc) counter+=1 else: print('Hello2') @@ -2817,7 +2819,7 @@ def _accept_parameter(self, rng_and_index, mpi_comm=None): perturbation_output = self.perturb(index, rng=rng) if perturbation_output[0] and self.pdf_of_prior(self.model, perturbation_output[1]) != 0: break - y_sim = self.simulate(self.n_samples_per_param, rng=rng, mpi_comm=mpi_comm) + y_sim = self.simulate(self.n_samples_per_param, rng=rng, npc=npc) print('2821: ' + y_sim.__str__()) counter+=1 y_sim_old = self.accepted_y_sim_bds.value()[index] diff --git a/examples/backends/mpi/mpi_model_inferences.py b/examples/backends/mpi/mpi_model_inferences.py index 55815dce..14413491 100644 --- a/examples/backends/mpi/mpi_model_inferences.py +++ b/examples/backends/mpi/mpi_model_inferences.py @@ -64,12 +64,9 @@ def _check_output(self, values): def get_output_dimension(self): return 2 - def forward_simulate(self, input_values, k, rng=np.random.RandomState(), npc=None): - print('before mpi part') - forward_simulate_mpi(input_values, k, rng=np.random.RandomState, mpi_comm=npc.get_communicator()) - print('after mpi part') - def forward_simulate_mpi(self, input_values, k, rng=np.random.RandomState, mpi_comm=None): + def forward_simulate(self, input_values, k, rng=np.random.RandomState, mpi_comm=None): + print("Start Forward Simulate on rank {}".format(mpi_comm.Get_rank())) rank = mpi_comm.Get_rank() # Extract the input parameters mu = input_values[rank] @@ -93,10 +90,14 @@ def forward_simulate_mpi(self, input_values, k, rng=np.random.RandomState, mpi_c point = np.array([element0, element1]) result[i] = point result = [np.array([result[i]]).reshape(-1, ) for i in range(k)] + print("End forward sim on master") return result else: + print("End forward sim on workers") return None + + def infer_parameters_pmcabc(): # define observation for true parameters mean=170, 65 @@ -358,10 +359,10 @@ def setUpModule(): setup_backend() print('True Value was: ' + str([170, 65])) print('Posterior Mean of PMCABC: ' + str(infer_parameters_pmcabc().posterior_mean())) - #print('Posterior Mean of ABCsubsim: ' + str(infer_parameters_abcsubsim().posterior_mean())) (Buggy) - print('Posterior Mean of RSMCABC: ' + str(infer_parameters_rsmcabc().posterior_mean())) - print('Posterior Mean of SABC: ' + str(infer_parameters_sabc().posterior_mean())) - print('Posterior Mean of SMCABC: ' + str(infer_parameters_smcabc().posterior_mean())) (Buggy) - print('Posterior Mean of APMCABC: ' + str(infer_parameters_apmcabc().posterior_mean())) - print('Posterior Mean of RejectionABC: ' + str(infer_parameters_rejectionabc().posterior_mean())) - print('Posterior Mean of PMC: ' + str(infer_parameters_pmc().posterior_mean())) + # print('Posterior Mean of ABCsubsim: ' + str(infer_parameters_abcsubsim().posterior_mean())) (Buggy) + # print('Posterior Mean of RSMCABC: ' + str(infer_parameters_rsmcabc().posterior_mean())) + # print('Posterior Mean of SABC: ' + str(infer_parameters_sabc().posterior_mean())) + # print('Posterior Mean of SMCABC: ' + str(infer_parameters_smcabc().posterior_mean())) (Buggy) + # print('Posterior Mean of APMCABC: ' + str(infer_parameters_apmcabc().posterior_mean())) + # print('Posterior Mean of RejectionABC: ' + str(infer_parameters_rejectionabc().posterior_mean())) + # print('Posterior Mean of PMC: ' + str(infer_parameters_pmc().posterior_mean())) From 40a6b53785692b6ce7f7f6dee35f33fac7f1c9d6 Mon Sep 17 00:00:00 2001 From: statrita2004 Date: Thu, 24 Jan 2019 21:43:30 +0000 Subject: [PATCH 38/41] All inference schemes running with nested parallelization, tests failing --- abcpy/distances.py | 27 ++++++++++++-- abcpy/graphtools.py | 5 ++- abcpy/inferences.py | 33 +++++++---------- examples/backends/mpi/mpi_model_inferences.py | 37 +++++++++---------- 4 files changed, 58 insertions(+), 44 deletions(-) diff --git a/abcpy/distances.py b/abcpy/distances.py index eb8ced17..bf09129d 100644 --- a/abcpy/distances.py +++ b/abcpy/distances.py @@ -116,7 +116,7 @@ def __init__(self, statistics): # summary statistics of them and not recalculate it each time self.s1 = None self.data_set = None - + self.dataSame = False def distance(self, d1, d2): """Calculates the distance between two datasets. @@ -132,10 +132,17 @@ def distance(self, d1, d2): if not isinstance(d2, list): raise TypeError('Data is not of allowed types') + if self.data_set is not None: + if len(d1[0]) == 1: + self.data_set == d1 + else: + self.dataSame = all([(self.data_set[i] == d1[i]).all() for i in range(len(d1))]) + # Extract summary statistics from the dataset - if(self.s1 is None or self.data_set!=d1): + if(self.s1 is None or self.dataSame is False): self.s1 = self.statistics_calc.statistics(d1) self.data_set = d1 + s2 = self.statistics_calc.statistics(d2) # compute distance between the statistics @@ -192,8 +199,14 @@ def distance(self, d1, d2): if not isinstance(d2, list): raise TypeError('Data is not of allowed types') + if self.data_set is not None: + if len(d1[0]) == 1: + self.data_set == d1 + else: + self.dataSame = all([(self.data_set[i] == d1[i]).all() for i in range(len(d1))]) + # Extract summary statistics from the dataset - if(self.s1 is None or self.data_set!=d1): + if(self.s1 is None or self.dataSame is False): self.s1 = self.statistics_calc.statistics(d1) self.data_set = d1 s2 = self.statistics_calc.statistics(d2) @@ -247,8 +260,14 @@ def distance(self, d1, d2): if not isinstance(d2, list): raise TypeError('Data is not of allowed types') + if self.data_set is not None: + if len(d1[0]) == 1: + self.data_set == d1 + else: + self.dataSame = all([(np.array(self.data_set[i]) == np.array(d1[i])).all() for i in range(len(d1))]) + # Extract summary statistics from the dataset - if(self.s1 is None or self.data_set!=d1): + if(self.s1 is None or self.dataSame is False): self.s1 = self.statistics_calc.statistics(d1) self.data_set = d1 s2 = self.statistics_calc.statistics(d2) diff --git a/abcpy/graphtools.py b/abcpy/graphtools.py index c2ec5049..f5649910 100644 --- a/abcpy/graphtools.py +++ b/abcpy/graphtools.py @@ -405,7 +405,10 @@ def simulate(self, n_samples_per_param, rng=np.random.RandomState(), npc=None): for model in self.model: parameters_compatible = model._check_input(model.get_input_values()) if parameters_compatible: - simulation_result = npc.run_nested(model.forward_simulate, model.get_input_values(), n_samples_per_param, rng=rng) + if npc is not None: + simulation_result = npc.run_nested(model.forward_simulate, model.get_input_values(), n_samples_per_param, rng=rng) + else: + simulation_result = model.forward_simulate(model.get_input_values(),n_samples_per_param, rng=rng) result.append(simulation_result) else: return None diff --git a/abcpy/inferences.py b/abcpy/inferences.py index eee961ab..7714f82d 100644 --- a/abcpy/inferences.py +++ b/abcpy/inferences.py @@ -155,7 +155,7 @@ class RejectionABC(InferenceMethod): Distance object defining the distance measure to compare simulated and observed data sets. backend: abcpy.backends.Backend Backend object defining the backend to be used. - seed: integer, optional + seed: integer, optionaldistance Optional initial seed for the random number generator. The default value is generated randomly. """ @@ -533,7 +533,7 @@ def _resample_parameter(self, rng, npc=None): accepted parameter """ - print(npc.communicator()) + #print(npc.communicator()) rng.seed(rng.randint(np.iinfo(np.uint32).max, dtype=np.uint32)) distance = self.distance.dist_max() @@ -1584,7 +1584,6 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1 for aStep in range(0, steps): self.logger.info("ABCsubsim step {}".format(aStep)) - if aStep==0 and journal_file is not None: accepted_parameters = journal.parameters[-1] accepted_weights = journal.weights[-1] @@ -1626,9 +1625,10 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1 distances = distances[SortIndex] accepted_parameters = accepted_parameters[SortIndex, :] + # 3: Calculate and broadcast annealling parameters self.logger.debug("Calculate and broadcast annealling parameters.") - temp_chain_length = chain_length + temp_chain_length = self.chain_length if aStep > 0: anneal_parameter_old = anneal_parameter anneal_parameter = 0.5 * ( @@ -1786,7 +1786,6 @@ def _accept_parameter(self, rng_and_index, npc=None): else: result_theta.append(theta) result_distance.append(distance) - return (result_theta, result_distance, counter) def _update_cov_mat(self, rng_t, npc=None): @@ -2805,14 +2804,12 @@ def _accept_parameter(self, rng_and_index, npc=None): self.accepted_parameters_manager.model) counter=0 - print('Hello 1') # print("on seed " + str(seed) + " distance: " + str(distance) + " epsilon: " + str(self.epsilon)) if self.accepted_parameters_manager.accepted_parameters_bds is None: self.sample_from_prior(rng=rng) y_sim = self.simulate(self.n_samples_per_param, rng=rng, npc=npc) counter+=1 else: - print('Hello2') if self.accepted_parameters_manager.accepted_weights_bds.value()[index] > 0: theta = np.array(self.accepted_parameters_manager.accepted_parameters_bds.value()[index]).reshape(-1,) while True: @@ -2820,31 +2817,27 @@ def _accept_parameter(self, rng_and_index, npc=None): if perturbation_output[0] and self.pdf_of_prior(self.model, perturbation_output[1]) != 0: break y_sim = self.simulate(self.n_samples_per_param, rng=rng, npc=npc) - print('2821: ' + y_sim.__str__()) counter+=1 y_sim_old = self.accepted_y_sim_bds.value()[index] ## Calculate acceptance probability: numerator = 0.0 denominator = 0.0 for ind in range(self.n_samples_per_param): - print('2828: ' + y_sim.__str__()) - lhs = self.accepted_parameters_manager.observations_bds.value() - rhs = [[y_sim[0][ind]]] - distance_new = self.distance.distance(lhs, rhs) - distance_old = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), [[y_sim_old[0][ind]]]) - numerator += (distance_new < self.epsilon[-1]) - denominator += (distance_old < self.epsilon[-1]) - print('denom') + numerator += (self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), + [[y_sim[0][ind]]]) < self.epsilon[-1]) + denominator += (self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), + [[y_sim_old[0][ind]]]) < self.epsilon[-1]) if denominator == 0: ratio_data_epsilon = 1 else: ratio_data_epsilon = numerator / denominator - ratio_prior_prob = self.pdf_of_prior(self.model, perturbation_output[1]) / self.pdf_of_prior(self.model, theta) + ratio_prior_prob = self.pdf_of_prior(self.model, perturbation_output[1]) / self.pdf_of_prior(self.model, + theta) kernel_numerator = self.kernel.pdf(mapping_for_kernels, self.accepted_parameters_manager, index, theta) - kernel_denominator = self.kernel.pdf(mapping_for_kernels, self.accepted_parameters_manager, index, perturbation_output[1]) + kernel_denominator = self.kernel.pdf(mapping_for_kernels, self.accepted_parameters_manager, index, + perturbation_output[1]) ratio_likelihood_prob = kernel_numerator / kernel_denominator acceptance_prob = min(1, ratio_data_epsilon * ratio_prior_prob * ratio_likelihood_prob) - print('binom') if rng.binomial(1, acceptance_prob) == 1: self.set_parameters(perturbation_output[1]) else: @@ -2854,4 +2847,4 @@ def _accept_parameter(self, rng_and_index, npc=None): self.set_parameters(self.accepted_parameters_manager.accepted_parameters_bds.value()[index]) y_sim = self.accepted_y_sim_bds.value()[index] - return (self.get_parameters(), y_sim, counter) + return (self.get_parameters(), y_sim, counter) \ No newline at end of file diff --git a/examples/backends/mpi/mpi_model_inferences.py b/examples/backends/mpi/mpi_model_inferences.py index 14413491..2e9c8e43 100644 --- a/examples/backends/mpi/mpi_model_inferences.py +++ b/examples/backends/mpi/mpi_model_inferences.py @@ -283,7 +283,7 @@ def infer_parameters_rejectionabc(): return journal -def infer_parameters_pmc(): +def infer_parameters_smcabc(): # define observation for true parameters mean=170, 65 rng = np.random.RandomState() y_obs = [np.array(rng.multivariate_normal([170, 65], np.eye(2), 1).reshape(2,))] @@ -300,22 +300,19 @@ def infer_parameters_pmc(): from abcpy.statistics import Identity statistics_calculator = Identity(degree = 2, cross = False) - from abcpy.approx_lhd import SynLiklihood - approx_lhd = SynLiklihood(statistics_calculator) + # define distance + from abcpy.distances import Euclidean + distance_calculator = Euclidean(statistics_calculator) # define sampling scheme - from abcpy.inferences import PMC - sampler = PMC([height_weight_model], [approx_lhd], backend, seed=1) - - # sample from scheme - T, n_sample, n_samples_per_param = 2, 10, 10 - - journal = sampler.sample([y_obs], T, n_sample, n_samples_per_param) + from abcpy.inferences import SMCABC + sampler = SMCABC([height_weight_model], [distance_calculator], backend, seed=1) + steps, n_samples, n_samples_per_param, epsilon = 2, 10, 1, 2000 + journal = sampler.sample([y_obs], steps, n_samples, n_samples_per_param, epsilon, full_output=1) return journal - -def infer_parameters_smcabc(): +def infer_parameters_pmc(): # define observation for true parameters mean=170, 65 rng = np.random.RandomState() y_obs = [np.array(rng.multivariate_normal([170, 65], np.eye(2), 1).reshape(2,))] @@ -332,15 +329,17 @@ def infer_parameters_smcabc(): from abcpy.statistics import Identity statistics_calculator = Identity(degree = 2, cross = False) - # define distance - from abcpy.distances import Euclidean - distance_calculator = Euclidean(statistics_calculator) + from abcpy.approx_lhd import SynLiklihood + approx_lhd = SynLiklihood(statistics_calculator) # define sampling scheme - from abcpy.inferences import SMCABC - sampler = SMCABC([height_weight_model], [distance_calculator], backend, seed=1) - steps, n_samples, n_samples_per_param, epsilon = 2, 10, 1, 2000 - journal = sampler.sample([y_obs], steps, n_samples, n_samples_per_param, epsilon, full_output=1) + from abcpy.inferences import PMC + sampler = PMC([height_weight_model], [approx_lhd], backend, seed=1) + + # sample from scheme + T, n_sample, n_samples_per_param = 2, 10, 10 + + journal = sampler.sample([y_obs], T, n_sample, n_samples_per_param) return journal From e2eaeb32ea0dc8595e7aadbb1dc468dd53dec1f1 Mon Sep 17 00:00:00 2001 From: statrita2004 Date: Thu, 24 Jan 2019 23:24:42 +0000 Subject: [PATCH 39/41] Algorithmic tests are passing, backend MPI tests are still failing --- abcpy/distances.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/abcpy/distances.py b/abcpy/distances.py index bf09129d..ea7623eb 100644 --- a/abcpy/distances.py +++ b/abcpy/distances.py @@ -133,10 +133,10 @@ def distance(self, d1, d2): raise TypeError('Data is not of allowed types') if self.data_set is not None: - if len(d1[0]) == 1: + if len(np.array(d1[0]).reshape(-1,)) == 1: self.data_set == d1 else: - self.dataSame = all([(self.data_set[i] == d1[i]).all() for i in range(len(d1))]) + self.dataSame = all([(np.array(self.data_set[i]) == np.array(d1[i])).all() for i in range(len(d1))]) # Extract summary statistics from the dataset if(self.s1 is None or self.dataSame is False): @@ -200,10 +200,10 @@ def distance(self, d1, d2): raise TypeError('Data is not of allowed types') if self.data_set is not None: - if len(d1[0]) == 1: + if len(np.array(d1[0]).reshape(-1,)) == 1: self.data_set == d1 else: - self.dataSame = all([(self.data_set[i] == d1[i]).all() for i in range(len(d1))]) + self.dataSame = all([(np.array(self.data_set[i]) == np.array(d1[i])).all() for i in range(len(d1))]) # Extract summary statistics from the dataset if(self.s1 is None or self.dataSame is False): @@ -261,7 +261,7 @@ def distance(self, d1, d2): raise TypeError('Data is not of allowed types') if self.data_set is not None: - if len(d1[0]) == 1: + if len(np.array(d1[0]).reshape(-1,)) == 1: self.data_set == d1 else: self.dataSame = all([(np.array(self.data_set[i]) == np.array(d1[i])).all() for i in range(len(d1))]) From ef9aecad3f47e4c17d435568a185308c0f04f190 Mon Sep 17 00:00:00 2001 From: statrita2004 Date: Fri, 25 Jan 2019 14:08:17 +0000 Subject: [PATCH 40/41] Finalizing the integration of nested-MPI Backend to ABCpy --- abcpy/distances.py | 5 + abcpy/graphtools.py | 2 +- abcpy/jointapprox_lhd.py | 4 - abcpy/jointdistances.py | 3 - doc/source/parallelization.rst | 123 ++++++------- examples/backends/mpi/mpi_model_inferences.py | 64 ++----- examples/backends/mpi/mpi_model_simple.py | 161 ------------------ .../mpi/mpi_pmc_hierarchical_models.py | 93 ---------- examples/backends/mpi/pmcabc_gaussian.py | 19 ++- tests/backend_tests_mpi_model_mpi.py | 50 +++--- 10 files changed, 125 insertions(+), 399 deletions(-) delete mode 100644 examples/backends/mpi/mpi_model_simple.py delete mode 100644 examples/backends/mpi/mpi_pmc_hierarchical_models.py diff --git a/abcpy/distances.py b/abcpy/distances.py index ea7623eb..caeb3e68 100644 --- a/abcpy/distances.py +++ b/abcpy/distances.py @@ -132,6 +132,7 @@ def distance(self, d1, d2): if not isinstance(d2, list): raise TypeError('Data is not of allowed types') + # Check whether d1 is same as self.data_set if self.data_set is not None: if len(np.array(d1[0]).reshape(-1,)) == 1: self.data_set == d1 @@ -185,6 +186,7 @@ def __init__(self, statistics): # Since the observations do always stay the same, we can save the summary statistics of them and not recalculate it each time self.s1 = None self.data_set = None + self.dataSame = False def distance(self, d1, d2): """Calculates the distance between two datasets. @@ -199,6 +201,7 @@ def distance(self, d1, d2): if not isinstance(d2, list): raise TypeError('Data is not of allowed types') + # Check whether d1 is same as self.data_set if self.data_set is not None: if len(np.array(d1[0]).reshape(-1,)) == 1: self.data_set == d1 @@ -244,6 +247,7 @@ def __init__(self, statistics): # Since the observations do always stay the same, we can save the summary statistics of them and not recalculate it each time self.s1 = None self.data_set = None + self.dataSame = False def distance(self, d1, d2): """Calculates the distance between two datasets. @@ -260,6 +264,7 @@ def distance(self, d1, d2): if not isinstance(d2, list): raise TypeError('Data is not of allowed types') + # Check whether d1 is same as self.data_set if self.data_set is not None: if len(np.array(d1[0]).reshape(-1,)) == 1: self.data_set == d1 diff --git a/abcpy/graphtools.py b/abcpy/graphtools.py index f5649910..edfb4de5 100644 --- a/abcpy/graphtools.py +++ b/abcpy/graphtools.py @@ -405,7 +405,7 @@ def simulate(self, n_samples_per_param, rng=np.random.RandomState(), npc=None): for model in self.model: parameters_compatible = model._check_input(model.get_input_values()) if parameters_compatible: - if npc is not None: + if npc is not None and npc.communicator().Get_size() > 1: simulation_result = npc.run_nested(model.forward_simulate, model.get_input_values(), n_samples_per_param, rng=rng) else: simulation_result = model.forward_simulate(model.get_input_values(),n_samples_per_param, rng=rng) diff --git a/abcpy/jointapprox_lhd.py b/abcpy/jointapprox_lhd.py index 7bf3b319..45028dbf 100644 --- a/abcpy/jointapprox_lhd.py +++ b/abcpy/jointapprox_lhd.py @@ -1,9 +1,5 @@ from abc import ABCMeta, abstractmethod -import numpy as np -from glmnet import LogitNet -from sklearn import linear_model - class JointApprox_likelihood(metaclass = ABCMeta): """This abstract base class defines how the combination of distances computed on the observed and diff --git a/abcpy/jointdistances.py b/abcpy/jointdistances.py index be6a5970..e4ed21ce 100644 --- a/abcpy/jointdistances.py +++ b/abcpy/jointdistances.py @@ -1,9 +1,6 @@ from abc import ABCMeta, abstractmethod import numpy as np -from glmnet import LogitNet -from sklearn import linear_model - class JointDistance(metaclass = ABCMeta): """This abstract base class defines how the combination of distances computed on the observed and diff --git a/doc/source/parallelization.rst b/doc/source/parallelization.rst index ffc973e5..042ed819 100644 --- a/doc/source/parallelization.rst +++ b/doc/source/parallelization.rst @@ -10,64 +10,26 @@ Running ABC algorithms is often computationally expensive, thus ABCpy is built with parallelization in mind. In order to run your inference schemes in parallel on multiple nodes (computers) you can choose from the following backends. - -Using the Spark Backend -~~~~~~~~~~~~~~~~~~~~~~~ - -To run ABCpy in parallel using Apache Spark, one only needs to use the provided -Spark backend. Considering the example from above, the statements for the -backend have to be changed to - -.. literalinclude:: ../../examples/backends/apache_spark/pmcabc_gaussian.py - :language: python - :lines: 6-9 - :dedent: 4 - -In words, a Spark context has to be created and passed to the Spark -backend. Additionally, the level of parallelism can be provided, which defines in -a sense in how many blocks the work should be split up. It corresponds to the -parallelism of an RDD in Apache Spark terminology. A good value is usually a -small multiple of the total number of available cores. - -The standard way to run the script on Spark is via the spark-submit command: - -:: - - PYSPARK_PYTHON=python3 spark-submit pmcabc_gaussian.py - -Often Spark installations use Python 2 by default. To make Spark use the -required Python 3 interpreter, the `PYSPARK_PYTHON` environment variable can be -set. - -The adapted python code can be found in -`examples/backend/apache_spark/pmcabc_gaussian.py`. - -Note that in order to run jobs in parallel you need to have Apache Spark -installed on the system in question. The dependencies of the spark backend can be -install with `pip install -r requirements/backend-spark.txt`. - -Details on the installation can be found on the official `homepage -`_. Further, keep in mind that the ABCpy library has to -be properly installed on the cluster, such that it is available to the Python -interpreters on the master and the worker nodes. - Using the MPI Backend ~~~~~~~~~~~~~~~~~~~~~ To run ABCpy in parallel using MPI, one only needs to use the provided MPI -backend. Using the same example as above, the statements for the backend have to +backend. Using the same example as before, the statements for the backend have to be changed to .. literalinclude:: ../../examples/backends/mpi/pmcabc_gaussian.py :language: python - :lines: 6-7 + :lines: 6-10 :dedent: 4 In words, one only needs to initialize an instance of the MPI backend. The number of ranks to spawn are specified at runtime through the way the script is run. A minimum of two ranks is required, since rank 0 (master) is used to orchestrate the calculation and all other ranks (workers) actually perform the -calculation. +calculation. (The default value of `process_per_model` is 1. If your simulator +model is not parallelized using MPI, do not specify +`process_per_model > 1`. The use of `process_per_model` for nested parallelization +will be explained below.) The standard way to run the script using MPI is directly via mpirun like below or on a cluster through a job scheduler like Slurm: @@ -80,37 +42,44 @@ or on a cluster through a job scheduler like Slurm: The adapted Python code can be found in `examples/backend/mpi/pmcabc_gaussian.py`. -Nested parallelization ----------------------- +Nested-MPI parallelization for MPI-parallelized simulator models +------------------------------------------------------------------ -Sometimes, the model on which we want to perform parameter inference has itself -large compute requirements and needs parallelization. When this parallelization -is achieved using threads, there MPI has just to be configured that each MPI +Sometimes, the simulator model itself has +large compute requirements and needs parallelization. To achieve this parallelization +using threads, the MPI backend need to be configured such that each MPI rank can spawn multiple threads on a node. However, there might be situations where node-local parallelization using threads is not sufficient and parallelization across nodes is required. Parallelization of the forward model across nodes is possible *but limited* to the MPI backend. Technically, this is implemented using individual MPI -communicators for each forward model. The amount of ranks per communicator +communicators for each forward model. The number of ranks per communicator +(defined as: `process_per_model`) can be passed at the initialization of the backend as follows: -.. literalinclude:: ../../examples/backends/mpi/mpi_model.py +.. literalinclude:: ../../examples/backends/mpi/mpi_model_inferences.py :language: python - :lines: 6-7 + :lines: 10-11 :dedent: 4 Here each model is assigned a MPI communicator with 2 ranks. Clearly, the MPI -job has to be configured manually that the total amount of MPI ranks is ideally +job has to be configured manually such that the total amount of MPI ranks is ideally a multiple of the ranks per communicator plus one additional rank for the master. For example, if we want to run n instances of a MPI model and allows m processes to each instance, we will have to spawn (n*m)+1 ranks. -For nested parallelization the model has to be able to take an MPI communicator -as a parameter. +For `forward_simulation` of the MPI-parallelized simulator model has to be able +to take an MPI communicator as a parameter. + +An example of an MPI-parallelized simulator model, which can be used with ABCpy +nested-parallelization, can be found in `examples/backend/mpi/mpi_model_inferences.py`. +The `forward_simulation` function of the above model is as follows: -An example using the nested MPI parallelization can be found in -`examples/backend/mpi/mpi_model.py`. +.. literalinclude:: ../../examples/backends/mpi/mpi_model_inferences.py + :language: python + :lines: 48-77 + :dedent: 4 Note that in order to run jobs in parallel you need to have MPI installed on the system(s) in question with the requisite Python bindings for MPI (mpi4py). The @@ -123,6 +92,46 @@ Details on the installation can be found on the official `Open MPI homepage to be properly installed on the cluster, such that it is available to the Python interpreters on the master and the worker nodes. +Using the Spark Backend +~~~~~~~~~~~~~~~~~~~~~~~ + +To run ABCpy in parallel using Apache Spark, one only needs to use the provided +Spark backend. Considering the example from before, the statements for the +backend have to be changed to + +.. literalinclude:: ../../examples/backends/apache_spark/pmcabc_gaussian.py + :language: python + :lines: 6-9 + :dedent: 4 + +In words, a Spark context has to be created and passed to the Spark +backend. Additionally, the level of parallelism can be provided, which defines in +a sense in how many blocks the work should be split up. It corresponds to the +parallelism of an RDD in Apache Spark terminology. A good value is usually a +small multiple of the total number of available cores. + +The standard way to run the script on Spark is via the spark-submit command: + +:: + + PYSPARK_PYTHON=python3 spark-submit pmcabc_gaussian.py + +Often Spark installations use Python 2 by default. To make Spark use the +required Python 3 interpreter, the `PYSPARK_PYTHON` environment variable can be +set. + +The adapted python code can be found in +`examples/backend/apache_spark/pmcabc_gaussian.py`. + +Note that in order to run jobs in parallel you need to have Apache Spark +installed on the system in question. The dependencies of the spark backend can be +install with `pip install -r requirements/backend-spark.txt`. + +Details on the installation can be found on the official `homepage +`_. Further, keep in mind that the ABCpy library has to +be properly installed on the cluster, such that it is available to the Python +interpreters on the master and the worker nodes. + Using Cluster Infrastructure ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/examples/backends/mpi/mpi_model_inferences.py b/examples/backends/mpi/mpi_model_inferences.py index 2e9c8e43..9134390c 100644 --- a/examples/backends/mpi/mpi_model_inferences.py +++ b/examples/backends/mpi/mpi_model_inferences.py @@ -1,33 +1,14 @@ -import logging -logging.basicConfig(level=logging.DEBUG) +#import logging +#logging.basicConfig(level=logging.DEBUG) import numpy as np -from mpi4py import MPI from abcpy.probabilisticmodels import ProbabilisticModel, InputConnector - def setup_backend(): global backend from abcpy.backends import BackendMPI as Backend - from abcpy.backends import NestedParallelizationController backend = Backend(process_per_model=2) - # backend = Backend() - - -def run_model(): - def square_mpi(model_comm, x): - local_res = np.array([x ** 2], 'i') - global_res = np.array([0], 'i') - model_comm.Reduce([local_res, MPI.INT], [global_res, MPI.INT], op=MPI.SUM, root=0) - return global_res[0] - - data = [1, 2, 3, 4, 5] - pds = backend.parallelize(data) - pds_map = backend.map(square_mpi, pds) - res = backend.collect(pds_map) - return res - class NestedBivariateGaussian(ProbabilisticModel): """ @@ -64,21 +45,20 @@ def _check_output(self, values): def get_output_dimension(self): return 2 - def forward_simulate(self, input_values, k, rng=np.random.RandomState, mpi_comm=None): - print("Start Forward Simulate on rank {}".format(mpi_comm.Get_rank())) + if mpi_comm is None: + ValueError('MPI-parallelized simulator model needs to have access \ + to a MPI communicator object') + #print("Start Forward Simulate on rank {}".format(mpi_comm.Get_rank())) rank = mpi_comm.Get_rank() # Extract the input parameters mu = input_values[rank] sigma = 1 - # print(mu) # Do the actual forward simulation vector_of_k_samples = np.array(rng.normal(mu, sigma, k)) # Send everything back to rank 0 - # print("Hello from forward_simulate before gather, rank = ", rank) data = mpi_comm.gather(vector_of_k_samples, root=0) - # print("Hello from forward_simulate after gather, rank = ", rank) # Format the output to obey API and broadcast it before return result = None @@ -90,15 +70,12 @@ def forward_simulate(self, input_values, k, rng=np.random.RandomState, mpi_comm= point = np.array([element0, element1]) result[i] = point result = [np.array([result[i]]).reshape(-1, ) for i in range(k)] - print("End forward sim on master") + #print("End forward sim on master") return result else: - print("End forward sim on workers") + #print("End forward sim on workers") return None - - - def infer_parameters_pmcabc(): # define observation for true parameters mean=170, 65 rng = np.random.RandomState() @@ -156,8 +133,8 @@ def infer_parameters_abcsubsim(): # define sampling scheme from abcpy.inferences import ABCsubsim sampler = ABCsubsim([height_weight_model], [distance_calculator], backend) - steps, n_samples = 2, 4 - journal = sampler.sample([y_obs], steps, n_samples) + steps, n_samples, n_samples_per_param, chain_length = 2, 10, 1, 2 + journal = sampler.sample([y_obs], steps, n_samples, n_samples_per_param, chain_length) return journal @@ -347,21 +324,14 @@ def infer_parameters_pmc(): def setUpModule(): setup_backend() -#class ExampleMPIModelTest(unittest.TestCase): -# def test_example(self): -# result = run_model() -# data = [1,2,3,4,5] -# expected_result = list(map(lambda x:2*(x**2),data)) -# assert result==expected_result - if __name__ == "__main__": setup_backend() print('True Value was: ' + str([170, 65])) print('Posterior Mean of PMCABC: ' + str(infer_parameters_pmcabc().posterior_mean())) - # print('Posterior Mean of ABCsubsim: ' + str(infer_parameters_abcsubsim().posterior_mean())) (Buggy) - # print('Posterior Mean of RSMCABC: ' + str(infer_parameters_rsmcabc().posterior_mean())) - # print('Posterior Mean of SABC: ' + str(infer_parameters_sabc().posterior_mean())) - # print('Posterior Mean of SMCABC: ' + str(infer_parameters_smcabc().posterior_mean())) (Buggy) - # print('Posterior Mean of APMCABC: ' + str(infer_parameters_apmcabc().posterior_mean())) - # print('Posterior Mean of RejectionABC: ' + str(infer_parameters_rejectionabc().posterior_mean())) - # print('Posterior Mean of PMC: ' + str(infer_parameters_pmc().posterior_mean())) + print('Posterior Mean of ABCsubsim: ' + str(infer_parameters_abcsubsim().posterior_mean())) + print('Posterior Mean of RSMCABC: ' + str(infer_parameters_rsmcabc().posterior_mean())) + print('Posterior Mean of SABC: ' + str(infer_parameters_sabc().posterior_mean())) + print('Posterior Mean of SMCABC: ' + str(infer_parameters_smcabc().posterior_mean())) + print('Posterior Mean of APMCABC: ' + str(infer_parameters_apmcabc().posterior_mean())) + print('Posterior Mean of RejectionABC: ' + str(infer_parameters_rejectionabc().posterior_mean())) + print('Posterior Mean of PMC: ' + str(infer_parameters_pmc().posterior_mean())) diff --git a/examples/backends/mpi/mpi_model_simple.py b/examples/backends/mpi/mpi_model_simple.py deleted file mode 100644 index 6b1d0e21..00000000 --- a/examples/backends/mpi/mpi_model_simple.py +++ /dev/null @@ -1,161 +0,0 @@ -import numpy as np -from mpi4py import MPI -from abcpy.probabilisticmodels import ProbabilisticModel, InputConnector - -def setup_backend(): - global backend - - from abcpy.backends import BackendMPI as Backend - backend = Backend() - -def run_model(): - def square_mpi(model_comm, x): - local_res = np.array([x**2], 'i') - global_res = np.array([0], 'i') - model_comm.Reduce([local_res,MPI.INT], [global_res,MPI.INT], op=MPI.SUM, root=0) - return global_res[0] - - data = [1,2,3,4,5] - pds = backend.parallelize(data) - pds_map = backend.map(square_mpi, pds) - res = backend.collect(pds_map) - return res - - -class NestedBivariateGaussian(ProbabilisticModel): - """ - This is a show case model of bi-variate Gaussian distribution where we assume - the standard deviation to be unit. - """ - - def __init__(self, parameters, name='Gaussian'): - # We expect input of type parameters = [mu, sigma] - if not isinstance(parameters, list): - raise TypeError('Input of Normal model is of type list') - - if len(parameters) != 2: - raise RuntimeError('Input list must be of length 2, containing [mu, sigma].') - - input_connector = InputConnector.from_list(parameters) - super().__init__(input_connector, name) - - - def _check_input(self, input_values): - # Check whether input has correct type or format - if len(input_values) != 2: - raise ValueError('Number of parameters are 2 (two means).') - return True - - - def _check_output(self, values): - if not isinstance(values, np.ndarray): - raise ValueError('Output of the normal distribution is always a numpy array.') - - if value.shape[0] != 2: - raise ValueError('Output shape should be of dimension 2.') - - return True - - - def get_output_dimension(self): - return 2 - - - #def forward_simulate(self, input_values, k, rng=np.random.RandomState(), mpi_comm=None): - #def forward_simulate(self, mpi_comm, input_values, k, rng=np.random.RandomState()): - def forward_simulate(self, input_values, k, rng=np.random.RandomState()): #, mpi_comm=None): - - #rank = mpi_comm.Get_rank() - - # Extract the input parameters - #mu = input_values[rank] - mu = input_values[0] - sigma = 1 - - #print("salut") - - # Do the actual forward simulation - vector_of_k_samples = np.array(rng.normal(mu, sigma, k)) - - # Send everything back to rank 0 - #data = mpi_comm.gather(vector_of_k_samples) - - data = vector_of_k_samples - - # Format the output to obey API but only on rank 0 - #if rank == 0: - result = [None]*k - # for i in range(k): - #element0 = data[0][i] - #element1 = data[1][i] - #element0 = data[0][0] - #element1 = data[1][0] - element0 = data[0] - element1 = data[1] - point = np.array([element0, element1]) - result[0] = point - return result - #else: - # return - - - def pdf(self, input_values, x): - mu = input_values[0] - sigma = input_values[1] - pdf = np.norm(mu,sigma).pdf(x) - return pdf - - -def infer_parameters(): - # define observation for true parameters mean=170, 65 - rng = np.random.RandomState() - y_obs = rng.multivariate_normal([170, 65], np.eye(2), 100) - - # define prior - from abcpy.continuousmodels import Uniform - mu0 = Uniform([[150], [200]], ) - mu1 = Uniform([[25], [100]], ) - - # define the model - from abcpy.continuousmodels import Normal - height_weight_model = NestedBivariateGaussian([mu0, mu1]) - - # define statistics - from abcpy.statistics import Identity - statistics_calculator = Identity(degree = 2, cross = False) - - # define distance - from abcpy.distances import LogReg - distance_calculator = LogReg(statistics_calculator) - - # define sampling scheme - from abcpy.inferences import PMCABC - sampler = PMCABC([height_weight_model], [distance_calculator], backend, seed=1) - - # sample from scheme - T, n_sample, n_samples_per_param = 3, 250, 10 - eps_arr = np.array([.75]) - epsilon_percentile = 10 - journal = sampler.sample([y_obs], T, eps_arr, n_sample, n_samples_per_param, epsilon_percentile) - - return journal - -import unittest -from mpi4py import MPI - -def setUpModule(): - setup_backend() - -class ExampleMPIModelTest(unittest.TestCase): - def test_example(self): - result = run_model() - data = [1,2,3,4,5] - expected_result = list(map(lambda x:2*(x**2),data)) - assert result==expected_result - -if __name__ == "__main__": - setup_backend() - #print(run_mod#print(run_model()) - #print(run_model()) - model = NestedBivariateGaussian([100,200]) - print(infer_parameters()) diff --git a/examples/backends/mpi/mpi_pmc_hierarchical_models.py b/examples/backends/mpi/mpi_pmc_hierarchical_models.py deleted file mode 100644 index d56edf5d..00000000 --- a/examples/backends/mpi/mpi_pmc_hierarchical_models.py +++ /dev/null @@ -1,93 +0,0 @@ -import numpy as np - -def setup_backend(): - global backend - - from abcpy.backends import BackendMPI as Backend - backend = Backend(process_per_model=2) - -"""An example showing how to implement a bayesian network in ABCpy""" -def infer_parameters(): - # The data corresponding to model_1 defined below - grades_obs = [3.872486707973337, 4.6735380808674405, 3.9703538990858376, 4.11021272048805, 4.211048655421368, 4.154817956586653, 4.0046893064392695, 4.01891381384729, 4.123804757702919, 4.014941267301294, 3.888174595940634, 4.185275142948246, 4.55148774469135, 3.8954427675259016, 4.229264035335705, 3.839949451328312, 4.039402553532825, 4.128077814241238, 4.361488645531874, 4.086279074446419, 4.370801602256129, 3.7431697332475466, 4.459454162392378, 3.8873973643008255, 4.302566721487124, 4.05556051626865, 4.128817316703757, 3.8673704442215984, 4.2174459453805015, 4.202280254493361, 4.072851400451234, 3.795173229398952, 4.310702877332585, 4.376886328810306, 4.183704734748868, 4.332192463368128, 3.9071312388426587, 4.311681374107893, 3.55187913252144, 3.318878360783221, 4.187850500877817, 4.207923106081567, 4.190462065625179, 4.2341474252986036, 4.110228694304768, 4.1589891480847765, 4.0345604687633045, 4.090635481715123, 3.1384654393449294, 4.20375641386518, 4.150452690356067, 4.015304457401275, 3.9635442007388195, 4.075915739179875, 3.5702080541929284, 4.722333310410388, 3.9087618197155227, 4.3990088006390735, 3.968501165774181, 4.047603645360087, 4.109184340976979, 4.132424805281853, 4.444358334346812, 4.097211737683927, 4.288553086265748, 3.8668863066511303, 3.8837108501541007] - - # The prior information changing the class size and social background, depending on school location - from abcpy.continuousmodels import Uniform, Normal - school_location = Uniform([[0.2], [0.3]], ) - - # The average class size of a certain school - class_size = Normal([[school_location], [0.1]], ) - - # The social background of a student - background = Normal([[school_location], [0.1]], ) - - # The grade a student would receive without any bias - grade_without_additional_effects = Normal([[4.5], [0.25]], ) - - # The grade a student of a certain school receives - final_grade = grade_without_additional_effects-class_size-background - - # The data corresponding to model_2 defined below - scholarship_obs = [2.7179657436207805, 2.124647285937229, 3.07193407853297, 2.335024761813643, 2.871893855192, 3.4332002458233837, 3.649996835818173, 3.50292335102711, 2.815638168018455, 2.3581613289315992, 2.2794821846395568, 2.8725835459926503, 3.5588573782815685, 2.26053126526137, 1.8998143530749971, 2.101110815311782, 2.3482974964831573, 2.2707679029919206, 2.4624550491079225, 2.867017757972507, 3.204249152084959, 2.4489542437714213, 1.875415915801106, 2.5604889644872433, 3.891985093269989, 2.7233633223405205, 2.2861070389383533, 2.9758813233490082, 3.1183403287267755, 2.911814060853062, 2.60896794303205, 3.5717098647480316, 3.3355752461779824, 1.99172284546858, 2.339937680892163, 2.9835630207301636, 2.1684912355975774, 3.014847335983034, 2.7844122961916202, 2.752119871525148, 2.1567428931391635, 2.5803629307680644, 2.7326646074552103, 2.559237193255186, 3.13478196958166, 2.388760269933492, 3.2822443541491815, 2.0114405441787437, 3.0380056368041073, 2.4889680313769724, 2.821660164621084, 3.343985964873723, 3.1866861970287808, 4.4535037154856045, 3.0026333138006027, 2.0675706089352612, 2.3835301730913185, 2.584208398359566, 3.288077633446465, 2.6955853384148183, 2.918315169739928, 3.2464814419322985, 2.1601516779909433, 3.231003347780546, 1.0893224045062178, 0.8032302688764734, 2.868438615047827] - - # A quantity that determines whether a student will receive a scholarship - scholarship_without_additional_effects = Normal([[2], [0.5]], ) - - # A quantity determining whether a student receives a scholarship, including his social background - final_scholarship = scholarship_without_additional_effects + 3*background - - # Define a summary statistics for final grade and final scholarship - from abcpy.statistics import Identity - statistics_calculator_final_grade = Identity(degree = 2, cross = False) - statistics_calculator_final_scholarship = Identity(degree = 3, cross = False) - - # Define a distance measure for final grade and final scholarship - from abcpy.approx_lhd import SynLiklihood - approx_lhd_final_grade = SynLiklihood(statistics_calculator_final_grade) - approx_lhd_final_scholarship = SynLiklihood(statistics_calculator_final_scholarship) - - # Define a backend - # from abcpy.backends import BackendDummy as Backend - # backend = Backend() - - setup_backend() - - # Define a perturbation kernel - from abcpy.perturbationkernel import DefaultKernel - kernel = DefaultKernel([school_location, class_size, grade_without_additional_effects, \ - background, scholarship_without_additional_effects]) - - # Define sampling parameters - T, n_sample, n_samples_per_param = 3, 250, 10 - - # Define sampler - from abcpy.inferences import PMC - sampler = PMC([final_grade, final_scholarship], \ - [approx_lhd_final_grade, approx_lhd_final_scholarship], backend, kernel) - - # Sample - journal = sampler.sample([grades_obs, scholarship_obs], T, n_sample, n_samples_per_param) - - -def analyse_journal(journal): - # output parameters and weights - print(journal.get_stored_output_values()) - print(journal.weights) - - # do post analysis - print(journal.posterior_mean()) - print(journal.posterior_cov()) - print(journal.posterior_histogram()) - - # print configuration - print(journal.configuration) - - # save and load journal - journal.save("experiments.jnl") - - from abcpy.output import Journal - new_journal = Journal.fromFile('experiments.jnl') - -if __name__ == "__main__": - journal = infer_parameters() - analyse_journal(journal) diff --git a/examples/backends/mpi/pmcabc_gaussian.py b/examples/backends/mpi/pmcabc_gaussian.py index f6b9126f..29d5b540 100644 --- a/examples/backends/mpi/pmcabc_gaussian.py +++ b/examples/backends/mpi/pmcabc_gaussian.py @@ -4,7 +4,11 @@ def setup_backend(): global backend from abcpy.backends import BackendMPI as Backend - backend = Backend(process_per_model=2) + backend = Backend() + # The above line is equivalent to: + # backend = Backend(process_per_model=1) + # Notice: Models not parallelized by MPI should not be given process_per_model > 1 + def infer_parameters(): @@ -25,17 +29,17 @@ def infer_parameters(): statistics_calculator = Identity(degree = 2, cross = False) # define distance - from abcpy.distances import LogReg - distance_calculator = LogReg(statistics_calculator) + from abcpy.distances import Euclidean + distance_calculator = Euclidean(statistics_calculator) # define sampling scheme from abcpy.inferences import PMCABC sampler = PMCABC([height], [distance_calculator], backend, seed=1) # sample from scheme - T, n_sample, n_samples_per_param = 3, 250, 10 - eps_arr = np.array([.75]) - epsilon_percentile = 10 + T, n_sample, n_samples_per_param = 2, 10, 1 + eps_arr = np.array([10000]) + epsilon_percentile = 95 journal = sampler.sample([y_obs], T, eps_arr, n_sample, n_samples_per_param, epsilon_percentile) return journal @@ -62,7 +66,6 @@ def analyse_journal(journal): import unittest -from mpi4py import MPI def setUpModule(): ''' @@ -83,7 +86,7 @@ class ExampleGaussianMPITest(unittest.TestCase): def test_example(self): journal = infer_parameters() test_result = journal.posterior_mean()[0] - expected_result = 178.07690877694714 + expected_result = 171.4343638312893 self.assertLess(abs(test_result - expected_result), 2) diff --git a/tests/backend_tests_mpi_model_mpi.py b/tests/backend_tests_mpi_model_mpi.py index e6c0d826..e6b7b1aa 100644 --- a/tests/backend_tests_mpi_model_mpi.py +++ b/tests/backend_tests_mpi_model_mpi.py @@ -26,18 +26,18 @@ class MPIBackendTests(unittest.TestCase): def test_parallelize(self): data = [0]*backend_mpi.size() pds = backend_mpi.parallelize(data) - pds_map = backend_mpi.map(lambda x, model_comm: x + MPI.COMM_WORLD.Get_rank(), pds) + pds_map = backend_mpi.map(lambda x, npc=None: x + MPI.COMM_WORLD.Get_rank(), pds) res = backend_mpi.collect(pds_map) for scheduler_index in backend_mpi.scheduler_node_ranks(): self.assertTrue(scheduler_index not in res,"Node in scheduler_node_ranks performed map.") def test_map(self): - def square_mpi(x, model_comm): - local_res = numpy.array([x**2], 'i') - global_res = numpy.array([0], 'i') - model_comm.Reduce([local_res,MPI.INT], [global_res,MPI.INT], op=MPI.SUM, root=0) - return global_res[0] + def square_mpi(x, npc=None): + local_res = numpy.array([2*(x**2)], 'i') + #global_res = numpy.array([0], 'i') + #MPI.COMM_WORLD.Reduce([local_res,MPI.INT], [global_res,MPI.INT], op=MPI.SUM, root=0) + return local_res[0] data = [1,2,3,4,5] pds = backend_mpi.parallelize(data) @@ -57,7 +57,7 @@ def test_broadcast(self): for k,v in backend_mpi.bds_store.items(): backend_mpi.bds_store[k] = 99999 - def test_map(x, model_comm): + def test_map(x, npc=None): return x + bds.value() pds_m = backend_mpi.map(test_map, pds) @@ -65,9 +65,9 @@ def test_map(x, model_comm): def test_pds_delete(self): - def check_if_exists(x, model_comm): + def check_if_exists(x, npc): obj = BackendMPITestHelper() - if model_comm.Get_rank() == 0: + if npc.communicator().Get_rank() == 0: return obj.check_pds(x) return None @@ -89,7 +89,7 @@ def check_if_exists(x, model_comm): def test_bds_delete(self): - def check_if_exists(x, model_comm): + def check_if_exists(x, npc=None): obj = BackendMPITestHelper() return obj.check_bds(x) @@ -109,26 +109,26 @@ def check_if_exists(x, model_comm): def test_function_pickle(self): - def square_mpi(x, model_comm): - local_res = numpy.array([x**2], 'i') - global_res = numpy.array([0], 'i') - model_comm.Reduce([local_res,MPI.INT], [global_res,MPI.INT], op=MPI.SUM, root=0) - return global_res[0] + def square_mpi(x, npc=None): + local_res = numpy.array([2*(x**2)], 'i') + #global_res = numpy.array([0], 'i') + #model_comm.Reduce([local_res,MPI.INT], [global_res,MPI.INT], op=MPI.SUM, root=0) + return local_res[0] class staticfunctest_mpi: @staticmethod - def square_mpi(x, model_comm): - local_res = numpy.array([x**2], 'i') - global_res = numpy.array([0], 'i') - model_comm.Reduce([local_res,MPI.INT], [global_res,MPI.INT], op=MPI.SUM, root=0) - return global_res[0] + def square_mpi(x, npc=None): + local_res = numpy.array([2*(x**2)], 'i') + #global_res = numpy.array([0], 'i') + #model_comm.Reduce([local_res,MPI.INT], [global_res,MPI.INT], op=MPI.SUM, root=0) + return local_res[0] class nonstaticfunctest_mpi: - def square_mpi(self, x, model_comm): - local_res = numpy.array([x**2], 'i') - global_res = numpy.array([0], 'i') - model_comm.Reduce([local_res,MPI.INT], [global_res,MPI.INT], op=MPI.SUM, root=0) - return global_res[0] + def square_mpi(self, x, npc=None): + local_res = numpy.array([2*(x**2)], 'i') + #global_res = numpy.array([0], 'i') + #model_comm.Reduce([local_res,MPI.INT], [global_res,MPI.INT], op=MPI.SUM, root=0) + return local_res[0] data = [1,2,3,4,5] expected_result = [2,8,18,32,50] From 246fe9ca8e2821cb5bb5de07beea2241a88ee81f Mon Sep 17 00:00:00 2001 From: statrita2004 Date: Fri, 25 Jan 2019 14:38:36 +0000 Subject: [PATCH 41/41] Preparing Release 0.5.5 --- README.md | 8 ++++---- VERSION | 2 +- doc/source/installation.rst | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 457be273..5958e381 100644 --- a/README.md +++ b/README.md @@ -26,9 +26,9 @@ scientists by providing # Documentation For more information, check out the -* [Documentation](http://abcpy.readthedocs.io/en/v0.5.3) -* [Examples](https://github.com/eth-cscs/abcpy/tree/v0.5.3/examples) directory and -* [Reference](http://abcpy.readthedocs.io/en/v0.5.3/abcpy.html) +* [Documentation](http://abcpy.readthedocs.io/en/v0.5.5) +* [Examples](https://github.com/eth-cscs/abcpy/tree/v0.5.5/examples) directory and +* [Reference](http://abcpy.readthedocs.io/en/v0.5.5/abcpy.html) Further, we provide a [collection of models](https://github.com/eth-cscs/abcpy-models) for which ABCpy @@ -54,7 +54,7 @@ finally CSCS (Swiss National Super Computing Center) for their generous support. There is a paper in the proceedings of the 2017 PASC conference. In case you use ABCpy for your publication, we would appreciate a citation. You can use -[this](https://github.com/eth-cscs/abcpy/blob/v0.5.3/doc/literature/DuttaS-ABCpy-PASC-2017.bib) +[this](https://github.com/eth-cscs/abcpy/blob/v0.5.5/doc/literature/DuttaS-ABCpy-PASC-2017.bib) BibTex reference. diff --git a/VERSION b/VERSION index be14282b..d1d899fa 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.5.3 +0.5.5 diff --git a/doc/source/installation.rst b/doc/source/installation.rst index 8e42e00f..84b6b619 100644 --- a/doc/source/installation.rst +++ b/doc/source/installation.rst @@ -34,7 +34,7 @@ To create a package and install it, do :: make package - pip3 install build/dist/abcpy-0.5.1-py3-none-any.whl + pip3 install build/dist/abcpy-0.5.5-py3-none-any.whl Note that ABCpy requires Python3.