From 5dbd5181ba16e8ea627cc65eb5fac6a8a61ea633 Mon Sep 17 00:00:00 2001 From: Marcel Schoengens Date: Tue, 14 Mar 2017 16:39:52 +0100 Subject: [PATCH 01/50] Refactored automated test execution in Makefile, fixed some version issues in requirements.txt, added additional packages to Travis config. --- .travis.yml | 11 +++++++---- Makefile | 17 ++++++++++------- requirements.txt | 1 + 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/.travis.yml b/.travis.yml index af25c7a2..3b12979d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,10 +7,13 @@ addons: apt: packages: - gfortran - - python3-sphinx - + - libboost-random-dev + - libpython3-dev + - swig -install: "pip install -r requirements.txt" +install: + - pip install -r requirements.txt -script: "make testall" +script: + - make test diff --git a/Makefile b/Makefile index 17d4ea2f..b08e3941 100644 --- a/Makefile +++ b/Makefile @@ -3,10 +3,10 @@ UNITTESTS=$(shell find tests -type f -name '*_tests.py') MAKEDIRS=$(shell find examples -name Makefile -exec dirname {} \;) .DEFAULT: help -.PHONY: help examples testall tests testcoverage clean doc package uninstall install reinstall $(MAKEDIRS) +.PHONY: help clean doc doctest exampletest package test uninstall unittest install reinstall $(MAKEDIRS) help: - @echo Targets are: clean, doc, examples, package, uninstall, tests + @echo Targets are: clean, doc, doctest, exampletest, package, uninstall, unittest, test clean: find . -name "*.pyc" -type f -delete @@ -14,19 +14,22 @@ clean: find . -name ".#*" -delete find . -name "#*#" -delete -testall: tests examples +test: unittest exampletest doctest -tests: +unittest: python3 -m unittest discover -s tests -v -p "*_tests.py" || (echo "Error in unit tests."; exit 1) - make -C doc html || (echo "Error in documentation generator."; exit 1) + $(MAKEDIRS): make -C $@ -examples: $(MAKEDIRS) +doctest: + make -C doc html || (echo "Error in documentation generator."; exit 1) + +exampletest: $(MAKEDIRS) python3 -m unittest discover -s examples -v -p "*.py" || (echo "Error in example tests."; exit 1) -testcoverage: +coveragetest: command -v coverage >/dev/null 2>&1 || { echo >&2 "Python package 'coverage' has to be installed. Please, run 'pip3 install coverage'."; exit;} @- $(foreach TEST, $(UNITTESTS), \ echo === Testing code coverage: $(TEST); \ diff --git a/requirements.txt b/requirements.txt index 49eefe99..b7ab0e8f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,5 +3,6 @@ scipy sklearn glmnet==1.0.0 findspark +sphinx==1.4.8 sphinx_rtd_theme coverage From 6c43723fceb16e9d88f2a646f42cc3ab4d9d62ed Mon Sep 17 00:00:00 2001 From: Marcel Schoengens Date: Wed, 15 Mar 2017 08:09:23 +0100 Subject: [PATCH 02/50] Bugfixing Travis config. --- .travis.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 3b12979d..c0e09bba 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,3 +1,5 @@ +dist: precise + language: python python: @@ -8,7 +10,7 @@ addons: packages: - gfortran - libboost-random-dev - - libpython3-dev + - python3.2-dev - swig install: From 2329e9d709eaea4350c6adf1ef86f3e1bdaac1c4 Mon Sep 17 00:00:00 2001 From: Marcel Schoengens Date: Wed, 15 Mar 2017 08:45:42 +0100 Subject: [PATCH 03/50] Bugfixing Travis. --- examples/extensions/models/gaussian_cpp/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/extensions/models/gaussian_cpp/Makefile b/examples/extensions/models/gaussian_cpp/Makefile index 6195d771..5ac9257f 100644 --- a/examples/extensions/models/gaussian_cpp/Makefile +++ b/examples/extensions/models/gaussian_cpp/Makefile @@ -4,7 +4,7 @@ WGET=wget -q CC=g++ CPPFLAGS=-fPIC -INCLUDEPATH=/usr/include/python3.5 +INCLUDEPATH=/usr/include/python3.2mu cpp_simple: _gaussian_model_simple.so gaussian_model_simple.py From 907de6b2ad8a2415293f9115baaf58d206ca3d55 Mon Sep 17 00:00:00 2001 From: Marcel Schoengens Date: Wed, 15 Mar 2017 10:05:48 +0100 Subject: [PATCH 04/50] Bugfixing Travis. --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index c0e09bba..bb4afe44 100644 --- a/.travis.yml +++ b/.travis.yml @@ -11,6 +11,7 @@ addons: - gfortran - libboost-random-dev - python3.2-dev + - python3-numpy - swig install: From b584f75f17e680972435602a3dae9bc9469616eb Mon Sep 17 00:00:00 2001 From: Marcel Schoengens Date: Wed, 15 Mar 2017 10:18:20 +0100 Subject: [PATCH 05/50] Bugfixing Travis. --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index bb4afe44..fd406511 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,4 +1,4 @@ -dist: precise +dist: trusty language: python @@ -10,7 +10,7 @@ addons: packages: - gfortran - libboost-random-dev - - python3.2-dev + - libpython3-dev - python3-numpy - swig From 1fa56e35e023fb420909a99eda12c3c8bbe5c2c5 Mon Sep 17 00:00:00 2001 From: Marcel Schoengens Date: Wed, 15 Mar 2017 11:06:49 +0100 Subject: [PATCH 06/50] Bugfixing Travis. --- examples/extensions/models/gaussian_cpp/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/extensions/models/gaussian_cpp/Makefile b/examples/extensions/models/gaussian_cpp/Makefile index 5ac9257f..6195d771 100644 --- a/examples/extensions/models/gaussian_cpp/Makefile +++ b/examples/extensions/models/gaussian_cpp/Makefile @@ -4,7 +4,7 @@ WGET=wget -q CC=g++ CPPFLAGS=-fPIC -INCLUDEPATH=/usr/include/python3.2mu +INCLUDEPATH=/usr/include/python3.5 cpp_simple: _gaussian_model_simple.so gaussian_model_simple.py From 53ef74f61d4c013556e8da0e032244e2b2e5afeb Mon Sep 17 00:00:00 2001 From: Marcel Schoengens Date: Wed, 15 Mar 2017 11:15:58 +0100 Subject: [PATCH 07/50] Bugfixing Travis. --- examples/extensions/models/gaussian_cpp/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/extensions/models/gaussian_cpp/Makefile b/examples/extensions/models/gaussian_cpp/Makefile index 6195d771..2b40ec31 100644 --- a/examples/extensions/models/gaussian_cpp/Makefile +++ b/examples/extensions/models/gaussian_cpp/Makefile @@ -4,7 +4,7 @@ WGET=wget -q CC=g++ CPPFLAGS=-fPIC -INCLUDEPATH=/usr/include/python3.5 +INCLUDEPATH=/usr/include/python3.5m cpp_simple: _gaussian_model_simple.so gaussian_model_simple.py From f7a3ea6f416232870ab413b82b8088893d8ad08d Mon Sep 17 00:00:00 2001 From: Marcel Schoengens Date: Wed, 15 Mar 2017 17:02:04 +0100 Subject: [PATCH 08/50] Made naming of releases more consisent by introducing a file VERSION. --- VERSION | 1 + doc/source/conf.py | 8 ++++++-- setup.py | 5 ++++- 3 files changed, 11 insertions(+), 3 deletions(-) create mode 100644 VERSION diff --git a/VERSION b/VERSION new file mode 100644 index 00000000..2f453618 --- /dev/null +++ b/VERSION @@ -0,0 +1 @@ +0.2 \ No newline at end of file diff --git a/doc/source/conf.py b/doc/source/conf.py index 8eb2e973..1ed9ce8d 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -15,6 +15,7 @@ import sys import os +from os import path import sys from unittest.mock import MagicMock @@ -75,9 +76,12 @@ def __getattr__(cls, name): # built documents. # # The short X.Y version. -version = '0.1' +here = path.abspath(path.dirname(__file__)) +with open(path.join(here + "/../..", 'VERSION')) as f: + version = f.read() + # The full version, including alpha/beta/rc tags. -release = '0.1' +release = version # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/setup.py b/setup.py index e7cc3766..08451035 100644 --- a/setup.py +++ b/setup.py @@ -10,13 +10,16 @@ with open(path.join(here, 'requirements.txt')) as f: dependencies = f.readlines() +with open(path.join(here, 'VERSION')) as f: + version = f.read() + setup( name='abcpy', # Versions should comply with PEP440. For a discussion on single-sourcing # the version across setup.py and the project code, see # https://packaging.python.org/en/latest/single_source_version.html - version='0.1', + version=version, description='An framework for parallel approximate bayesian computation.', long_description=long_description, From 2b98d0918039a2c63497cef321ecf177fead1526 Mon Sep 17 00:00:00 2001 From: Marcel Schoengens Date: Thu, 23 Mar 2017 15:02:20 +0100 Subject: [PATCH 09/50] Small bugfix in Swig Makefile. --- examples/extensions/models/gaussian_cpp/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/extensions/models/gaussian_cpp/Makefile b/examples/extensions/models/gaussian_cpp/Makefile index 2b40ec31..418e38e6 100644 --- a/examples/extensions/models/gaussian_cpp/Makefile +++ b/examples/extensions/models/gaussian_cpp/Makefile @@ -18,7 +18,7 @@ cpp_simple: _gaussian_model_simple.so gaussian_model_simple.py $(CC) $(CPPFLAGS) -I $(INCLUDEPATH) -c $< -o $@ _%.so: %.o %_wrap.o - $(CC) -shared $< -o $@ + $(CC) -shared $^ -o $@ %.i: $(WGET) "https://raw.githubusercontent.com/numpy/numpy/master/tools/swig/numpy.i" From 9146d524af733b5bbe92d979dab80b569334f987 Mon Sep 17 00:00:00 2001 From: Avinash Ummadisingu Date: Thu, 30 Mar 2017 21:40:32 +0200 Subject: [PATCH 10/50] Added an initial MPI backend with hello world example to test --- abcpy/backends.py | 157 +++++++++++++++++++++++++++ examples/backends/mpi/hello_world.py | 16 +++ 2 files changed, 173 insertions(+) create mode 100644 examples/backends/mpi/hello_world.py diff --git a/abcpy/backends.py b/abcpy/backends.py index 461d60ed..0a641ac5 100644 --- a/abcpy/backends.py +++ b/abcpy/backends.py @@ -1,5 +1,8 @@ from abc import ABCMeta, abstractmethod +import numpy as np +import sys + class Backend(metaclass = ABCMeta): """ This is the base class for every parallelization backend. It essentially @@ -371,3 +374,157 @@ def value(self): """ return self.bcv.value + + +class BackendMPI(Backend): + """ + A parallelization backend for MPI. + + """ + + comm = None + size = None + rank = None + + def __init__(self): + """ + Initialize the backend identifying all the ranks. + + """ + # Extremely unpythonic. + #Find a cleaner way to check this and import conditionally on the backend. + global MPI + from mpi4py import MPI + + self.comm = MPI.COMM_WORLD + self.size = self.comm.Get_size() + self.rank = self.comm.Get_rank() + + if (self.rank == 0): + print("Hello World, I am the master.") + else: + print("Hello World, I am worker number %s." % (self.rank)) + + + def parallelize(self, python_list): + """ + This method distributes the list on the available workers and returns a + reference object. + + The list is split into number of workers many parts as a numpy array. + Each part is sent to a separate worker node using the MPI scatter. + + Parameters + ---------- + list: Python list + the list that should get distributed on the worker nodes + Returns + ------- + PDSMPI class (parallel data set) + A reference object that represents the parallelized list + """ + + rdd = np.array_split(python_list, self.size, axis=0) + data_chunk = self.comm.scatter(rdd, root=0) + return PDSMPI(data_chunk) + + + def broadcast(self, object): + """ + Send object to all worker nodes without splitting it up. + + Parameters + ---------- + object: Python object + An arbitrary object that should be available on all workers + + Returns + ------- + BDS class (broadcast data set) + A reference to the broadcasted object + """ + + bcv = self.comm.bcast(object, root=0) + bds = BDSMPI(bcv) + + return bds + + + def map(self, func, pds): + """ + A distributed implementation of map that works on parallel data sets (PDS). + + On every element of pds the function func is called. + + Parameters + ---------- + func: Python func + A function that can be applied to every element of the pds + pds: PDS class + A parallel data set to which func should be applied + + Returns + ------- + PDSMPI class + a new parallel data set that contains the result of the map + """ + + rdd = list(map(func, pds.python_list)) + pds_res = PDSMPI(rdd) + + return pds_res + + + def collect(self, pds): + """ + Gather the pds from all the workers, send it to the master and return it as a standard Python list. + + Parameters + ---------- + pds: PDS class + a parallel data set + + Returns + ------- + Python list + all elements of pds as a list + """ + + python_list = self.comm.gather(pds.python_list, root=0) + + return python_list + + + +class PDSMPI(PDS): + """ + This is a wrapper for a Python parallel data set. + """ + + def __init__(self, python_list): + """ + Returns + ------- + python_list + a Python list + """ + + self.python_list = python_list + + + +class BDSMPI(BDS): + """ + The reference class for broadcast data set (BDS). + """ + + def __init__(self, object): + + self.object = object + + def value(self): + """ + This method returns the actual object that the broadcast data set represents. + """ + + return self.object \ No newline at end of file diff --git a/examples/backends/mpi/hello_world.py b/examples/backends/mpi/hello_world.py new file mode 100644 index 00000000..0cfa60d2 --- /dev/null +++ b/examples/backends/mpi/hello_world.py @@ -0,0 +1,16 @@ +from abcpy.backends import BackendMPI + +def square(x): + return x**2 + + +if __name__ == "__main__": + + backend = BackendMPI() + data = list(range(100)) + + datachunk_pds = backend.parallelize(data) + print("Worker with Rank", backend.rank, "has", datachunk_pds.python_list) + + mapres_pds = backend.map(square, datachunk_pds) + print ("Worker with Rank", backend.rank, "got map result", mapres_pds.python_list) From a84564f3fc6961994e68f826cedf5b2813451192 Mon Sep 17 00:00:00 2001 From: Marcel Schoengens Date: Fri, 31 Mar 2017 15:34:46 +0200 Subject: [PATCH 11/50] Refactored C++ example of model extension to get rid of the RNG dependency. --- .../models/gaussian_cpp/gaussian_model_simple.cpp | 8 ++++---- .../models/gaussian_cpp/gaussian_model_simple.i | 11 ++--------- .../gaussian_cpp/pmcabc-gaussian_model_simple.py | 5 ++--- 3 files changed, 8 insertions(+), 16 deletions(-) diff --git a/examples/extensions/models/gaussian_cpp/gaussian_model_simple.cpp b/examples/extensions/models/gaussian_cpp/gaussian_model_simple.cpp index a77c3ab6..46b3aff0 100644 --- a/examples/extensions/models/gaussian_cpp/gaussian_model_simple.cpp +++ b/examples/extensions/models/gaussian_cpp/gaussian_model_simple.cpp @@ -4,8 +4,10 @@ using namespace std; + // Simulation function of the gaussian model -void gaussian_model(double* result, unsigned int k, double mu, double sigma, boost::mt19937 rng) { +void gaussian_model(double* result, unsigned int k, double mu, double sigma, int seed) { + boost::mt19937 rng(seed); boost::normal_distribution<> nd(mu, sigma); boost::variate_generator > sampler(rng, nd); @@ -15,13 +17,11 @@ void gaussian_model(double* result, unsigned int k, double mu, double sigma, boo } - // main function to run the simulation of the Gaussian model int main() { int k = 10; - boost::mt19937 rng; double samples[k]; - gaussian_model(samples, 0.0, 1.0, k, rng); + gaussian_model(samples, 0.0, 1.0, k, 1); for (int i=0; i #include - extern void gaussian_model(double* result, unsigned int k, double mu, double sigma, boost::mt19937 rng); + extern void gaussian_model(double* result, unsigned int k, double mu, double sigma, int seed); %} %include "numpy.i" @@ -15,14 +15,7 @@ import_array(); %} -%inline %{ - boost::mt19937* get_rng(int seed) { - boost::mt19937* rng = new boost::mt19937(seed); - return rng; - } -%} - %apply (double* ARGOUT_ARRAY1, int DIM1 ) {(double* result, unsigned int k)}; -extern void gaussian_model(double* result, unsigned int k, double mu, double sigma, boost::mt19937 rng); +extern void gaussian_model(double* result, unsigned int k, double mu, double sigma, int seed); diff --git a/examples/extensions/models/gaussian_cpp/pmcabc-gaussian_model_simple.py b/examples/extensions/models/gaussian_cpp/pmcabc-gaussian_model_simple.py index 9f2150a2..312ed470 100644 --- a/examples/extensions/models/gaussian_cpp/pmcabc-gaussian_model_simple.py +++ b/examples/extensions/models/gaussian_cpp/pmcabc-gaussian_model_simple.py @@ -26,9 +26,8 @@ def sample_from_prior(self): self.set_parameters(sample) def simulate(self, k): - cseed = self.rng.randint(np.iinfo(np.int32).max) - crng = get_rng(cseed); - result = gaussian_model(k, self.mu, self.sigma, crng) + seed = self.rng.randint(np.iinfo(np.int32).max) + result = gaussian_model(k, self.mu, self.sigma, seed) return list(result) From 4bd9f8e67ed3563845465b7fd07c87ef7e0501fb Mon Sep 17 00:00:00 2001 From: Marcel Schoengens Date: Fri, 31 Mar 2017 17:43:06 +0200 Subject: [PATCH 12/50] Bugfixes. --- .../models/gaussian_cpp/pmcabc-gaussian_model_simple.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/extensions/models/gaussian_cpp/pmcabc-gaussian_model_simple.py b/examples/extensions/models/gaussian_cpp/pmcabc-gaussian_model_simple.py index 312ed470..22a01a43 100644 --- a/examples/extensions/models/gaussian_cpp/pmcabc-gaussian_model_simple.py +++ b/examples/extensions/models/gaussian_cpp/pmcabc-gaussian_model_simple.py @@ -1,7 +1,8 @@ import numpy as np from abcpy.models import Model -from gaussian_model import gaussian_model, get_rng +from gaussian_model_simple import gaussian_model + class Gaussian(Model): def __init__(self, prior, seed=None): self.prior = prior From 45be6e41f1843911ef32a5392a41965e6ecb00ff Mon Sep 17 00:00:00 2001 From: Avinash Ummadisingu Date: Wed, 5 Apr 2017 17:16:46 +0200 Subject: [PATCH 13/50] Rewrote backend to better separate master/slave interaction and be more generic --- Makefile | 6 +- abcpy/backends.py | 191 ++++++++++++++++++++++----- examples/backends/mpi/hello_world.py | 3 + tests/backend_tests.py | 10 ++ 4 files changed, 177 insertions(+), 33 deletions(-) create mode 100644 tests/backend_tests.py diff --git a/Makefile b/Makefile index b08e3941..39020acb 100644 --- a/Makefile +++ b/Makefile @@ -14,12 +14,16 @@ clean: find . -name ".#*" -delete find . -name "#*#" -delete -test: unittest exampletest doctest +test: unittest unittest_mpi exampletest doctest unittest: python3 -m unittest discover -s tests -v -p "*_tests.py" || (echo "Error in unit tests."; exit 1) +unittest_mpi: + mpirun -np 4 python3 -m unittest discover -s tests -v -p "backend_tests.py" || (echo "Error in unit tests."; exit 1) + + $(MAKEDIRS): make -C $@ diff --git a/abcpy/backends.py b/abcpy/backends.py index 0a641ac5..4b23e395 100644 --- a/abcpy/backends.py +++ b/abcpy/backends.py @@ -385,6 +385,7 @@ class BackendMPI(Backend): comm = None size = None rank = None + MPI = None def __init__(self): """ @@ -393,18 +394,103 @@ def __init__(self): """ # Extremely unpythonic. #Find a cleaner way to check this and import conditionally on the backend. - global MPI from mpi4py import MPI - + self.MPI = MPI self.comm = MPI.COMM_WORLD self.size = self.comm.Get_size() self.rank = self.comm.Get_rank() + self.current_tag = 0 + + self.is_master = (self.rank==0) - if (self.rank == 0): + if (self.is_master): print("Hello World, I am the master.") else: print("Hello World, I am worker number %s." % (self.rank)) + self.slave_run() + + + + def slave_run(self): + """ + This method is the infinite loop a slave enters directly from init. + It makes the slave wait for a command to perform from the master and + then calls the appropriate function. + + This method also takes care of the synchronization of data between the + master and the slaves by matching PDSs based on the tags sent by the master + with the command. + + Commands received from the master are of the form of a tuple. + The first component of the tuple is always the operation to be performed + and the rest are conditional on the operation. + + (op) where op=="par" for parallelize + (op,tag,func) where op=="map" for map. + (op,tag) where op=="col" for a collect operation + (op,) where op=="die" for the slave to break and die + """ + self.data_store = {} + + while True: + #Get the next broadcasted operation from the root. + data = self.comm.bcast(None, root=0) + + if data[0]=="par": + pds = self.parallelize([]) + self.data_store[pds.tag] = pds + elif data[0] == "map": + tag, func = data[1:] + #Access an existing PDS + pds = self.data_store[tag] + pds_new = self.map(func,pds) + + #Store the result in a newly gnerated PDS tag. + self.data_store[pds_new.tag] = pds_new + + elif data[0] =="col": + tag = data[1] + #Access an existing PDS + pds = self.data_store[tag] + self.collect(pds) + + elif data[0] =="die": + quit() + + + def master_run(self,command,tag =None,remote_function = None): + """ + This method handles the sending of the command to the slaves + telling them what operation to perform next. + + + Parameters + ---------- + command: str + A string telling the slave what the next operation is. + valid options are (par,map,col,dir) + tag: int (Default: None) + A "tag" telling the slave which pds it should operate on + remote_function: Python Function (Default:None) + A python function passed for the "map". Is None otherwise + + """ + _ = self.comm.bcast((command,tag,remote_function),root=0) + + + def generate_new_tag(self): + """ + This method generates a new tag to associate a PDS with it's remote counterpart + that slaves use to store & index data based on the tag they receive + + Returns + ------- + Returns a unique integer. + + """ + self.current_tag+=1 + return self.current_tag def parallelize(self, python_list): """ @@ -414,40 +500,35 @@ def parallelize(self, python_list): The list is split into number of workers many parts as a numpy array. Each part is sent to a separate worker node using the MPI scatter. + MASTER: python_list is the real data that is to be split up + SLAVE: python_list should be [] and is ignored by the scatter() + Parameters ---------- list: Python list the list that should get distributed on the worker nodes + Returns ------- PDSMPI class (parallel data set) A reference object that represents the parallelized list """ + if self.is_master: + #Tell the slaves to enter parallelize() + self.master_run("par",None) + rdd = np.array_split(python_list, self.size, axis=0) - data_chunk = self.comm.scatter(rdd, root=0) - return PDSMPI(data_chunk) + data_chunk = self.comm.scatter(rdd, root=0) - def broadcast(self, object): - """ - Send object to all worker nodes without splitting it up. - Parameters - ---------- - object: Python object - An arbitrary object that should be available on all workers + #Generate a new tag to associate the data to. + #Assumption: It's in sync with master + tag = self.generate_new_tag() + pds = PDSMPI(data_chunk,tag) - Returns - ------- - BDS class (broadcast data set) - A reference to the broadcasted object - """ - - bcv = self.comm.bcast(object, root=0) - bds = BDSMPI(bcv) - - return bds + return pds def map(self, func, pds): @@ -469,8 +550,16 @@ def map(self, func, pds): a new parallel data set that contains the result of the map """ + + if self.is_master: + #Tell the slaves to enter the map() with the current tag & func. + self.master_run("map",pds.tag,remote_function = func) + + rdd = list(map(func, pds.python_list)) - pds_res = PDSMPI(rdd) + + tag_res = self.generate_new_tag() + pds_res = PDSMPI(rdd,tag_res) return pds_res @@ -490,27 +579,63 @@ def collect(self, pds): all elements of pds as a list """ + if self.is_master: + #Tell the slaves to enter collect with the pds's tag + self.master_run("col",pds.tag) + python_list = self.comm.gather(pds.python_list, root=0) return python_list + def __del__(self): + """ + Overriding the delete function to explicitly call MPI.finalize(). + This is also required so we can tell the slaves to get out of the + while loop they are in and exit gracefully and they themselves call + finalize when they die. + """ + if self.is_master: + self.master_run("die") + self.MPI.Finalize() -class PDSMPI(PDS): - """ - This is a wrapper for a Python parallel data set. - """ - def __init__(self, python_list): + def broadcast(self, object, tag = None): """ + Send object to all worker nodes without splitting it up. + + Parameters + ---------- + object: Python object + An arbitrary object that should be available on all workers + + tag: Int (Default: None) + the tag identifier of the parallelize. The master will overwrite + but the slaves will use it. + Returns ------- - python_list - a Python list + BDS class (broadcast data set) + A reference to the broadcasted object """ - self.python_list = python_list + raise NotImplementedError + + bcv = self.comm.bcast(object, root=0) + bds = BDSMPI(bcv) + return bds + + + +class PDSMPI(PDS): + """ + This is a wrapper for a Python parallel data set. + """ + + def __init__(self, python_list,tag): + self.python_list = python_list + self.tag = tag class BDSMPI(BDS): @@ -518,9 +643,11 @@ class BDSMPI(BDS): The reference class for broadcast data set (BDS). """ - def __init__(self, object): + def __init__(self, object,tag): self.object = object + self.tag = tag + def value(self): """ diff --git a/examples/backends/mpi/hello_world.py b/examples/backends/mpi/hello_world.py index 0cfa60d2..3aee8d2e 100644 --- a/examples/backends/mpi/hello_world.py +++ b/examples/backends/mpi/hello_world.py @@ -14,3 +14,6 @@ def square(x): mapres_pds = backend.map(square, datachunk_pds) print ("Worker with Rank", backend.rank, "got map result", mapres_pds.python_list) + + print("Result of the map is:",backend.collect(mapres_pds)) + print("Original Data was:",backend.collect(datachunk_pds)) diff --git a/tests/backend_tests.py b/tests/backend_tests.py new file mode 100644 index 00000000..92ad098f --- /dev/null +++ b/tests/backend_tests.py @@ -0,0 +1,10 @@ +import unittest +import numpy as np + +from abcpy.backends import BackendMPI + + + +class MPIBackendTests(unittest.TestCase): + def setUp(self): + assert 1==0,"Die" \ No newline at end of file From c8fa0e4455c657406932dfdf8dd3c4f79e5a1572 Mon Sep 17 00:00:00 2001 From: Lorenzo Fabbri Date: Tue, 11 Apr 2017 17:37:55 +0200 Subject: [PATCH 14/50] Separate MPI backend. Start MPI backend tests --- abcpy/backend_mpi.py | 419 +++++++++++++++++++++++++++++++++++++++++ abcpy/backends.py | 284 ---------------------------- tests/backend_tests.py | 44 ++++- 3 files changed, 461 insertions(+), 286 deletions(-) create mode 100644 abcpy/backend_mpi.py diff --git a/abcpy/backend_mpi.py b/abcpy/backend_mpi.py new file mode 100644 index 00000000..9d73f61e --- /dev/null +++ b/abcpy/backend_mpi.py @@ -0,0 +1,419 @@ +from abc import ABCMeta, abstractmethod + +from mpi4py import MPI +import numpy as np +import marshal +import sys + +class Backend(metaclass = ABCMeta): + """ + This is the base class for every parallelization backend. It essentially + resembles the map/reduce API from Spark. + + An idea for the future is to implement a MPI version of the backend with the + hope to be more complient with standard HPC infrastructure and a potential + speed-up. + + """ + + @abstractmethod + def parallelize(self, list): + """ + This method distributes the list on the available workers and returns a + reference object. + + The list should be split into number of workers many parts. Each + part should then be sent to a separate worker node. + + Parameters + ---------- + list: Python list + the list that should get distributed on the worker nodes + Returns + ------- + PDS class (parallel data set) + A reference object that represents the parallelized list + """ + + raise NotImplemented + + + @abstractmethod + def broadcast(self, object): + """ + Send object to all worker nodes without splitting it up. + + Parameters + ---------- + object: Python object + An abitrary object that should be available on all workers + + Returns + ------- + BDS class (broadcast data set) + A reference to the broadcasted object + """ + + raise NotImplemented + + + @abstractmethod + def map(self, func, pds): + """ + A distributed implementation of map that works on parallel data sets (PDS). + + On every element of pds the function func is called. + + Parameters + ---------- + func: Python func + A function that can be applied to every element of the pds + pds: PDS class + A parallel data set to which func should be applied + + Returns + ------- + PDS class + a new parallel data set that contains the result of the map + """ + + raise NotImplemented + + + @abstractmethod + def collect(self, pds): + """ + Gather the pds from all the workers, send it to the master and return it as a standard Python list. + + Parameters + ---------- + pds: PDS class + a parallel data set + + Returns + ------- + Python list + all elements of pds as a list + """ + + raise NotImplemented + + +class PDS: + """ + The reference class for parallel data sets (PDS). + """ + + @abstractmethod + def __init__(self): + raise NotImplemented + + +class BDS: + """ + The reference class for broadcast data set (BDS). + """ + + @abstractmethod + def __init__(self): + raise NotImplemented + + + @abstractmethod + def value(self): + """ + This method should return the actual object that the broadcast data set represents. + """ + raise NotImplemented + + + +class BackendMPI(Backend): + """ + A parallelization backend for MPI. + + """ + + comm = None + size = None + rank = None + MPI = None + + + def __init__(self): + """ + Initialize the backend identifying all the ranks. + + """ + + self.MPI = MPI + self.comm = MPI.COMM_WORLD + self.size = self.comm.Get_size() + self.rank = self.comm.Get_rank() + self.current_tag = 0 + + self.is_master = (self.rank == 0) + if self.size < 2: + raise ValueError('Please, use at least 2 ranks.') + + # List of available workes, check on Master node + avail_workers = list(range(1, size)) + + if (self.is_master): + print("Hello World, I am the master.") + else: + print("Hello World, I am worker number %s." % (self.rank)) + self.slave_run() + + + def slave_run(self): + """ + This method is the infinite loop a slave enters directly from init. + It makes the slave wait for a command to perform from the master and + then calls the appropriate function. + + This method also takes care of the synchronization of data between the + master and the slaves by matching PDSs based on the tags sent by the master + with the command. + + Commands received from the master are of the form of a tuple. + The first component of the tuple is always the operation to be performed + and the rest are conditional on the operation. + + (op) where op=="par" for parallelize + (op,tag,func) where op=="map" for map. + (op,tag) where op=="col" for a collect operation + (op,) where op=="die" for the slave to break and die + """ + + self.data_store = {} + + while True: + # Get the next broadcasted operation from the root + data = self.comm.bcast(None, root=0) + + if data[0] == "par": + pds = self.parallelize([]) + self.data_store[pds.tag] = pds + + elif data[0] == "map": + tag, func = data[1:] + # Access an existing PDS + pds = self.data_store[tag] + pds_new = self.map(func, pds) + + # Store the result in a newly gnerated PDS tag + self.data_store[pds_new.tag] = pds_new + + elif data[0] == "col": + tag = data[1] + # Access an existing PDS + pds = self.data_store[tag] + self.collect(pds) + + elif data[0] == "die": + quit() + + + def master_run(self, command, tag = None, remote_function = None): + """ + This method handles the sending of the command to the slaves + telling them what operation to perform next. + + + Parameters + ---------- + command: str + A string telling the slave what the next operation is. + valid options are (par,map,col,dir) + tag: int (Default: None) + A "tag" telling the slave which pds it should operate on + remote_function: Python Function (Default:None) + A python function passed for the "map". Is None otherwise + + """ + + _ = self.comm.bcast((command, tag, remote_function), root=0) + + + def generate_new_tag(self): + """ + This method generates a new tag to associate a PDS with it's remote counterpart + that slaves use to store & index data based on the tag they receive + + Returns + ------- + Returns a unique integer. + + """ + + self.current_tag += 1 + return self.current_tag + + + def parallelize(self, python_list): + """ + This method distributes the list on the available workers and returns a + reference object. + + The list is split into number of workers many parts as a numpy array. + Each part is sent to a separate worker node using the MPI scatter. + + MASTER: python_list is the real data that is to be split up + SLAVE: python_list should be [] and is ignored by the scatter() + + Parameters + ---------- + list: Python list + the list that should get distributed on the worker nodes + + Returns + ------- + PDSMPI class (parallel data set) + A reference object that represents the parallelized list + """ + + if self.is_master: + # Tell the slaves to enter parallelize() + self.master_run("par", None) + + rdd = np.array_split(python_list, self.size, axis=0) + + data_chunk = self.comm.scatter(rdd, root=0) + + # Generate a new tag to associate the data to. + # Assumption: It's in sync with master + tag = self.generate_new_tag() + pds = PDSMPI(data_chunk, tag) + + return pds + + + def map(self, func, pds): + """ + A distributed implementation of map that works on parallel data sets (PDS). + + On every element of pds the function func is called. + + Parameters + ---------- + func: Python func + A function that can be applied to every element of the pds + pds: PDS class + A parallel data set to which func should be applied + + Returns + ------- + PDSMPI class + a new parallel data set that contains the result of the map + """ + + if self.is_master: + # Tell the slaves to enter the map() with the current tag & func. + self.master_run("map", pds.tag, remote_function = func) + + rdd = list(map(func, pds.python_list)) + + tag_res = self.generate_new_tag() + pds_res = PDSMPI(rdd, tag_res) + + return pds_res + + + def collect(self, pds): + """ + Gather the pds from all the workers, send it to the master and return it as a standard Python list. + + Parameters + ---------- + pds: PDS class + a parallel data set + + Returns + ------- + Python list + all elements of pds as a list + """ + + if self.is_master: + # Tell the slaves to enter collect with the pds's tag + self.master_run("col", pds.tag) + + python_list = self.comm.gather(pds.python_list, root=0) + + return python_list + + + def __del__(self): + """ + Overriding the delete function to explicitly call MPI.finalize(). + This is also required so we can tell the slaves to get out of the + while loop they are in and exit gracefully and they themselves call + finalize when they die. + """ + + if self.is_master: + self.master_run("die") + + self.MPI.Finalize() + + + def broadcast(self, object, tag = None): + """ + Send object to all worker nodes without splitting it up. + + Parameters + ---------- + object: Python object + An arbitrary object that should be available on all workers + + tag: Int (Default: None) + the tag identifier of the parallelize. The master will overwrite + but the slaves will use it. + + Returns + ------- + BDS class (broadcast data set) + A reference to the broadcasted object + """ + + raise NotImplementedError + + bcv = self.comm.bcast(object, root=0) + bds = BDSMPI(bcv) + + return bds + + +class PDSMPI(PDS): + """ + This is a wrapper for a Python parallel data set. + """ + + def __init__(self, python_list, tag): + self.python_list = python_list + self.tag = tag + + def __del__(self): + """ + Destructor + """ + print self.tag, 'Died' + + +class BDSMPI(BDS): + """ + The reference class for broadcast data set (BDS). + """ + + def __init__(self, object, tag): + + self.object = object + self.tag = tag + + def value(self): + """ + This method returns the actual object that the broadcast data set represents. + """ + + return self.object diff --git a/abcpy/backends.py b/abcpy/backends.py index 4b23e395..461d60ed 100644 --- a/abcpy/backends.py +++ b/abcpy/backends.py @@ -1,8 +1,5 @@ from abc import ABCMeta, abstractmethod -import numpy as np -import sys - class Backend(metaclass = ABCMeta): """ This is the base class for every parallelization backend. It essentially @@ -374,284 +371,3 @@ def value(self): """ return self.bcv.value - - -class BackendMPI(Backend): - """ - A parallelization backend for MPI. - - """ - - comm = None - size = None - rank = None - MPI = None - - def __init__(self): - """ - Initialize the backend identifying all the ranks. - - """ - # Extremely unpythonic. - #Find a cleaner way to check this and import conditionally on the backend. - from mpi4py import MPI - self.MPI = MPI - self.comm = MPI.COMM_WORLD - self.size = self.comm.Get_size() - self.rank = self.comm.Get_rank() - self.current_tag = 0 - - self.is_master = (self.rank==0) - - if (self.is_master): - print("Hello World, I am the master.") - else: - print("Hello World, I am worker number %s." % (self.rank)) - self.slave_run() - - - - def slave_run(self): - """ - This method is the infinite loop a slave enters directly from init. - It makes the slave wait for a command to perform from the master and - then calls the appropriate function. - - This method also takes care of the synchronization of data between the - master and the slaves by matching PDSs based on the tags sent by the master - with the command. - - Commands received from the master are of the form of a tuple. - The first component of the tuple is always the operation to be performed - and the rest are conditional on the operation. - - (op) where op=="par" for parallelize - (op,tag,func) where op=="map" for map. - (op,tag) where op=="col" for a collect operation - (op,) where op=="die" for the slave to break and die - """ - self.data_store = {} - - while True: - #Get the next broadcasted operation from the root. - data = self.comm.bcast(None, root=0) - - if data[0]=="par": - pds = self.parallelize([]) - self.data_store[pds.tag] = pds - - elif data[0] == "map": - tag, func = data[1:] - #Access an existing PDS - pds = self.data_store[tag] - pds_new = self.map(func,pds) - - #Store the result in a newly gnerated PDS tag. - self.data_store[pds_new.tag] = pds_new - - elif data[0] =="col": - tag = data[1] - #Access an existing PDS - pds = self.data_store[tag] - self.collect(pds) - - elif data[0] =="die": - quit() - - - def master_run(self,command,tag =None,remote_function = None): - """ - This method handles the sending of the command to the slaves - telling them what operation to perform next. - - - Parameters - ---------- - command: str - A string telling the slave what the next operation is. - valid options are (par,map,col,dir) - tag: int (Default: None) - A "tag" telling the slave which pds it should operate on - remote_function: Python Function (Default:None) - A python function passed for the "map". Is None otherwise - - """ - _ = self.comm.bcast((command,tag,remote_function),root=0) - - - def generate_new_tag(self): - """ - This method generates a new tag to associate a PDS with it's remote counterpart - that slaves use to store & index data based on the tag they receive - - Returns - ------- - Returns a unique integer. - - """ - self.current_tag+=1 - return self.current_tag - - def parallelize(self, python_list): - """ - This method distributes the list on the available workers and returns a - reference object. - - The list is split into number of workers many parts as a numpy array. - Each part is sent to a separate worker node using the MPI scatter. - - MASTER: python_list is the real data that is to be split up - SLAVE: python_list should be [] and is ignored by the scatter() - - Parameters - ---------- - list: Python list - the list that should get distributed on the worker nodes - - Returns - ------- - PDSMPI class (parallel data set) - A reference object that represents the parallelized list - """ - if self.is_master: - #Tell the slaves to enter parallelize() - self.master_run("par",None) - - - rdd = np.array_split(python_list, self.size, axis=0) - - data_chunk = self.comm.scatter(rdd, root=0) - - - #Generate a new tag to associate the data to. - #Assumption: It's in sync with master - tag = self.generate_new_tag() - pds = PDSMPI(data_chunk,tag) - - return pds - - - def map(self, func, pds): - """ - A distributed implementation of map that works on parallel data sets (PDS). - - On every element of pds the function func is called. - - Parameters - ---------- - func: Python func - A function that can be applied to every element of the pds - pds: PDS class - A parallel data set to which func should be applied - - Returns - ------- - PDSMPI class - a new parallel data set that contains the result of the map - """ - - - if self.is_master: - #Tell the slaves to enter the map() with the current tag & func. - self.master_run("map",pds.tag,remote_function = func) - - - rdd = list(map(func, pds.python_list)) - - tag_res = self.generate_new_tag() - pds_res = PDSMPI(rdd,tag_res) - - return pds_res - - - def collect(self, pds): - """ - Gather the pds from all the workers, send it to the master and return it as a standard Python list. - - Parameters - ---------- - pds: PDS class - a parallel data set - - Returns - ------- - Python list - all elements of pds as a list - """ - - if self.is_master: - #Tell the slaves to enter collect with the pds's tag - self.master_run("col",pds.tag) - - python_list = self.comm.gather(pds.python_list, root=0) - - return python_list - - def __del__(self): - """ - Overriding the delete function to explicitly call MPI.finalize(). - This is also required so we can tell the slaves to get out of the - while loop they are in and exit gracefully and they themselves call - finalize when they die. - """ - if self.is_master: - self.master_run("die") - self.MPI.Finalize() - - - - def broadcast(self, object, tag = None): - """ - Send object to all worker nodes without splitting it up. - - Parameters - ---------- - object: Python object - An arbitrary object that should be available on all workers - - tag: Int (Default: None) - the tag identifier of the parallelize. The master will overwrite - but the slaves will use it. - - Returns - ------- - BDS class (broadcast data set) - A reference to the broadcasted object - """ - - raise NotImplementedError - - bcv = self.comm.bcast(object, root=0) - bds = BDSMPI(bcv) - - return bds - - - -class PDSMPI(PDS): - """ - This is a wrapper for a Python parallel data set. - """ - - def __init__(self, python_list,tag): - self.python_list = python_list - self.tag = tag - - -class BDSMPI(BDS): - """ - The reference class for broadcast data set (BDS). - """ - - def __init__(self, object,tag): - - self.object = object - self.tag = tag - - - def value(self): - """ - This method returns the actual object that the broadcast data set represents. - """ - - return self.object \ No newline at end of file diff --git a/tests/backend_tests.py b/tests/backend_tests.py index 92ad098f..eb0898b5 100644 --- a/tests/backend_tests.py +++ b/tests/backend_tests.py @@ -1,10 +1,50 @@ import unittest + +from mpi4py import MPI import numpy as np +import sys + +from abcpy import backend_mpi -from abcpy.backends import BackendMPI +try: + import marshal +except ImportError: + marhsal = None class MPIBackendTests(unittest.TestCase): + def setUp(self): - assert 1==0,"Die" \ No newline at end of file + assert 1==0, "Die" + + + def test_parallelize(self): + + data = list(range(100)) + data_pds = self.backend_mpi.parallelize(data) + # Assert sum lenght of chunks sums up to length data + self.assertTrue() + # Assert type returned object is list + + def test_map(self): + + # Assert with simple function each element in each chunk is correct + + # Assert type returned object is list + + def test_collect(self): + + # Assert length of returned object matches length of original dataset + + # Assert type returned object is list + + + if __name__ = '__main__': + + # Test only on Master node + if self.rank == 0: + try: + unittest.main() + except SystemExit: + pass From e446d09d0d13936ac87f0aba27d954fec4812024 Mon Sep 17 00:00:00 2001 From: Avinash Ummadisingu Date: Tue, 11 Apr 2017 19:40:00 +0200 Subject: [PATCH 15/50] Rewrote communication format. Centralized generation of tags. Using cloudpickle to serialize functions & static class funcs. Handling remote PDS deletion on scope end --- abcpy/backend_mpi.py | 284 +++++++++++---------------- examples/backends/mpi/hello_world.py | 20 +- requirements.txt | 1 + 3 files changed, 138 insertions(+), 167 deletions(-) diff --git a/abcpy/backend_mpi.py b/abcpy/backend_mpi.py index 9d73f61e..7b1b9950 100644 --- a/abcpy/backend_mpi.py +++ b/abcpy/backend_mpi.py @@ -1,133 +1,10 @@ from abc import ABCMeta, abstractmethod - +from abcpy.backends import Backend,PDS,BDS from mpi4py import MPI import numpy as np -import marshal +import cloudpickle import sys -class Backend(metaclass = ABCMeta): - """ - This is the base class for every parallelization backend. It essentially - resembles the map/reduce API from Spark. - - An idea for the future is to implement a MPI version of the backend with the - hope to be more complient with standard HPC infrastructure and a potential - speed-up. - - """ - - @abstractmethod - def parallelize(self, list): - """ - This method distributes the list on the available workers and returns a - reference object. - - The list should be split into number of workers many parts. Each - part should then be sent to a separate worker node. - - Parameters - ---------- - list: Python list - the list that should get distributed on the worker nodes - Returns - ------- - PDS class (parallel data set) - A reference object that represents the parallelized list - """ - - raise NotImplemented - - - @abstractmethod - def broadcast(self, object): - """ - Send object to all worker nodes without splitting it up. - - Parameters - ---------- - object: Python object - An abitrary object that should be available on all workers - - Returns - ------- - BDS class (broadcast data set) - A reference to the broadcasted object - """ - - raise NotImplemented - - - @abstractmethod - def map(self, func, pds): - """ - A distributed implementation of map that works on parallel data sets (PDS). - - On every element of pds the function func is called. - - Parameters - ---------- - func: Python func - A function that can be applied to every element of the pds - pds: PDS class - A parallel data set to which func should be applied - - Returns - ------- - PDS class - a new parallel data set that contains the result of the map - """ - - raise NotImplemented - - - @abstractmethod - def collect(self, pds): - """ - Gather the pds from all the workers, send it to the master and return it as a standard Python list. - - Parameters - ---------- - pds: PDS class - a parallel data set - - Returns - ------- - Python list - all elements of pds as a list - """ - - raise NotImplemented - - -class PDS: - """ - The reference class for parallel data sets (PDS). - """ - - @abstractmethod - def __init__(self): - raise NotImplemented - - -class BDS: - """ - The reference class for broadcast data set (BDS). - """ - - @abstractmethod - def __init__(self): - raise NotImplemented - - - @abstractmethod - def value(self): - """ - This method should return the actual object that the broadcast data set represents. - """ - raise NotImplemented - - - class BackendMPI(Backend): """ A parallelization backend for MPI. @@ -137,7 +14,18 @@ class BackendMPI(Backend): comm = None size = None rank = None - MPI = None + + OP_PARALLELIZE = 1 + OP_MAP = 2 + OP_COLLECT = 3 + OP_BROADCAST = 4 + OP_DELETEPDS = 5 + OP_DELETEBDS = 6 + OP_FINISH = 7 + + ATTR_TAG = 11 + ATTR_RESTAG = 12 + ATTR_FUNC = 13 def __init__(self): @@ -146,18 +34,25 @@ def __init__(self): """ - self.MPI = MPI + #Initialize some private variables for tags we need for communication + #.. between Master and slaves + self.__current_tag = 0 + self.__rec_tag = None + self.__rec_tag_new = None + + self.comm = MPI.COMM_WORLD self.size = self.comm.Get_size() self.rank = self.comm.Get_rank() - self.current_tag = 0 + + self.is_master = (self.rank == 0) if self.size < 2: raise ValueError('Please, use at least 2 ranks.') # List of available workes, check on Master node - avail_workers = list(range(1, size)) + avail_workers = list(range(1, self.size)) if (self.is_master): print("Hello World, I am the master.") @@ -166,6 +61,7 @@ def __init__(self): self.slave_run() + def slave_run(self): """ This method is the infinite loop a slave enters directly from init. @@ -188,16 +84,26 @@ def slave_run(self): self.data_store = {} + while True: - # Get the next broadcasted operation from the root data = self.comm.bcast(None, root=0) - if data[0] == "par": + op = data["op"] + + if op == self.OP_PARALLELIZE: + tag = data[self.ATTR_TAG] + self.__rec_tag = tag pds = self.parallelize([]) self.data_store[pds.tag] = pds - elif data[0] == "map": - tag, func = data[1:] + + elif op == self.OP_MAP: + tag,tag_new,func_dump = data[self.ATTR_TAG],data[self.ATTR_RESTAG],data[self.ATTR_FUNC] + self.__rec_tag, self.__rec_tag_new = tag,tag_new + + #Use cloudpickle to convert back our string into a function + func = cloudpickle.loads(func_dump) + # Access an existing PDS pds = self.data_store[tag] pds_new = self.map(func, pds) @@ -205,38 +111,72 @@ def slave_run(self): # Store the result in a newly gnerated PDS tag self.data_store[pds_new.tag] = pds_new - elif data[0] == "col": - tag = data[1] + elif op == self.OP_COLLECT: + tag = data[self.ATTR_TAG] + # Access an existing PDS pds = self.data_store[tag] + self.collect(pds) - elif data[0] == "die": - quit() + elif op == self.OP_DELETEPDS: + #Delete the remote PDS when the master tells it to. + tag = data[self.ATTR_TAG] + del self.data_store[tag] + elif op == self.OP_FINISH: + quit() - def master_run(self, command, tag = None, remote_function = None): + def __get_received_tag(self): """ + Function to retrieve the tag(s) we received from the master to associate + our slave's created PDS with the master's. + """ + return self.__rec_tag,self.__rec_tag_new + + def command_slaves(self,command,data): + """ This method handles the sending of the command to the slaves telling them what operation to perform next. - Parameters ---------- - command: str - A string telling the slave what the next operation is. - valid options are (par,map,col,dir) - tag: int (Default: None) - A "tag" telling the slave which pds it should operate on - remote_function: Python Function (Default:None) - A python function passed for the "map". Is None otherwise - + command: operation code of OP_xxx + One of the operation codes defined in the class definition as OP_xxx + which tell the slaves what operation they're performing. + data: tuple + Any of the data required for the operation which needs to be bundled + in the data packet sent. """ + data_packet = {} + data_packet["op"] = command + + if command == self.OP_PARALLELIZE: + #In parallelize, we get only one entry of tuple data + # which is the tag of the data we are going to receive. + tag = data[0] + data_packet[self.ATTR_TAG] = tag + + elif command == self.OP_MAP: + #In map we recieve data as (tag,tag_new,func) + tag,tag_new,func = data + + #Use cloudpickle to dump the function into a string. + func_dump = cloudpickle.dumps(func) + data_packet[self.ATTR_TAG] = tag + data_packet[self.ATTR_RESTAG] = tag_new + data_packet[self.ATTR_FUNC] = func_dump - _ = self.comm.bcast((command, tag, remote_function), root=0) + elif command == self.OP_COLLECT: + #In collect we receive data as (tag) + tag = data[0] + data_packet[self.ATTR_TAG] = tag - def generate_new_tag(self): + + _ = self.comm.bcast(data_packet, root=0) + + def __generate_new_tag(self): """ This method generates a new tag to associate a PDS with it's remote counterpart that slaves use to store & index data based on the tag they receive @@ -247,8 +187,8 @@ def generate_new_tag(self): """ - self.current_tag += 1 - return self.current_tag + self.__current_tag += 1 + return self.__current_tag def parallelize(self, python_list): @@ -275,15 +215,15 @@ def parallelize(self, python_list): if self.is_master: # Tell the slaves to enter parallelize() - self.master_run("par", None) + tag = self.__generate_new_tag() + self.command_slaves(self.OP_PARALLELIZE,(tag,)) + else: + tag,tag_new = self.__get_received_tag() rdd = np.array_split(python_list, self.size, axis=0) data_chunk = self.comm.scatter(rdd, root=0) - # Generate a new tag to associate the data to. - # Assumption: It's in sync with master - tag = self.generate_new_tag() pds = PDSMPI(data_chunk, tag) return pds @@ -310,13 +250,22 @@ def map(self, func, pds): if self.is_master: # Tell the slaves to enter the map() with the current tag & func. - self.master_run("map", pds.tag, remote_function = func) - rdd = list(map(func, pds.python_list)) + #Get tag of dataset we want to operate on + tag = pds.tag + + #Generate a new tag to be used by the slaves for the resultant PDS + tag_new = self.__generate_new_tag() + + data = (tag,tag_new,func) + self.command_slaves(self.OP_MAP,data) - tag_res = self.generate_new_tag() - pds_res = PDSMPI(rdd, tag_res) + else: + tag,tag_new = self.__get_received_tag() + rdd = list(map(func, pds.python_list)) + + pds_res = PDSMPI(rdd, tag_new) return pds_res @@ -337,7 +286,7 @@ def collect(self, pds): if self.is_master: # Tell the slaves to enter collect with the pds's tag - self.master_run("col", pds.tag) + self.command_slaves(self.OP_COLLECT,(pds.tag,)) python_list = self.comm.gather(pds.python_list, root=0) @@ -353,9 +302,9 @@ def __del__(self): """ if self.is_master: - self.master_run("die") + self.command_slaves(self.OP_FINISH,None) - self.MPI.Finalize() + MPI.Finalize() def broadcast(self, object, tag = None): @@ -396,9 +345,16 @@ def __init__(self, python_list, tag): def __del__(self): """ - Destructor + Destructor to be called when a PDS falls out of scope and\or is being deleted. + Tells the slaves to delete their copy of the PDS. """ - print self.tag, 'Died' + if MPI.Is_finalized()==False: + comm = MPI.COMM_WORLD + rank = comm.Get_rank() + if rank ==0: + data_packet = {"op":BackendMPI.OP_DELETEPDS,BackendMPI.ATTR_TAG:self.tag} + _ = comm.bcast(data_packet, root=0) + class BDSMPI(BDS): diff --git a/examples/backends/mpi/hello_world.py b/examples/backends/mpi/hello_world.py index 3aee8d2e..067fcb55 100644 --- a/examples/backends/mpi/hello_world.py +++ b/examples/backends/mpi/hello_world.py @@ -1,7 +1,6 @@ -from abcpy.backends import BackendMPI +from abcpy.backend_mpi import BackendMPI + -def square(x): - return x**2 if __name__ == "__main__": @@ -9,6 +8,16 @@ def square(x): backend = BackendMPI() data = list(range(100)) + + def square(x): + return x**2 + + class staticfunctest: + @staticmethod + def cube(x): + return x**3 + + datachunk_pds = backend.parallelize(data) print("Worker with Rank", backend.rank, "has", datachunk_pds.python_list) @@ -17,3 +26,8 @@ def square(x): print("Result of the map is:",backend.collect(mapres_pds)) print("Original Data was:",backend.collect(datachunk_pds)) + + + mapres_pds = backend.map(staticfunctest.cube, datachunk_pds) + print("Result of the map is:",backend.collect(mapres_pds)) + diff --git a/requirements.txt b/requirements.txt index b7ab0e8f..63bb7960 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,3 +6,4 @@ findspark sphinx==1.4.8 sphinx_rtd_theme coverage +cloudpickle \ No newline at end of file From 31970c4bf71c5fa5b15f52297ffa85001af2fbb1 Mon Sep 17 00:00:00 2001 From: Avinash Ummadisingu Date: Tue, 11 Apr 2017 21:05:24 +0200 Subject: [PATCH 16/50] Updated unit test case for backend with simple test --- abcpy/backend_mpi.py | 1 + tests/backend_mpi_tests.py | 35 ++++++++++++++++++++++++++ tests/backend_tests.py | 50 -------------------------------------- 3 files changed, 36 insertions(+), 50 deletions(-) create mode 100644 tests/backend_mpi_tests.py delete mode 100644 tests/backend_tests.py diff --git a/abcpy/backend_mpi.py b/abcpy/backend_mpi.py index 7b1b9950..b28d9322 100644 --- a/abcpy/backend_mpi.py +++ b/abcpy/backend_mpi.py @@ -122,6 +122,7 @@ def slave_run(self): elif op == self.OP_DELETEPDS: #Delete the remote PDS when the master tells it to. tag = data[self.ATTR_TAG] + # print("Deleting the PDS with tag",tag) del self.data_store[tag] elif op == self.OP_FINISH: diff --git a/tests/backend_mpi_tests.py b/tests/backend_mpi_tests.py new file mode 100644 index 00000000..4ccda8a4 --- /dev/null +++ b/tests/backend_mpi_tests.py @@ -0,0 +1,35 @@ +import unittest +from abcpy.backend_mpi import BackendMPI + +from mpi4py import MPI + + + +class MPIBackendTests(unittest.TestCase): + + def setUp(self): + self.backend = BackendMPI() + + + def map_test(self): + data = [1,2,3,4,5] + pds = self.backend.parallelize(data) + pds_map = self.backend.map(lambda x:x**2,pds) + res = self.backend.collect(pds_map) + assert res==list(map(lambda x:x**2,data)) + + def function_pickle_map_test(self): + + def square(x): + return x**2 + + class staticfunctest: + @staticmethod + def cube(x): + return x**3 + +if __name__ == '__main__': + print("Inside Main") + comm = MPI.COMM_WORLD + if comm.Get_rank()==0: + unittest.main() diff --git a/tests/backend_tests.py b/tests/backend_tests.py deleted file mode 100644 index eb0898b5..00000000 --- a/tests/backend_tests.py +++ /dev/null @@ -1,50 +0,0 @@ -import unittest - -from mpi4py import MPI -import numpy as np -import sys - -from abcpy import backend_mpi - -try: - import marshal -except ImportError: - marhsal = None - - - -class MPIBackendTests(unittest.TestCase): - - def setUp(self): - assert 1==0, "Die" - - - def test_parallelize(self): - - data = list(range(100)) - data_pds = self.backend_mpi.parallelize(data) - # Assert sum lenght of chunks sums up to length data - self.assertTrue() - # Assert type returned object is list - - def test_map(self): - - # Assert with simple function each element in each chunk is correct - - # Assert type returned object is list - - def test_collect(self): - - # Assert length of returned object matches length of original dataset - - # Assert type returned object is list - - - if __name__ = '__main__': - - # Test only on Master node - if self.rank == 0: - try: - unittest.main() - except SystemExit: - pass From 602650cd26771616583e5b17fdb298981565535f Mon Sep 17 00:00:00 2001 From: Avinash Ummadisingu Date: Thu, 20 Apr 2017 18:51:45 +0200 Subject: [PATCH 17/50] Switched back to tuple based msg indexing. Minor code cleanup, cleaner way for remote PDS deletion, initial unittests for mpi backend --- abcpy/backend_mpi.py | 214 ++++++++++++++++++++----------------- tests/backend_mpi_tests.py | 78 +++++++++++--- 2 files changed, 179 insertions(+), 113 deletions(-) diff --git a/abcpy/backend_mpi.py b/abcpy/backend_mpi.py index b28d9322..ddd9dd5f 100644 --- a/abcpy/backend_mpi.py +++ b/abcpy/backend_mpi.py @@ -14,7 +14,9 @@ class BackendMPI(Backend): comm = None size = None rank = None + finalized = False + #Define some operation codes to make it more readable OP_PARALLELIZE = 1 OP_MAP = 2 OP_COLLECT = 3 @@ -23,22 +25,24 @@ class BackendMPI(Backend): OP_DELETEBDS = 6 OP_FINISH = 7 - ATTR_TAG = 11 - ATTR_RESTAG = 12 - ATTR_FUNC = 13 - def __init__(self): + def __init__(self,master_node_ranks = [0,]): """ Initialize the backend identifying all the ranks. """ - #Initialize some private variables for tags we need for communication + + # Define a list of processes on the master node which should *not* perform + # .. any computation + self.master_node_ranks = master_node_ranks + + #Initialize some private variables for pds_ids we need for communication #.. between Master and slaves - self.__current_tag = 0 - self.__rec_tag = None - self.__rec_tag_new = None + self.__current_pds_id = 0 + self.__rec_pds_id = None + self.__rec_pds_id_result = None self.comm = MPI.COMM_WORLD @@ -51,15 +55,14 @@ def __init__(self): if self.size < 2: raise ValueError('Please, use at least 2 ranks.') - # List of available workes, check on Master node - avail_workers = list(range(1, self.size)) + if (self.is_master): print("Hello World, I am the master.") else: print("Hello World, I am worker number %s." % (self.rank)) self.slave_run() - + raise Exception("Slaves exitted main loop.") def slave_run(self): @@ -69,73 +72,72 @@ def slave_run(self): then calls the appropriate function. This method also takes care of the synchronization of data between the - master and the slaves by matching PDSs based on the tags sent by the master + master and the slaves by matching PDSs based on the pds_ids sent by the master with the command. Commands received from the master are of the form of a tuple. The first component of the tuple is always the operation to be performed and the rest are conditional on the operation. - (op) where op=="par" for parallelize - (op,tag,func) where op=="map" for map. - (op,tag) where op=="col" for a collect operation - (op,) where op=="die" for the slave to break and die + (op,pds_id) where op == OP_PARALLELIZE for parallelize + (op,pds_id,pds_id_result,func) where op == OP_MAP for map. + (op,pds_id) where op == OP_COLLECT for a collect operation + (op,pds_id) where op == OP_DELETEPDS for a delete of the remote PDS on slaves + (op,) where op==OP_FINISH for the slave to break out of the loop and terminate """ + # Initialized data store here because only slaves need to do it. self.data_store = {} - while True: data = self.comm.bcast(None, root=0) - op = data["op"] - + op = data[0] if op == self.OP_PARALLELIZE: - tag = data[self.ATTR_TAG] - self.__rec_tag = tag + pds_id = data[1] + self.__rec_pds_id = pds_id pds = self.parallelize([]) - self.data_store[pds.tag] = pds + self.data_store[pds.pds_id] = pds elif op == self.OP_MAP: - tag,tag_new,func_dump = data[self.ATTR_TAG],data[self.ATTR_RESTAG],data[self.ATTR_FUNC] - self.__rec_tag, self.__rec_tag_new = tag,tag_new + pds_id,pds_id_result,function_packed = data[1:] + self.__rec_pds_id, self.__rec_pds_id_result = pds_id,pds_id_result - #Use cloudpickle to convert back our string into a function - func = cloudpickle.loads(func_dump) + #Use cloudpickle to convert back function string to a function + func = cloudpickle.loads(function_packed) # Access an existing PDS - pds = self.data_store[tag] - pds_new = self.map(func, pds) + pds = self.data_store[pds_id] + pds_res = self.map(func, pds) - # Store the result in a newly gnerated PDS tag - self.data_store[pds_new.tag] = pds_new + # Store the result in a newly gnerated PDS pds_id + self.data_store[pds_res.pds_id] = pds_res elif op == self.OP_COLLECT: - tag = data[self.ATTR_TAG] + pds_id = data[1] - # Access an existing PDS - pds = self.data_store[tag] + # Access an existing PDS from data store + pds = self.data_store[pds_id] self.collect(pds) elif op == self.OP_DELETEPDS: - #Delete the remote PDS when the master tells it to. - tag = data[self.ATTR_TAG] - # print("Deleting the PDS with tag",tag) - del self.data_store[tag] + pds_id = data[1] + + del self.data_store[pds_id] elif op == self.OP_FINISH: quit() - def __get_received_tag(self): + def __get_received_pds_id(self): """ - Function to retrieve the tag(s) we received from the master to associate + Function to retrieve the pds_id(s) we received from the master to associate our slave's created PDS with the master's. """ - return self.__rec_tag,self.__rec_tag_new + return self.__rec_pds_id,self.__rec_pds_id_result - def command_slaves(self,command,data): + def __command_slaves(self,command,data): """ This method handles the sending of the command to the slaves telling them what operation to perform next. @@ -149,38 +151,36 @@ def command_slaves(self,command,data): Any of the data required for the operation which needs to be bundled in the data packet sent. """ - data_packet = {} - data_packet["op"] = command + + assert self.is_master,"Slaves are not allowed to call this function" if command == self.OP_PARALLELIZE: - #In parallelize, we get only one entry of tuple data - # which is the tag of the data we are going to receive. - tag = data[0] - data_packet[self.ATTR_TAG] = tag + #In parallelize we receive data as (pds_id) + data_packet = (command , data[0]) elif command == self.OP_MAP: - #In map we recieve data as (tag,tag_new,func) - tag,tag_new,func = data - + #In map we receive data as (pds_id,pds_id_new,func) #Use cloudpickle to dump the function into a string. - func_dump = cloudpickle.dumps(func) - data_packet[self.ATTR_TAG] = tag - data_packet[self.ATTR_RESTAG] = tag_new - data_packet[self.ATTR_FUNC] = func_dump - + function_packed = cloudpickle.dumps(data[2]) + data_packet = (command,data[0],data[1],function_packed) elif command == self.OP_COLLECT: - #In collect we receive data as (tag) - tag = data[0] - data_packet[self.ATTR_TAG] = tag + #In collect we receive data as (pds_id) + data_packet = (command,data[0]) + elif command == self.OP_DELETEPDS: + #In deletepds we receive data as (pds_id) + data_packet = (command,data[0]) + + elif command == self.OP_FINISH: + data_packet = (command,) _ = self.comm.bcast(data_packet, root=0) - def __generate_new_tag(self): + def __generate_new_pds_id(self): """ - This method generates a new tag to associate a PDS with it's remote counterpart - that slaves use to store & index data based on the tag they receive + This method generates a new pds_id to associate a PDS with it's remote counterpart + that slaves use to store & index data based on the pds_id they receive Returns ------- @@ -188,8 +188,8 @@ def __generate_new_tag(self): """ - self.__current_tag += 1 - return self.__current_tag + self.__current_pds_id += 1 + return self.__current_pds_id def parallelize(self, python_list): @@ -216,16 +216,23 @@ def parallelize(self, python_list): if self.is_master: # Tell the slaves to enter parallelize() - tag = self.__generate_new_tag() - self.command_slaves(self.OP_PARALLELIZE,(tag,)) + pds_id = self.__generate_new_pds_id() + self.__command_slaves(self.OP_PARALLELIZE,(pds_id,)) else: - tag,tag_new = self.__get_received_tag() + pds_id,pds_id_new = self.__get_received_pds_id() + + #Initialize empty data lists for the processes on the master node + rdd_masters = [[] for i in range(len(self.master_node_ranks))] + + #Split the data only amongst the number of workers + rdd_slaves = np.array_split(python_list, self.size - len(self.master_node_ranks), axis=0) - rdd = np.array_split(python_list, self.size, axis=0) + #Combine the lists into the final rdd before we split it across all ranks. + rdd = rdd_masters + rdd_slaves data_chunk = self.comm.scatter(rdd, root=0) - pds = PDSMPI(data_chunk, tag) + pds = PDSMPI(data_chunk, pds_id, self) return pds @@ -250,23 +257,23 @@ def map(self, func, pds): """ if self.is_master: - # Tell the slaves to enter the map() with the current tag & func. + # Tell the slaves to enter the map() with the current pds_id & func. - #Get tag of dataset we want to operate on - tag = pds.tag + #Get pds_id of dataset we want to operate on + pds_id = pds.pds_id - #Generate a new tag to be used by the slaves for the resultant PDS - tag_new = self.__generate_new_tag() + #Generate a new pds_id to be used by the slaves for the resultant PDS + pds_id_new = self.__generate_new_pds_id() - data = (tag,tag_new,func) - self.command_slaves(self.OP_MAP,data) + data = (pds_id,pds_id_new,func) + self.__command_slaves(self.OP_MAP,data) else: - tag,tag_new = self.__get_received_tag() + pds_id,pds_id_new = self.__get_received_pds_id() rdd = list(map(func, pds.python_list)) - pds_res = PDSMPI(rdd, tag_new) + pds_res = PDSMPI(rdd, pds_id_new, self) return pds_res @@ -286,13 +293,32 @@ def collect(self, pds): """ if self.is_master: - # Tell the slaves to enter collect with the pds's tag - self.command_slaves(self.OP_COLLECT,(pds.tag,)) + # Tell the slaves to enter collect with the pds's pds_id + self.__command_slaves(self.OP_COLLECT,(pds.pds_id,)) python_list = self.comm.gather(pds.python_list, root=0) - return python_list + if self.is_master: + # When we gather, the results are a list of lists one + # .. per rank. Undo that by one level and still maintain multi + # .. dimensional output (which is why we cannot use np.flatten) + combined_result = [] + list(map(combined_result.extend, python_list)) + return combined_result + + def delete_remote_pds(self,pds_id): + """ + A public function for the PDS objects on the master to call when they go out of + scope or are deleted in order to ensure the same happens on the slaves. + + Parameters + ---------- + pds_id: int + A pds_id identifying the remote PDS on the slaves to delete. + """ + if self.is_master and not self.finalized: + self.__command_slaves(self.OP_DELETEPDS,(pds_id,)) def __del__(self): """ @@ -303,12 +329,13 @@ def __del__(self): """ if self.is_master: - self.command_slaves(self.OP_FINISH,None) + self.__command_slaves(self.OP_FINISH,None) MPI.Finalize() + self.finalized = True - def broadcast(self, object, tag = None): + def broadcast(self, object, pds_id = None): """ Send object to all worker nodes without splitting it up. @@ -317,8 +344,8 @@ def broadcast(self, object, tag = None): object: Python object An arbitrary object that should be available on all workers - tag: Int (Default: None) - the tag identifier of the parallelize. The master will overwrite + pds_id: Int (Default: None) + the pds_id identifier of the parallelize. The master will overwrite but the slaves will use it. Returns @@ -340,22 +367,17 @@ class PDSMPI(PDS): This is a wrapper for a Python parallel data set. """ - def __init__(self, python_list, tag): + def __init__(self, python_list, pds_id , backend_obj): self.python_list = python_list - self.tag = tag + self.pds_id = pds_id + self.backend_obj = backend_obj def __del__(self): """ Destructor to be called when a PDS falls out of scope and\or is being deleted. - Tells the slaves to delete their copy of the PDS. + Uses the backend to send a message to destroy the slaves' copy of the pds. """ - if MPI.Is_finalized()==False: - comm = MPI.COMM_WORLD - rank = comm.Get_rank() - if rank ==0: - data_packet = {"op":BackendMPI.OP_DELETEPDS,BackendMPI.ATTR_TAG:self.tag} - _ = comm.bcast(data_packet, root=0) - + self.backend_obj.delete_remote_pds(self.pds_id) class BDSMPI(BDS): @@ -363,10 +385,10 @@ class BDSMPI(BDS): The reference class for broadcast data set (BDS). """ - def __init__(self, object, tag): + def __init__(self, object, pds_id): self.object = object - self.tag = tag + self.pds_id = pds_id def value(self): """ diff --git a/tests/backend_mpi_tests.py b/tests/backend_mpi_tests.py index 4ccda8a4..38ac7dbd 100644 --- a/tests/backend_mpi_tests.py +++ b/tests/backend_mpi_tests.py @@ -1,35 +1,79 @@ + import unittest +from mpi4py import MPI from abcpy.backend_mpi import BackendMPI -from mpi4py import MPI +def setUpModule(): + ''' + If an exception is raised in a setUpModule then none of + the tests in the module will be run. + + This is useful because the slaves run in a while loop on initialization + only responding to the master's commands and will never execute anything else. + + On termination of master, the slaves call quit() that raises a SystemExit(). + Because of the behaviour of setUpModule, it will not run any unit tests + for the slave and we now only need to write unit-tests from the master's + point of view. + ''' + global rank,backend + comm = MPI.COMM_WORLD + rank = comm.Get_rank() + backend = BackendMPI() class MPIBackendTests(unittest.TestCase): + def test_parallelize(self): + data = [0]*backend.size + pds = backend.parallelize(data) + pds_map = backend.map(lambda x: x + MPI.COMM_WORLD.Get_rank(), pds) + res = backend.collect(pds_map) - def setUp(self): - self.backend = BackendMPI() - + print(">>>",res) + for master_index in backend.master_node_ranks: + self.assertTrue(master_index not in res,"Node in master_node_ranks performed map.") - def map_test(self): + def test_map(self): data = [1,2,3,4,5] - pds = self.backend.parallelize(data) - pds_map = self.backend.map(lambda x:x**2,pds) - res = self.backend.collect(pds_map) + pds = backend.parallelize(data) + pds_map = backend.map(lambda x:x**2,pds) + res = backend.collect(pds_map) assert res==list(map(lambda x:x**2,data)) - def function_pickle_map_test(self): - + def test_function_pickle(self): def square(x): return x**2 class staticfunctest: @staticmethod - def cube(x): - return x**3 + def square(x): + return x**2 -if __name__ == '__main__': - print("Inside Main") - comm = MPI.COMM_WORLD - if comm.Get_rank()==0: - unittest.main() + class nonstaticfunctest: + def square(self,x): + return x**2 + + data = [1,2,3,4,5] + expected_result = [1,4,9,16,25] + pds = backend.parallelize(data) + + + pds_map1 = backend.map(square,pds) + pds_res1 = backend.collect(pds_map1) + self.assertTrue(pds_res1==expected_result,"Failed pickle test for general function") + + + pds_map2 = backend.map(lambda x:x**2,pds) + pds_res2 = backend.collect(pds_map2) + self.assertTrue(pds_res2==expected_result,"Failed pickle test for lambda function") + + + pds_map3 = backend.map(staticfunctest.square,pds) + pds_res3 = backend.collect(pds_map3) + self.assertTrue(pds_res3==expected_result,"Failed pickle test for static function") + + obj = nonstaticfunctest() + pds_map4 = backend.map(obj.square ,pds) + pds_res4 = backend.collect(pds_map4) + self.assertTrue(pds_res4==expected_result,"Failed pickle test for non-static function") \ No newline at end of file From ddeecb8aa89bdea39eb9d35b3bc130a611cded21 Mon Sep 17 00:00:00 2001 From: Marcel Schoengens Date: Mon, 24 Apr 2017 12:48:48 +0200 Subject: [PATCH 18/50] A prototype implementation of broadcast (with some errors when destroying the backend). --- abcpy/backend_mpi.py | 44 ++++++++++++++++++++++++++------------ tests/backend_mpi_tests.py | 22 ++++++++++++++++++- 2 files changed, 51 insertions(+), 15 deletions(-) diff --git a/abcpy/backend_mpi.py b/abcpy/backend_mpi.py index ddd9dd5f..7011090c 100644 --- a/abcpy/backend_mpi.py +++ b/abcpy/backend_mpi.py @@ -41,6 +41,7 @@ def __init__(self,master_node_ranks = [0,]): #Initialize some private variables for pds_ids we need for communication #.. between Master and slaves self.__current_pds_id = 0 + self.__current_bds_id = 0 self.__rec_pds_id = None self.__rec_pds_id_result = None @@ -49,7 +50,7 @@ def __init__(self,master_node_ranks = [0,]): self.size = self.comm.Get_size() self.rank = self.comm.Get_rank() - + self.bds_ids = {} self.is_master = (self.rank == 0) if self.size < 2: @@ -91,8 +92,8 @@ def slave_run(self): while True: data = self.comm.bcast(None, root=0) - op = data[0] + if op == self.OP_PARALLELIZE: pds_id = data[1] self.__rec_pds_id = pds_id @@ -122,6 +123,11 @@ def slave_run(self): self.collect(pds) + elif op == self.OP_BROADCAST: + bds_id = data[1] + value = data[2] + self.broadcast(value, id=bds_id) + elif op == self.OP_DELETEPDS: pds_id = data[1] @@ -168,6 +174,10 @@ def __command_slaves(self,command,data): #In collect we receive data as (pds_id) data_packet = (command,data[0]) + elif command == self.OP_BROADCAST: + #In collect we receive data as (pds_id) + data_packet = (command,data[0],data[1]) + elif command == self.OP_DELETEPDS: #In deletepds we receive data as (pds_id) data_packet = (command,data[0]) @@ -175,6 +185,7 @@ def __command_slaves(self,command,data): elif command == self.OP_FINISH: data_packet = (command,) + print(data_packet) _ = self.comm.bcast(data_packet, root=0) def __generate_new_pds_id(self): @@ -335,7 +346,7 @@ def __del__(self): self.finalized = True - def broadcast(self, object, pds_id = None): + def broadcast(self, value, id=None): """ Send object to all worker nodes without splitting it up. @@ -353,13 +364,18 @@ def broadcast(self, object, pds_id = None): BDS class (broadcast data set) A reference to the broadcasted object """ + + if self.is_master: + id = self.__current_bds_id + self.__current_bds_id += 1 + self.__command_slaves(self.OP_BROADCAST, (id, value,)) - raise NotImplementedError - - bcv = self.comm.bcast(object, root=0) - bds = BDSMPI(bcv) + self.bds_ids[id] = value - return bds + if self.is_master: + bds = BDSMPI(id) + bds.backend = self + return bds class PDSMPI(PDS): @@ -385,14 +401,14 @@ class BDSMPI(BDS): The reference class for broadcast data set (BDS). """ - def __init__(self, object, pds_id): - - self.object = object - self.pds_id = pds_id - + def __init__(self, id): + self.id = id + self.backend = None + def value(self): """ This method returns the actual object that the broadcast data set represents. """ + return self.backend.bds_ids[self.id] - return self.object + diff --git a/tests/backend_mpi_tests.py b/tests/backend_mpi_tests.py index 38ac7dbd..ba929054 100644 --- a/tests/backend_mpi_tests.py +++ b/tests/backend_mpi_tests.py @@ -25,6 +25,7 @@ def setUpModule(): class MPIBackendTests(unittest.TestCase): def test_parallelize(self): + return data = [0]*backend.size pds = backend.parallelize(data) pds_map = backend.map(lambda x: x + MPI.COMM_WORLD.Get_rank(), pds) @@ -35,13 +36,32 @@ def test_parallelize(self): self.assertTrue(master_index not in res,"Node in master_node_ranks performed map.") def test_map(self): + return data = [1,2,3,4,5] pds = backend.parallelize(data) pds_map = backend.map(lambda x:x**2,pds) res = backend.collect(pds_map) assert res==list(map(lambda x:x**2,data)) + + def test_broadcast(self): + data = [1,2,3,4,5] + pds = backend.parallelize(data) + + bds = backend.broadcast(100) + + def test_map(x): + return x + bds.value() + + pds_map = backend.map(test_map, pds) + res = backend.collect(pds_map) + print(res) + res1 = backend.map(lambda x: x-50, pds_map) + print(backend.collect(res1)) + + def test_function_pickle(self): + return def square(x): return x**2 @@ -76,4 +96,4 @@ def square(self,x): obj = nonstaticfunctest() pds_map4 = backend.map(obj.square ,pds) pds_res4 = backend.collect(pds_map4) - self.assertTrue(pds_res4==expected_result,"Failed pickle test for non-static function") \ No newline at end of file + self.assertTrue(pds_res4==expected_result,"Failed pickle test for non-static function") From d54de370ace6ff7633d6738ffaf545013a3c7d08 Mon Sep 17 00:00:00 2001 From: Avinash Ummadisingu Date: Mon, 24 Apr 2017 15:21:49 +0200 Subject: [PATCH 19/50] Modified broadcast test to show backend gets pickled --- tests/backend_mpi_tests.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/tests/backend_mpi_tests.py b/tests/backend_mpi_tests.py index ba929054..6ecacaaf 100644 --- a/tests/backend_mpi_tests.py +++ b/tests/backend_mpi_tests.py @@ -4,6 +4,13 @@ from abcpy.backend_mpi import BackendMPI +class remoteContext: + def __init__(self): + self.bds = backend.broadcast(1) + + def func(self,x): + print("Real Rank:",MPI.COMM_WORLD.Get_rank(),"self.bds's backend rank:",self.bds.backend.rank) + return self.bds.value()+x def setUpModule(): ''' @@ -47,18 +54,10 @@ def test_map(self): def test_broadcast(self): data = [1,2,3,4,5] pds = backend.parallelize(data) - - bds = backend.broadcast(100) - - def test_map(x): - return x + bds.value() - - pds_map = backend.map(test_map, pds) + rc = remoteContext() + pds_map = backend.map(rc.func, pds) res = backend.collect(pds_map) print(res) - res1 = backend.map(lambda x: x-50, pds_map) - print(backend.collect(res1)) - def test_function_pickle(self): return From 1b83f95a559dc985f761ec8b89a87edccfe37f41 Mon Sep 17 00:00:00 2001 From: Marcel Schoengens Date: Mon, 24 Apr 2017 17:25:46 +0200 Subject: [PATCH 20/50] made backend a global variable... --- abcpy/backend_mpi.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/abcpy/backend_mpi.py b/abcpy/backend_mpi.py index 7011090c..798820d3 100644 --- a/abcpy/backend_mpi.py +++ b/abcpy/backend_mpi.py @@ -107,6 +107,7 @@ def slave_run(self): #Use cloudpickle to convert back function string to a function func = cloudpickle.loads(function_packed) + func.__globals__['backend'] = self # Access an existing PDS pds = self.data_store[pds_id] @@ -371,10 +372,10 @@ def broadcast(self, value, id=None): self.__command_slaves(self.OP_BROADCAST, (id, value,)) self.bds_ids[id] = value - + globals()['backend'] = self + if self.is_master: bds = BDSMPI(id) - bds.backend = self return bds @@ -403,12 +404,11 @@ class BDSMPI(BDS): def __init__(self, id): self.id = id - self.backend = None def value(self): """ This method returns the actual object that the broadcast data set represents. """ - return self.backend.bds_ids[self.id] + return backend.bds_ids[self.id] From 449db6e7da96d9f4f86e52743f976f46eb4061a9 Mon Sep 17 00:00:00 2001 From: Avinash Ummadisingu Date: Thu, 27 Apr 2017 14:08:41 +0200 Subject: [PATCH 21/50] Split BackendMPI into Master/Slave classes --- Makefile | 2 +- abcpy/backend_mpi.py | 403 +++++++++++++++++++++++++------------------ 2 files changed, 235 insertions(+), 170 deletions(-) diff --git a/Makefile b/Makefile index 39020acb..3f181778 100644 --- a/Makefile +++ b/Makefile @@ -21,7 +21,7 @@ unittest: unittest_mpi: - mpirun -np 4 python3 -m unittest discover -s tests -v -p "backend_tests.py" || (echo "Error in unit tests."; exit 1) + mpirun -np 4 python3 -m unittest discover -s tests -v -p "backend_mpi_tests.py" || (echo "Error in unit tests."; exit 1) $(MAKEDIRS): diff --git a/abcpy/backend_mpi.py b/abcpy/backend_mpi.py index ddd9dd5f..355af84a 100644 --- a/abcpy/backend_mpi.py +++ b/abcpy/backend_mpi.py @@ -5,137 +5,31 @@ import cloudpickle import sys -class BackendMPI(Backend): - """ - A parallelization backend for MPI. - """ - comm = None - size = None - rank = None - finalized = False +class BackendMPIMaster(Backend): + """Defines the behavior of the master process + + This class defines the behavior of the master process (The one + with rank==0) in MPI. + """ #Define some operation codes to make it more readable - OP_PARALLELIZE = 1 - OP_MAP = 2 - OP_COLLECT = 3 - OP_BROADCAST = 4 - OP_DELETEPDS = 5 - OP_DELETEBDS = 6 - OP_FINISH = 7 - + OP_PARALLELIZE,OP_MAP,OP_COLLECT,OP_BROADCAST,OP_DELETEPDS,OP_DELETEBDS,OP_FINISH=[1,2,3,4,5,6,7] + finalized = False - def __init__(self,master_node_ranks = [0,]): - """ - Initialize the backend identifying all the ranks. - - """ - - - # Define a list of processes on the master node which should *not* perform - # .. any computation - self.master_node_ranks = master_node_ranks - - #Initialize some private variables for pds_ids we need for communication - #.. between Master and slaves - self.__current_pds_id = 0 - self.__rec_pds_id = None - self.__rec_pds_id_result = None - + def __init__(self,master_node_ranks=[0,]): self.comm = MPI.COMM_WORLD self.size = self.comm.Get_size() self.rank = self.comm.Get_rank() + self.master_node_ranks = master_node_ranks + #Initialize the current_pds_id + self.__current_pds_id = 0 - self.is_master = (self.rank == 0) - if self.size < 2: - raise ValueError('Please, use at least 2 ranks.') - - - - if (self.is_master): - print("Hello World, I am the master.") - else: - print("Hello World, I am worker number %s." % (self.rank)) - self.slave_run() - raise Exception("Slaves exitted main loop.") - - - def slave_run(self): - """ - This method is the infinite loop a slave enters directly from init. - It makes the slave wait for a command to perform from the master and - then calls the appropriate function. - - This method also takes care of the synchronization of data between the - master and the slaves by matching PDSs based on the pds_ids sent by the master - with the command. - - Commands received from the master are of the form of a tuple. - The first component of the tuple is always the operation to be performed - and the rest are conditional on the operation. - - (op,pds_id) where op == OP_PARALLELIZE for parallelize - (op,pds_id,pds_id_result,func) where op == OP_MAP for map. - (op,pds_id) where op == OP_COLLECT for a collect operation - (op,pds_id) where op == OP_DELETEPDS for a delete of the remote PDS on slaves - (op,) where op==OP_FINISH for the slave to break out of the loop and terminate - """ - - # Initialized data store here because only slaves need to do it. - self.data_store = {} - - while True: - data = self.comm.bcast(None, root=0) - - op = data[0] - if op == self.OP_PARALLELIZE: - pds_id = data[1] - self.__rec_pds_id = pds_id - pds = self.parallelize([]) - self.data_store[pds.pds_id] = pds - - - elif op == self.OP_MAP: - pds_id,pds_id_result,function_packed = data[1:] - self.__rec_pds_id, self.__rec_pds_id_result = pds_id,pds_id_result - - #Use cloudpickle to convert back function string to a function - func = cloudpickle.loads(function_packed) - - # Access an existing PDS - pds = self.data_store[pds_id] - pds_res = self.map(func, pds) - - # Store the result in a newly gnerated PDS pds_id - self.data_store[pds_res.pds_id] = pds_res - - elif op == self.OP_COLLECT: - pds_id = data[1] - - # Access an existing PDS from data store - pds = self.data_store[pds_id] - - self.collect(pds) - - elif op == self.OP_DELETEPDS: - pds_id = data[1] - - del self.data_store[pds_id] - - elif op == self.OP_FINISH: - quit() - - def __get_received_pds_id(self): - """ - Function to retrieve the pds_id(s) we received from the master to associate - our slave's created PDS with the master's. - """ - return self.__rec_pds_id,self.__rec_pds_id_result def __command_slaves(self,command,data): """ @@ -152,8 +46,6 @@ def __command_slaves(self,command,data): in the data packet sent. """ - assert self.is_master,"Slaves are not allowed to call this function" - if command == self.OP_PARALLELIZE: #In parallelize we receive data as (pds_id) data_packet = (command , data[0]) @@ -192,6 +84,7 @@ def __generate_new_pds_id(self): return self.__current_pds_id + def parallelize(self, python_list): """ This method distributes the list on the available workers and returns a @@ -201,7 +94,6 @@ def parallelize(self, python_list): Each part is sent to a separate worker node using the MPI scatter. MASTER: python_list is the real data that is to be split up - SLAVE: python_list should be [] and is ignored by the scatter() Parameters ---------- @@ -214,14 +106,11 @@ def parallelize(self, python_list): A reference object that represents the parallelized list """ - if self.is_master: - # Tell the slaves to enter parallelize() - pds_id = self.__generate_new_pds_id() - self.__command_slaves(self.OP_PARALLELIZE,(pds_id,)) - else: - pds_id,pds_id_new = self.__get_received_pds_id() + # Tell the slaves to enter parallelize() + pds_id = self.__generate_new_pds_id() + self.__command_slaves(self.OP_PARALLELIZE,(pds_id,)) - #Initialize empty data lists for the processes on the master node + #Initialize empty data lists for the processes on the master node rdd_masters = [[] for i in range(len(self.master_node_ranks))] #Split the data only amongst the number of workers @@ -256,24 +145,20 @@ def map(self, func, pds): a new parallel data set that contains the result of the map """ - if self.is_master: - # Tell the slaves to enter the map() with the current pds_id & func. + # Tell the slaves to enter the map() with the current pds_id & func. + #Get pds_id of dataset we want to operate on + pds_id = pds.pds_id - #Get pds_id of dataset we want to operate on - pds_id = pds.pds_id - - #Generate a new pds_id to be used by the slaves for the resultant PDS - pds_id_new = self.__generate_new_pds_id() - - data = (pds_id,pds_id_new,func) - self.__command_slaves(self.OP_MAP,data) - - else: - pds_id,pds_id_new = self.__get_received_pds_id() + #Generate a new pds_id to be used by the slaves for the resultant PDS + pds_id_new = self.__generate_new_pds_id() + + data = (pds_id,pds_id_new,func) + self.__command_slaves(self.OP_MAP,data) rdd = list(map(func, pds.python_list)) pds_res = PDSMPI(rdd, pds_id_new, self) + return pds_res @@ -292,20 +177,22 @@ def collect(self, pds): all elements of pds as a list """ - if self.is_master: - # Tell the slaves to enter collect with the pds's pds_id - self.__command_slaves(self.OP_COLLECT,(pds.pds_id,)) + # Tell the slaves to enter collect with the pds's pds_id + self.__command_slaves(self.OP_COLLECT,(pds.pds_id,)) python_list = self.comm.gather(pds.python_list, root=0) - if self.is_master: - # When we gather, the results are a list of lists one - # .. per rank. Undo that by one level and still maintain multi - # .. dimensional output (which is why we cannot use np.flatten) - combined_result = [] - list(map(combined_result.extend, python_list)) - return combined_result + # When we gather, the results are a list of lists one + # .. per rank. Undo that by one level and still maintain multi + # .. dimensional output (which is why we cannot use np.flatten) + combined_result = [] + list(map(combined_result.extend, python_list)) + return combined_result + + + def broadcast(self): + pass def delete_remote_pds(self,pds_id): """ @@ -317,7 +204,7 @@ def delete_remote_pds(self,pds_id): pds_id: int A pds_id identifying the remote PDS on the slaves to delete. """ - if self.is_master and not self.finalized: + if not self.finalized: self.__command_slaves(self.OP_DELETEPDS,(pds_id,)) def __del__(self): @@ -327,39 +214,213 @@ def __del__(self): while loop they are in and exit gracefully and they themselves call finalize when they die. """ + #Tell the slaves they can exit gracefully. + self.__command_slaves(self.OP_FINISH,None) - if self.is_master: - self.__command_slaves(self.OP_FINISH,None) - + #Finalize the connection because the slaves should have finished. MPI.Finalize() self.finalized = True - def broadcast(self, object, pds_id = None): +class BackendMPISlave(Backend): + """Defines the behavior of the slaves processes + + This class defines how the slaves should behave during operation. + Slaves are those processes(not nodes like Spark) that have rank!=0 + and whose ids are not present in the list of non workers. + """ + OP_PARALLELIZE,OP_MAP,OP_COLLECT,OP_BROADCAST,OP_DELETEPDS,OP_DELETEBDS,OP_FINISH=[1,2,3,4,5,6,7] + + def __init__(self): + self.comm = MPI.COMM_WORLD + self.size = self.comm.Get_size() + self.rank = self.comm.Get_rank() + + #Define the vars that will hold the pds ids received from master to operate on + self.__rec_pds_id = None + self.__rec_pds_id_result = None + + #Go into an infinite loop waiting for commands from the user. + self.slave_run() + + def slave_run(self): + """ + This method is the infinite loop a slave enters directly from init. + It makes the slave wait for a command to perform from the master and + then calls the appropriate function. + + This method also takes care of the synchronization of data between the + master and the slaves by matching PDSs based on the pds_ids sent by the master + with the command. + + Commands received from the master are of the form of a tuple. + The first component of the tuple is always the operation to be performed + and the rest are conditional on the operation. + + (op,pds_id) where op == OP_PARALLELIZE for parallelize + (op,pds_id,pds_id_result,func) where op == OP_MAP for map. + (op,pds_id) where op == OP_COLLECT for a collect operation + (op,pds_id) where op == OP_DELETEPDS for a delete of the remote PDS on slaves + (op,) where op==OP_FINISH for the slave to break out of the loop and terminate + """ + + # Initialized data store here because only slaves need to do it. + self.data_store = {} + + while True: + data = self.comm.bcast(None, root=0) + + op = data[0] + if op == self.OP_PARALLELIZE: + pds_id = data[1] + self.__rec_pds_id = pds_id + pds = self.parallelize([]) + self.data_store[pds.pds_id] = pds + + + elif op == self.OP_MAP: + pds_id,pds_id_result,function_packed = data[1:] + self.__rec_pds_id, self.__rec_pds_id_result = pds_id,pds_id_result + + #Use cloudpickle to convert back function string to a function + func = cloudpickle.loads(function_packed) + + # Access an existing PDS + pds = self.data_store[pds_id] + pds_res = self.map(func, pds) + + # Store the result in a newly gnerated PDS pds_id + self.data_store[pds_res.pds_id] = pds_res + + elif op == self.OP_COLLECT: + pds_id = data[1] + + # Access an existing PDS from data store + pds = self.data_store[pds_id] + + self.collect(pds) + + elif op == self.OP_DELETEPDS: + pds_id = data[1] + + del self.data_store[pds_id] + + elif op == self.OP_FINISH: + quit() + + def __get_received_pds_id(self): + """ + Function to retrieve the pds_id(s) we received from the master to associate + our slave's created PDS with the master's. + """ + return self.__rec_pds_id,self.__rec_pds_id_result + + + def parallelize(self, python_list): + """ + This method distributes the list on the available workers and returns a + reference object. + + The list is split into number of workers many parts as a numpy array. + Each part is sent to a separate worker node using the MPI scatter. + + SLAVE: python_list should be [] and is ignored by the scatter() + + Parameters + ---------- + list: Python list + the list that should get distributed on the worker nodes + + Returns + ------- + PDSMPI class (parallel data set) + A reference object that represents the parallelized list """ - Send object to all worker nodes without splitting it up. + + #Get the PDS id we should store this data in + pds_id,pds_id_new = self.__get_received_pds_id() + + data_chunk = self.comm.scatter(None, root=0) + + pds = PDSMPI(data_chunk, pds_id, self) + + return pds + + + def map(self, func, pds): + """ + A distributed implementation of map that works on parallel data sets (PDS). + + On every element of pds the function func is called. Parameters ---------- - object: Python object - An arbitrary object that should be available on all workers + func: Python func + A function that can be applied to every element of the pds + pds: PDS class + A parallel data set to which func should be applied + + Returns + ------- + PDSMPI class + a new parallel data set that contains the result of the map + """ - pds_id: Int (Default: None) - the pds_id identifier of the parallelize. The master will overwrite - but the slaves will use it. + #Get the PDS id we operate on and the new one to store the result in + pds_id,pds_id_new = self.__get_received_pds_id() + + rdd = list(map(func, pds.python_list)) + + pds_res = PDSMPI(rdd, pds_id_new, self) + + return pds_res + + + def collect(self, pds): + """ + Gather the pds from all the workers, send it to the master and return it as a standard Python list. + + Parameters + ---------- + pds: PDS class + a parallel data set Returns ------- - BDS class (broadcast data set) - A reference to the broadcasted object + Python list + all elements of pds as a list """ - raise NotImplementedError + #Send the data we have back to the master + _ = self.comm.gather(pds.python_list, root=0) + + def broadcast(self): + pass + + +class BackendMPI(BackendMPIMaster if MPI.COMM_WORLD.Get_rank() == 0 else BackendMPISlave): + """A backend parallelized by using MPI + + The backend conditionally inherits either the BackendMPIMaster class + or the BackendMPISlave class depending on it's rank. This lets + BackendMPI have a uniform interface for the user but allows for a + logical split between functions performed by the master + and the slaves. + """ + + def __init__(self,master_node_ranks=[0,]): + self.comm = MPI.COMM_WORLD + self.size = self.comm.Get_size() + self.rank = self.comm.Get_rank() - bcv = self.comm.bcast(object, root=0) - bds = BDSMPI(bcv) + if self.size<2: + raise ValueError('Please, use at least 2 ranks.') - return bds + if self.rank==0: + super().__init__(master_node_ranks) + else: + super().__init__() + raise Exception("Slaves exitted main loop.") class PDSMPI(PDS): @@ -377,7 +438,11 @@ def __del__(self): Destructor to be called when a PDS falls out of scope and\or is being deleted. Uses the backend to send a message to destroy the slaves' copy of the pds. """ - self.backend_obj.delete_remote_pds(self.pds_id) + try: + self.backend_obj.delete_remote_pds(self.pds_id) + except AttributeError: + #Catch "delete_remote_pds not defined" for slaves and ignore. + pass class BDSMPI(BDS): From 74592f2ecd12d467e870d868bb6b753af1aefdbc Mon Sep 17 00:00:00 2001 From: Avinash Ummadisingu Date: Thu, 27 Apr 2017 14:16:43 +0200 Subject: [PATCH 22/50] Renamed data_store to pds_store --- abcpy/backend_mpi.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/abcpy/backend_mpi.py b/abcpy/backend_mpi.py index 355af84a..dad37aca 100644 --- a/abcpy/backend_mpi.py +++ b/abcpy/backend_mpi.py @@ -264,8 +264,8 @@ def slave_run(self): (op,) where op==OP_FINISH for the slave to break out of the loop and terminate """ - # Initialized data store here because only slaves need to do it. - self.data_store = {} + # Initialize PDS data store here because only slaves need to do it. + self.pds_store = {} while True: data = self.comm.bcast(None, root=0) @@ -275,7 +275,7 @@ def slave_run(self): pds_id = data[1] self.__rec_pds_id = pds_id pds = self.parallelize([]) - self.data_store[pds.pds_id] = pds + self.pds_store[pds.pds_id] = pds elif op == self.OP_MAP: @@ -286,24 +286,24 @@ def slave_run(self): func = cloudpickle.loads(function_packed) # Access an existing PDS - pds = self.data_store[pds_id] + pds = self.pds_store[pds_id] pds_res = self.map(func, pds) # Store the result in a newly gnerated PDS pds_id - self.data_store[pds_res.pds_id] = pds_res + self.pds_store[pds_res.pds_id] = pds_res elif op == self.OP_COLLECT: pds_id = data[1] # Access an existing PDS from data store - pds = self.data_store[pds_id] + pds = self.pds_store[pds_id] self.collect(pds) elif op == self.OP_DELETEPDS: pds_id = data[1] - del self.data_store[pds_id] + del self.pds_store[pds_id] elif op == self.OP_FINISH: quit() From 6beba50b0b0bad99f0a6d7ab4b32e96ef97cd154 Mon Sep 17 00:00:00 2001 From: Avinash Ummadisingu Date: Thu, 27 Apr 2017 15:15:33 +0200 Subject: [PATCH 23/50] Added Broadcast functionality. Untested --- abcpy/backend_mpi.py | 117 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 106 insertions(+), 11 deletions(-) diff --git a/abcpy/backend_mpi.py b/abcpy/backend_mpi.py index dad37aca..bd2cb496 100644 --- a/abcpy/backend_mpi.py +++ b/abcpy/backend_mpi.py @@ -27,9 +27,12 @@ def __init__(self,master_node_ranks=[0,]): self.master_node_ranks = master_node_ranks - #Initialize the current_pds_id + #Initialize the current_pds_id and bds_id self.__current_pds_id = 0 + self.__current_bds_id = 0 + #Initialize a BDS store for both master & slave. + self.bds_store = {} def __command_slaves(self,command,data): """ @@ -53,9 +56,12 @@ def __command_slaves(self,command,data): elif command == self.OP_MAP: #In map we receive data as (pds_id,pds_id_new,func) #Use cloudpickle to dump the function into a string. - function_packed = cloudpickle.dumps(data[2]) + function_packed = self.__sanitize_and_pack_func(data[2]) data_packet = (command,data[0],data[1],function_packed) + elif command == self.OP_BROADCAST: + data_packet = (command,data[0]) + elif command == self.OP_COLLECT: #In collect we receive data as (pds_id) data_packet = (command,data[0]) @@ -69,6 +75,35 @@ def __command_slaves(self,command,data): _ = self.comm.bcast(data_packet, root=0) + + def __sanitize_and_pack_func(self,func): + """ + Prevents the function from packing the backend by temporarily + setting it to another variable and then uses cloudpickle + to pack it into a string to be sent. + + Parameters + ---------- + func: Python Function + The function we are supposed to pack while sending it along to the slaves + during the map function + + Returns + ------- + Returns a string of the function packed by cloudpickle + + """ + + #Set the backend to None to prevent it from being packed + globals()['backend'] = {} + + function_packed = cloudpickle.dumps(func) + + #Reset the backend to self after it's been packed + globals()['backend'] = self + + return function_packed + def __generate_new_pds_id(self): """ This method generates a new pds_id to associate a PDS with it's remote counterpart @@ -84,6 +119,21 @@ def __generate_new_pds_id(self): return self.__current_pds_id + def __generate_new_pds_id(self): + """ + This method generates a new bds_id to associate a BDS with it's remote counterpart + that slaves use to store & index data based on the bds_id they receive + + Returns + ------- + Returns a unique integer. + + """ + + self.__current_bds_id += 1 + return self.__current_bds_id + + def parallelize(self, python_list): """ @@ -191,8 +241,17 @@ def collect(self, pds): return combined_result - def broadcast(self): - pass + def broadcast(self,value): + # Tell the slaves to enter broadcast() + bds_id = self.__generate_new_bds_id() + self.__command_slaves(self.OP_BROADCAST,(bds_id,)) + + _ = self.comm.broadcast(value, root=0) + + bds = BDSMPI(value, bds_id, self) + return bds + + def delete_remote_pds(self,pds_id): """ @@ -207,6 +266,13 @@ def delete_remote_pds(self,pds_id): if not self.finalized: self.__command_slaves(self.OP_DELETEPDS,(pds_id,)) + def delete_remote_bds(self,bds_id): + """ + """ + if not self.finalized: + self.__command_slaves(self.OP_DELETEBDS,(bds_id,)) + + def __del__(self): """ Overriding the delete function to explicitly call MPI.finalize(). @@ -240,6 +306,9 @@ def __init__(self): self.__rec_pds_id = None self.__rec_pds_id_result = None + #Initialize a BDS store for both master & slave. + self.bds_store = {} + #Go into an infinite loop waiting for commands from the user. self.slave_run() @@ -284,6 +353,10 @@ def slave_run(self): #Use cloudpickle to convert back function string to a function func = cloudpickle.loads(function_packed) + #Set the function's backend to current class + #so it can access bds_store properly + func.backend = self + # Access an existing PDS pds = self.pds_store[pds_id] @@ -292,6 +365,10 @@ def slave_run(self): # Store the result in a newly gnerated PDS pds_id self.pds_store[pds_res.pds_id] = pds_res + elif op == self.OP_BROADCAST: + self.__bds_id = data[1] + self.broadcast(None) + elif op == self.OP_COLLECT: pds_id = data[1] @@ -394,8 +471,12 @@ def collect(self, pds): #Send the data we have back to the master _ = self.comm.gather(pds.python_list, root=0) - def broadcast(self): - pass + def broadcast(self,value): + """ + Value is ignored for the slaves. We get data from master + """ + value = self.comm.broadcast(None, root=0) + self.bds_store[self.__rec_bds_id] = value class BackendMPI(BackendMPIMaster if MPI.COMM_WORLD.Get_rank() == 0 else BackendMPISlave): @@ -450,14 +531,28 @@ class BDSMPI(BDS): The reference class for broadcast data set (BDS). """ - def __init__(self, object, pds_id): - - self.object = object - self.pds_id = pds_id + def __init__(self, object, bds_id, backend_obj): + #The BDS data is no longer saved in the BDS object. + #It will access & store the data only from the current backend + self.bds_id = bds_id + backend.bds_store[self.bds_id] = object + self.backend_obj = backend_obj def value(self): """ This method returns the actual object that the broadcast data set represents. """ + return backend.bds_store[self.bds_id] + + def __del__(self): + """ + Destructor to be called when a BDS falls out of scope and\or is being deleted. + Uses the backend to send a message to destroy the slaves' copy of the bds. + """ + del backend.bds_store[self.bds_id] + try: + self.backend_obj.delete_remote_bds(self.bds_id) + except AttributeError: + #Catch "delete_remote_pds not defined" for slaves and ignore. + pass - return self.object From cdc4587e603d80cf06fdf10137b4ae4495e93651 Mon Sep 17 00:00:00 2001 From: Marcel Schoengens Date: Thu, 27 Apr 2017 17:43:36 +0200 Subject: [PATCH 24/50] Simplified Fortran code and added Makefile. --- .../extensions/models/gaussian_f90/Makefile | 8 ++ .../gaussian_f90/gaussian_model_simple.f90 | 49 ++++++++ .../pmcabc-gaussian_model_simple.py | 110 ++++++++++++++++++ 3 files changed, 167 insertions(+) create mode 100644 examples/extensions/models/gaussian_f90/Makefile create mode 100644 examples/extensions/models/gaussian_f90/gaussian_model_simple.f90 create mode 100644 examples/extensions/models/gaussian_f90/pmcabc-gaussian_model_simple.py diff --git a/examples/extensions/models/gaussian_f90/Makefile b/examples/extensions/models/gaussian_f90/Makefile new file mode 100644 index 00000000..763e8097 --- /dev/null +++ b/examples/extensions/models/gaussian_f90/Makefile @@ -0,0 +1,8 @@ +F2PY=f2py3 +EXT_SUFFIX := $(shell python3-config --extension-suffix) + +default: gaussian_model_simple$(EXT_SUFFIX) + +%$(EXT_SUFFIX): %.f90 + $(F2PY) -c -m $* $< + diff --git a/examples/extensions/models/gaussian_f90/gaussian_model_simple.f90 b/examples/extensions/models/gaussian_f90/gaussian_model_simple.f90 new file mode 100644 index 00000000..b9054dca --- /dev/null +++ b/examples/extensions/models/gaussian_f90/gaussian_model_simple.f90 @@ -0,0 +1,49 @@ +module gaussian_model +contains + subroutine gaussian(output, mu, sigma, k, seed) + integer, intent(in) :: k, seed + real(8), intent(in) :: mu, sigma + real(8), intent(out) :: output(k) + + integer :: i, n + real(8) :: r, theta + real(8), dimension(:), allocatable :: temp + integer(4), dimension(:), allocatable :: seed_arr + + ! get random seed array size and fill seed_arr with provided seed + call random_seed(size = n) + allocate(seed_arr(n)) + seed_arr = seed + call random_seed(put = seed_arr) + + ! create 2k random numbers uniformly from [0,1] + if(allocated(temp)) then + deallocate(temp) + end if + allocate(temp(k*2)) + call random_number(temp) + + ! Use Box-Muller transfrom to create normally distributed variables + do i = 1, k + r = (-2.0 * log(temp(2*i-1)))**0.5 + theta = 2 * 3.1415926 * temp(2*i) + output(i) = mu + sigma * r * sin(theta) + end do + end subroutine gaussian +end module gaussian_model + +program main + use gaussian_model + implicit none + + integer, parameter :: k = 100 + integer :: seed = 9, i + real(8) :: mu = 10.0, sigma = 2.0 + real(8) :: output(k) + + call gaussian(output, mu, sigma, k, seed) + + do i = 1, k + write(*,*) output(i) + end do +end program main diff --git a/examples/extensions/models/gaussian_f90/pmcabc-gaussian_model_simple.py b/examples/extensions/models/gaussian_f90/pmcabc-gaussian_model_simple.py new file mode 100644 index 00000000..0a2527fe --- /dev/null +++ b/examples/extensions/models/gaussian_f90/pmcabc-gaussian_model_simple.py @@ -0,0 +1,110 @@ +import numpy as np + +from abcpy.models import Model +from gaussian_model_simple import gaussian_model + +class Gaussian(Model): + def __init__(self, prior, seed=None): + self.prior = prior + self.sample_from_prior() + self.rng = np.random.RandomState(seed) + + + def set_parameters(self, theta): + theta = np.array(theta) + if theta.shape[0] > 2: return False + if theta[1] <= 0: return False + + self.mu = theta[0] + self.sigma = theta[1] + return True + + def get_parameters(self): + return np.array([self.mu, self.sigma]) + + def sample_from_prior(self): + sample = self.prior.sample(1).reshape(-1) + self.set_parameters(sample) + + def simulate(self, k): + seed = self.rng.randint(np.iinfo(np.int32).max) + result = gaussian_model(self.mu, self.sigma, k, seed) + return list(result) + + +def infer_parameters(): + # define observation for true parameters mean=170, std=15 + y_obs = [160.82499176, 167.24266737, 185.71695756, 153.7045709, 163.40568812, 140.70658699, 169.59102084, 172.81041696, 187.38782738, 179.66358934, 176.63417241, 189.16082803, 181.98288443, 170.18565017, 183.78493886, 166.58387299, 161.9521899, 155.69213073, 156.17867343, 144.51580379, 170.29847515, 197.96767899, 153.36646527, 162.22710198, 158.70012047, 178.53470703, 170.77697743, 164.31392633, 165.88595994, 177.38083686, 146.67058471763457, 179.41946565658628, 238.02751620619537, 206.22458790620766, 220.89530574344568, 221.04082532837026, 142.25301427453394, 261.37656571434275, 171.63761180867033, 210.28121820385866, 237.29130237612236, 175.75558340169619, 224.54340549862235, 197.42448680731226, 165.88273684581381, 166.55094082844519, 229.54308602661584, 222.99844054358519, 185.30223966014586, 152.69149367593846, 206.94372818527413, 256.35498655339154, 165.43140916577741, 250.19273595481803, 148.87781549665536, 223.05547559193792, 230.03418198709608, 146.13611923127021, 138.24716809523139, 179.26755740864527, 141.21704876815426, 170.89587081800852, 222.96391329259626, 188.27229523693822, 202.67075179617672, 211.75963110985992, 217.45423324370509] + + # define prior + from abcpy.distributions import Uniform + prior = Uniform([150, 5],[200, 25]) + + # define the model + model = Gaussian(prior) + + # define statistics + from abcpy.statistics import Identity + statistics_calculator = Identity(degree = 2, cross = False) + + # define distance + from abcpy.distances import LogReg + distance_calculator = LogReg(statistics_calculator) + + # define kernel + from abcpy.distributions import MultiStudentT + mean, cov, df = np.array([.0, .0]), np.eye(2), 3. + kernel = MultiStudentT(mean, cov, df) + + # define backend + from abcpy.backends import BackendSpark as Backend + from abcpy.backends import BackendDummy as Backend + backend = Backend() + + # define sampling scheme + from abcpy.inferences import PMCABC + sampler = PMCABC(model, distance_calculator, kernel, backend) + + # sample from scheme + T, n_sample, n_samples_per_param = 3, 100, 10 + eps_arr = np.array([.75]) + epsilon_percentile = 10 + journal = sampler.sample(y_obs, T, eps_arr, n_sample, n_samples_per_param, epsilon_percentile) + + return journal + + +def analyse_journal(journal): + # output parameters and weights + print(journal.parameters) + print(journal.weights) + + # do post analysis + print(journal.posterior_mean()) + print(journal.posterior_cov()) + print(journal.posterior_histogram()) + + # print configuration + print(journal.configuration) + + # save and load journal + journal.save("experiments.jnl") + + from abcpy.output import Journal + new_journal = Journal.fromFile('experiments.jnl') + + +# this code is for testing purposes only and not relevant to run the example +import unittest +class ExampleExtendModelGaussianCpp(unittest.TestCase): + def test_example(self): + journal = infer_parameters() + test_result = journal.posterior_mean()[0] + expected_result = 177.02 + self.assertLess(abs(test_result - expected_result), 1.) + + +if __name__ == "__main__": + journal = infer_parameters() + analyse_journal(journal) + From adfb559e497da4ca1b7ce46eb37e214b869581b4 Mon Sep 17 00:00:00 2001 From: Lorenzo Fabbri Date: Thu, 27 Apr 2017 17:46:21 +0200 Subject: [PATCH 25/50] Minor errors. Modified hellow_world.py. Add test case for broadcast. --- abcpy/backend_mpi.py | 26 +++++++++++++++--------- examples/backends/mpi/hello_world.py | 10 +++++----- tests/backend_mpi_tests.py | 30 +++++++++++++++++++++++++--- 3 files changed, 49 insertions(+), 17 deletions(-) diff --git a/abcpy/backend_mpi.py b/abcpy/backend_mpi.py index bd2cb496..7f3a862c 100644 --- a/abcpy/backend_mpi.py +++ b/abcpy/backend_mpi.py @@ -6,7 +6,6 @@ import sys - class BackendMPIMaster(Backend): """Defines the behavior of the master process @@ -14,8 +13,8 @@ class BackendMPIMaster(Backend): with rank==0) in MPI. """ - #Define some operation codes to make it more readable + #Define some operation codes to make it more readable OP_PARALLELIZE,OP_MAP,OP_COLLECT,OP_BROADCAST,OP_DELETEPDS,OP_DELETEBDS,OP_FINISH=[1,2,3,4,5,6,7] finalized = False @@ -34,6 +33,7 @@ def __init__(self,master_node_ranks=[0,]): #Initialize a BDS store for both master & slave. self.bds_store = {} + def __command_slaves(self,command,data): """ This method handles the sending of the command to the slaves @@ -104,6 +104,7 @@ def __sanitize_and_pack_func(self,func): return function_packed + def __generate_new_pds_id(self): """ This method generates a new pds_id to associate a PDS with it's remote counterpart @@ -119,7 +120,7 @@ def __generate_new_pds_id(self): return self.__current_pds_id - def __generate_new_pds_id(self): + def __generate_new_bds_id(self): """ This method generates a new bds_id to associate a BDS with it's remote counterpart that slaves use to store & index data based on the bds_id they receive @@ -134,7 +135,6 @@ def __generate_new_pds_id(self): return self.__current_bds_id - def parallelize(self, python_list): """ This method distributes the list on the available workers and returns a @@ -246,13 +246,12 @@ def broadcast(self,value): bds_id = self.__generate_new_bds_id() self.__command_slaves(self.OP_BROADCAST,(bds_id,)) - _ = self.comm.broadcast(value, root=0) + _ = self.comm.bcast(value, root=0) bds = BDSMPI(value, bds_id, self) return bds - def delete_remote_pds(self,pds_id): """ A public function for the PDS objects on the master to call when they go out of @@ -263,12 +262,15 @@ def delete_remote_pds(self,pds_id): pds_id: int A pds_id identifying the remote PDS on the slaves to delete. """ + if not self.finalized: self.__command_slaves(self.OP_DELETEPDS,(pds_id,)) + def delete_remote_bds(self,bds_id): """ """ + if not self.finalized: self.__command_slaves(self.OP_DELETEBDS,(bds_id,)) @@ -280,6 +282,7 @@ def __del__(self): while loop they are in and exit gracefully and they themselves call finalize when they die. """ + #Tell the slaves they can exit gracefully. self.__command_slaves(self.OP_FINISH,None) @@ -295,6 +298,7 @@ class BackendMPISlave(Backend): Slaves are those processes(not nodes like Spark) that have rank!=0 and whose ids are not present in the list of non workers. """ + OP_PARALLELIZE,OP_MAP,OP_COLLECT,OP_BROADCAST,OP_DELETEPDS,OP_DELETEBDS,OP_FINISH=[1,2,3,4,5,6,7] def __init__(self): @@ -312,6 +316,7 @@ def __init__(self): #Go into an infinite loop waiting for commands from the user. self.slave_run() + def slave_run(self): """ This method is the infinite loop a slave enters directly from init. @@ -385,11 +390,13 @@ def slave_run(self): elif op == self.OP_FINISH: quit() + def __get_received_pds_id(self): """ Function to retrieve the pds_id(s) we received from the master to associate our slave's created PDS with the master's. """ + return self.__rec_pds_id,self.__rec_pds_id_result @@ -471,12 +478,14 @@ def collect(self, pds): #Send the data we have back to the master _ = self.comm.gather(pds.python_list, root=0) + def broadcast(self,value): """ Value is ignored for the slaves. We get data from master """ - value = self.comm.broadcast(None, root=0) - self.bds_store[self.__rec_bds_id] = value + + value = self.comm.bcast(None, root=0) + self.bds_store[self.__bds_id] = value class BackendMPI(BackendMPIMaster if MPI.COMM_WORLD.Get_rank() == 0 else BackendMPISlave): @@ -555,4 +564,3 @@ def __del__(self): except AttributeError: #Catch "delete_remote_pds not defined" for slaves and ignore. pass - diff --git a/examples/backends/mpi/hello_world.py b/examples/backends/mpi/hello_world.py index 067fcb55..8ff2e47c 100644 --- a/examples/backends/mpi/hello_world.py +++ b/examples/backends/mpi/hello_world.py @@ -1,14 +1,10 @@ from abcpy.backend_mpi import BackendMPI - - - if __name__ == "__main__": backend = BackendMPI() data = list(range(100)) - def square(x): return x**2 @@ -27,7 +23,11 @@ def cube(x): print("Result of the map is:",backend.collect(mapres_pds)) print("Original Data was:",backend.collect(datachunk_pds)) - mapres_pds = backend.map(staticfunctest.cube, datachunk_pds) print("Result of the map is:",backend.collect(mapres_pds)) + bcast_bds = backend.broadcast(data) + #print("Broadcast at Rank", backend.rank, "has", backend.bds_store[bcast_bds.bds_id]) + + for i in range(0, backend.size): + print("Broadcasted data at Rank", i, "has", backend.bds_store[bcast_bds.bds_id]) diff --git a/tests/backend_mpi_tests.py b/tests/backend_mpi_tests.py index 38ac7dbd..2b0a058d 100644 --- a/tests/backend_mpi_tests.py +++ b/tests/backend_mpi_tests.py @@ -1,10 +1,8 @@ - import unittest from mpi4py import MPI from abcpy.backend_mpi import BackendMPI - def setUpModule(): ''' If an exception is raised in a setUpModule then none of @@ -24,6 +22,7 @@ def setUpModule(): backend = BackendMPI() class MPIBackendTests(unittest.TestCase): + def test_parallelize(self): data = [0]*backend.size pds = backend.parallelize(data) @@ -41,6 +40,30 @@ def test_map(self): res = backend.collect(pds_map) assert res==list(map(lambda x:x**2,data)) + # def test_broadcast(self): + # data = [1,2,3,4,5] + # pds = backend.parallelize(data) + # pds_map = backend.map(lambda x:x**2,pds) + # res = backend.collect(pds_map) + + # bds = backend.broadcast(res) + # assert bds.value()==list(map(lambda x:x**2,data)) + + def test_broadcast(self): + data = [1,2,3,4,5] + pds = backend.parallelize(data) + + bds = backend.broadcast(100) + + def test_map(x): + return x + bds.value() + + pds_map1 = backend.map(test_map, pds) + print(backend.collect(pds_map1)) + + pds_map2 = backend.map(lambda x: x-50, pds_map1) + print(backend.collect(pds_map2)) + def test_function_pickle(self): def square(x): return x**2 @@ -73,7 +96,8 @@ def square(self,x): pds_res3 = backend.collect(pds_map3) self.assertTrue(pds_res3==expected_result,"Failed pickle test for static function") + obj = nonstaticfunctest() pds_map4 = backend.map(obj.square ,pds) pds_res4 = backend.collect(pds_map4) - self.assertTrue(pds_res4==expected_result,"Failed pickle test for non-static function") \ No newline at end of file + self.assertTrue(pds_res4==expected_result,"Failed pickle test for non-static function") From c167d4c309118e6d0cfb7214631b5e366b9c84d3 Mon Sep 17 00:00:00 2001 From: Avinash Ummadisingu Date: Thu, 27 Apr 2017 18:06:03 +0200 Subject: [PATCH 26/50] Broadcast implemented with testcase --- abcpy/backend_mpi.py | 11 ++++++++--- examples/backends/mpi/hello_world.py | 2 -- tests/backend_mpi_tests.py | 15 ++------------- 3 files changed, 10 insertions(+), 18 deletions(-) diff --git a/abcpy/backend_mpi.py b/abcpy/backend_mpi.py index 7f3a862c..51792452 100644 --- a/abcpy/backend_mpi.py +++ b/abcpy/backend_mpi.py @@ -66,8 +66,8 @@ def __command_slaves(self,command,data): #In collect we receive data as (pds_id) data_packet = (command,data[0]) - elif command == self.OP_DELETEPDS: - #In deletepds we receive data as (pds_id) + elif command == self.OP_DELETEPDS or command == self.OP_DELETEBDS: + #In deletepds we receive data as (pds_id) or bds_id data_packet = (command,data[0]) elif command == self.OP_FINISH: @@ -360,7 +360,7 @@ def slave_run(self): func = cloudpickle.loads(function_packed) #Set the function's backend to current class #so it can access bds_store properly - func.backend = self + # func.backend = self # Access an existing PDS @@ -506,6 +506,11 @@ def __init__(self,master_node_ranks=[0,]): if self.size<2: raise ValueError('Please, use at least 2 ranks.') + + #Set the global backend + globals()['backend'] = self + + if self.rank==0: super().__init__(master_node_ranks) else: diff --git a/examples/backends/mpi/hello_world.py b/examples/backends/mpi/hello_world.py index 8ff2e47c..097a4a61 100644 --- a/examples/backends/mpi/hello_world.py +++ b/examples/backends/mpi/hello_world.py @@ -29,5 +29,3 @@ def cube(x): bcast_bds = backend.broadcast(data) #print("Broadcast at Rank", backend.rank, "has", backend.bds_store[bcast_bds.bds_id]) - for i in range(0, backend.size): - print("Broadcasted data at Rank", i, "has", backend.bds_store[bcast_bds.bds_id]) diff --git a/tests/backend_mpi_tests.py b/tests/backend_mpi_tests.py index 2b0a058d..d6d72e64 100644 --- a/tests/backend_mpi_tests.py +++ b/tests/backend_mpi_tests.py @@ -29,7 +29,6 @@ def test_parallelize(self): pds_map = backend.map(lambda x: x + MPI.COMM_WORLD.Get_rank(), pds) res = backend.collect(pds_map) - print(">>>",res) for master_index in backend.master_node_ranks: self.assertTrue(master_index not in res,"Node in master_node_ranks performed map.") @@ -40,14 +39,6 @@ def test_map(self): res = backend.collect(pds_map) assert res==list(map(lambda x:x**2,data)) - # def test_broadcast(self): - # data = [1,2,3,4,5] - # pds = backend.parallelize(data) - # pds_map = backend.map(lambda x:x**2,pds) - # res = backend.collect(pds_map) - - # bds = backend.broadcast(res) - # assert bds.value()==list(map(lambda x:x**2,data)) def test_broadcast(self): data = [1,2,3,4,5] @@ -58,11 +49,9 @@ def test_broadcast(self): def test_map(x): return x + bds.value() - pds_map1 = backend.map(test_map, pds) - print(backend.collect(pds_map1)) + pds_m = backend.map(test_map, pds) + self.assertTrue(backend.collect(pds_m)==[101,102,103,104,105]) - pds_map2 = backend.map(lambda x: x-50, pds_map1) - print(backend.collect(pds_map2)) def test_function_pickle(self): def square(x): From 8533d1d7aa787fb233f6b70ab762071c3fc37e3b Mon Sep 17 00:00:00 2001 From: Avinash Ummadisingu Date: Thu, 27 Apr 2017 19:33:05 +0200 Subject: [PATCH 27/50] Added BDS deletion and test cases for PDS & BDS deletetion --- abcpy/backend_mpi.py | 23 ++++++++-- tests/backend_mpi_tests.py | 94 ++++++++++++++++++++++++++++---------- 2 files changed, 89 insertions(+), 28 deletions(-) diff --git a/abcpy/backend_mpi.py b/abcpy/backend_mpi.py index 51792452..d8dedae6 100644 --- a/abcpy/backend_mpi.py +++ b/abcpy/backend_mpi.py @@ -272,6 +272,9 @@ def delete_remote_bds(self,bds_id): """ if not self.finalized: + #The master deallocates it's BDS data. Explicit because + #.. bds_store and BDSMPI object are disconnected. + del backend.bds_store[bds_id] self.__command_slaves(self.OP_DELETEBDS,(bds_id,)) @@ -300,6 +303,7 @@ class BackendMPISlave(Backend): """ OP_PARALLELIZE,OP_MAP,OP_COLLECT,OP_BROADCAST,OP_DELETEPDS,OP_DELETEBDS,OP_FINISH=[1,2,3,4,5,6,7] + def __init__(self): self.comm = MPI.COMM_WORLD @@ -384,11 +388,16 @@ def slave_run(self): elif op == self.OP_DELETEPDS: pds_id = data[1] - del self.pds_store[pds_id] + elif op == self.OP_DELETEBDS: + bds_id = data[1] + del self.bds_store[bds_id] + elif op == self.OP_FINISH: quit() + else: + raise Exception("Slave recieved unknown command code") def __get_received_pds_id(self): @@ -539,6 +548,12 @@ def __del__(self): #Catch "delete_remote_pds not defined" for slaves and ignore. pass +class BackendMPITestHelper: + def check_pds(self,k): + return k in backend.pds_store.keys() + + def check_bds(self,k): + return k in backend.bds_store.keys() class BDSMPI(BDS): """ @@ -550,7 +565,7 @@ def __init__(self, object, bds_id, backend_obj): #It will access & store the data only from the current backend self.bds_id = bds_id backend.bds_store[self.bds_id] = object - self.backend_obj = backend_obj + # self.backend_obj = backend_obj def value(self): """ @@ -563,9 +578,9 @@ def __del__(self): Destructor to be called when a BDS falls out of scope and\or is being deleted. Uses the backend to send a message to destroy the slaves' copy of the bds. """ - del backend.bds_store[self.bds_id] + try: - self.backend_obj.delete_remote_bds(self.bds_id) + backend.delete_remote_bds(self.bds_id) except AttributeError: #Catch "delete_remote_pds not defined" for slaves and ignore. pass diff --git a/tests/backend_mpi_tests.py b/tests/backend_mpi_tests.py index d6d72e64..ed7884e4 100644 --- a/tests/backend_mpi_tests.py +++ b/tests/backend_mpi_tests.py @@ -1,6 +1,6 @@ import unittest from mpi4py import MPI -from abcpy.backend_mpi import BackendMPI +from abcpy.backend_mpi import BackendMPI,BackendMPITestHelper def setUpModule(): @@ -16,41 +16,87 @@ def setUpModule(): for the slave and we now only need to write unit-tests from the master's point of view. ''' - global rank,backend + global rank,backend_mpi comm = MPI.COMM_WORLD rank = comm.Get_rank() - backend = BackendMPI() + backend_mpi = BackendMPI() class MPIBackendTests(unittest.TestCase): def test_parallelize(self): - data = [0]*backend.size - pds = backend.parallelize(data) - pds_map = backend.map(lambda x: x + MPI.COMM_WORLD.Get_rank(), pds) - res = backend.collect(pds_map) + data = [0]*backend_mpi.size + pds = backend_mpi.parallelize(data) + pds_map = backend_mpi.map(lambda x: x + MPI.COMM_WORLD.Get_rank(), pds) + res = backend_mpi.collect(pds_map) - for master_index in backend.master_node_ranks: + for master_index in backend_mpi.master_node_ranks: self.assertTrue(master_index not in res,"Node in master_node_ranks performed map.") def test_map(self): data = [1,2,3,4,5] - pds = backend.parallelize(data) - pds_map = backend.map(lambda x:x**2,pds) - res = backend.collect(pds_map) + pds = backend_mpi.parallelize(data) + pds_map = backend_mpi.map(lambda x:x**2,pds) + res = backend_mpi.collect(pds_map) assert res==list(map(lambda x:x**2,data)) def test_broadcast(self): data = [1,2,3,4,5] - pds = backend.parallelize(data) + pds = backend_mpi.parallelize(data) - bds = backend.broadcast(100) + bds = backend_mpi.broadcast(100) + + #Pollute the BDS values of the master to confirm slaves + # use their broadcasted value + for k,v in backend_mpi.bds_store.items(): + backend_mpi.bds_store[k] = 99999 def test_map(x): return x + bds.value() - pds_m = backend.map(test_map, pds) - self.assertTrue(backend.collect(pds_m)==[101,102,103,104,105]) + pds_m = backend_mpi.map(test_map, pds) + self.assertTrue(backend_mpi.collect(pds_m)==[101,102,103,104,105]) + + def test_pds_delete(self): + + def check_if_exists(x): + obj = BackendMPITestHelper() + return obj.check_pds(x) + + data = [1,2,3,4,5] + pds = backend_mpi.parallelize(data) + + #Check if the pds we just created exists in all the slaves(+master) + + id_check_pds = backend_mpi.parallelize([pds.pds_id]*5) + pds_check_result = backend_mpi.map(check_if_exists, id_check_pds) + self.assertTrue(False not in backend_mpi.collect(pds_check_result),"PDS was not created") + + #Delete the PDS on master and try again + del pds + pds_check_result = backend_mpi.map(check_if_exists,id_check_pds) + + self.assertTrue(True not in backend_mpi.collect(pds_check_result),"PDS was not deleted") + + + def test_bds_delete(self): + + def check_if_exists(x): + obj = BackendMPITestHelper() + return obj.check_bds(x) + + data = [1,2,3,4,5] + bds = backend_mpi.broadcast(data) + + #Check if the pds we just created exists in all the slaves(+master) + id_check_bds = backend_mpi.parallelize([bds.bds_id]*5) + bds_check_result = backend_mpi.map(check_if_exists, id_check_bds) + self.assertTrue(False not in backend_mpi.collect(bds_check_result),"BDS was not created") + + #Delete the PDS on master and try again + del bds + bds_check_result = backend_mpi.map(check_if_exists,id_check_bds) + self.assertTrue(True not in backend_mpi.collect(bds_check_result),"BDS was not deleted") def test_function_pickle(self): @@ -68,25 +114,25 @@ def square(self,x): data = [1,2,3,4,5] expected_result = [1,4,9,16,25] - pds = backend.parallelize(data) + pds = backend_mpi.parallelize(data) - pds_map1 = backend.map(square,pds) - pds_res1 = backend.collect(pds_map1) + pds_map1 = backend_mpi.map(square,pds) + pds_res1 = backend_mpi.collect(pds_map1) self.assertTrue(pds_res1==expected_result,"Failed pickle test for general function") - pds_map2 = backend.map(lambda x:x**2,pds) - pds_res2 = backend.collect(pds_map2) + pds_map2 = backend_mpi.map(lambda x:x**2,pds) + pds_res2 = backend_mpi.collect(pds_map2) self.assertTrue(pds_res2==expected_result,"Failed pickle test for lambda function") - pds_map3 = backend.map(staticfunctest.square,pds) - pds_res3 = backend.collect(pds_map3) + pds_map3 = backend_mpi.map(staticfunctest.square,pds) + pds_res3 = backend_mpi.collect(pds_map3) self.assertTrue(pds_res3==expected_result,"Failed pickle test for static function") obj = nonstaticfunctest() - pds_map4 = backend.map(obj.square ,pds) - pds_res4 = backend.collect(pds_map4) + pds_map4 = backend_mpi.map(obj.square ,pds) + pds_res4 = backend_mpi.collect(pds_map4) self.assertTrue(pds_res4==expected_result,"Failed pickle test for non-static function") From 0c9435e37211e9d2b8f4c29cb59c2aa6d6137fd7 Mon Sep 17 00:00:00 2001 From: Marcel Schoengens Date: Fri, 28 Apr 2017 09:28:11 +0200 Subject: [PATCH 28/50] MPI example. --- examples/backends/mpi/pmcabc_gaussian.py | 82 ++++++++++++++++++++++++ 1 file changed, 82 insertions(+) create mode 100644 examples/backends/mpi/pmcabc_gaussian.py diff --git a/examples/backends/mpi/pmcabc_gaussian.py b/examples/backends/mpi/pmcabc_gaussian.py new file mode 100644 index 00000000..360952c6 --- /dev/null +++ b/examples/backends/mpi/pmcabc_gaussian.py @@ -0,0 +1,82 @@ +import numpy as np + +def infer_parameters(): + # define observation for true parameters mean=170, std=15 + y_obs = [160.82499176, 167.24266737, 185.71695756, 153.7045709, 163.40568812, 140.70658699, 169.59102084, 172.81041696, 187.38782738, 179.66358934, 176.63417241, 189.16082803, 181.98288443, 170.18565017, 183.78493886, 166.58387299, 161.9521899, 155.69213073, 156.17867343, 144.51580379, 170.29847515, 197.96767899, 153.36646527, 162.22710198, 158.70012047, 178.53470703, 170.77697743, 164.31392633, 165.88595994, 177.38083686, 146.67058471763457, 179.41946565658628, 238.02751620619537, 206.22458790620766, 220.89530574344568, 221.04082532837026, 142.25301427453394, 261.37656571434275, 171.63761180867033, 210.28121820385866, 237.29130237612236, 175.75558340169619, 224.54340549862235, 197.42448680731226, 165.88273684581381, 166.55094082844519, 229.54308602661584, 222.99844054358519, 185.30223966014586, 152.69149367593846, 206.94372818527413, 256.35498655339154, 165.43140916577741, 250.19273595481803, 148.87781549665536, 223.05547559193792, 230.03418198709608, 146.13611923127021, 138.24716809523139, 179.26755740864527, 141.21704876815426, 170.89587081800852, 222.96391329259626, 188.27229523693822, 202.67075179617672, 211.75963110985992, 217.45423324370509] + + # define prior + from abcpy.distributions import Uniform + prior = Uniform([150, 5],[200, 25], seed=1) + + # define the model + from abcpy.models import Gaussian + model = Gaussian(prior, seed=1) + + # define statistics + from abcpy.statistics import Identity + statistics_calculator = Identity(degree = 2, cross = False) + + # define distance + from abcpy.distances import LogReg + distance_calculator = LogReg(statistics_calculator) + + # define kernel + from abcpy.distributions import MultiStudentT + mean, cov, df = np.array([.0, .0]), np.eye(2), 3. + kernel = MultiStudentT(mean, cov, df, seed=1) + + # define backend + from abcpy.backend_mpi import BackendMPI as Backend + backend = Backend() + + # define sampling scheme + from abcpy.inferences import PMCABC + sampler = PMCABC(model, distance_calculator, kernel, backend, seed=1) + + # sample from scheme + T, n_sample, n_samples_per_param = 3, 250, 10 + eps_arr = np.array([.75]) + epsilon_percentile = 10 + journal = sampler.sample(y_obs, T, eps_arr, n_sample, n_samples_per_param, epsilon_percentile) + + return journal + + +def analyse_journal(journal): + # output parameters and weights + print(journal.parameters) + print(journal.weights) + + # do post analysis + print(journal.posterior_mean()) + print(journal.posterior_cov()) + print(journal.posterior_histogram()) + + # print configuration + print(journal.configuration) + + # save and load journal + journal.save("experiments.jnl") + + from abcpy.output import Journal + new_journal = Journal.fromFile('experiments.jnl') + + +import unittest +import findspark +class ExampleGaussianSparkTest(unittest.TestCase): + def setUp(self): + findspark.init() + + def test_example(self): + journal = infer_parameters() + test_result = journal.posterior_mean()[0] + expected_result = 176.0 + self.assertLess(abs(test_result - expected_result), 2.) + + +if __name__ == "__main__": + journal = infer_parameters() + analyse_journal(journal) + + From 88e640bae491e117787aeffa25164e65a5e8e99b Mon Sep 17 00:00:00 2001 From: Lorenzo Fabbri Date: Sun, 28 May 2017 18:01:25 +0200 Subject: [PATCH 29/50] Update list of master ranks for final simulations --- abcpy/backend_mpi.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/abcpy/backend_mpi.py b/abcpy/backend_mpi.py index d8dedae6..9ad90c74 100644 --- a/abcpy/backend_mpi.py +++ b/abcpy/backend_mpi.py @@ -18,7 +18,7 @@ class BackendMPIMaster(Backend): OP_PARALLELIZE,OP_MAP,OP_COLLECT,OP_BROADCAST,OP_DELETEPDS,OP_DELETEBDS,OP_FINISH=[1,2,3,4,5,6,7] finalized = False - def __init__(self,master_node_ranks=[0,]): + def __init__(self,master_node_ranks = list(range(36))): self.comm = MPI.COMM_WORLD self.size = self.comm.Get_size() @@ -507,7 +507,7 @@ class BackendMPI(BackendMPIMaster if MPI.COMM_WORLD.Get_rank() == 0 else Backend and the slaves. """ - def __init__(self,master_node_ranks=[0,]): + def __init__(self,master_node_ranks = list(range(36))): self.comm = MPI.COMM_WORLD self.size = self.comm.Get_size() self.rank = self.comm.Get_rank() From da1a5a0239e67a76a35fdee43b8b96b66952f5bb Mon Sep 17 00:00:00 2001 From: Anthony Ebert Date: Mon, 10 Jul 2017 14:50:21 +0200 Subject: [PATCH 30/50] R documentation --- doc/source/README.rst | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/doc/source/README.rst b/doc/source/README.rst index 35f8d495..5e0c4775 100644 --- a/doc/source/README.rst +++ b/doc/source/README.rst @@ -247,6 +247,34 @@ same way (see `Getting Started`_) as we would do with shipped models. The complete example code can be found `here `_ +Use ABCpy with a model written in R +========================== + +Statisticians often use the R language to build statistical models. R models can be incorporated within the ABCpy language with the `rpy2` Python package. We show how to use the `rpy2` package to connect with a model written in R. + +Continuing from the previous section we use a simple Gaussian model as an example. The following R code is the contents of the R file `gaussian_model.R`. + +.. literalinclude:: ../../examples/extensions/models/gaussian_R/gaussian_model.R + :language: R + :lines: 1 - 4 + +More complex R models are incorporated in the same way. To include this function within the ABCpy script we include the following code at the beginning of our python script. + +.. literalinclude:: ../../examples/extensions/models/gaussian_R/gaussian_model.py + :language: python + :lines: 5 - 14 + +This imports the R function `simple_gaussian` into the python environment. We need to build our own model to incorporate this R function as in the previous section. The only difference is the `simulate` method of the class `Gaussian'. + +.. automethod:: abcpy.models.Model.simulate + :noindex: + +.. literalinclude:: ../../examples/extensions/models/gaussian_R/gaussian_model.py + :language: python + :lines: 40 - 42 + +The default output for R functions in python is a float vector. This must be converted into a list for the purposes of ABCpy. + .. Extending: Add your Distance ============================ From 43bb4bce4d2376d0b6f92c9825867d0735b21909 Mon Sep 17 00:00:00 2001 From: Anthony Ebert Date: Mon, 10 Jul 2017 14:51:25 +0200 Subject: [PATCH 31/50] R example --- .../models/gaussian_R/gaussian_model.R | 4 + .../models/gaussian_R/gaussian_model.py | 126 ++++++++++++++++++ .../extensions/models/gaussian_R/graph_ABC.py | 31 +++++ 3 files changed, 161 insertions(+) create mode 100644 examples/extensions/models/gaussian_R/gaussian_model.R create mode 100644 examples/extensions/models/gaussian_R/gaussian_model.py create mode 100644 examples/extensions/models/gaussian_R/graph_ABC.py diff --git a/examples/extensions/models/gaussian_R/gaussian_model.R b/examples/extensions/models/gaussian_R/gaussian_model.R new file mode 100644 index 00000000..7c67d69a --- /dev/null +++ b/examples/extensions/models/gaussian_R/gaussian_model.R @@ -0,0 +1,4 @@ +simple_gaussian <- function(mu, sigma, k = 1){ + output <- rnorm(k, mu, sigma) + return(output) +} \ No newline at end of file diff --git a/examples/extensions/models/gaussian_R/gaussian_model.py b/examples/extensions/models/gaussian_R/gaussian_model.py new file mode 100644 index 00000000..21c95f36 --- /dev/null +++ b/examples/extensions/models/gaussian_R/gaussian_model.py @@ -0,0 +1,126 @@ +import numpy as np + +from abcpy.models import Model + +import rpy2 +import rpy2.robjects as robjects +import rpy2.robjects.numpy2ri +rpy2.robjects.numpy2ri.activate() + +robjects.r(''' + source('gaussian_model.R') +''') + +r_simple_gaussian = robjects.globalenv['simple_gaussian'] + + +class Gaussian(Model): + def __init__(self, prior, seed=None): + self.prior = prior + self.sample_from_prior() + self.rng = np.random.RandomState(seed) + + def set_parameters(self, theta): + theta = np.array(theta) + + if theta.shape[0] > 2: return False + if theta[1] <= 0: return False + + self.mu = theta[0] + self.sigma = theta[1] + return True + + def get_parameters(self): + return np.array([self.mu, self.sigma]) + + def sample_from_prior(self): + sample = self.prior.sample(1).reshape(-1) + self.set_parameters(sample) + + def simulate(self, k): + output = list(r_simple_gaussian(self.mu, self.sigma, k)) + return output + + +def infer_parameters(): + # define observation for true parameters mean=170, std=15 + y_obs = [160.82499176, 167.24266737, 185.71695756, 153.7045709, 163.40568812, 140.70658699, 169.59102084, 172.81041696, 187.38782738, 179.66358934, 176.63417241, 189.16082803, 181.98288443, 170.18565017, 183.78493886, 166.58387299, 161.9521899, 155.69213073, 156.17867343, 144.51580379, 170.29847515, 197.96767899, 153.36646527, 162.22710198, 158.70012047, 178.53470703, 170.77697743, 164.31392633, 165.88595994, 177.38083686, 146.67058471763457, 179.41946565658628, 238.02751620619537, 206.22458790620766, 220.89530574344568, 221.04082532837026, 142.25301427453394, 261.37656571434275, 171.63761180867033, 210.28121820385866, 237.29130237612236, 175.75558340169619, 224.54340549862235, 197.42448680731226, 165.88273684581381, 166.55094082844519, 229.54308602661584, 222.99844054358519, 185.30223966014586, 152.69149367593846, 206.94372818527413, 256.35498655339154, 165.43140916577741, 250.19273595481803, 148.87781549665536, 223.05547559193792, 230.03418198709608, 146.13611923127021, 138.24716809523139, 179.26755740864527, 141.21704876815426, 170.89587081800852, 222.96391329259626, 188.27229523693822, 202.67075179617672, 211.75963110985992, 217.45423324370509] + + # define prior + from abcpy.distributions import Uniform + prior = Uniform([150, 5],[200, 25]) + + # define the model + model = Gaussian(prior) + + # define statistics + from abcpy.statistics import Identity + statistics_calculator = Identity(degree = 2, cross = False) + + # define distance + from abcpy.distances import LogReg + distance_calculator = LogReg(statistics_calculator) + + # define kernel + from abcpy.distributions import MultiStudentT + mean, cov, df = np.array([.0, .0]), np.eye(2), 3. + kernel = MultiStudentT(mean, cov, df) + + # define backend + from abcpy.backends import BackendDummy as Backend + backend = Backend() + + # define sampling scheme + from abcpy.inferences import PMCABC + sampler = PMCABC(model, distance_calculator, kernel, backend) + + # sample from scheme + T, n_sample, n_samples_per_param = 3, 250, 10 + eps_arr = np.array([.75]) + epsilon_percentile = 10 + journal = sampler.sample(y_obs, T, eps_arr, n_sample, n_samples_per_param, epsilon_percentile) + + return journal + + +def analyse_journal(journal): + # output parameters and weights + print(journal.parameters) + print(journal.weights) + + # do post analysis + print(journal.posterior_mean()) + print(journal.posterior_cov()) + print(journal.posterior_histogram()) + + # print configuration + print(journal.configuration) + + # save and load journal + journal.save("experiments.jnl") + + from abcpy.output import Journal + new_journal = Journal.fromFile('experiments.jnl') + + +journal = infer_parameters() +mu = journal.get_parameters()[:,0].reshape(-1,1) +sigma = journal.get_parameters()[:,1].reshape(-1,1) + +import graph_ABC +plot_mu = graph_ABC.plot(mu, true_value = 170) +plot_sigma = graph_ABC.plot(sigma, true_value = 15) + +# this code is for testing purposes only and not relevant to run the example +import unittest +class ExampleExtendModelGaussianPython(unittest.TestCase): + def test_example(self): + journal = infer_parameters() + test_result = journal.posterior_mean()[0] + expected_result = 177.02 + self.assertLess(abs(test_result - expected_result), 2.) + + +if __name__ == "__main__": + journal = infer_parameters() + analyse_journal(journal) diff --git a/examples/extensions/models/gaussian_R/graph_ABC.py b/examples/extensions/models/gaussian_R/graph_ABC.py new file mode 100644 index 00000000..122c6bbf --- /dev/null +++ b/examples/extensions/models/gaussian_R/graph_ABC.py @@ -0,0 +1,31 @@ + +import matplotlib.pyplot as plt +from scipy.stats import gaussian_kde +import numpy as np + +def plot(samples, path = None, true_value = 5, title = 'ABC posterior'): + Bayes_estimate = np.mean(samples, axis = 0) + theta = true_value + xmin, xmax = max(samples[:,0]), min(samples[:,0]) + positions = np.linspace(xmin, xmax, samples.shape[0]) + gaussian_kernel = gaussian_kde(samples[:,0].reshape(samples.shape[0],)) + values = gaussian_kernel(positions) + plt.figure() + plt.plot(positions,gaussian_kernel(positions)) + plt.plot([theta, theta],[min(values), max(values)+.1*(max(values)-min(values))]) + plt.plot([Bayes_estimate, Bayes_estimate],[min(values), max(values)+.1*(max(values)-min(values))]) + plt.ylim([min(values), max(values)+.1*(max(values)-min(values))]) + plt.xlabel(r'$\theta$') + plt.ylabel('density') + #plt.xlim([0,1]) + plt.rc('axes', labelsize=15) + plt.legend(loc='best', frameon=False, numpoints=1) + font = {'size' : 15} + plt.rc('font', **font) + plt.title(title) + if path is not None : + plt.savefig(path) + return plt + + + From 7f7d9b502446712ef5f369ee341978c9795ed9f2 Mon Sep 17 00:00:00 2001 From: Anthony Ebert Date: Mon, 10 Jul 2017 14:57:58 +0200 Subject: [PATCH 32/50] Remove superfluous line --- examples/extensions/models/gaussian_R/gaussian_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/extensions/models/gaussian_R/gaussian_model.py b/examples/extensions/models/gaussian_R/gaussian_model.py index 21c95f36..1c89c4c8 100644 --- a/examples/extensions/models/gaussian_R/gaussian_model.py +++ b/examples/extensions/models/gaussian_R/gaussian_model.py @@ -2,7 +2,7 @@ from abcpy.models import Model -import rpy2 + import rpy2.robjects as robjects import rpy2.robjects.numpy2ri rpy2.robjects.numpy2ri.activate() From 3187b7e6cfee340524a2c24fd954ef43ac7bf0b2 Mon Sep 17 00:00:00 2001 From: Avinash Ummadisingu Date: Tue, 25 Jul 2017 14:09:06 +0200 Subject: [PATCH 33/50] Code cleanup and compliance --- abcpy/backend_mpi.py | 170 +++++++++++++++++++++++-------------------- 1 file changed, 93 insertions(+), 77 deletions(-) diff --git a/abcpy/backend_mpi.py b/abcpy/backend_mpi.py index 9ad90c74..ec686f32 100644 --- a/abcpy/backend_mpi.py +++ b/abcpy/backend_mpi.py @@ -1,24 +1,22 @@ -from abc import ABCMeta, abstractmethod -from abcpy.backends import Backend,PDS,BDS -from mpi4py import MPI import numpy as np import cloudpickle -import sys +from mpi4py import MPI +from abcpy.backends import Backend, PDS, BDS class BackendMPIMaster(Backend): - """Defines the behavior of the master process - + """Defines the behavior of the master process + This class defines the behavior of the master process (The one with rank==0) in MPI. """ #Define some operation codes to make it more readable - OP_PARALLELIZE,OP_MAP,OP_COLLECT,OP_BROADCAST,OP_DELETEPDS,OP_DELETEBDS,OP_FINISH=[1,2,3,4,5,6,7] + OP_PARALLELIZE, OP_MAP, OP_COLLECT, OP_BROADCAST, OP_DELETEPDS, OP_DELETEBDS, OP_FINISH = [1, 2, 3, 4, 5, 6, 7] finalized = False - def __init__(self,master_node_ranks = list(range(36))): + def __init__(self, master_node_ranks=list(range(36))): self.comm = MPI.COMM_WORLD self.size = self.comm.Get_size() @@ -34,9 +32,9 @@ def __init__(self,master_node_ranks = list(range(36))): self.bds_store = {} - def __command_slaves(self,command,data): - """ - This method handles the sending of the command to the slaves + def __command_slaves(self, command, data): + """ + This method handles the sending of the command to the slaves telling them what operation to perform next. Parameters @@ -45,30 +43,30 @@ def __command_slaves(self,command,data): One of the operation codes defined in the class definition as OP_xxx which tell the slaves what operation they're performing. data: tuple - Any of the data required for the operation which needs to be bundled + Any of the data required for the operation which needs to be bundled in the data packet sent. """ if command == self.OP_PARALLELIZE: #In parallelize we receive data as (pds_id) - data_packet = (command , data[0]) + data_packet = (command, data[0]) elif command == self.OP_MAP: #In map we receive data as (pds_id,pds_id_new,func) #Use cloudpickle to dump the function into a string. function_packed = self.__sanitize_and_pack_func(data[2]) - data_packet = (command,data[0],data[1],function_packed) + data_packet = (command, data[0], data[1], function_packed) elif command == self.OP_BROADCAST: - data_packet = (command,data[0]) + data_packet = (command, data[0]) elif command == self.OP_COLLECT: #In collect we receive data as (pds_id) - data_packet = (command,data[0]) + data_packet = (command, data[0]) elif command == self.OP_DELETEPDS or command == self.OP_DELETEBDS: #In deletepds we receive data as (pds_id) or bds_id - data_packet = (command,data[0]) + data_packet = (command, data[0]) elif command == self.OP_FINISH: data_packet = (command,) @@ -76,10 +74,10 @@ def __command_slaves(self,command,data): _ = self.comm.bcast(data_packet, root=0) - def __sanitize_and_pack_func(self,func): + def __sanitize_and_pack_func(self, func): """ Prevents the function from packing the backend by temporarily - setting it to another variable and then uses cloudpickle + setting it to another variable and then uses cloudpickle to pack it into a string to be sent. Parameters @@ -95,12 +93,12 @@ def __sanitize_and_pack_func(self,func): """ #Set the backend to None to prevent it from being packed - globals()['backend'] = {} + globals()['backend'] = {} function_packed = cloudpickle.dumps(func) #Reset the backend to self after it's been packed - globals()['backend'] = self + globals()['backend'] = self return function_packed @@ -112,7 +110,7 @@ def __generate_new_pds_id(self): Returns ------- - Returns a unique integer. + Returns a unique integer id. """ @@ -127,7 +125,7 @@ def __generate_new_bds_id(self): Returns ------- - Returns a unique integer. + Returns a unique integer id. """ @@ -158,9 +156,9 @@ def parallelize(self, python_list): # Tell the slaves to enter parallelize() pds_id = self.__generate_new_pds_id() - self.__command_slaves(self.OP_PARALLELIZE,(pds_id,)) + self.__command_slaves(self.OP_PARALLELIZE, (pds_id,)) - #Initialize empty data lists for the processes on the master node + #Initialize empty data lists for the processes on the master node rdd_masters = [[] for i in range(len(self.master_node_ranks))] #Split the data only amongst the number of workers @@ -201,9 +199,9 @@ def map(self, func, pds): #Generate a new pds_id to be used by the slaves for the resultant PDS pds_id_new = self.__generate_new_pds_id() - - data = (pds_id,pds_id_new,func) - self.__command_slaves(self.OP_MAP,data) + + data = (pds_id, pds_id_new, func) + self.__command_slaves(self.OP_MAP, data) rdd = list(map(func, pds.python_list)) @@ -214,7 +212,8 @@ def map(self, func, pds): def collect(self, pds): """ - Gather the pds from all the workers, send it to the master and return it as a standard Python list. + Gather the pds from all the workers, + send it to the master and return it as a standard Python list. Parameters ---------- @@ -228,7 +227,7 @@ def collect(self, pds): """ # Tell the slaves to enter collect with the pds's pds_id - self.__command_slaves(self.OP_COLLECT,(pds.pds_id,)) + self.__command_slaves(self.OP_COLLECT, (pds.pds_id,)) python_list = self.comm.gather(pds.python_list, root=0) @@ -241,10 +240,10 @@ def collect(self, pds): return combined_result - def broadcast(self,value): + def broadcast(self, value): # Tell the slaves to enter broadcast() bds_id = self.__generate_new_bds_id() - self.__command_slaves(self.OP_BROADCAST,(bds_id,)) + self.__command_slaves(self.OP_BROADCAST, (bds_id,)) _ = self.comm.bcast(value, root=0) @@ -252,10 +251,10 @@ def broadcast(self,value): return bds - def delete_remote_pds(self,pds_id): + def delete_remote_pds(self, pds_id): """ - A public function for the PDS objects on the master to call when they go out of - scope or are deleted in order to ensure the same happens on the slaves. + A public function for the PDS objects on the master to call when they go out of + scope or are deleted in order to ensure the same happens on the slaves. Parameters ---------- @@ -264,18 +263,26 @@ def delete_remote_pds(self,pds_id): """ if not self.finalized: - self.__command_slaves(self.OP_DELETEPDS,(pds_id,)) + self.__command_slaves(self.OP_DELETEPDS, (pds_id,)) - def delete_remote_bds(self,bds_id): + def delete_remote_bds(self, bds_id): """ + Public function for the BDS objects on the master to call when they go + out of score or are deleted in order to ensure they are deleted + ont he slaves as well. + + Parameters + ---------- + bds_id: int + A bds_id identifying the remote BDS on the slaves to delete. """ if not self.finalized: - #The master deallocates it's BDS data. Explicit because + #The master deallocates it's BDS data. Explicit because #.. bds_store and BDSMPI object are disconnected. del backend.bds_store[bds_id] - self.__command_slaves(self.OP_DELETEBDS,(bds_id,)) + self.__command_slaves(self.OP_DELETEBDS, (bds_id,)) def __del__(self): @@ -287,7 +294,7 @@ def __del__(self): """ #Tell the slaves they can exit gracefully. - self.__command_slaves(self.OP_FINISH,None) + self.__command_slaves(self.OP_FINISH, None) #Finalize the connection because the slaves should have finished. MPI.Finalize() @@ -299,12 +306,12 @@ class BackendMPISlave(Backend): This class defines how the slaves should behave during operation. Slaves are those processes(not nodes like Spark) that have rank!=0 - and whose ids are not present in the list of non workers. + and whose ids are not present in the list of non workers. """ - OP_PARALLELIZE,OP_MAP,OP_COLLECT,OP_BROADCAST,OP_DELETEPDS,OP_DELETEBDS,OP_FINISH=[1,2,3,4,5,6,7] + OP_PARALLELIZE, OP_MAP, OP_COLLECT, OP_BROADCAST, OP_DELETEPDS, OP_DELETEBDS, OP_FINISH = [1, 2, 3, 4, 5, 6, 7] + - def __init__(self): self.comm = MPI.COMM_WORLD self.size = self.comm.Get_size() @@ -327,16 +334,16 @@ def slave_run(self): It makes the slave wait for a command to perform from the master and then calls the appropriate function. - This method also takes care of the synchronization of data between the - master and the slaves by matching PDSs based on the pds_ids sent by the master + This method also takes care of the synchronization of data between the + master and the slaves by matching PDSs based on the pds_ids sent by the master with the command. - Commands received from the master are of the form of a tuple. + Commands received from the master are of the form of a tuple. The first component of the tuple is always the operation to be performed and the rest are conditional on the operation. - (op,pds_id) where op == OP_PARALLELIZE for parallelize - (op,pds_id,pds_id_result,func) where op == OP_MAP for map. + (op,pds_id) where op == OP_PARALLELIZE for parallelize + (op,pds_id, pds_id_result,func) where op == OP_MAP for map. (op,pds_id) where op == OP_COLLECT for a collect operation (op,pds_id) where op == OP_DELETEPDS for a delete of the remote PDS on slaves (op,) where op==OP_FINISH for the slave to break out of the loop and terminate @@ -357,8 +364,8 @@ def slave_run(self): elif op == self.OP_MAP: - pds_id,pds_id_result,function_packed = data[1:] - self.__rec_pds_id, self.__rec_pds_id_result = pds_id,pds_id_result + pds_id, pds_id_result, function_packed = data[1:] + self.__rec_pds_id, self.__rec_pds_id_result = pds_id, pds_id_result #Use cloudpickle to convert back function string to a function func = cloudpickle.loads(function_packed) @@ -406,7 +413,7 @@ def __get_received_pds_id(self): our slave's created PDS with the master's. """ - return self.__rec_pds_id,self.__rec_pds_id_result + return self.__rec_pds_id, self.__rec_pds_id_result def parallelize(self, python_list): @@ -431,7 +438,7 @@ def parallelize(self, python_list): """ #Get the PDS id we should store this data in - pds_id,pds_id_new = self.__get_received_pds_id() + pds_id, pds_id_new = self.__get_received_pds_id() data_chunk = self.comm.scatter(None, root=0) @@ -460,7 +467,7 @@ def map(self, func, pds): """ #Get the PDS id we operate on and the new one to store the result in - pds_id,pds_id_new = self.__get_received_pds_id() + pds_id, pds_id_new = self.__get_received_pds_id() rdd = list(map(func, pds.python_list)) @@ -471,7 +478,8 @@ def map(self, func, pds): def collect(self, pds): """ - Gather the pds from all the workers, send it to the master and return it as a standard Python list. + Gather the pds from all the workers, + send it to the master and return it as a standard Python list. Parameters ---------- @@ -488,39 +496,39 @@ def collect(self, pds): _ = self.comm.gather(pds.python_list, root=0) - def broadcast(self,value): + def broadcast(self, value): """ Value is ignored for the slaves. We get data from master """ - value = self.comm.bcast(None, root=0) self.bds_store[self.__bds_id] = value class BackendMPI(BackendMPIMaster if MPI.COMM_WORLD.Get_rank() == 0 else BackendMPISlave): - """A backend parallelized by using MPI + """A backend parallelized by using MPI The backend conditionally inherits either the BackendMPIMaster class - or the BackendMPISlave class depending on it's rank. This lets - BackendMPI have a uniform interface for the user but allows for a - logical split between functions performed by the master + or the BackendMPISlave class depending on it's rank. This lets + BackendMPI have a uniform interface for the user but allows for a + logical split between functions performed by the master and the slaves. """ - def __init__(self,master_node_ranks = list(range(36))): + def __init__(self, master_node_ranks=list(range(36))): self.comm = MPI.COMM_WORLD self.size = self.comm.Get_size() self.rank = self.comm.Get_rank() - if self.size<2: - raise ValueError('Please, use at least 2 ranks.') + if self.size < 2: + raise ValueError('A minimum of 2 ranks are required for the MPI backend') #Set the global backend - globals()['backend'] = self + globals()['backend'] = self - if self.rank==0: + #Call the appropriate constructors and pass the required data + if self.rank == 0: super().__init__(master_node_ranks) else: super().__init__() @@ -529,17 +537,17 @@ def __init__(self,master_node_ranks = list(range(36))): class PDSMPI(PDS): """ - This is a wrapper for a Python parallel data set. + This is an MPI wrapper for a Python parallel data set. """ - def __init__(self, python_list, pds_id , backend_obj): + def __init__(self, python_list, pds_id, backend_obj): self.python_list = python_list self.pds_id = pds_id self.backend_obj = backend_obj def __del__(self): """ - Destructor to be called when a PDS falls out of scope and\or is being deleted. + Destructor to be called when a PDS falls out of scope and/or is being deleted. Uses the backend to send a message to destroy the slaves' copy of the pds. """ try: @@ -548,16 +556,10 @@ def __del__(self): #Catch "delete_remote_pds not defined" for slaves and ignore. pass -class BackendMPITestHelper: - def check_pds(self,k): - return k in backend.pds_store.keys() - - def check_bds(self,k): - return k in backend.bds_store.keys() class BDSMPI(BDS): """ - The reference class for broadcast data set (BDS). + This is a wrapper for MPI's BDS class. """ def __init__(self, object, bds_id, backend_obj): @@ -575,12 +577,26 @@ def value(self): def __del__(self): """ - Destructor to be called when a BDS falls out of scope and\or is being deleted. + Destructor to be called when a BDS falls out of scope and/or is being deleted. Uses the backend to send a message to destroy the slaves' copy of the bds. """ - + try: backend.delete_remote_bds(self.bds_id) except AttributeError: #Catch "delete_remote_pds not defined" for slaves and ignore. pass + +class BackendMPITestHelper: + """ + Helper function for some of the test cases to be able to access and verify class members. + """ + def check_pds(self, k): + """Checks if a PDS exists in the pds data store. Used to verify deletion and creation + """ + return k in backend.pds_store.keys() + + def check_bds(self, k): + """Checks if a BDS exists in the BDS data store. Used to verify deletion and creation + """ + return k in backend.bds_store.keys() From 500db42d4fbec4ae108d0b405c387229a2a43510 Mon Sep 17 00:00:00 2001 From: Avinash Ummadisingu Date: Tue, 25 Jul 2017 15:57:51 +0200 Subject: [PATCH 34/50] Fixed mpi backend example test --- examples/backends/mpi/hello_world.py | 31 ---------------------- examples/backends/mpi/pmcabc_gaussian.py | 33 +++++++++++++++++++----- 2 files changed, 27 insertions(+), 37 deletions(-) delete mode 100644 examples/backends/mpi/hello_world.py diff --git a/examples/backends/mpi/hello_world.py b/examples/backends/mpi/hello_world.py deleted file mode 100644 index 097a4a61..00000000 --- a/examples/backends/mpi/hello_world.py +++ /dev/null @@ -1,31 +0,0 @@ -from abcpy.backend_mpi import BackendMPI - -if __name__ == "__main__": - - backend = BackendMPI() - data = list(range(100)) - - def square(x): - return x**2 - - class staticfunctest: - @staticmethod - def cube(x): - return x**3 - - - datachunk_pds = backend.parallelize(data) - print("Worker with Rank", backend.rank, "has", datachunk_pds.python_list) - - mapres_pds = backend.map(square, datachunk_pds) - print ("Worker with Rank", backend.rank, "got map result", mapres_pds.python_list) - - print("Result of the map is:",backend.collect(mapres_pds)) - print("Original Data was:",backend.collect(datachunk_pds)) - - mapres_pds = backend.map(staticfunctest.cube, datachunk_pds) - print("Result of the map is:",backend.collect(mapres_pds)) - - bcast_bds = backend.broadcast(data) - #print("Broadcast at Rank", backend.rank, "has", backend.bds_store[bcast_bds.bds_id]) - diff --git a/examples/backends/mpi/pmcabc_gaussian.py b/examples/backends/mpi/pmcabc_gaussian.py index 360952c6..1b1dcd55 100644 --- a/examples/backends/mpi/pmcabc_gaussian.py +++ b/examples/backends/mpi/pmcabc_gaussian.py @@ -26,8 +26,10 @@ def infer_parameters(): kernel = MultiStudentT(mean, cov, df, seed=1) # define backend + global backend from abcpy.backend_mpi import BackendMPI as Backend - backend = Backend() + #Load and initialize backend only if it hasn't been set up already + backend = Backend() if backend is None else backend # define sampling scheme from abcpy.inferences import PMCABC @@ -63,11 +65,30 @@ def analyse_journal(journal): import unittest -import findspark -class ExampleGaussianSparkTest(unittest.TestCase): - def setUp(self): - findspark.init() - +from mpi4py import MPI + +def setUpModule(): + ''' + If an exception is raised in a setUpModule then none of + the tests in the module will be run. + + This is useful because the slaves run in a while loop on initialization + only responding to the master's commands and will never execute anything else. + + On termination of master, the slaves call quit() that raises a SystemExit(). + Because of the behaviour of setUpModule, it will not run any unit tests + for the slave and we now only need to write unit-tests from the master's + point of view. + ''' + global rank,backend + comm = MPI.COMM_WORLD + rank = comm.Get_rank() + + from abcpy.backend_mpi import BackendMPI as Backend + backend = Backend() + + +class ExampleGaussianMPITest(unittest.TestCase): def test_example(self): journal = infer_parameters() test_result = journal.posterior_mean()[0] From 088bbe6f115c671fb010135fcbdabe239fe26527 Mon Sep 17 00:00:00 2001 From: Avinash Ummadisingu Date: Tue, 25 Jul 2017 16:00:31 +0200 Subject: [PATCH 35/50] Temporarily removed mpi related stuff from requirements.txt --- requirements.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index a50335ab..564dd47b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,5 +5,4 @@ glmnet findspark sphinx==1.4.8 sphinx_rtd_theme -coverage -cloudpickle \ No newline at end of file +coverage \ No newline at end of file From 0821be486b8e410641cdd792e21de0fe0119c604 Mon Sep 17 00:00:00 2001 From: Avinash Ummadisingu Date: Tue, 25 Jul 2017 16:09:53 +0200 Subject: [PATCH 36/50] Removed mpi test from make test. Has to be manually invoked --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 42435201..fe0a346c 100644 --- a/Makefile +++ b/Makefile @@ -15,14 +15,14 @@ clean: find . -name ".#*" -delete find . -name "#*#" -delete -test: unittest unittest_mpi exampletest doctest +test: unittest exampletest doctest unittest: python3 -m unittest discover -s tests -v -p "*_tests.py" || (echo "Error in unit tests."; exit 1) unittest_mpi: - mpirun -np 4 python3 -m unittest discover -s tests -v -p "backend_mpi_tests.py" || (echo "Error in unit tests."; exit 1) + mpirun -np 2 python3 -m unittest discover -s tests -v -p "backend_mpi_tests.py" || (echo "Error in unit tests."; exit 1) $(MAKEDIRS): From 599be65d1ecc5ecb4392fa87cfbf1ea317f63d8d Mon Sep 17 00:00:00 2001 From: Avinash Ummadisingu Date: Tue, 25 Jul 2017 20:05:12 +0200 Subject: [PATCH 37/50] Changed backends module organization. Renamed mpi test script to avoid autoexecution on make test --- Makefile | 6 +- abcpy/backends/__init__.py | 3 + abcpy/{backends.py => backends/base.py} | 145 ------------------ abcpy/{backend_mpi.py => backends/mpi.py} | 0 abcpy/backends/spark.py | 145 ++++++++++++++++++ ...kend_mpi_tests.py => backend_tests_mpi.py} | 0 6 files changed, 151 insertions(+), 148 deletions(-) create mode 100644 abcpy/backends/__init__.py rename abcpy/{backends.py => backends/base.py} (61%) rename abcpy/{backend_mpi.py => backends/mpi.py} (100%) create mode 100644 abcpy/backends/spark.py rename tests/{backend_mpi_tests.py => backend_tests_mpi.py} (100%) diff --git a/Makefile b/Makefile index fe0a346c..87a3660f 100644 --- a/Makefile +++ b/Makefile @@ -4,10 +4,10 @@ MAKEDIRS=$(shell find examples -name Makefile -exec dirname {} \;) whl_file = abcpy-${VERSION}-py3-none-any.whl .DEFAULT: help -.PHONY: help clean doc doctest exampletest package test uninstall unittest install reinstall $(MAKEDIRS) +.PHONY: help clean doc doctest exampletest package test uninstall unittest unittest_mpi install reinstall $(MAKEDIRS) help: - @echo Targets are: clean, doc, doctest, exampletest, package, uninstall, unittest, test + @echo Targets are: clean, doc, doctest, exampletest, package, uninstall, unittest, unittest_mpi , test clean: find . -name "*.pyc" -type f -delete @@ -22,7 +22,7 @@ unittest: unittest_mpi: - mpirun -np 2 python3 -m unittest discover -s tests -v -p "backend_mpi_tests.py" || (echo "Error in unit tests."; exit 1) + mpirun -np 2 python3 -m unittest discover -s tests -v -p "backend_tests_mpi.py" || (echo "Error in unit tests."; exit 1) $(MAKEDIRS): diff --git a/abcpy/backends/__init__.py b/abcpy/backends/__init__.py new file mode 100644 index 00000000..b4c0f823 --- /dev/null +++ b/abcpy/backends/__init__.py @@ -0,0 +1,3 @@ +from abcpy.backends.base import * +from abcpy.backends.mpi import BackendMPI +from abcpy.backends.spark import BackendSpark diff --git a/abcpy/backends.py b/abcpy/backends/base.py similarity index 61% rename from abcpy/backends.py rename to abcpy/backends/base.py index 461d60ed..c525e34b 100644 --- a/abcpy/backends.py +++ b/abcpy/backends/base.py @@ -226,148 +226,3 @@ def __init__(self, object): def value(self): return self.object - - - -class BackendSpark(Backend): - """ - A parallelization backend for Apache Spark. It is essetially a wrapper for - the required Spark functionality. - """ - - def __init__(self, sparkContext, parallelism=4): - """ - Initialize the backend with an existing and configured SparkContext. - - Parameters - ---------- - sparkContext: pyspark.SparkContext - an existing and fully configured PySpark context - parallelism: int - defines on how many workers a distributed dataset can be distributed - """ - self.sc = sparkContext - self.parallelism = parallelism - - - def parallelize(self, python_list): - """ - This is a wrapper of pyspark.SparkContext.parallelize(). - - Parameters - ---------- - list: Python list - list that is distributed on the workers - - Returns - ------- - PDSSpark class (parallel data set) - A reference object that represents the parallelized list - """ - - rdd = self.sc.parallelize(python_list, self.parallelism) - pds = PDSSpark(rdd) - return pds - - - def broadcast(self, object): - """ - This is a wrapper for pyspark.SparkContext.broadcast(). - - Parameters - ---------- - object: Python object - An abitrary object that should be available on all workers - Returns - ------- - BDSSpark class (broadcast data set) - A reference to the broadcasted object - """ - - bcv = self.sc.broadcast(object) - bds = BDSSpark(bcv) - return bds - - - def map(self, func, pds): - """ - This is a wrapper for pyspark.rdd.map() - - Parameters - ---------- - func: Python func - A function that can be applied to every element of the pds - pds: PDSSpark class - A parallel data set to which func should be applied - Returns - ------- - PDSSpark class - a new parallel data set that contains the result of the map - """ - - rdd = pds.rdd.map(func) - new_pds = PDSSpark(rdd) - return new_pds - - - def collect(self, pds): - """ - A wrapper for pyspark.rdd.collect() - - Parameters - ---------- - pds: PDSSpark class - a parallel data set - Returns - ------- - Python list - all elements of pds as a list - """ - - python_list = pds.rdd.collect() - return python_list - - - -class PDSSpark(PDS): - """ - This is a wrapper for Apache Spark RDDs. - """ - - def __init__(self, rdd): - """ - Returns - ------- - rdd: pyspark.rdd - initialize with an Spark RDD - """ - - self.rdd = rdd - - - -class BDSSpark(BDS): - """ - This is a wrapper for Apache Spark Broadcast variables. - """ - - def __init__(self, bcv): - """ - Parameters - ---------- - bcv: pyspark.broadcast.Broadcast - Initialize with a Spark broadcast variable - """ - - self.bcv = bcv - - - def value(self): - """ - Returns - ------- - object - returns the referenced object that was broadcasted. - """ - - return self.bcv.value diff --git a/abcpy/backend_mpi.py b/abcpy/backends/mpi.py similarity index 100% rename from abcpy/backend_mpi.py rename to abcpy/backends/mpi.py diff --git a/abcpy/backends/spark.py b/abcpy/backends/spark.py new file mode 100644 index 00000000..33d960a9 --- /dev/null +++ b/abcpy/backends/spark.py @@ -0,0 +1,145 @@ + +from abcpy.backends import Backend, PDS, BDS + +class BackendSpark(Backend): + """ + A parallelization backend for Apache Spark. It is essetially a wrapper for + the required Spark functionality. + """ + + def __init__(self, sparkContext, parallelism=4): + """ + Initialize the backend with an existing and configured SparkContext. + + Parameters + ---------- + sparkContext: pyspark.SparkContext + an existing and fully configured PySpark context + parallelism: int + defines on how many workers a distributed dataset can be distributed + """ + self.sc = sparkContext + self.parallelism = parallelism + + + def parallelize(self, python_list): + """ + This is a wrapper of pyspark.SparkContext.parallelize(). + + Parameters + ---------- + list: Python list + list that is distributed on the workers + + Returns + ------- + PDSSpark class (parallel data set) + A reference object that represents the parallelized list + """ + + rdd = self.sc.parallelize(python_list, self.parallelism) + pds = PDSSpark(rdd) + return pds + + + def broadcast(self, object): + """ + This is a wrapper for pyspark.SparkContext.broadcast(). + + Parameters + ---------- + object: Python object + An abitrary object that should be available on all workers + Returns + ------- + BDSSpark class (broadcast data set) + A reference to the broadcasted object + """ + + bcv = self.sc.broadcast(object) + bds = BDSSpark(bcv) + return bds + + + def map(self, func, pds): + """ + This is a wrapper for pyspark.rdd.map() + + Parameters + ---------- + func: Python func + A function that can be applied to every element of the pds + pds: PDSSpark class + A parallel data set to which func should be applied + Returns + ------- + PDSSpark class + a new parallel data set that contains the result of the map + """ + + rdd = pds.rdd.map(func) + new_pds = PDSSpark(rdd) + return new_pds + + + def collect(self, pds): + """ + A wrapper for pyspark.rdd.collect() + + Parameters + ---------- + pds: PDSSpark class + a parallel data set + Returns + ------- + Python list + all elements of pds as a list + """ + + python_list = pds.rdd.collect() + return python_list + + + +class PDSSpark(PDS): + """ + This is a wrapper for Apache Spark RDDs. + """ + + def __init__(self, rdd): + """ + Returns + ------- + rdd: pyspark.rdd + initialize with an Spark RDD + """ + + self.rdd = rdd + + + +class BDSSpark(BDS): + """ + This is a wrapper for Apache Spark Broadcast variables. + """ + + def __init__(self, bcv): + """ + Parameters + ---------- + bcv: pyspark.broadcast.Broadcast + Initialize with a Spark broadcast variable + """ + + self.bcv = bcv + + + def value(self): + """ + Returns + ------- + object + returns the referenced object that was broadcasted. + """ + + return self.bcv.value diff --git a/tests/backend_mpi_tests.py b/tests/backend_tests_mpi.py similarity index 100% rename from tests/backend_mpi_tests.py rename to tests/backend_tests_mpi.py From 0d54bdee01cadb7bf868341db4819d028f884018 Mon Sep 17 00:00:00 2001 From: Avinash Ummadisingu Date: Wed, 26 Jul 2017 11:25:01 +0200 Subject: [PATCH 38/50] Fixed master rank list to be just the first rank (default) --- abcpy/backends/mpi.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/abcpy/backends/mpi.py b/abcpy/backends/mpi.py index ec686f32..52d04acc 100644 --- a/abcpy/backends/mpi.py +++ b/abcpy/backends/mpi.py @@ -16,7 +16,7 @@ class BackendMPIMaster(Backend): OP_PARALLELIZE, OP_MAP, OP_COLLECT, OP_BROADCAST, OP_DELETEPDS, OP_DELETEBDS, OP_FINISH = [1, 2, 3, 4, 5, 6, 7] finalized = False - def __init__(self, master_node_ranks=list(range(36))): + def __init__(self, master_node_ranks=[0]): self.comm = MPI.COMM_WORLD self.size = self.comm.Get_size() @@ -514,7 +514,7 @@ class BackendMPI(BackendMPIMaster if MPI.COMM_WORLD.Get_rank() == 0 else Backend and the slaves. """ - def __init__(self, master_node_ranks=list(range(36))): + def __init__(self, master_node_ranks=[0]): self.comm = MPI.COMM_WORLD self.size = self.comm.Get_size() self.rank = self.comm.Get_rank() From c2e6e3e2240d14beb0bbba82873058c5dcbde231 Mon Sep 17 00:00:00 2001 From: Avinash Ummadisingu Date: Wed, 26 Jul 2017 12:53:00 +0200 Subject: [PATCH 39/50] Changed mpi backend load path to follow new structure --- examples/backends/mpi/pmcabc_gaussian.py | 7 ++++--- tests/backend_tests_mpi.py | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/examples/backends/mpi/pmcabc_gaussian.py b/examples/backends/mpi/pmcabc_gaussian.py index 1b1dcd55..e9ceb04f 100644 --- a/examples/backends/mpi/pmcabc_gaussian.py +++ b/examples/backends/mpi/pmcabc_gaussian.py @@ -1,5 +1,5 @@ import numpy as np - +backend = None def infer_parameters(): # define observation for true parameters mean=170, std=15 y_obs = [160.82499176, 167.24266737, 185.71695756, 153.7045709, 163.40568812, 140.70658699, 169.59102084, 172.81041696, 187.38782738, 179.66358934, 176.63417241, 189.16082803, 181.98288443, 170.18565017, 183.78493886, 166.58387299, 161.9521899, 155.69213073, 156.17867343, 144.51580379, 170.29847515, 197.96767899, 153.36646527, 162.22710198, 158.70012047, 178.53470703, 170.77697743, 164.31392633, 165.88595994, 177.38083686, 146.67058471763457, 179.41946565658628, 238.02751620619537, 206.22458790620766, 220.89530574344568, 221.04082532837026, 142.25301427453394, 261.37656571434275, 171.63761180867033, 210.28121820385866, 237.29130237612236, 175.75558340169619, 224.54340549862235, 197.42448680731226, 165.88273684581381, 166.55094082844519, 229.54308602661584, 222.99844054358519, 185.30223966014586, 152.69149367593846, 206.94372818527413, 256.35498655339154, 165.43140916577741, 250.19273595481803, 148.87781549665536, 223.05547559193792, 230.03418198709608, 146.13611923127021, 138.24716809523139, 179.26755740864527, 141.21704876815426, 170.89587081800852, 222.96391329259626, 188.27229523693822, 202.67075179617672, 211.75963110985992, 217.45423324370509] @@ -27,7 +27,7 @@ def infer_parameters(): # define backend global backend - from abcpy.backend_mpi import BackendMPI as Backend + from abcpy.backends import BackendMPI as Backend #Load and initialize backend only if it hasn't been set up already backend = Backend() if backend is None else backend @@ -84,10 +84,11 @@ def setUpModule(): comm = MPI.COMM_WORLD rank = comm.Get_rank() - from abcpy.backend_mpi import BackendMPI as Backend + from abcpy.backends import BackendMPI as Backend backend = Backend() + class ExampleGaussianMPITest(unittest.TestCase): def test_example(self): journal = infer_parameters() diff --git a/tests/backend_tests_mpi.py b/tests/backend_tests_mpi.py index ed7884e4..18377f38 100644 --- a/tests/backend_tests_mpi.py +++ b/tests/backend_tests_mpi.py @@ -1,6 +1,6 @@ import unittest from mpi4py import MPI -from abcpy.backend_mpi import BackendMPI,BackendMPITestHelper +from abcpy.backends import BackendMPI,BackendMPITestHelper def setUpModule(): From 7d275d02feeb1464e699e3065b2ebf54ff738b20 Mon Sep 17 00:00:00 2001 From: Avinash Ummadisingu Date: Wed, 26 Jul 2017 14:48:08 +0200 Subject: [PATCH 40/50] Modified init for dynamic import --- abcpy/backends/__init__.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/abcpy/backends/__init__.py b/abcpy/backends/__init__.py index b4c0f823..93a9b88b 100644 --- a/abcpy/backends/__init__.py +++ b/abcpy/backends/__init__.py @@ -1,3 +1,14 @@ from abcpy.backends.base import * -from abcpy.backends.mpi import BackendMPI -from abcpy.backends.spark import BackendSpark + + +def BackendMPI(*args,**kwargs): + from abcpy.backends.mpi import BackendMPI + return BackendMPI(*args,**kwargs) + +def BackendMPITestHelper(*args,**kwargs): + from abcpy.backends.mpi import BackendMPITestHelper + return BackendMPITestHelper(*args,**kwargs) + +def BackendSpark(*args,**kwargs): + from abcpy.backends.spark import BackendSpark + return BackendSpark(*args,**kwargs) From 9a8bce16630e4a16db6b167fa09a57b284866549 Mon Sep 17 00:00:00 2001 From: Avinash Ummadisingu Date: Thu, 3 Aug 2017 09:37:20 +0200 Subject: [PATCH 41/50] restructured MPI backend example --- examples/backends/mpi/pmcabc_gaussian.py | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/examples/backends/mpi/pmcabc_gaussian.py b/examples/backends/mpi/pmcabc_gaussian.py index e9ceb04f..5bbd796b 100644 --- a/examples/backends/mpi/pmcabc_gaussian.py +++ b/examples/backends/mpi/pmcabc_gaussian.py @@ -1,5 +1,12 @@ import numpy as np backend = None + +def setup_backend(): + global backend + from abcpy.backends import BackendMPI as Backend + backend = Backend() + + def infer_parameters(): # define observation for true parameters mean=170, std=15 y_obs = [160.82499176, 167.24266737, 185.71695756, 153.7045709, 163.40568812, 140.70658699, 169.59102084, 172.81041696, 187.38782738, 179.66358934, 176.63417241, 189.16082803, 181.98288443, 170.18565017, 183.78493886, 166.58387299, 161.9521899, 155.69213073, 156.17867343, 144.51580379, 170.29847515, 197.96767899, 153.36646527, 162.22710198, 158.70012047, 178.53470703, 170.77697743, 164.31392633, 165.88595994, 177.38083686, 146.67058471763457, 179.41946565658628, 238.02751620619537, 206.22458790620766, 220.89530574344568, 221.04082532837026, 142.25301427453394, 261.37656571434275, 171.63761180867033, 210.28121820385866, 237.29130237612236, 175.75558340169619, 224.54340549862235, 197.42448680731226, 165.88273684581381, 166.55094082844519, 229.54308602661584, 222.99844054358519, 185.30223966014586, 152.69149367593846, 206.94372818527413, 256.35498655339154, 165.43140916577741, 250.19273595481803, 148.87781549665536, 223.05547559193792, 230.03418198709608, 146.13611923127021, 138.24716809523139, 179.26755740864527, 141.21704876815426, 170.89587081800852, 222.96391329259626, 188.27229523693822, 202.67075179617672, 211.75963110985992, 217.45423324370509] @@ -25,11 +32,6 @@ def infer_parameters(): mean, cov, df = np.array([.0, .0]), np.eye(2), 3. kernel = MultiStudentT(mean, cov, df, seed=1) - # define backend - global backend - from abcpy.backends import BackendMPI as Backend - #Load and initialize backend only if it hasn't been set up already - backend = Backend() if backend is None else backend # define sampling scheme from abcpy.inferences import PMCABC @@ -80,14 +82,7 @@ def setUpModule(): for the slave and we now only need to write unit-tests from the master's point of view. ''' - global rank,backend - comm = MPI.COMM_WORLD - rank = comm.Get_rank() - - from abcpy.backends import BackendMPI as Backend - backend = Backend() - - + setup_backend() class ExampleGaussianMPITest(unittest.TestCase): def test_example(self): @@ -98,6 +93,7 @@ def test_example(self): if __name__ == "__main__": + setup_backend() journal = infer_parameters() analyse_journal(journal) From 21c84c71aa18070dfd97e578a4ad96909abdccf4 Mon Sep 17 00:00:00 2001 From: Marcel Schoengens Date: Thu, 3 Aug 2017 10:19:20 +0200 Subject: [PATCH 42/50] Extended automated testing in Makefile. --- Makefile | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/Makefile b/Makefile index 87a3660f..c2aed790 100644 --- a/Makefile +++ b/Makefile @@ -15,27 +15,34 @@ clean: find . -name ".#*" -delete find . -name "#*#" -delete -test: unittest exampletest doctest +$(MAKEDIRS): + make -C $@ -unittest: - python3 -m unittest discover -s tests -v -p "*_tests.py" || (echo "Error in unit tests."; exit 1) +# testing +test: unittest unittest_mpi exampletest exampletest_mpi doctest + +unittest: + echo "Running standard unit tests.." + python3 -m unittest discover -s tests -v -p "*_tests.py" || (echo "Error in standard unit tests."; exit 1) unittest_mpi: - mpirun -np 2 python3 -m unittest discover -s tests -v -p "backend_tests_mpi.py" || (echo "Error in unit tests."; exit 1) + echo "Running MPI backend unit tests.." + mpirun -np 2 python3 -m unittest discover -s tests -v -p "backend_tests_mpi.py" || (echo "Error in MPI unit tests."; exit 1) +exampletest: $(MAKEDIRS) + echo "Testing standard examples.." + python3 -m unittest discover -s examples -v -p "*.py" || (echo "Error in example tests."; exit 1) -$(MAKEDIRS): - make -C $@ +exampletest_mpi: + echo "Testing MPI backend examples.." + mpirun -np 2 python3 -m unittest -v examples/backends/mpi/pmcabc_gaussian.py || (echo "Error in MPI example tests."; exit 1) doctest: make -C doc html || (echo "Error in documentation generator."; exit 1) -exampletest: $(MAKEDIRS) - python3 -m unittest discover -s examples -v -p "*.py" || (echo "Error in example tests."; exit 1) - coveragetest: - command -v coverage >/dev/null 2>&1 || { echo >&2 "Python package 'coverage' has to be installed. Please, run 'pip3 install coverage'."; exit;} + command -v coverage >/dev/null 2>&1 || { echo >&2 "Python package 'coverage' has to been installed. Please, run 'pip3 install coverage'."; exit;} @- $(foreach TEST, $(UNITTESTS), \ echo === Testing code coverage: $(TEST); \ python3 -m unittest $(TEST); \ From 80572962ee7362e333ed8008ae8e8df7f252c2df Mon Sep 17 00:00:00 2001 From: Marcel Schoengens Date: Wed, 12 Jul 2017 11:04:46 +0200 Subject: [PATCH 43/50] Added documentation how to use C++ code with ABCpy using the SWIG framework. --- doc/source/README.rst | 81 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 80 insertions(+), 1 deletion(-) diff --git a/doc/source/README.rst b/doc/source/README.rst index 5e0c4775..2a70faf0 100644 --- a/doc/source/README.rst +++ b/doc/source/README.rst @@ -247,8 +247,86 @@ same way (see `Getting Started`_) as we would do with shipped models. The complete example code can be found `here `_ + +Use ABCpy with a model written in C++ +====================================== + +There are several frameworks that help you integrating your C++/C code into Python. We showcase examples for + +* `Swig `_ +* `Pybind `_ + +Using Swig +---------- + +Swig is a tool that creates a Python wrapper for our C++/C code using an +interface (file) that we have to specify. We can then import the wrapper and +in turn use your C++ code with ABCpy as if it was written in Python. + +We go through a complete example to illustrate how to use a simple Gaussian +model written in C++ with ABCpy. First, have a look at our C++ model: + +.. literalinclude:: ../../examples/extensions/models/gaussian_cpp/gaussian_model_simple.cpp + :language: c++ + :lines: 9 - 17 + +To use this code in Python, we need to specify exactly how to expose the C++ +function to Python. Therefore, we write a Swig interface file that look as +follows: + +.. literalinclude:: ../../examples/extensions/models/gaussian_cpp/gaussian_model_simple.i + :language: c++ + +In the first line we define the module name we later have to import in your +ABCpy Python code. Then, in curly brackets, we specify which libraries we want +to include and which function we want to expose through the wrapper. + +Now comes the tricky part. The model class expects a method `simulate` that +forward-simulates our model and which returns an array of syntetic +observations. However, C++/C does not know the concept of returning an array, +instead in C++/C we would provide a memory position (pointer) where to write +the results. Swig has to translate between the two concepts. We use actually an +Swig interface definition from numpy called `import_array`. The line + +.. literalinclude:: ../../examples/extensions/models/gaussian_cpp/gaussian_model_simple.i + :language: c++ + :lines: 18 + +states that we want the two parameters `result` and `k` of the `gaussian_model` +C++ function be interpreted as an array of length k that is returned. Have a +look at the Python code below and observe how the wrapped Python function takes only two +instead of four parameters and returns a numpy array. + +The first stop to get everything running is to translate the Swig interface file +to wrapper code in C++ and Python. +:: + swig -python -c++ -o gaussian_model_simple_wrap.cpp gaussian_model_simple.i + +This creates two wrapper files `gaussian_model_simple_wrap.cpp` and +`gaussian_model_simple.py`. Now the C++ files can be compiled: +:: + g++ -fPIC -I /usr/include/python3.5m -c gaussian_model_simple.cpp -o gaussian_model_simple.o + g++ -fPIC -I /usr/include/python3.5m -c gaussian_model_simple_wrap.cpp -o gaussian_model_simple_wrap.o + g++ -shared gaussian_model_simple.o gaussian_model_simple_wrap.o -o _gaussian_model_simple.so + +Note that the include paths might need to be adapted to your system. Finally, we +can write a Python model which uses our C++ code: + +.. literalinclude:: ../../examples/extensions/models/gaussian_cpp/pmcabc-gaussian_model_simple.py + :language: python + :lines: 3 - 32 + +The important lines are where we import the wrapper code as a module (line 2) and call +the respective model function (line -2). + +The full code is available in `examples/extensions/models/gaussion_cpp/`. To +simplify compilation of SWIG and C++ code we created a Makefile. Note that you +might need to adapt some paths in the Makefile. + + + Use ABCpy with a model written in R -========================== +=================================== Statisticians often use the R language to build statistical models. R models can be incorporated within the ABCpy language with the `rpy2` Python package. We show how to use the `rpy2` package to connect with a model written in R. @@ -275,6 +353,7 @@ This imports the R function `simple_gaussian` into the python environment. We ne The default output for R functions in python is a float vector. This must be converted into a list for the purposes of ABCpy. + .. Extending: Add your Distance ============================ From 3187442ee18e7b765ed0a804b2c460779e0bc87e Mon Sep 17 00:00:00 2001 From: Marcel Schoengens Date: Wed, 12 Jul 2017 11:05:27 +0200 Subject: [PATCH 44/50] Added developer documentation on deployment & branching. --- doc/source/DEVELOP.rst | 26 ++++++++++++++++++++++++++ doc/source/index.rst | 6 ++++++ 2 files changed, 32 insertions(+) create mode 100644 doc/source/DEVELOP.rst diff --git a/doc/source/DEVELOP.rst b/doc/source/DEVELOP.rst new file mode 100644 index 00000000..bcdccfe3 --- /dev/null +++ b/doc/source/DEVELOP.rst @@ -0,0 +1,26 @@ +Branching Scheme +================ + +We use the branching strategy described in this `blog post `_. + + +Deploy a new Release +==================== + +This documentation is mainly intended for the main developers. The deployment of +new releases is automated using Travis CI. However, there are still a few manual +steps required in order to deploy a new release. Assume we want to deploy the +new version `M.m.b': + +1. Create a release branch `release-M.m.b` +2. Adapt `VERSION` file in the repos root directiory `echo M.m.b > VERSION` +3. Merge all desired feature branches into the release branch +4. Create a pull/ merge request: release branch -> master + +After a successfull merge: + +5. Create tag vM.m.b (`git tag vM.m.b`) and push the tag (`git push --tags`) +6. Create a release in Github + +The new tag on master will signal Travis to deploy a new package to Pypi while +the GitHub release is just for user documentation. diff --git a/doc/source/index.rst b/doc/source/index.rst index d234c5cb..a80317e1 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -17,6 +17,12 @@ Welcome to ABCpy's documentation! README +.. toctree:: + :maxdepth: 2 + :caption: Developer Documentation + + DEVELOP + .. toctree:: :maxdepth: 2 :caption: Reference From ac7455ae5044695cfe1d6c1a8d06cfa81e800523 Mon Sep 17 00:00:00 2001 From: Marcel Schoengens Date: Wed, 12 Jul 2017 11:34:42 +0200 Subject: [PATCH 45/50] Some minor fixes to the documentation. --- doc/source/README.rst | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/doc/source/README.rst b/doc/source/README.rst index 2a70faf0..f419c237 100644 --- a/doc/source/README.rst +++ b/doc/source/README.rst @@ -251,7 +251,8 @@ complete example code can be found `here Use ABCpy with a model written in C++ ====================================== -There are several frameworks that help you integrating your C++/C code into Python. We showcase examples for +There are several frameworks that help you integrating your C++/C code into +Python. We showcase examples for * `Swig `_ * `Pybind `_ @@ -323,26 +324,32 @@ The full code is available in `examples/extensions/models/gaussion_cpp/`. To simplify compilation of SWIG and C++ code we created a Makefile. Note that you might need to adapt some paths in the Makefile. - Use ABCpy with a model written in R =================================== -Statisticians often use the R language to build statistical models. R models can be incorporated within the ABCpy language with the `rpy2` Python package. We show how to use the `rpy2` package to connect with a model written in R. +Statisticians often use the R language to build statistical models. R models can +be incorporated within the ABCpy language with the `rpy2` Python package. We +show how to use the `rpy2` package to connect with a model written in R. -Continuing from the previous section we use a simple Gaussian model as an example. The following R code is the contents of the R file `gaussian_model.R`. +Continuing from the previous sections we use a simple Gaussian model as an +example. The following R code is the contents of the R file `gaussian_model.R`: .. literalinclude:: ../../examples/extensions/models/gaussian_R/gaussian_model.R :language: R :lines: 1 - 4 -More complex R models are incorporated in the same way. To include this function within the ABCpy script we include the following code at the beginning of our python script. +More complex R models are incorporated in the same way. To include this function +within ABCpy we include the following code at the beginning of our Python +file: .. literalinclude:: ../../examples/extensions/models/gaussian_R/gaussian_model.py :language: python :lines: 5 - 14 -This imports the R function `simple_gaussian` into the python environment. We need to build our own model to incorporate this R function as in the previous section. The only difference is the `simulate` method of the class `Gaussian'. +This imports the R function `simple_gaussian` into the Python environment. We +need to build our own model to incorporate this R function as in the previous +section. The only difference is the `simulate` method of the class `Gaussian'. .. automethod:: abcpy.models.Model.simulate :noindex: @@ -351,7 +358,8 @@ This imports the R function `simple_gaussian` into the python environment. We ne :language: python :lines: 40 - 42 -The default output for R functions in python is a float vector. This must be converted into a list for the purposes of ABCpy. +The default output for R functions in Python is a float vector. This must be +converted into a Python list for the purposes of ABCpy. .. From 802f66e635aaef2aa812f5c16ded65a5e876a095 Mon Sep 17 00:00:00 2001 From: Marcel Schoengens Date: Wed, 19 Jul 2017 17:44:31 +0200 Subject: [PATCH 46/50] Added documentation on Amazon Web Services (AWS). --- doc/source/README.rst | 81 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 75 insertions(+), 6 deletions(-) diff --git a/doc/source/README.rst b/doc/source/README.rst index f419c237..d5a9ca99 100644 --- a/doc/source/README.rst +++ b/doc/source/README.rst @@ -181,6 +181,74 @@ the official `homepage `_. Further, keep in mind that the ABCpy library has to be properly installed on the cluster, such that it is available to the Python interpreters on the master and the worker nodes. + +Using Cluster Infrastructure +============================ + +When your model is computationally expensive and/or other factors require +compute infrastructure that goes beyond a single notebook or workstation you can +easily run ABCpy on infrastructure for cluster or high-performance computing. + +Running on Amazon Web Services +------------------------------ + +We show with high level steps how to get ABCpy running on Amazon Web Services +(AWS). Please note, that this is not a complete guide to AWS, so we would like +to refer you to the respective documentation. The first step would be to setup a +AWS Elastic Map Reduce (EMR) cluster which comes with the option of a +pre-configured Apache Spark. Then, we show how to run a simple inference code on +this cluster. + +Setting up the EMR Cluster +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +When we setup an EMR cluster we want to install ABCpy on every node of the +cluster. Therefore, we provide a bootstrap script that does this job for us. On +your local machine create a file named `emr_bootstrap.sh` with the following +content: + +:: + + #!/bin/sh + sudo yum -y install git + sudo pip-3.4 install ipython findspark abcpy + +In AWS go to Services, then S3 under the Storage Section. Create a new bucket +called `abcpy` and upload your bootstrap script `emr_bootstap.sh`. + +To create a cluster, in AWS go to Services and then EMR under the Analytics +Section. Click 'Create Cluster', then choose 'Advanced Options'. In Step 1 +choose the emr-5.7.0 image and make sure only Spark is selected for your cluster +(the other software packages are not required). In Step 2 choose for example one +master node and 4 core nodes (16 vCPUs if you have 4 vCPUs instances). In Step 3 +under the boostrap action, choose custom, and select the script +`abcpy/emr_bootstrap.sh`. In the last step (Step 4), choose a key to access the +master node (we assume that you already setup keys). Start the cluster. + + +Running ABCpy on AWS +~~~~~~~~~~~~~~~~~~~~ + +Log in via SSH and run the following commands to get an example code from ABCpy +running with Python3 support: + +:: + + sudo bash -c 'echo export PYSPARK_PYTHON=python34 >> /etc/spark/conf/spark-env.sh' + git clone https://github.com/eth-cscs/abcpy.git + +Then, to submit a job to the Spark cluster we run the following commands: + +:: + + cd abcpy/examples/backends/ + spark-submit --num-executors 16 pmcabc_gaussian.py + +Clearly the setup can be extended and optimized. For this and basic information +we refer you to the `AWS documentation on +EMR `_. + + Implementing a new Model ======================== @@ -248,8 +316,8 @@ complete example code can be found `here `_ -Use ABCpy with a model written in C++ -====================================== +Wrap a Model Written in C++ +--------------------------- There are several frameworks that help you integrating your C++/C code into Python. We showcase examples for @@ -258,7 +326,7 @@ Python. We showcase examples for * `Pybind `_ Using Swig ----------- +~~~~~~~~~~ Swig is a tool that creates a Python wrapper for our C++/C code using an interface (file) that we have to specify. We can then import the wrapper and @@ -301,11 +369,13 @@ instead of four parameters and returns a numpy array. The first stop to get everything running is to translate the Swig interface file to wrapper code in C++ and Python. :: + swig -python -c++ -o gaussian_model_simple_wrap.cpp gaussian_model_simple.i This creates two wrapper files `gaussian_model_simple_wrap.cpp` and `gaussian_model_simple.py`. Now the C++ files can be compiled: :: + g++ -fPIC -I /usr/include/python3.5m -c gaussian_model_simple.cpp -o gaussian_model_simple.o g++ -fPIC -I /usr/include/python3.5m -c gaussian_model_simple_wrap.cpp -o gaussian_model_simple_wrap.o g++ -shared gaussian_model_simple.o gaussian_model_simple_wrap.o -o _gaussian_model_simple.so @@ -323,10 +393,9 @@ the respective model function (line -2). The full code is available in `examples/extensions/models/gaussion_cpp/`. To simplify compilation of SWIG and C++ code we created a Makefile. Note that you might need to adapt some paths in the Makefile. - -Use ABCpy with a model written in R -=================================== +Wrap a Model Written in R +------------------------- Statisticians often use the R language to build statistical models. R models can be incorporated within the ABCpy language with the `rpy2` Python package. We From 4c894d6d1a7b6a03c2424d4ae83692b29d948bd5 Mon Sep 17 00:00:00 2001 From: Avinash Ummadisingu Date: Fri, 4 Aug 2017 13:14:49 +0200 Subject: [PATCH 47/50] Updated documentation for MPI Backend and modified backend examples to be similar --- doc/source/README.rst | 39 +++++++++++++++++-- .../backends/apache_spark/pmcabc_gaussian.py | 16 +++++--- examples/backends/mpi/pmcabc_gaussian.py | 2 +- 3 files changed, 48 insertions(+), 9 deletions(-) diff --git a/doc/source/README.rst b/doc/source/README.rst index 97bd5c3a..e2c5e28e 100644 --- a/doc/source/README.rst +++ b/doc/source/README.rst @@ -153,7 +153,7 @@ backend have to be changed to .. literalinclude:: ../../examples/backends/apache_spark/pmcabc_gaussian.py :language: python - :lines: 29-32 + :lines: 6-9 :dedent: 4 In words, a Spark context has to be created and passed to the Spark @@ -166,14 +166,14 @@ The standard way to run the script on Spark is via the spark-submit command: :: - PYSPARK_PYTHON=python3 spark-submit gaussian.py + PYSPARK_PYTHON=python3 spark-submit pmcabc_gaussian.py Often Spark installations use Python 2 by default. To make Spark use the required Python 3 interpreter, the `PYSPARK_PYTHON` environment variable can be set. The adapted python code can be found in -`examples/backend/apache_spark/gaussian.py`. +`examples/backend/apache_spark/pmcabc_gaussian.py`. Note that in order to run jobs in parallel you need to have Apache Spark installed on the system in question. Details on the installation can be found on @@ -182,6 +182,39 @@ the ABCpy library has to be properly installed on the cluster, such that it is available to the Python interpreters on the master and the worker nodes. +Using the MPI Backend +======================= + +To run ABCpy in parallel using Open MPI, one only needs to use the provided +MPI backend. Using the same example as above, the statements for the +backend have to be changed to + +.. literalinclude:: ../../examples/backends/mpi/pmcabc_gaussian.py + :language: python + :lines: 6-7 + :dedent: 4 + +In words, one only needs to initialize an instance of the MPI backend. The number +of ranks to spawn are specified at runtime through the way the script is run. + +The standard way to run the script using Open MPI is directly via mpirun like below +or on a cluster through a job scheduler like Slurm: + +:: + + mpirun -np 4 python3 pmcabc_gaussian.py + + +The adapted python code can be found in +`examples/backend/mpi/pmcabc_gaussian.py`. + +Note that in order to run jobs in parallel you need to have Open MPI +installed on the system(s) in question with the requisite python bindings for MPI (mpi4py). +Details on the installation can be found on the official `Open MPI homepage `_ +and the `mpi4py homepage `_. Further, keep in mind that the ABCpy library +has to be properly installed on the cluster, such that it is available to the Python +interpreters on the master and the worker nodes. + Using Cluster Infrastructure ============================ diff --git a/examples/backends/apache_spark/pmcabc_gaussian.py b/examples/backends/apache_spark/pmcabc_gaussian.py index 819791f7..50995e9c 100644 --- a/examples/backends/apache_spark/pmcabc_gaussian.py +++ b/examples/backends/apache_spark/pmcabc_gaussian.py @@ -1,5 +1,14 @@ import numpy as np +def setup_backend(): + global backend + + import pyspark + sc = pyspark.SparkContext() + from abcpy.backends import BackendSpark as Backend + backend = Backend(sc, parallelism=4) + + def infer_parameters(): # define observation for true parameters mean=170, std=15 y_obs = [160.82499176, 167.24266737, 185.71695756, 153.7045709, 163.40568812, 140.70658699, 169.59102084, 172.81041696, 187.38782738, 179.66358934, 176.63417241, 189.16082803, 181.98288443, 170.18565017, 183.78493886, 166.58387299, 161.9521899, 155.69213073, 156.17867343, 144.51580379, 170.29847515, 197.96767899, 153.36646527, 162.22710198, 158.70012047, 178.53470703, 170.77697743, 164.31392633, 165.88595994, 177.38083686, 146.67058471763457, 179.41946565658628, 238.02751620619537, 206.22458790620766, 220.89530574344568, 221.04082532837026, 142.25301427453394, 261.37656571434275, 171.63761180867033, 210.28121820385866, 237.29130237612236, 175.75558340169619, 224.54340549862235, 197.42448680731226, 165.88273684581381, 166.55094082844519, 229.54308602661584, 222.99844054358519, 185.30223966014586, 152.69149367593846, 206.94372818527413, 256.35498655339154, 165.43140916577741, 250.19273595481803, 148.87781549665536, 223.05547559193792, 230.03418198709608, 146.13611923127021, 138.24716809523139, 179.26755740864527, 141.21704876815426, 170.89587081800852, 222.96391329259626, 188.27229523693822, 202.67075179617672, 211.75963110985992, 217.45423324370509] @@ -25,11 +34,6 @@ def infer_parameters(): mean, cov, df = np.array([.0, .0]), np.eye(2), 3. kernel = MultiStudentT(mean, cov, df, seed=1) - # define backend - import pyspark - sc = pyspark.SparkContext() - from abcpy.backends import BackendSpark as Backend - backend = Backend(sc, parallelism=4) # define sampling scheme from abcpy.inferences import PMCABC @@ -71,6 +75,7 @@ def setUp(self): findspark.init() def test_example(self): + setup_backend() journal = infer_parameters() test_result = journal.posterior_mean()[0] expected_result = 176.0 @@ -78,6 +83,7 @@ def test_example(self): if __name__ == "__main__": + setup_backend() journal = infer_parameters() analyse_journal(journal) diff --git a/examples/backends/mpi/pmcabc_gaussian.py b/examples/backends/mpi/pmcabc_gaussian.py index 5bbd796b..8e142a75 100644 --- a/examples/backends/mpi/pmcabc_gaussian.py +++ b/examples/backends/mpi/pmcabc_gaussian.py @@ -1,8 +1,8 @@ import numpy as np -backend = None def setup_backend(): global backend + from abcpy.backends import BackendMPI as Backend backend = Backend() From a576b504bf50d04987d5c1efaf72396456c0b972 Mon Sep 17 00:00:00 2001 From: Marcel Schoengens Date: Mon, 7 Aug 2017 13:25:35 +0200 Subject: [PATCH 48/50] Added MPI configuration for Travis CI. Restructuring requirements file. Small modifications to the backend documentation. --- .travis.yml | 4 +++ doc/source/README.rst | 64 +++++++++++++++++++++------------- requirements.txt | 3 +- requirements/backend-mpi.txt | 2 ++ requirements/backend-spark.txt | 1 + 5 files changed, 48 insertions(+), 26 deletions(-) create mode 100644 requirements/backend-mpi.txt create mode 100644 requirements/backend-spark.txt diff --git a/.travis.yml b/.travis.yml index 199dc029..dc6c39cf 100644 --- a/.travis.yml +++ b/.travis.yml @@ -10,8 +10,12 @@ addons: - libpython3.4-dev - python3-numpy - swig + - libmpich-dev + - mpich install: - pip install -r requirements.txt +- pip install -r requirements/backend-mpi.txt +- pip install -r requirements/backend-spark.txt script: - make test deploy: diff --git a/doc/source/README.rst b/doc/source/README.rst index e2c5e28e..860d067a 100644 --- a/doc/source/README.rst +++ b/doc/source/README.rst @@ -142,10 +142,18 @@ And certainly, a journal can easily be saved to and loaded from disk: :language: python :lines: 60, 63 :dedent: 4 - - + + +Using Parallelization Backends +============================== + +Running ABC algorithms is often computationally expensive, thus ABCpy is build +with parallelization in mind. In order to run your inference schemes in parallel +on multiple nodes (computers) you can choose from the following backends. + + Using the Spark Backend -======================= +~~~~~~~~~~~~~~~~~~~~~~~ To run ABCpy in parallel using Apache Spark, one only needs to use the provided Spark backend. Considering the example from above, the statements for the @@ -174,28 +182,33 @@ set. The adapted python code can be found in `examples/backend/apache_spark/pmcabc_gaussian.py`. - + Note that in order to run jobs in parallel you need to have Apache Spark -installed on the system in question. Details on the installation can be found on -the official `homepage `_. Further, keep in mind that -the ABCpy library has to be properly installed on the cluster, such that it is -available to the Python interpreters on the master and the worker nodes. +installed on the system in question. The dependencies of the MPI backend can be +install with `pip install -r requirements/backend-spark.txt`. +Details on the installation can be found on the official `homepage +`_. Further, keep in mind that the ABCpy library has to +be properly installed on the cluster, such that it is available to the Python +interpreters on the master and the worker nodes. Using the MPI Backend -======================= +~~~~~~~~~~~~~~~~~~~~~ -To run ABCpy in parallel using Open MPI, one only needs to use the provided -MPI backend. Using the same example as above, the statements for the -backend have to be changed to +To run ABCpy in parallel using MPI, one only needs to use the provided MPI +backend. Using the same example as above, the statements for the backend have to +be changed to .. literalinclude:: ../../examples/backends/mpi/pmcabc_gaussian.py :language: python :lines: 6-7 :dedent: 4 -In words, one only needs to initialize an instance of the MPI backend. The number -of ranks to spawn are specified at runtime through the way the script is run. +In words, one only needs to initialize an instance of the MPI backend. The +number of ranks to spawn are specified at runtime through the way the script is +run. A minimum of two ranks is required, since rank 0 (master) is used to +orchestrade the calculation and all other ranks (workers) actually perform the +calculation. The standard way to run the script using Open MPI is directly via mpirun like below or on a cluster through a job scheduler like Slurm: @@ -205,14 +218,18 @@ or on a cluster through a job scheduler like Slurm: mpirun -np 4 python3 pmcabc_gaussian.py -The adapted python code can be found in +The adapted Python code can be found in `examples/backend/mpi/pmcabc_gaussian.py`. -Note that in order to run jobs in parallel you need to have Open MPI -installed on the system(s) in question with the requisite python bindings for MPI (mpi4py). -Details on the installation can be found on the official `Open MPI homepage `_ -and the `mpi4py homepage `_. Further, keep in mind that the ABCpy library -has to be properly installed on the cluster, such that it is available to the Python +Note that in order to run jobs in parallel you need to have MPI installed on the +system(s) in question with the requisite Python bindings for MPI (mpi4py). The +dependencies of the MPI backend can be install with +`pip install -r requirements/backend-mpi.txt`. + +Details on the installation can be found on the official `Open MPI homepage +`_ and the `mpi4py homepage +`_. Further, keep in mind that the ABCpy library has +to be properly installed on the cluster, such that it is available to the Python interpreters on the master and the worker nodes. Using Cluster Infrastructure @@ -223,7 +240,7 @@ compute infrastructure that goes beyond a single notebook or workstation you can easily run ABCpy on infrastructure for cluster or high-performance computing. Running on Amazon Web Services ------------------------------- +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ We show with high level steps how to get ABCpy running on Amazon Web Services (AWS). Please note, that this is not a complete guide to AWS, so we would like @@ -350,7 +367,7 @@ complete example code can be found `here Wrap a Model Written in C++ ---------------------------- +~~~~~~~~~~~~~~~~~~~~~~~~~~~ There are several frameworks that help you integrating your C++/C code into Python. We showcase examples for @@ -442,8 +459,7 @@ example. The following R code is the contents of the R file `gaussian_model.R`: :lines: 1 - 4 More complex R models are incorporated in the same way. To include this function -within ABCpy we include the following code at the beginning of our Python -file: +within ABCpy we include the following code at the beginning of our Python file: .. literalinclude:: ../../examples/extensions/models/gaussian_R/gaussian_model.py :language: python diff --git a/requirements.txt b/requirements.txt index 564dd47b..007b5307 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,6 @@ numpy scipy sklearn glmnet -findspark sphinx==1.4.8 sphinx_rtd_theme -coverage \ No newline at end of file +coverage diff --git a/requirements/backend-mpi.txt b/requirements/backend-mpi.txt new file mode 100644 index 00000000..47e460fa --- /dev/null +++ b/requirements/backend-mpi.txt @@ -0,0 +1,2 @@ +mpi4py +cloudpickle diff --git a/requirements/backend-spark.txt b/requirements/backend-spark.txt new file mode 100644 index 00000000..2e186911 --- /dev/null +++ b/requirements/backend-spark.txt @@ -0,0 +1 @@ +findspark From d2bc5a81fd3b5173b3e4fb0e042c81c58a17a617 Mon Sep 17 00:00:00 2001 From: Marcel Schoengens Date: Mon, 7 Aug 2017 17:43:34 +0200 Subject: [PATCH 49/50] Adapted VERSION file. --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index ee1372d3..be586341 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.2.2 +0.3 From 60b90d2e0bcb712a4451f351f34e65189dc8ef29 Mon Sep 17 00:00:00 2001 From: Marcel Schoengens Date: Mon, 7 Aug 2017 17:55:33 +0200 Subject: [PATCH 50/50] Updated module description. --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 533b2e33..138d3680 100644 --- a/setup.py +++ b/setup.py @@ -22,7 +22,7 @@ version=version, description='A framework for approximate Bayesian computation (ABC) that speeds up inference by parallelizing computation on single computers or whole clusters.', - long_description='ABCpy is a highly modular, scientific library for approximate Bayesian computation (ABC) written in Python using the parallel computation framework Apache SPARK. The modularity helps domain scientists to easily apply ABC to their research without being ABC experts; using ABCpy they can easily run large parallel simulations without much knowledge about parallelization, even without much additional effort to parallelize their code. Further, ABCpy enables ABC experts to easily develop new inference schemes and evaluate them in a standardized environment, and to extend the library with new algorithms. These benefits come mainly from the modularity of ABCpy.', + long_description='ABCpy is a highly modular, scientific library for approximate Bayesian computation (ABC) written in Python. It is designed to run all included ABC algorithms in parallel, either using multiple cores of a single computer or using an Apache Spark or MPI enabled cluster. The modularity helps domain scientists to easily apply ABC to their research without being ABC experts; using ABCpy they can easily run large parallel simulations without much knowledge about parallelization, even without much additional effort to parallelize their code. Further, ABCpy enables ABC experts to easily develop new inference schemes and evaluate them in a standardized environment, and to extend the library with new algorithms. These benefits come mainly from the modularity of ABCpy.', # The project's main homepage. url='https://github.com/eth-cscs/abcpy',