diff --git a/.gitignore b/.gitignore
index 72364f99..5e1c3034 100644
--- a/.gitignore
+++ b/.gitignore
@@ -87,3 +87,7 @@ ENV/
 
 # Rope project settings
 .ropeproject
+
+# vim backup files
+#
+**/.*.sw?
diff --git a/README.md b/README.md
index 9bb14090..ec75eca6 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
 # ABCpy [![Documentation Status](https://readthedocs.org/projects/abcpy/badge/?version=latest)](http://abcpy.readthedocs.io/en/latest/?badge=latest) [![Build Status](https://travis-ci.org/eth-cscs/abcpy.svg?branch=master)](https://travis-ci.org/eth-cscs/abcpy)
 
 ABCpy is a scientific library written in Python for Bayesian uncertainty quantification in
-absence of likelihood function, which parallelizes existing approaximate Bayesian computation (ABC) 
+absence of likelihood function, which parallelizes existing approximate Bayesian computation (ABC) 
 algorithms and other likelihood-free inference schemes. It presently includes:
 
 * RejectionABC
@@ -26,52 +26,37 @@ scientists by providing
 # Documentation
 For more information, check out the
 
-* [Documentation](http://abcpy.readthedocs.io/en/v0.5.2) 
-* [Examples](https://github.com/eth-cscs/abcpy/tree/v0.5.2/examples) directory and
-* [Reference](http://abcpy.readthedocs.io/en/v0.5.2/abcpy.html)
+* [Documentation](http://abcpy.readthedocs.io/en/v0.5.3) 
+* [Examples](https://github.com/eth-cscs/abcpy/tree/v0.5.3/examples) directory and
+* [Reference](http://abcpy.readthedocs.io/en/v0.5.3/abcpy.html)
 
 Further, we provide a
 [collection of models](https://github.com/eth-cscs/abcpy-models) for which ABCpy
 has been applied successfully. This is a good place to look at more complicated inference setups.
 
 # Author 
-ABCpy was written by [Ritabrata Dutta, Università della svizzera italiana](https://search.usi.ch/en/people/c4342228614d041dca7e2f67cbb996c9/dutta-ritabrata) 
-and [Marcel Schoengens, CSCS, ETH Zurich](schoengens@cscs.ch), and we're actively developing it. Please feel free 
-to submit any bugs or feature requests. We'd also love to hear about your experiences with ABCpy in general. Drop us an email!
-
-We want to thank
-[Prof. Antonietta Mira, Università della svizzera italiana](https://search.usi.ch/en/people/f8960de6d60dd08a79b6c1eb20b7442b/Mira-Antonietta),
-and
-[Prof. Jukka-Pekka Onnela, Harvard University](https://www.hsph.harvard.edu/onnela-lab/)
-for helpful contributions and advice; Avinash Ummadisinghu and Nicole Widmern
-respectively for developing dynamic-MPI backend and making ABCpy suitbale for
-hierarchical models; and finally CSCS (Swiss National Super Computing Center)
-for their generous support.
+ABCpy was written by [Ritabrata Dutta, Warwick
+University](https://warwick.ac.uk/fac/sci/statistics/staff/academic-research/dutta/)
+and [Marcel Schoengens](schoengens@cscs.ch), CSCS, ETH Zurich, and we're
+actively developing it. Please feel free to submit any bugs or feature requests.
+We'd also love to hear about your experiences with ABCpy in general. Drop us an
+email!
+
+We want to thank [Prof. Antonietta Mira, Università della svizzera
+italiana](https://search.usi.ch/en/people/f8960de6d60dd08a79b6c1eb20b7442b/Mira-Antonietta),
+and [Prof. Jukka-Pekka Onnela, Harvard
+University](https://www.hsph.harvard.edu/onnela-lab/) for helpful contributions
+and advice; Avinash Ummadisinghu and Nicole Widmern respectively for developing
+dynamic-MPI backend and making ABCpy suitable for hierarchical models; and
+finally CSCS (Swiss National Super Computing Center) for their generous support.
 
 ## Citation
 
-There is a paper in the proceedings of the 2017 PASC conference. We would appreciate a citation. 
-
-```
-@inproceedings{Dutta:2017:AUE:3093172.3093233,
- author = {Dutta, Ritabrata and Schoengens, Marcel and Onnela, Jukka-Pekka and Mira, Antonietta},
- title = {ABCpy: A User-Friendly, Extensible, and Parallel Library for Approximate Bayesian Computation},
- booktitle = {Proceedings of the Platform for Advanced Scientific Computing Conference},
- series = {PASC '17},
- year = {2017},
- isbn = {978-1-4503-5062-4},
- location = {Lugano, Switzerland},
- pages = {8:1--8:9},
- articleno = {8},
- numpages = {9},
- url = {http://doi.acm.org/10.1145/3093172.3093233},
- doi = {10.1145/3093172.3093233},
- acmid = {3093233},
- publisher = {ACM},
- address = {New York, NY, USA},
- keywords = {ABC, Library, Parallel, Spark},
-} 
-```
+There is a paper in the proceedings of the 2017 PASC conference. In case you use
+ABCpy for your publication, we would appreciate a citation. You can use
+[this](https://github.com/eth-cscs/abcpy/blob/v0.5.3/doc/literature/DuttaS-ABCpy-PASC-2017.bib)
+BibTex reference.
+
 
 ## Other Refernces
 
diff --git a/VERSION b/VERSION
index cb0c939a..be14282b 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-0.5.2
+0.5.3
diff --git a/abcpy/backends/mpi.py b/abcpy/backends/mpi.py
index f93f6e03..c792979c 100644
--- a/abcpy/backends/mpi.py
+++ b/abcpy/backends/mpi.py
@@ -1,10 +1,12 @@
-import numpy as np
-import cloudpickle
-import time
 import pickle
+import time
 
+import cloudpickle
+import numpy as np
 from mpi4py import MPI
-from abcpy.backends import Backend, PDS, BDS
+
+from abcpy.backends import BDS, PDS, Backend
+
 
 class BackendMPIMaster(Backend):
     """Defines the behavior of the master process
@@ -268,9 +270,11 @@ def collect(self, pds):
         all_data_indices,all_data_items = [],[]
 
         for node_data in all_data:
-            for item in node_data:
-                all_data_indices+=[item[0]]
-                all_data_items+=[item[1]]
+            for index, item in node_data:
+                if isinstance(item, Exception):
+                    raise item
+                all_data_indices.append(index)
+                all_data_items.append(item)
 
         #Sort the accumulated data according to the indices we tagged
         #them with when distributing 
@@ -490,7 +494,11 @@ def map(self, func):
             #Accumulate the indicess and *processed* chunks
             for chunk in data_chunks:
                 data_index,data_item = chunk
-                rdd+=[(data_index,func(data_item))]
+                try:
+                    result = func(data_item)
+                except Exception as e:
+                    result = e
+                rdd.append((data_index, result))
 
         pds_res = PDSMPI(rdd, pds_id_new, self)
 
diff --git a/abcpy/inferences.py b/abcpy/inferences.py
index 5eb0110b..0f54a3fc 100644
--- a/abcpy/inferences.py
+++ b/abcpy/inferences.py
@@ -1,16 +1,19 @@
+import copy
+import logging
+import numpy as np
+
 from abc import ABCMeta, abstractmethod, abstractproperty
+from scipy import optimize
 
-from abcpy.graphtools import GraphTools
-from abcpy.probabilisticmodels import *
 from abcpy.acceptedparametersmanager import *
-from abcpy.perturbationkernel import DefaultKernel
-from abcpy.jointdistances import LinearCombination
+from abcpy.graphtools import GraphTools
 from abcpy.jointapprox_lhd import ProductCombination
-import copy
-
-import numpy as np
+from abcpy.jointdistances import LinearCombination
 from abcpy.output import Journal
-from scipy import optimize
+from abcpy.perturbationkernel import DefaultKernel
+from abcpy.probabilisticmodels import *
+from abcpy.utils import cached
+
 
 class InferenceMethod(GraphTools, metaclass = ABCMeta):
     """
@@ -171,6 +174,7 @@ def __init__(self, root_models, distances, backend, seed=None):
         self.distance = LinearCombination(root_models, distances)
         self.backend = backend
         self.rng = np.random.RandomState(seed)
+        self.logger = logging.getLogger(__name__)
 
         # An object managing the bds objects
         self.accepted_parameters_manager = AcceptedParametersManager(self.model)
@@ -259,6 +263,11 @@ def _sample_parameter(self, rng):
         """
         distance = self.distance.dist_max()
 
+        if distance < self.epsilon and self.logger:
+            self.logger.warn("initial epsilon {:e} is larger than dist_max {:e}"
+                             .format(float(self.epsilon), distance))
+
+        theta = np.array(self.get_parameters(self.model)).reshape(-1,)
         counter = 0
 
         while distance > self.epsilon:
@@ -269,8 +278,14 @@ def _sample_parameter(self, rng):
             counter+=1
             if(y_sim is not None):
                 distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim)
+                self.logger.debug("distance after {:4d} simulations: {:e}".format(
+                    counter, distance))
             else:
                 distance = self.distance.dist_max()
+        self.logger.debug(
+                "Needed {:4d} simulations to reach distance {:e} < epsilon = {:e}".
+                format(counter, distance, float(self.epsilon))
+                )
         return (theta, counter)
 
 
@@ -312,7 +327,7 @@ class PMCABC(BaseDiscrepancy, InferenceMethod):
     backend = None
 
 
-    def __init__(self, root_models, distances, backend, kernel=None,seed=None):
+    def __init__(self, root_models, distances, backend, kernel=None, seed=None):
         self.model = root_models
         # We define the joint Linear combination distance using all the distances for each individual models
         self.distance = LinearCombination(root_models, distances)
@@ -327,6 +342,7 @@ def __init__(self, root_models, distances, backend, kernel=None,seed=None):
         self.kernel = kernel
         self.backend = backend
         self.rng = np.random.RandomState(seed)
+        self.logger = logging.getLogger(__name__) 
 
         self.accepted_parameters_manager = AcceptedParametersManager(self.model)
 
@@ -394,8 +410,9 @@ def sample(self, observations, steps, epsilon_init, n_samples = 10000, n_samples
                 raise ValueError("The length of epsilon_init can only be equal to 1 or steps.")
 
         # main PMCABC algorithm
-        # print("INFO: Starting PMCABC iterations.")
-        for aStep in range(0, steps):
+        self.logger.info("Starting PMC iterations")
+        for aStep in range(steps):
+            self.logger.debug("iteration {} of PMC algorithm".format(aStep))
             if(aStep==0 and journal_file is not None):
                 accepted_parameters = journal.parameters[-1]
                 accepted_weights = journal.weights[-1]
@@ -409,23 +426,25 @@ def sample(self, observations, steps, epsilon_init, n_samples = 10000, n_samples
                 self.accepted_parameters_manager.update_kernel_values(self.backend, kernel_parameters=kernel_parameters)
 
                 # 3: calculate covariance
-                # print("INFO: Calculating covariance matrix.")
+                self.logger.info("Calculateing covariance matrix")
                 new_cov_mats = self.kernel.calculate_cov(self.accepted_parameters_manager)
                 # Since each entry of new_cov_mats is a numpy array, we can multiply like this
                 accepted_cov_mats = [covFactor * new_cov_mat for new_cov_mat in new_cov_mats]
 
-            # print("DEBUG: Iteration " + str(aStep) + " of PMCABC algorithm.")
             seed_arr = self.rng.randint(0, np.iinfo(np.uint32).max, size=n_samples, dtype=np.uint32)
             rng_arr = np.array([np.random.RandomState(seed) for seed in seed_arr])
             rng_pds = self.backend.parallelize(rng_arr)
 
             # 0: update remotely required variables
             # print("INFO: Broadcasting parameters.")
+            self.logger.info("Broadcasting parameters")
             self.epsilon = epsilon_arr[aStep]
             self.accepted_parameters_manager.update_broadcast(self.backend, accepted_parameters, accepted_weights, accepted_cov_mats)
 
             # 1: calculate resample parameters
             # print("INFO: Resampling parameters")
+            self.logger.info("Resamping parameters")
+
             params_and_dists_and_ysim_and_counter_pds = self.backend.map(self._resample_parameter, rng_pds)
             params_and_dists_and_ysim_and_counter = self.backend.collect(params_and_dists_and_ysim_and_counter_pds)
             new_parameters, distances, counter = [list(t) for t in zip(*params_and_dists_and_ysim_and_counter)]
@@ -438,6 +457,7 @@ def sample(self, observations, steps, epsilon_init, n_samples = 10000, n_samples
 
             # Compute epsilon for next step
             # print("INFO: Calculating acceptance threshold (epsilon).")
+            self.logger.info("Calculating acceptances threshold")
             if aStep < steps - 1:
                 if epsilon_arr[aStep + 1] == None:
                     epsilon_arr[aStep + 1] = np.percentile(distances, epsilon_percentile)
@@ -445,11 +465,10 @@ def sample(self, observations, steps, epsilon_init, n_samples = 10000, n_samples
                     epsilon_arr[aStep + 1] = np.max(
                         [np.percentile(distances, epsilon_percentile), epsilon_arr[aStep + 1]])
             # 2: calculate weights for new parameters
-            # print("INFO: Calculating weights.")
-
-
+            self.logger.info("Calculating weights")
 
             new_parameters_pds = self.backend.parallelize(new_parameters)
+            self.logger.info("Calculate weights")
             new_weights_pds = self.backend.map(self._calculate_weight, new_parameters_pds)
             new_weights = np.array(self.backend.collect(new_weights_pds)).reshape(-1, 1)
             sum_of_weights = 0.0
@@ -468,7 +487,7 @@ def sample(self, observations, steps, epsilon_init, n_samples = 10000, n_samples
             self.accepted_parameters_manager.update_kernel_values(self.backend, kernel_parameters=kernel_parameters)
 
             # 3: calculate covariance
-            # print("INFO: Calculating covariance matrix.")
+            self.logger.info("Calculating covariance matrix")
             new_cov_mats = self.kernel.calculate_cov(self.accepted_parameters_manager)
             # Since each entry of new_cov_mats is a numpy array, we can multiply like this
             new_cov_mats = [covFactor*new_cov_mat for new_cov_mat in new_cov_mats]
@@ -478,7 +497,8 @@ def sample(self, observations, steps, epsilon_init, n_samples = 10000, n_samples
             accepted_weights = new_weights
             accepted_cov_mats = new_cov_mats
 
-            # print("INFO: Saving configuration to output journal.")
+            self.logger.info("Save configuration to output journal")
+
             if (full_output == 1 and aStep <= steps - 1) or (full_output == 0 and aStep == steps - 1):
                 journal.add_parameters(accepted_parameters)
                 journal.add_weights(accepted_weights)
@@ -514,7 +534,14 @@ def _resample_parameter(self, rng):
         rng.seed(rng.randint(np.iinfo(np.uint32).max, dtype=np.uint32))
 
         distance = self.distance.dist_max()
+
+        if distance < self.epsilon and self.logger:
+            self.logger.warn("initial epsilon {:e} is larger than dist_max {:e}"
+                             .format(float(self.epsilon), distance))
+
+        theta = self.get_parameters()
         counter=0
+
         while distance > self.epsilon:
             #print( " distance: " + str(distance) + " epsilon: " + str(self.epsilon))
 
@@ -538,9 +565,16 @@ def _resample_parameter(self, rng):
 
             if(y_sim is not None):
                 distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(),y_sim)
+                self.logger.debug("distance after {:4d} simulations: {:e}".format(
+                    counter, distance))
             else:
                 distance = self.distance.dist_max()
 
+        self.logger.debug(
+                "Needed {:4d} simulations to reach distance {:e} < epsilon = {:e}".
+                format(counter, distance, float(self.epsilon))
+                )
+
         return (theta, distance, counter)
 
     def _calculate_weight(self, theta):
@@ -558,6 +592,7 @@ def _calculate_weight(self, theta):
         float
             the new weight for theta
         """
+        self.logger.debug("_calculate_weight")
         if self.accepted_parameters_manager.kernel_parameters_bds is None:
             return 1.0 / self.n_samples
         else:
@@ -633,6 +668,7 @@ def __init__(self, root_models, likfuns, backend, kernel=None, seed=None):
         self.kernel = kernel
         self.backend = backend
         self.rng = np.random.RandomState(seed)
+        self.logger = logging.getLogger(__name__)
 
         # these are usually big tables, so we broadcast them to have them once
         # per executor instead of once per task
@@ -720,8 +756,7 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1
         self.accepted_parameters_manager.update_kernel_values(self.backend, kernel_parameters=kernel_parameters)
 
         # 3: calculate covariance
-        # print("INFO: Calculating covariance matrix.")
-
+        self.logger.info("Calculating covariance matrix")
 
         new_cov_mats = self.kernel.calculate_cov(self.accepted_parameters_manager)
         # Since each entry of new_cov_mats is a numpy array, we can multiply like this
@@ -731,8 +766,8 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1
         self.accepted_parameters_manager.update_broadcast(self.backend, accepted_cov_mats=accepted_cov_mats)
 
         # main SMC algorithm
-        # print("INFO: Starting PMC iterations.")
-        for aStep in range(0, steps):
+        self.logger.info("Starting pmc iterations")
+        for aStep in range(steps):
             if(aStep==0 and journal_file is not None):
                 accepted_parameters = journal.parameters[-1]
                 accepted_weights = journal.weights[-1]
@@ -748,7 +783,7 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1
                 self.accepted_parameters_manager.update_kernel_values(self.backend, kernel_parameters=kernel_parameters)
 
                 # 3: calculate covariance
-                # print("INFO: Calculating covariance matrix.")
+                self.logger.info("Calculating covariance matrix")
 
 
                 new_cov_mats = self.kernel.calculate_cov(self.accepted_parameters_manager)
@@ -756,14 +791,14 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1
 
                 accepted_cov_mats = [covFactor * new_cov_mat for covFactor, new_cov_mat in zip(covFactors, new_cov_mats)]
 
-            # print("DEBUG: Iteration " + str(aStep) + " of PMC algorithm.")
+            self.logger.info("Iteration {} of PMC algorithm".format(aStep))
 
             # 0: update remotely required variables
-            # print("INFO: Broadcasting parameters.")
+            self.logger.info("Broadcasting parameters")
             self.accepted_parameters_manager.update_broadcast(self.backend, accepted_parameters=accepted_parameters, accepted_weights=accepted_weights, accepted_cov_mats=accepted_cov_mats)
 
             # 1: calculate resample parameters
-            # print("INFO: Resample parameters.")
+            self.logger.info("Resample parameters")
             index = self.rng.choice(accepted_parameters.shape[0], size=n_samples, p=accepted_weights.reshape(-1))
             # Choose a new particle using the resampled particle (make the boundary proper)
             # Initialize new_parameters
@@ -775,10 +810,10 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1
                         new_parameters[ind, :] = perturbation_output[1]
                         break
             # 2: calculate approximate lieklihood for new parameters
-            # print("INFO: Calculate approximate likelihood.")
+            self.logger.info("Calculate approximate likelihood")
             new_parameters_pds = self.backend.parallelize(new_parameters)
             approx_likelihood_new_parameters_and_counter_pds = self.backend.map(self._approx_lik_calc, new_parameters_pds)
-            # print("DEBUG: Collect approximate likelihood from pds.")
+            self.logger.debug("collect approximate likelihood from pds")
             approx_likelihood_new_parameters_and_counter = self.backend.collect(approx_likelihood_new_parameters_and_counter_pds)
             approx_likelihood_new_parameters, counter = [list(t) for t in zip(*approx_likelihood_new_parameters_and_counter)]
 
@@ -788,7 +823,7 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1
                 self.simulation_counter+=count
 
             # 3: calculate new weights for new parameters
-            # print("INFO: Calculating weights.")
+            self.logger.info("Calculating weights")
             new_weights_pds = self.backend.map(self._calculate_weight, new_parameters_pds)
             new_weights = np.array(self.backend.collect(new_weights_pds)).reshape(-1, 1)
 
@@ -802,8 +837,8 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1
             self.accepted_parameters_manager.update_broadcast(self.backend, accepted_parameters=accepted_parameters, accepted_weights=new_weights)
 
             # 4: calculate covariance
-            # print("INFO: Calculating covariance matrix.")
             # The parameters relevant to each kernel have to be used to calculate n_sample times. It is therefore more efficient to broadcast these parameters once, instead of collecting them at each kernel in each step
+            self.logger.info("Calculating covariance matrix")
             kernel_parameters = []
             for kernel in self.kernel.kernels:
                 kernel_parameters.append(
@@ -812,8 +847,7 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1
             self.accepted_parameters_manager.update_kernel_values(self.backend, kernel_parameters=kernel_parameters)
 
             # 3: calculate covariance
-            # print("INFO: Calculating covariance matrix.")
-
+            self.logger.info("Calculating covariance matrix")
 
             new_cov_mats = self.kernel.calculate_cov(self.accepted_parameters_manager)
             # Since each entry of new_cov_mats is a numpy array, we can multiply like this
@@ -826,7 +860,8 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1
             accepted_weights = new_weights
             accepted_cov_mat = new_cov_mats
 
-            # print("INFO: Saving configuration to output journal.")
+            self.logger.info("Saving configuration to output journal")
+
             if (full_output == 1 and aStep <= steps - 1) or (full_output == 0 and aStep == steps - 1):
                 journal.add_parameters(accepted_parameters)
                 journal.add_weights(accepted_weights)
@@ -891,6 +926,8 @@ def _calculate_weight(self, theta):
             The new weight for theta
         """
 
+        self.logger.debug("_calculate_weight")
+
         if self.accepted_parameters_manager.accepted_weights_bds is None:
             return 1.0 / self.n_samples
         else:
@@ -959,6 +996,7 @@ def __init__(self, root_models, distances, backend, kernel=None, seed=None):
         self.kernel = kernel
         self.backend = backend
         self.rng = np.random.RandomState(seed)
+        self.logger = logging.getLogger(__name__)
 
         # these are usually big tables, so we broadcast them to have them once
         # per executor instead of once per task
@@ -1056,7 +1094,7 @@ def sample(self, observations, steps, epsilon, n_samples = 10000, n_samples_per_
         broken_preemptively = False
 
         for aStep in range(0, steps):
-            print(aStep)
+            self.logger.debug("step {}".format(aStep))
             if(aStep==0 and journal_file is not None):
                 accepted_parameters=journal.parameters[-1]
                 accepted_weights=journal.weights[-1]
@@ -1094,12 +1132,12 @@ def sample(self, observations, steps, epsilon, n_samples = 10000, n_samples_per_
             data_pds = self.backend.parallelize(data_arr)
 
             # 0: update remotely required variables
-            # print("INFO: Broadcasting parameters.")
+            self.logger.info("Broadcasting parameters")
             self.epsilon = epsilon
             self._update_broadcasts(smooth_distances, all_distances)
 
             # 1: Calculate  parameters
-            # print("INFO: Initial accepted parameter parameters")
+            self.logger.info("Initial accepted parameter parameters")
             params_and_dists_pds = self.backend.map(self._accept_parameter, data_pds)
             params_and_dists = self.backend.collect(params_and_dists_pds)
             new_parameters, new_distances, new_all_parameters, new_all_distances, index, acceptance, counter = [list(t) for t in
@@ -1142,9 +1180,14 @@ def sample(self, observations, steps, epsilon, n_samples = 10000, n_samples_per_
                 accept = accept + np.sum(acceptance)
                 samples_until = samples_until + sample_array[aStep]
                 acceptance_rate = accept / samples_until
-                print(
-                'updates: ', np.sum(sample_array[1:aStep + 1]) / np.sum(sample_array[1:]) * 100, ' epsilon: ', epsilon, \
-                'u.mean: ', U, 'acceptance rate: ', acceptance_rate)
+
+                msg = ("updates= {:.2f}, epsilon= {}, u.mean={:e}, acceptance rate: {:.2f}"
+                        .format(
+                            np.sum(sample_array[1:aStep + 1]) / np.sum(sample_array[1:]) * 100,
+                            epsilon, U, acceptance_rate
+                            )
+                        )
+                self.logger.debug(msg)
                 if acceptance_rate < ar_cutoff:
                     broken_preemptively = True
                     break
@@ -1424,7 +1467,7 @@ def __init__(self, root_models, distances, backend, kernel=None,seed=None):
         # We define the joint Linear combination distance using all the distances for each individual models
         self.distance = LinearCombination(root_models, distances)
 
-        if (kernel is None):
+        if kernel is None:
 
             mapping, garbage_index = self._get_mapping()
             models = []
@@ -1437,6 +1480,7 @@ def __init__(self, root_models, distances, backend, kernel=None,seed=None):
         self.backend = backend
         self.rng = np.random.RandomState(seed)
         self.anneal_parameter = None
+        self.logger = logging.getLogger(__name__)
 
 
         # these are usually big tables, so we broadcast them to have them once
@@ -1497,13 +1541,15 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1
 
 
         for aStep in range(0, steps):
-            if(aStep==0 and journal_file is not None):
+            self.logger.info("Step {}".format(aStep))
+
+            if aStep==0 and journal_file is not None:
                 accepted_parameters = journal.parameters[-1]
                 accepted_weights = journal.weights[-1]
                 accepted_cov_mats = journal.opt_values[-1]
 
             # main ABCsubsim algorithm
-            # print("INFO: Initialization of ABCsubsim")
+            self.logger.info("Initializatio of ABCsubsim")
             seed_arr = self.rng.randint(0, np.iinfo(np.uint32).max, size=int(n_samples / temp_chain_length),
                                         dtype=np.uint32)
             rng_arr = np.array([np.random.RandomState(seed) for seed in seed_arr])
@@ -1513,12 +1559,13 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1
             rng_and_index_pds = self.backend.parallelize(rng_and_index_arr)
 
             # 0: update remotely required variables
-            # print("INFO: Broadcasting parameters.")
+            self.logger.info("Broadcasting parameters")
 
             self.accepted_parameters_manager.update_broadcast(self.backend, accepted_parameters=accepted_parameters)
 
             # 1: Calculate  parameters
             # print("INFO: Initial accepted parameter parameters")
+            self.logger.info("Initial accepted parameters")
             params_and_dists_pds = self.backend.map(self._accept_parameter, rng_and_index_pds)
             params_and_dists = self.backend.collect(params_and_dists_pds)
             new_parameters, new_distances, counter = [list(t) for t in zip(*params_and_dists)]
@@ -1581,8 +1628,8 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1
 
             self.accepted_parameters_manager.update_broadcast(self.backend, accepted_cov_mats=accepted_cov_mats)
 
-            # print("INFO: Saving intermediate configuration to output journal.")
             if full_output == 1:
+                self.logger.info("Saving intermediate configuration to output journal")
                 journal.add_parameters(copy.deepcopy(accepted_parameters))
                 journal.add_weights(copy.deepcopy(accepted_weights))
                 journal.add_opt_values(accepted_cov_mats)
@@ -1594,8 +1641,9 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1
 
             # Show progress
             anneal_parameter_change_percentage = 100 * abs(anneal_parameter_old - anneal_parameter) / abs(anneal_parameter)
-            print('Steps: ', aStep, 'annealing parameter: ', anneal_parameter, 'change (%) in annealing parameter: ',
-                  anneal_parameter_change_percentage)
+            msg = ("step: {}, annealing parameter: {:.4f}, change(%) in annealing parameter: {:.1f}"
+                   .format(aStep, anneal_parameter, anneal_parameter_change_percentage))
+            self.logger.info(msg)
             if anneal_parameter_change_percentage < ap_change_cutoff:
                 break
 
@@ -1784,12 +1832,12 @@ class RSMCABC(BaseDiscrepancy, InferenceMethod):
     backend = None
 
 
-    def __init__(self, root_models, distances, backend, kernel=None,seed=None):
+    def __init__(self, root_models, distances, backend, kernel=None, seed=None):
         self.model = root_models
         # We define the joint Linear combination distance using all the distances for each individual models
         self.distance = LinearCombination(root_models, distances)
 
-        if (kernel is None):
+        if kernel is None:
 
             mapping, garbage_index = self._get_mapping()
             models = []
@@ -1800,8 +1848,9 @@ def __init__(self, root_models, distances, backend, kernel=None,seed=None):
 
         self.kernel = kernel
         self.backend = backend
+        self.logger = logging.getLogger(__name__)
 
-        self.R=None
+        self.R = None
         self.rng = np.random.RandomState(seed)
 
         # these are usually big tables, so we broadcast them to have them once
@@ -1813,7 +1862,8 @@ def __init__(self, root_models, distances, backend, kernel=None,seed=None):
 
 
     def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1, alpha = 0.1, epsilon_init = 100, epsilon_final = 0.1, const = 0.01, covFactor = 2.0, full_output=0, journal_file = None):
-        """Samples from the posterior distribution of the model parameter given the observed
+        """
+        Samples from the posterior distribution of the model parameter given the observed
         data observations.
 
         Parameters
@@ -1845,6 +1895,7 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1
         abcpy.output.Journal
             A journal containing simulation results, metadata and optionally intermediate results.
         """
+
         self.sample_from_prior(rng=self.rng)
 
         self.accepted_parameters_manager.broadcast(self.backend, observations)
@@ -1867,9 +1918,10 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1
         accepted_dist = None
 
         # main RSMCABC algorithm
-        # print("INFO: Starting RSMCABC iterations.")
         for aStep in range(steps):
-            if(aStep==0 and journal_file is not None):
+            self.logger.info("RSMCABC iteration {}".format(aStep))
+
+            if aStep == 0 and journal_file is not None:
                 accepted_parameters=journal.parameters[-1]
 
                 self.accepted_parameters_manager.update_broadcast(self.backend, accepted_parameters=accepted_parameters)
@@ -1914,6 +1966,8 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1
                 accepted_cov_mats = [covFactor*cov_mat for cov_mat in accepted_cov_mats]
 
             if epsilon[-1] < epsilon_final:
+                self.logger("accepted epsilon {:e} < {:e}"
+                            .format(epsilon[-1], epsilon_final))
                 break
 
             seed_arr = self.rng.randint(0, np.iinfo(np.uint32).max, size=n_replenish, dtype=np.uint32)
@@ -1948,8 +2002,8 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1
                 accepted_parameters = np.concatenate((accepted_parameters, new_parameters))
                 accepted_dist = np.concatenate((accepted_dist, new_dist))
 
-            # print("INFO: Saving configuration to output journal.")
             if (full_output == 1 and aStep <= steps - 1) or (full_output == 0 and aStep == steps - 1):
+                self.logger.info("Saving configuration to output journal.")
                 journal.add_parameters(copy.deepcopy(accepted_parameters))
                 journal.add_weights(np.ones(shape=(len(accepted_parameters), 1)) * (1 / len(accepted_parameters)))
                 self.accepted_parameters_manager.update_broadcast(self.backend, accepted_parameters=accepted_parameters)
@@ -1958,8 +2012,6 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1
                 journal.number_of_simulations.append(self.simulation_counter)
 
             # 2: Compute acceptance probabilty and set R
-            # print(aStep)
-            # print(new_index)
             prob_acceptance = sum(new_index) / (R * n_replenish)
             if prob_acceptance == 1 or prob_acceptance == 0:
                 R = 1
@@ -2088,12 +2140,12 @@ class APMCABC(BaseDiscrepancy, InferenceMethod):
 
     backend = None
 
-    def __init__(self,  root_models, distances, backend, kernel = None,seed=None):
+    def __init__(self,  root_models, distances, backend, kernel=None, seed=None):
         self.model = root_models
         # We define the joint Linear combination distance using all the distances for each individual models
         self.distance = LinearCombination(root_models, distances)
 
-        if (kernel is None):
+        if kernel is None:
 
             mapping, garbage_index = self._get_mapping()
             models = []
@@ -2103,6 +2155,7 @@ def __init__(self,  root_models, distances, backend, kernel = None,seed=None):
 
         self.kernel = kernel
         self.backend = backend
+        self.logger = logging.getLogger(__name__)
 
         self.epsilon= None
         self.rng = np.random.RandomState(seed)
@@ -2172,6 +2225,7 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1
         # main APMCABC algorithm
         # print("INFO: Starting APMCABC iterations.")
         for aStep in range(steps):
+            self.logger.info("APMCABC iteration {}".format(aStep))
             if(aStep==0 and journal_file is not None):
                 accepted_parameters=journal.parameters[-1]
                 accepted_weights=journal.weights[-1]
@@ -2195,7 +2249,6 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1
                 alpha_accepted_weights=accepted_weights
 
             # 0: Drawing new new/perturbed samples using prior or MCMC Kernel
-            # print("DEBUG: Iteration " + str(aStep) + " of APMCABC algorithm.")
             if aStep > 0:
                 n_additional_samples = n_samples - round(n_samples * alpha)
             else:
@@ -2206,12 +2259,12 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1
             rng_pds = self.backend.parallelize(rng_arr)
 
             # update remotely required variables
-            # print("INFO: Broadcasting parameters.")
+            self.logger.info("Broadcasting parameters")
             self.accepted_parameters_manager.update_broadcast(self.backend, accepted_parameters=alpha_accepted_parameters, accepted_weights=alpha_accepted_weights, accepted_cov_mats=accepted_cov_mats)
             self._update_broadcasts(alpha_accepted_dist)
 
             # calculate resample parameters
-            # print("INFO: Resampling parameters")
+            self.logger.info("Resampling parameters")
             params_and_dist_weights_pds = self.backend.map(self._accept_parameter, rng_pds)
             params_and_dist_weights = self.backend.collect(params_and_dist_weights_pds)
             new_parameters, new_dist, new_weights, counter = [list(t) for t in zip(*params_and_dist_weights)]
@@ -2247,7 +2300,7 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1
             alpha_accepted_dist = accepted_dist[index_alpha]
 
             # 3: calculate covariance
-            # print("INFO: Calculating covariance matrix.")
+            self.logger.info("Calculating covariance matrix")
             self.accepted_parameters_manager.update_broadcast(self.backend, accepted_parameters=alpha_accepted_parameters, accepted_weights=alpha_accepted_weights)
 
             kernel_parameters = []
@@ -2328,13 +2381,14 @@ def _accept_parameter(self, rng):
                 perturbation_output = self.perturb(index[0], rng=rng)
                 if perturbation_output[0] and self.pdf_of_prior(self.model, perturbation_output[1]) != 0:
                     break
+
             y_sim = self.simulate(self.n_samples_per_param, rng=rng)
             counter+=1
             dist = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim)
 
             prior_prob = self.pdf_of_prior(self.model, perturbation_output[1])
             denominator = 0.0
-            for i in range(0, len(self.accepted_parameters_manager.accepted_weights_bds.value())):
+            for i in range(len(self.accepted_parameters_manager.accepted_weights_bds.value())):
                 pdf_value = self.kernel.pdf(mapping_for_kernels, self.accepted_parameters_manager, index[0], perturbation_output[1])
                 denominator += self.accepted_parameters_manager.accepted_weights_bds.value()[i, 0] * pdf_value
             weight = 1.0 * prior_prob / denominator
@@ -2377,7 +2431,7 @@ class SMCABC(BaseDiscrepancy, InferenceMethod):
 
     backend = None
 
-    def __init__(self, root_models, distances, backend, kernel = None,seed=None):
+    def __init__(self, root_models, distances, backend, kernel = None, seed=None):
         self.model = root_models
         # We define the joint Linear combination distance using all the distances for each individual models
         self.distance = LinearCombination(root_models, distances)
@@ -2392,6 +2446,7 @@ def __init__(self, root_models, distances, backend, kernel = None,seed=None):
 
         self.kernel = kernel
         self.backend = backend
+        self.logger = logging.getLogger(__name__)
 
         self.epsilon = None
         self.rng = np.random.RandomState(seed)
@@ -2464,8 +2519,9 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1
         epsilon = [10000]
 
         # main SMC ABC algorithm
-        # print("INFO: Starting SMCABC iterations.")
         for aStep in range(0, steps):
+            self.logger.info("SMCABC iteration {}".format(aStep))
+
             if(aStep==0 and journal_file is not None):
                 accepted_parameters=journal.parameters[-1]
                 accepted_weights=journal.weights[-1]
@@ -2493,6 +2549,7 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1
 
             # 0: Compute the Epsilon
             if accepted_y_sim != None:
+                self.logger.info("Compute epsilon, might take a while")
                 # Compute epsilon for next step
                 fun = lambda epsilon_var: self._compute_epsilon(epsilon_var, \
                                                                 epsilon, observations, accepted_y_sim, accepted_weights,
@@ -2503,7 +2560,7 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1
                 epsilon.append(epsilon_new)
 
             # 1: calculate weights for new parameters
-            # print("INFO: Calculating weights.")
+            self.logger.info("Calculating weights")
             if accepted_y_sim != None:
                 new_weights = np.zeros(shape=(n_samples), )
                 for ind1 in range(n_samples):
@@ -2524,7 +2581,7 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1
 
             # 2: Resample
             if accepted_y_sim != None and pow(sum(pow(new_weights, 2)), -1) < resample:
-                print('Resampling')
+                self.logger.info("Resampling")
                 # Weighted resampling:
                 index_resampled = self.rng.choice(np.arange(n_samples), n_samples, replace=1, p=new_weights)
                 accepted_parameters = accepted_parameters[index_resampled, :]
@@ -2548,7 +2605,7 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1
                 accepted_cov_mats = [covFactor * cov_mat for cov_mat in accepted_cov_mats]
 
             # 3: Drawing new perturbed samples using MCMC Kernel
-            # print("DEBUG: Iteration " + str(aStep) + " of SMCABC algorithm.")
+            self.logger.debug("drawing new pertubated samples using mcmc kernel")
             seed_arr = self.rng.randint(0, np.iinfo(np.uint32).max, size=n_samples, dtype=np.uint32)
             rng_arr = np.array([np.random.RandomState(seed) for seed in seed_arr])
             index_arr = np.arange(n_samples)
@@ -2563,7 +2620,7 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1
             self._update_broadcasts(accepted_y_sim)
 
             # calculate resample parameters
-            # print("INFO: Resampling parameters")
+            self.logger.info("Resampling parameters")
             params_and_ysim_pds = self.backend.map(self._accept_parameter, rng_and_index_pds)
             params_and_ysim = self.backend.collect(params_and_ysim_pds)
             new_parameters, new_y_sim, counter = [list(t) for t in zip(*params_and_ysim)]
@@ -2577,8 +2634,8 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1
             accepted_y_sim = new_y_sim
 
 
-            # print("INFO: Saving configuration to output journal.")
             if (full_output == 1 and aStep <= steps - 1) or (full_output == 0 and aStep == steps - 1):
+                self.logger.info("Saving configuration to output journal")
                 self.accepted_parameters_manager.update_broadcast(self.backend, accepted_parameters=accepted_parameters)
                 journal.add_parameters(copy.deepcopy(accepted_parameters))
                 journal.add_weights(copy.deepcopy(accepted_weights))
@@ -2640,9 +2697,15 @@ def _compute_epsilon(self, epsilon_new, epsilon, observations, accepted_y_sim, a
             result = RHS - LHS
         return (result)
 
+
     def _bisection(self, func, low, high, tol):
+        # cache computed values, as we call func below
+        # several times for the same argument:
+        func = cached(func)
         midpoint = (low + high) / 2.0
         while (high - low) / 2.0 > tol:
+            self.logger.debug("bisection: distance = {:e} > tol = {:e}"
+                              .format((high - low) / 2, tol))
             if func(midpoint) == 0:
                 return midpoint
             elif func(low) * func(midpoint) < 0:
diff --git a/abcpy/perturbationkernel.py b/abcpy/perturbationkernel.py
index bfb35776..ec0d5d23 100644
--- a/abcpy/perturbationkernel.py
+++ b/abcpy/perturbationkernel.py
@@ -1,8 +1,10 @@
 from abc import ABCMeta, abstractmethod
-from abcpy.probabilisticmodels import Continuous
+
 import numpy as np
-from scipy.stats import multivariate_normal
 from scipy.special import gamma
+from scipy.stats import multivariate_normal
+
+from abcpy.probabilisticmodels import Continuous
 
 
 class PerturbationKernel(metaclass = ABCMeta):
@@ -336,7 +338,7 @@ def pdf(self, accepted_parameters_manager, kernel_index, index, x):
 
         cov = np.array(accepted_parameters_manager.accepted_cov_mats_bds.value()[kernel_index]).astype(float)
 
-        return multivariate_normal(mean, cov).pdf(x)
+        return multivariate_normal(mean, cov, allow_singular=True).pdf(x)
 
 
 class MultivariateStudentTKernel(PerturbationKernel, ContinuousKernel):
@@ -570,4 +572,3 @@ def __init__(self, models):
         else:
             super(DefaultKernel, self).__init__([continuous_kernel, discrete_kernel])
 
-
diff --git a/abcpy/utils.py b/abcpy/utils.py
new file mode 100644
index 00000000..800c121c
--- /dev/null
+++ b/abcpy/utils.py
@@ -0,0 +1,13 @@
+from functools import wraps
+
+
+def cached(func):
+    cache = {}
+
+    @wraps(func)
+    def wrapped(x):
+        if x not in cache:
+            cache[x] = func(x)
+        return cache[x]
+
+    return wrapped
diff --git a/doc/literature/DuttaS-ABCpy-PASC-2017.bib b/doc/literature/DuttaS-ABCpy-PASC-2017.bib
new file mode 100644
index 00000000..5434d905
--- /dev/null
+++ b/doc/literature/DuttaS-ABCpy-PASC-2017.bib
@@ -0,0 +1,18 @@
+@inproceedings{Dutta:2017:AUE:3093172.3093233,
+ author = {Dutta, Ritabrata and Schoengens, Marcel and Onnela, Jukka-Pekka and Mira, Antonietta},
+ title = {ABCpy: A User-Friendly, Extensible, and Parallel Library for Approximate Bayesian Computation},
+ booktitle = {Proceedings of the Platform for Advanced Scientific Computing Conference},
+ series = {PASC '17},
+ year = {2017},
+ isbn = {978-1-4503-5062-4},
+ location = {Lugano, Switzerland},
+ pages = {8:1--8:9},
+ articleno = {8},
+ numpages = {9},
+ url = {http://doi.acm.org/10.1145/3093172.3093233},
+ doi = {10.1145/3093172.3093233},
+ acmid = {3093233},
+ publisher = {ACM},
+ address = {New York, NY, USA},
+ keywords = {ABC, Library, Parallel, Spark},
+} 
\ No newline at end of file
diff --git a/doc/source/DEVELOP.rst b/doc/source/DEVELOP.rst
index cb83bf89..03d7ead1 100644
--- a/doc/source/DEVELOP.rst
+++ b/doc/source/DEVELOP.rst
@@ -13,7 +13,7 @@ steps required in order to deploy a new release. Assume we want to deploy the
 new version `M.m.b':
 
 1. Create a release branch `release-M.m.b`
-2. Adapt `VERSION` file in the repos root directiory: `echo M.m.b > VERSION`
+2. Adapt `VERSION` file in the repos root directory: `echo M.m.b > VERSION`
 3. Adapt `README.md` file: adapt links to correct version of `User Documentation` and `Reference`
 4. Merge all desired feature branches into the release branch
 5. Create a pull/ merge request: release branch -> master
diff --git a/doc/source/conf.py b/doc/source/conf.py
index e55f5ab3..2b7bfb8e 100644
--- a/doc/source/conf.py
+++ b/doc/source/conf.py
@@ -63,7 +63,7 @@ def __getattr__(cls, name):
 # The encoding of source files.
 #source_encoding = 'utf-8-sig'
 
-# The master toctree document.
+# The master doctree document.
 master_doc = 'index'
 
 # General information about the project.
diff --git a/doc/source/getting_started.rst b/doc/source/getting_started.rst
index ceb0f5eb..74c5c9da 100644
--- a/doc/source/getting_started.rst
+++ b/doc/source/getting_started.rst
@@ -3,14 +3,14 @@
 2. Getting Started
 ==================
 
-Here, we explain how to use ABCpy to quntify parameter uncertainty of a probabbilistic model given some observed
+Here, we explain how to use ABCpy to quantify parameter uncertainty of a probabilistic model given some observed
 dataset. If you are new to uncertainty quantification using Approximate Bayesian Computation (ABC), we recommend you to
 start with the `Parameters as Random Variables`_ section.
 
 Parameters as Random Variables
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-As an example, if we have measurements of the height of a group of grown up human and it is also known that a Gaussian
+As an example, if we have measurements of the height of a group of grown up humans and it is also known that a Gaussian
 distribution is an appropriate probabilistic model for these kind of observations, then our observed dataset would be
 measurement of heights and the probabilistic model would be Gaussian.
 
@@ -33,7 +33,7 @@ please check the implementing a new model section.
 
 To define a parameter of a model as a random variable, you start by assigning a *prior* distribution on them. We can
 utilize *prior* knowledge about these parameters as *prior* distribution. In the absence of prior knowledge, we still
-need to provide *prior* information and a non-informative flat disribution on the parameter space can be used. The prior
+need to provide *prior* information and a non-informative flat distribution on the parameter space can be used. The prior
 distribution on the random variables are assigned by a probabilistic model which can take other random variables or hyper
 parameters as input.
 
@@ -92,9 +92,9 @@ discrete. For a more involved example, please consult `Complex Perturbation Kern
     :lines: 90-91
     :dedent: 4
 
-Finally, we need to specify a backend that determins the parallelization framework to use. The example code here uses
+Finally, we need to specify a backend that determines the parallelization framework to use. The example code here uses
 the dummy backend :py:class:`BackendDummy <abcpy.backends.dummy>` which does not parallelize the computation of the
-inference schemes, but  which this is handy for prototyping and testing. For more advanced parallelization backends
+inference schemes, but which is handy for prototyping and testing. For more advanced parallelization backends
 available in ABCpy, please consult :ref:`Using Parallelization Backends <parallelization>` section.
 
 .. literalinclude:: ../../examples/extensions/models/gaussian_python/pmcabc_gaussian_model_simple.py
@@ -124,7 +124,7 @@ quantifying the uncertainty of the inferred parameter, which are stored in the j
 <postanalysis>` for further information on extracting results.
 
 Note that the model and the observations are given as a list. This is due to the fact that in ABCpy, it is possible to
-have hierarchical models, building relationships between co-occuring groups of datasets. To learn more, see the
+have hierarchical models, building relationships between co-occurring groups of datasets. To learn more, see the
 `Hierarchical Model`_ section.
 
 The full source can be found in `examples/extensions/models/gaussian_python/pmcabc_gaussian_model_simple.py`. To
@@ -138,38 +138,47 @@ execute the code you only need to run
 Probabilistic Dependency between Random Variables
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-Since release 0.5.0 of ABCpy, a probabilistic dependency structures (e.g., a Bayesian network) between random variables
-can be modelled. Behind the scene, ABCpy will represent this dependency structure as a directed acyclic graph (DAG) such
-that the inference can be done on the full graph. Further we can also define new random variables through operations
-between existing random variables. To make this concept more approachable, we now exemplify an inference problem on a
-probabilistic dependency structure.
+Since release 0.5.0 of ABCpy, a probabilistic dependency structures (e.g., a
+Bayesian network) between random variables can be modelled. Behind the scene,
+ABCpy will represent this dependency structure as a directed acyclic graph (DAG)
+such that the inference can be done on the full graph. Further we can also
+define new random variables through operations between existing random
+variables. To make this concept more approachable, we now exemplify an inference
+problem on a probabilistic dependency structure.
 
-
-Students of a school took an exam and received some grade. The observed grades of the students are:
+Students of a school took an exam and received some grade. The observed grades
+of the students are:
 
 .. literalinclude:: ../../examples/hierarchicalmodels/pmcabc_inference_on_multiple_sets_of_obs.py
     :language: python
     :lines: 6
     :dedent: 4
-which depend on several variables: if there were bias, the average size of the classes, as well as the number of teachers at the school. 
-Here we assume the average size of a class and the number of the teachers at the school are normally distributed with
-some mean, depending on the budget of the school and variance $1$. We further assume that the budget of the school is 
-uniformly distributed between 1 and 10 millions US dollars. Finally, we can assume that the grade without any bias would
-be a normally distributed parameter around an average grade. The dependency structure between these variables can be defined using the
-following Bayesian network:
+
+which depend on several variables: if there were bias, the average size of the
+classes, as well as the number of teachers at the school. Here we assume the
+average size of a class and the number of the teachers at the school are
+normally distributed with some mean, depending on the budget of the school and
+variance $1$. We further assume that the budget of the school is uniformly
+distributed between 1 and 10 millions US dollars. Finally, we can assume that
+the grade without any bias would be a normally distributed parameter around an
+average grade. The dependency structure between these variables can be defined
+using the following Bayesian network:
 
 .. image:: network.png
 
-We can define these random variables and the dependencies between them in ABCpy in the following way:
+We can define these random variables and the dependencies between them in ABCpy
+in the following way:
 
 .. literalinclude:: ../../examples/hierarchicalmodels/pmcabc_inference_on_multiple_sets_of_obs.py
     :language: python
     :lines: 9-10, 13, 16, 19
     :dedent: 4
-So, each student will receive some grade without additional effects which is normally distributed, but then the final
-grade recieved will be a function of grade without additional effects and the other random variables defined beforehand
-(e.g., `school_budget`, `class_size` and `no_teacher`). The model for the final grade of the students now can be
-written as [Figure~\ref{fig:dep_grade}]:
+
+So, each student will receive some grade without additional effects which is
+normally distributed, but then the final grade recieved will be a function of
+grade without additional effects and the other random variables defined
+beforehand (e.g., `school_budget`, `class_size` and `no_teacher`). The model for
+the final grade of the students now can be written as:
 
 .. literalinclude:: ../../examples/hierarchicalmodels/pmcabc_inference_on_multiple_sets_of_obs.py
     :language: python
@@ -177,7 +186,7 @@ written as [Figure~\ref{fig:dep_grade}]:
     :dedent: 4
 
 Notice here we created a new random variable `final_grade`, by subtracting the random variables `class_size` multiplied
-by 0.001 and adding `no_teacher` multiplied by 0.02 from the random variable `grade_without_additional_effects.
+by 0.001 and adding `no_teacher` multiplied by 0.02 from the random variable `grade_without_additional_effects`.
 In short, this illustrates that you can perform standard operations "+",
 "-", "*", "/" and "**" (the power operator in Python) on any two random variables, to get a new random variable. It is
 possible to perform these operations between two random variables additionally to the general data types of Python
@@ -194,8 +203,8 @@ Hierarchical Model
 ~~~~~~~~~~~~~~~~~~
 
 
-ABCpy also supports inference when co-occuring datasets are available. To illustrate how this is implemented, we 
-will consider the example from `Probabilistic Dependency between Random Variables`_ section and extend it for co-occuring datasets, 
+ABCpy also supports inference when co-occurring datasets are available. To illustrate how this is implemented, we
+will consider the example from `Probabilistic Dependency between Random Variables`_ section and extend it for co-occurring datasets,
 when we also have data for final scholarships given out by the school to the students in addition to the final grade of a student.
 
 .. image:: network1.png
@@ -215,7 +224,7 @@ model (of the DAG) given our observations.
 To infer uncertainty of our parameters, we follow the same steps as in our previous examples: We choose summary
 statistics, distance, inference scheme, backend and kernel. We will skip the definitions that have not changed from the
 previous section. However, we would like to point out the difference in definition of the distance. Since we are now
-considering two observed datasets, we need to define an distances on them separately. Here, we use the Euclidean
+considering two observed datasets, we need to define a distance on each one of them separately. Here, we use the Euclidean
 distance for each observed data set and corresponding simulated dataset. You can use two different distances on two
 different observed datasets.
 
@@ -264,7 +273,7 @@ It just needs to be provided with all the relevant kernels:
 
 This is all that needs to be changed. The rest of the implementation works the exact same as in the previous example. If
 you would like to implement your own perturbation kernel, please check :ref:`Implementing a new Perturbation Kernel
-<implementations>`. Please keep in mind that you can only perturb parameters. **You cannot use the access operator to
+<user_customization>`. Please keep in mind that you can only perturb parameters. **You cannot use the access operator to
 perturb one component of a multi-dimensional random variable differently than another component of the same variable.**
 
 The source code to this section can be found in `examples/extensions/perturbationkernels/pmcabc_perturbation_kernels.py`
@@ -296,10 +305,13 @@ two methods:
 * Synthetic likelihood approximation :py:class:`abcpy.approx_lhd.SynLiklihood`, and another method using
 * penalized logistic regression :py:class:`abcpy.approx_lhd.PenLogReg`.
 
-Next we explain how we can use PMC algorithm using approximation of the likelihood functions. As we are now considering
-two observed datasets corresponding to two root models, we need to define an approximation of likelihood function for
-each of them separately. Here, we use the :py:class:`abcpy.approx_lhd.SynLiklihood` for each of the root models. It is
-also possible to use two different approximate likelihoods for two different root models.
+Next we explain how we can use PMC algorithm using approximation of the
+likelihood functions. As we are now considering two observed datasets
+corresponding to two root models, we need to define an approximation of
+likelihood function for each of them separately. Here, we use the
+:py:class:`abcpy.approx_lhd.SynLiklihood` for each of the root models. It is
+also possible to use two different approximate likelihoods for two different
+root models.
 
 .. literalinclude:: ../../examples/approx_lhd/pmc_hierarchical_models.py
     :language: python
@@ -311,10 +323,10 @@ We then parametrize the sampler and sample from the posterior distribution.
 .. literalinclude:: ../../examples/approx_lhd/pmc_hierarchical_models.py
     :language: python
     :lines: 52-64
-    :dedent: 4
+    :dedent: 4 
 
 Observe that the lists given to the sampler and the sampling method now contain two entries. These correspond to the two
-different observed data sets respectively. Also notice now we provide two different distances corresponding to the two
+different observed data sets respectively. Also notice we now provide two different distances corresponding to the two
 different root models and their observed datasets. Presently ABCpy combines the distances by a linear combination.
 Further possibilities of combination will be made available in later versions of ABCpy.
 
@@ -339,7 +351,7 @@ statistics defined as follows:
     :dedent: 4
 
 Then we can learn the optimized summary statistics from the given list of summary statistics using the semi-automatic
-summary selection procedure as following:
+summary selection procedure as follows:
 
 .. literalinclude:: ../../examples/summaryselection/pmcabc_gaussian_summary_selection.py
     :language: python
@@ -381,13 +393,19 @@ Now we can choose the most suitable model for the observed dataset `y_obs`,
 or compute posterior probability of each of the models given the observed dataset.
 
 .. literalinclude:: ../../examples/modelselection/randomforest_modelselections.py
-    :language: python
-    :lines: 35-36
-    :dedent: 4
-
-
-
+   :language: python
+   :lines: 35-36
+   :dedent: 4
 
 
+Logging
+~~~~~~~
 
+Sometimes, when running inference schemes it is desired to have a more verbose
+logging output. This can be achieved by using Python's standard logger and
+setting it to info mode at the beginning of the file.
 
+.. literalinclude:: ../../examples/extensions/models/gaussian_python/pmcabc_gaussian_model_simple.py
+   :language: python
+   :lines: 1, 8
+   :dedent: 0
diff --git a/doc/source/installation.rst b/doc/source/installation.rst
index 7b65a1b5..8e42e00f 100644
--- a/doc/source/installation.rst
+++ b/doc/source/installation.rst
@@ -3,8 +3,8 @@
 1. Installation
 ===============
 
-ABCpy requires Python3 and is not compatible with Python2. The simplest way to install ABCpy is via PyPI and the is
-recommended method to use.
+ABCpy requires Python3 and is not compatible with Python2. The simplest way to install ABCpy is via PyPI and we
+recommended to use this method.
 
 Installation from PyPI
 ~~~~~~~~~~~~~~~~~~~~~~
@@ -16,8 +16,8 @@ Simplest way to install
 This clearly works also in a virtual environment.
 
 
-Installation from Souce
-~~~~~~~~~~~~~~~~~~~~~~~
+Installation from Source
+~~~~~~~~~~~~~~~~~~~~~~~~
 
 If you prefer to work on the source, clone the repository
 ::
@@ -30,7 +30,7 @@ Make sure all requirements are installed
    cd abcpy
    pip3 install -r requirements.txt
 
-To create a package and install it do
+To create a package and install it, do
 ::
 
    make package
diff --git a/doc/source/parallelization.rst b/doc/source/parallelization.rst
index ebdc9926..9df905cd 100644
--- a/doc/source/parallelization.rst
+++ b/doc/source/parallelization.rst
@@ -6,7 +6,7 @@
 Using Parallelization Backends
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-Running ABC algorithms is often computationally expensive, thus ABCpy is build
+Running ABC algorithms is often computationally expensive, thus ABCpy is built
 with parallelization in mind. In order to run your inference schemes in parallel
 on multiple nodes (computers) you can choose from the following backends.
 
@@ -52,7 +52,7 @@ be properly installed on the cluster, such that it is available to the Python
 interpreters on the master and the worker nodes.
 
 Using the MPI Backend
-~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~~~~
 
 To run ABCpy in parallel using MPI, one only needs to use the provided MPI
 backend. Using the same example as above, the statements for the backend have to
@@ -66,7 +66,7 @@ be changed to
 In words, one only needs to initialize an instance of the MPI backend. The
 number of ranks to spawn are specified at runtime through the way the script is
 run. A minimum of two ranks is required, since rank 0 (master) is used to
-orchestrade the calculation and all other ranks (workers) actually perform the
+orchestrate the calculation and all other ranks (workers) actually perform the
 calculation.
 
 The standard way to run the script using Open MPI is directly via mpirun like below
diff --git a/doc/source/user_customization.rst b/doc/source/user_customization.rst
index f5389c52..c785fd94 100644
--- a/doc/source/user_customization.rst
+++ b/doc/source/user_customization.rst
@@ -259,7 +259,7 @@ ABCpy Python code. Then, in curly brackets, we specify which libraries we want
 to include and which function we want to expose through the wrapper.
 
 Now comes the tricky part. The model class expects a method `forward_simulate` that
-forward-simulates our model and which returns an array of syntetic
+forward-simulates our model and which returns an array of synthetic
 observations. However, C++/C does not know the concept of returning an array,
 instead in C++/C we would provide a memory position (pointer) where to write
 the results. Swig has to translate between the two concepts. We use actually an
@@ -327,9 +327,10 @@ within ABCpy we include the following code at the beginning of our Python file:
     :lines: 6 - 14
     :linenos:
 
-This imports the R function :code:`simple_gaussian` into the Python environment. We need to build our own model to
-incorporate this R function as in the previous section. The only difference is in the :code:`forward_simulate` method of
-the class :code:`Gaussian'.
+This imports the R function :code:`simple_gaussian` into the Python environment.
+We need to build our own model to incorporate this R function as in the previous
+section. The only difference is in the :code:`forward_simulate` method of the
+class :code:`Gaussian'.
 
 .. literalinclude:: ../../examples/extensions/models/gaussian_R/gaussian_model.py
     :language: python
@@ -418,7 +419,7 @@ distribution (which is already implemented within ABCpy). First, we need to defi
 .. automethod:: abcpy.perturbationkernel.PerturbationKernel.__init__
     :noindex:
 
-Thus, ABCpy expects that the arguments passed to the initializer is of type :py:class:`ProbibilisticModel
+Thus, ABCpy expects that the arguments passed to the initializer is of type :py:class:`ProbabilisticModel
 <abcpy.probabilisticmodels.ProbabilisticModel>`, which can be seen as the random variables that should be perturbed by
 this kernel. All these models should be saved on the kernel for future reference.
 
@@ -488,7 +489,7 @@ converted to a numpy array. Then, the covariance matrix is retrieved from the ac
 similar function call. Finally, the parameters are perturbed and returned.
 
 Last but not least, each kernel requires a probability density or probability mass function depending on whether it is a
-Continous Kernel or a Discrete Kernel:
+Continuous Kernel or a Discrete Kernel:
 
 .. automethod:: abcpy.perturbationkernel.PerturbationKernel.pdf
     :noindex:
diff --git a/examples/extensions/models/gaussian_cpp/Makefile b/examples/extensions/models/gaussian_cpp/Makefile
index cfa190a9..cccf21ff 100644
--- a/examples/extensions/models/gaussian_cpp/Makefile
+++ b/examples/extensions/models/gaussian_cpp/Makefile
@@ -4,7 +4,10 @@ WGET=wget -q
 
 CC=g++
 CPPFLAGS=-fPIC
-INCLUDEPATH=/usr/include/python3.4m
+INCLUDEPATH=$(shell python3-config --includes)
+INCLUDEPATHNUMPY=$(shell python3 -c 'import numpy as np; print(np.get_include())')
+PYTHONLINKERSETTINGS=$(shell python3-config --ldflags | cut -d" " -f 1)
+PYTHONLIBS=$(shell python3-config --libs)
 
 cpp_simple: _gaussian_model_simple.so gaussian_model_simple.py
 
@@ -18,10 +21,10 @@ clean:
 	$(SWIG) $(SWIGFLAGS) -o $@ $<
 
 %.o: %.cpp
-	$(CC) $(CPPFLAGS) -I $(INCLUDEPATH) -c $< -o $@
+	$(CC) $(CPPFLAGS) -I $(INCLUDEPATHNUMPY) $(INCLUDEPATH) -c $< -o $@
 
 _%.so: %.o %_wrap.o
-	$(CC) -shared $^ -o $@
+	$(CC) -shared $^ $(PYTHONLINKERSETTINGS) $(PYTHONLIBS) -o $@
 
 %.i:
 	$(WGET) "https://raw.githubusercontent.com/numpy/numpy/master/tools/swig/numpy.i"
diff --git a/examples/extensions/models/gaussian_f90/Makefile b/examples/extensions/models/gaussian_f90/Makefile
index 763e8097..bbe6c450 100644
--- a/examples/extensions/models/gaussian_f90/Makefile
+++ b/examples/extensions/models/gaussian_f90/Makefile
@@ -1,8 +1,9 @@
-F2PY=f2py3
+F2PY=python -m numpy.f2py
 EXT_SUFFIX := $(shell python3-config --extension-suffix)
 
 default: gaussian_model_simple$(EXT_SUFFIX)
 
 %$(EXT_SUFFIX): %.f90
+	echo $(F2PY)
 	$(F2PY) -c -m $* $<
 
diff --git a/examples/extensions/models/gaussian_python/pmcabc_gaussian_model_simple.py b/examples/extensions/models/gaussian_python/pmcabc_gaussian_model_simple.py
index 5aedf9b2..b186c9a9 100644
--- a/examples/extensions/models/gaussian_python/pmcabc_gaussian_model_simple.py
+++ b/examples/extensions/models/gaussian_python/pmcabc_gaussian_model_simple.py
@@ -1,9 +1,12 @@
+import logging
 import numpy as np
 
 from numbers import Number
 
 from abcpy.probabilisticmodels import ProbabilisticModel, Continuous, InputConnector
 
+logging.basicConfig(level=logging.INFO)
+
 class Gaussian(ProbabilisticModel, Continuous):
     """
     This class is an re-implementation of the `abcpy.continousmodels.Normal` for documentation purposes.
diff --git a/requirements.txt b/requirements.txt
index 1b29ef64..e87f6dc1 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,7 @@
-numpy==1.14
+numpy
 scipy
 sklearn
 glmnet==2.0.0
-sphinx==1.4.8
+sphinx
 sphinx_rtd_theme 
 coverage
diff --git a/tests/backend_tests_mpi.py b/tests/backend_tests_mpi.py
index 18377f38..2c11f116 100644
--- a/tests/backend_tests_mpi.py
+++ b/tests/backend_tests_mpi.py
@@ -1,6 +1,8 @@
 import unittest
+
 from mpi4py import MPI
-from abcpy.backends import BackendMPI,BackendMPITestHelper
+
+from abcpy.backends import BackendMPI, BackendMPITestHelper
 
 
 def setUpModule():
@@ -136,3 +138,15 @@ def square(self,x):
         pds_map4 = backend_mpi.map(obj.square ,pds)
         pds_res4 = backend_mpi.collect(pds_map4)
         self.assertTrue(pds_res4==expected_result,"Failed pickle test for non-static function")
+
+    def test_exception_handling(self):
+
+        def function_with_possible_zero_devision(i):
+            return 1 / i
+
+        data = [1, 2, 0]
+        pds = backend_mpi.parallelize(data)
+
+        pds_map = backend_mpi.map(function_with_possible_zero_devision, pds)
+        with self.assertRaises(ZeroDivisionError):
+            backend_mpi.collect(pds_map)