diff --git a/.travis.yml b/.travis.yml
index ef6e87f8..de8cf030 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,13 +1,15 @@
-dist: trusty
+dist: xenial
 language: python
 python:
-- '3.4'
+- '3.7'
 addons:
   apt:
+    sources:
+    - deadsnakes
     packages:
     - gfortran
     - libboost-random-dev
-    - libpython3.4-dev
+    - python3.7-dev
     - python3-numpy
     - swig
     - libmpich-dev
@@ -16,6 +18,7 @@ install:
 - pip install -r requirements.txt
 - pip install -r requirements/backend-mpi.txt
 - pip install -r requirements/backend-spark.txt
+- pip install -r requirements/optional-requirements.txt
 script:
 - make test
 before_deploy:
diff --git a/README.md b/README.md
index 73d0a215..bd184ca1 100644
--- a/README.md
+++ b/README.md
@@ -13,7 +13,8 @@ algorithms and other likelihood-free inference schemes. It presently includes:
 * ABCsubsim (ABC using subset simulation)
 * PMC (Population Monte Carlo) using approximations of likelihood functions
 * Random Forest Model Selection Scheme
-* Semi-automatic summary selection
+* Semi-automatic summary selection (with Neural networks)
+* summary selection using distance learning (with Neural networks)
 
 ABCpy addresses the needs of domain scientists and data
 scientists by providing
@@ -26,10 +27,9 @@ scientists by providing
 # Documentation
 For more information, check out the
 
-* [Documentation](http://abcpy.readthedocs.io/en/v0.5.6) 
-* [Examples](https://github.com/eth-cscs/abcpy/tree/v0.5.6/examples) directory and
-* [Reference](http://abcpy.readthedocs.io/en/v0.5.6/abcpy.html)
-
+* [Documentation](http://abcpy.readthedocs.io/en/v0.5.7) 
+* [Examples](https://github.com/eth-cscs/abcpy/tree/v0.5.7/examples) directory and
+* [Reference](http://abcpy.readthedocs.io/en/v0.5.7/abcpy.html)
 
 Further, we provide a
 [collection of models](https://github.com/eth-cscs/abcpy-models) for which ABCpy
@@ -64,6 +64,10 @@ BibTex reference.
 
 Publications in which ABCpy was applied:
 
+* L. Pacchiardi, P. Künzli, M. Schöngens, B. Chopard, R. Dutta, "Distance-Learning for Approximate Bayesian
+  Computation to Model a Volcanic Eruption", 2020, Sankhya B, ISSN 0976-8394, 
+  [DOI: 10.1007/s13571-019-00208-8](https://doi.org/10.1007/s13571-019-00208-8).
+
 * R. Dutta, J. P.  Onnela, A. Mira, "Bayesian Inference of Spreading Processes
   on Networks", 2018, Proc. R. Soc. A, 474(2215), 20180129.
 
diff --git a/VERSION b/VERSION
index b49b2533..d3532a10 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-0.5.6
+0.5.7
diff --git a/examples/summaryselection/__init__.py b/abcpy/NN_utilities/__init__.py
similarity index 100%
rename from examples/summaryselection/__init__.py
rename to abcpy/NN_utilities/__init__.py
diff --git a/abcpy/NN_utilities/algorithms.py b/abcpy/NN_utilities/algorithms.py
new file mode 100644
index 00000000..0df4305c
--- /dev/null
+++ b/abcpy/NN_utilities/algorithms.py
@@ -0,0 +1,174 @@
+try:
+    import torch
+    import torch.nn as nn
+    import torch.optim as optim
+    from torch.optim import lr_scheduler
+    from torch.utils.data import Dataset
+    from abcpy.NN_utilities.datasets import Similarities, SiameseSimilarities, TripletSimilarities, \
+        ParameterSimulationPairs
+    from abcpy.NN_utilities.losses import ContrastiveLoss, TripletLoss
+    from abcpy.NN_utilities.networks import SiameseNet, TripletNet
+    from abcpy.NN_utilities.trainer import fit
+except ImportError:
+    has_torch = False
+else:
+    has_torch = True
+
+
+def contrastive_training(samples, similarity_set, embedding_net, cuda, batch_size=16, n_epochs=200,
+                         positive_weight=None, load_all_data_GPU=False, margin=1., lr=None, optimizer=None,
+                         scheduler=None, start_epoch=0, verbose=False, optimizer_kwargs={}, scheduler_kwargs={},
+                         loader_kwargs={}):
+    """ Implements the algorithm for the contrastive distance learning training of a neural network; need to be
+     provided with a set of samples and the corresponding similarity matrix"""
+
+    # If the dataset is small enough, we can speed up training by loading all on the GPU at beginning, by using
+    # load_all_data_GPU=True. It may crash if the dataset is too large. Note that in some cases using only CPU may still
+    # be quicker.
+
+    # Do all the setups
+
+    # need to use the Similarities and SiameseSimilarities datasets
+
+    similarities_dataset = Similarities(samples, similarity_set, "cuda" if cuda and load_all_data_GPU else "cpu")
+    pairs_dataset = SiameseSimilarities(similarities_dataset, positive_weight=positive_weight)
+
+    if cuda:
+        if load_all_data_GPU:
+            loader_kwargs_2 = {'num_workers': 0, 'pin_memory': False}
+        else:
+            loader_kwargs_2 = {'num_workers': 1, 'pin_memory': True}
+    else:
+        loader_kwargs_2 = {}
+
+    loader_kwargs.update(loader_kwargs_2)
+
+    pairs_train_loader = torch.utils.data.DataLoader(pairs_dataset, batch_size=batch_size, shuffle=True,
+                                                     **loader_kwargs)
+
+    model_contrastive = SiameseNet(embedding_net)
+
+    if cuda:
+        model_contrastive.cuda()
+    loss_fn = ContrastiveLoss(margin)
+
+    if lr is None:
+        lr = 1e-3
+
+    if optimizer is None:  # default value
+        optimizer = optim.Adam(embedding_net.parameters(), lr=lr, **optimizer_kwargs)
+    else:
+        optimizer = optimizer(embedding_net.parameters(), lr=lr, **optimizer_kwargs)
+
+    if scheduler is None:  # default value, i.e. a dummy scheduler
+        scheduler = lr_scheduler.StepLR(optimizer, 8, gamma=1, last_epoch=-1)
+    else:
+        scheduler = scheduler(optimizer, **scheduler_kwargs)
+
+    # now train:
+    fit(pairs_train_loader, model_contrastive, loss_fn, optimizer, scheduler, n_epochs, cuda, start_epoch=start_epoch)
+
+    return embedding_net
+
+
+def triplet_training(samples, similarity_set, embedding_net, cuda, batch_size=16, n_epochs=400,
+                     load_all_data_GPU=False, margin=1., lr=None, optimizer=None, scheduler=None, start_epoch=0,
+                     verbose=False, optimizer_kwargs={}, scheduler_kwargs={}, loader_kwargs={}):
+    """ Implements the algorithm for the triplet distance learning training of a neural network; need to be
+     provided with a set of samples and the corresponding similarity matrix"""
+
+    # If the dataset is small enough, we can speed up training by loading all on the GPU at beginning, by using
+    # load_all_data_GPU=True. It may crash if the dataset is too large. Note that in some cases using only CPU may still
+    # be quicker.
+    # Do all the setups
+
+    # need to use the Similarities and TripletSimilarities datasets
+
+    similarities_dataset = Similarities(samples, similarity_set, "cuda" if cuda and load_all_data_GPU else "cpu")
+    triplets_dataset = TripletSimilarities(similarities_dataset)
+
+    if cuda:
+        if load_all_data_GPU:
+            loader_kwargs_2 = {'num_workers': 0, 'pin_memory': False}
+        else:
+            loader_kwargs_2 = {'num_workers': 1, 'pin_memory': True}
+    else:
+        loader_kwargs_2 = {}
+
+    loader_kwargs.update(loader_kwargs_2)
+
+    triplets_train_loader = torch.utils.data.DataLoader(triplets_dataset, batch_size=batch_size, shuffle=True,
+                                                        **loader_kwargs)
+
+    model_triplet = TripletNet(embedding_net)
+
+    if cuda:
+        model_triplet.cuda()
+    loss_fn = TripletLoss(margin)
+
+    if lr is None:
+        lr = 1e-3
+
+    if optimizer is None:  # default value
+        optimizer = optim.Adam(embedding_net.parameters(), lr=lr, **optimizer_kwargs)
+    else:
+        optimizer = optimizer(embedding_net.parameters(), lr=lr, **optimizer_kwargs)
+
+    if scheduler is None:  # default value, i.e. a dummy scheduler
+        scheduler = lr_scheduler.StepLR(optimizer, 8, gamma=1, last_epoch=-1)
+    else:
+        scheduler = scheduler(optimizer, **scheduler_kwargs)
+
+    # now train:
+    fit(triplets_train_loader, model_triplet, loss_fn, optimizer, scheduler, n_epochs, cuda, start_epoch=start_epoch)
+
+    return embedding_net
+
+
+def FP_nn_training(samples, target, embedding_net, cuda, batch_size=1, n_epochs=50, load_all_data_GPU=False,
+                   lr=1e-3, optimizer=None, scheduler=None, start_epoch=0, verbose=False, optimizer_kwargs={},
+                   scheduler_kwargs={}, loader_kwargs={}):
+    """ Implements the algorithm for the training of a neural network based on regressing the values of the parameters
+    on the corresponding simulation outcomes; it is effectively a training with a mean squared error loss. Needs to be
+    provided with a set of samples and the corresponding parameters that generated the samples. Note that in this case
+    the network has to have same output size as the number of parameters, as the learned summary statistic will have the
+    same dimension as the parameter."""
+
+    # If the dataset is small enough, we can speed up training by loading all on the GPU at beginning, by using
+    # load_all_data_GPU=True. It may crash if the dataset is too large. Note that in some cases using only CPU may still
+    # be quicker.
+
+    # Do all the setups
+
+    dataset_FP_nn = ParameterSimulationPairs(samples, target, "cuda" if cuda and load_all_data_GPU else "cpu")
+
+    if cuda:
+        if load_all_data_GPU:
+            loader_kwargs_2 = {'num_workers': 0, 'pin_memory': False}
+        else:
+            loader_kwargs_2 = {'num_workers': 1, 'pin_memory': True}
+    else:
+        loader_kwargs_2 = {}
+
+    loader_kwargs.update(loader_kwargs_2)
+
+    data_loader_FP_nn = torch.utils.data.DataLoader(dataset_FP_nn, batch_size=batch_size, shuffle=True, **loader_kwargs)
+
+    if cuda:
+        embedding_net.cuda()
+    loss_fn = nn.MSELoss(reduction="mean")
+
+    if optimizer is None:  # default value
+        optimizer = optim.Adam(embedding_net.parameters(), lr=lr, **optimizer_kwargs)
+    else:
+        optimizer = optimizer(embedding_net.parameters(), lr=lr, **optimizer_kwargs)
+
+    if scheduler is None:  # default value, i.e. a dummy scheduler
+        scheduler = lr_scheduler.StepLR(optimizer, 8, gamma=1, last_epoch=-1)
+    else:
+        scheduler = scheduler(optimizer, **scheduler_kwargs)
+
+    # now train:
+    fit(data_loader_FP_nn, embedding_net, loss_fn, optimizer, scheduler, n_epochs, cuda, start_epoch=start_epoch)
+
+    return embedding_net
diff --git a/abcpy/NN_utilities/datasets.py b/abcpy/NN_utilities/datasets.py
new file mode 100644
index 00000000..d0a9bfd3
--- /dev/null
+++ b/abcpy/NN_utilities/datasets.py
@@ -0,0 +1,164 @@
+import warnings
+
+import numpy as np
+import torch
+from torch.utils.data import Dataset
+
+
+# DATASETS DEFINITION FOR DISTANCE LEARNING:
+
+class Similarities(Dataset):
+    """A dataset class that considers a set of samples and pairwise similarities defined between them.
+    Note that, for our application of computing distances, we are not interested in train/test split. """
+
+    def __init__(self, samples, similarity_matrix, device):
+        """
+        Parameters:
+
+        samples: n_samples x n_features
+        similarity_matrix: n_samples x n_samples
+        """
+        if isinstance(samples, np.ndarray):
+            self.samples = torch.from_numpy(samples.astype("float32")).to(device)
+        else:
+            self.samples = samples.to(device)
+        if isinstance(similarity_matrix, np.ndarray):
+            self.similarity_matrix = torch.from_numpy(similarity_matrix.astype("int")).to(device)
+        else:
+            self.similarity_matrix = similarity_matrix.to(device)
+
+    def __getitem__(self, index):
+        """Return the required sample along with the similarities of the sample with all the others."""
+        return self.samples[index], self.similarity_matrix[index]
+
+    def __len__(self):
+        return self.samples.shape[0]
+
+
+class SiameseSimilarities(Dataset):
+    """
+    This class defines a dataset returning pairs of similar and dissimilar examples. It has to be instantiated with a
+    dataset of the class Similarities
+    """
+
+    def __init__(self, similarities_dataset, positive_weight=None):
+
+        """If positive_weight=None, then for each sample we pick another random element to form a pair.
+        If positive_weight is a number (in [0,1]), we will pick positive samples with that probability
+        (if there are some)."""
+        self.dataset = similarities_dataset
+        self.positive_weight = positive_weight
+        self.samples = similarities_dataset.samples
+        self.similarity_matrix = similarities_dataset.similarity_matrix
+
+    def __getitem__(self, index):
+        """If self.positive_weight is None, or if the sample denoted by index has no similar elements, choose another
+        random sample to build the pair. If instead self.positive_weight is a number, choose a similar element with
+        that probability.
+        """
+        if self.positive_weight is None or (torch.sum(self.similarity_matrix[index]) < 2):
+            # sample a new index different from the present one
+            siamese_index = index
+            while siamese_index == index:
+                siamese_index = np.random.choice(range(self.samples.shape[0]))
+
+            target = self.similarity_matrix[index, siamese_index]
+
+        else:
+            # pick positive target with probability self.positive_weight
+            target = int(np.random.uniform() < self.positive_weight)
+            if target:
+                # sample a new index different from the present one
+                siamese_index = index
+                while siamese_index == index:
+                    siamese_index = np.random.choice(np.where(self.similarity_matrix[index].cpu())[0])
+            else:
+                # sample a new index different from the present one. This would not be necessary in theory,
+                # as a sample is always similar to itself.
+                # Leave this check anyway, to avoid problems in case the dataset is not perfectly defined.
+                siamese_index = index
+                while siamese_index == index:
+                    siamese_index = np.random.choice(np.where(self.similarity_matrix[index].cpu() == False)[0])
+
+        return (self.samples[index], self.samples[siamese_index]), target
+
+    def __len__(self):
+        return self.samples.shape[0]
+
+
+class TripletSimilarities(Dataset):
+    """
+    This class defines a dataset returning triplets of anchor, positive and negative examples. 
+    It has to be instantiated with a dataset of the class Similarities.
+    """
+
+    def __init__(self, similarities_dataset, ):
+        self.dataset = similarities_dataset
+        self.samples = similarities_dataset.samples
+        self.similarity_matrix = similarities_dataset.similarity_matrix
+
+    def __getitem__(self, index):
+        # sample a new index different from the present one
+        if torch.sum(self.similarity_matrix[index]) < 2:
+            # then we pick a new sample that has at least one similar example
+            warnings.warn("Sample {} in the dataset has no similar samples. \nIncrease the quantile defining the"
+                          " similarity matrix to avoid such problems.\nExecution will continue taking another sample "
+                          "instead of that as anchor.".format(index), RuntimeWarning)
+            new_anchor = index
+            while new_anchor == index:
+                new_anchor = np.random.randint(0, self.dataset.__len__())
+                # if this other sample does not have a similar one as well -> sample another one.
+                if torch.sum(self.similarity_matrix[new_anchor]) < 2:
+                    new_anchor = index
+            index = new_anchor
+
+        positive_index = index
+        while positive_index == index:
+            # this loops indefinitely if some sample has no other similar samples!
+            positive_index = np.random.choice(np.where(self.similarity_matrix[index].cpu())[0])
+
+            # sample a new index different from the present one. This would not be necessary in theory,
+            # as a sample is always similar to itself.
+        # Leave this check anyway, to avoid problems in case the dataset is not perfectly defined.
+        negative_index = index
+        while negative_index == index:
+            negative_index = np.random.choice(np.where(self.similarity_matrix[index].cpu() == False)[0])
+
+        return (self.samples[index], self.samples[positive_index], self.samples[negative_index]), []
+
+    def __len__(self):
+        return self.samples.shape[0]
+
+
+# DATASET DEFINITION FOR SUFFICIENT STATS LEARNING:
+
+class ParameterSimulationPairs(Dataset):
+    """A dataset class that consists of pairs of parameters-simulation pairs, in which the data contains the
+    simulations, with shape (n_samples, n_features), and targets contains the ground truth of the parameters,
+    with shape (n_samples, 2). Note that n_features could also have more than one dimension here. """
+
+    def __init__(self, simulations, parameters, device):
+        """
+        Parameters:
+
+        simulations: (n_samples,  n_features)
+        parameters: (n_samples, 2)
+        """
+        if simulations.shape[0] != parameters.shape[0]:
+            raise RuntimeError("The number of simulations must be the same as the number of parameters.")
+
+        if isinstance(simulations, np.ndarray):
+            self.simulations = torch.from_numpy(simulations.astype("float32")).to(device)
+        else:
+            self.simulations = simulations.to(device)
+        if isinstance(parameters, np.ndarray):
+            self.parameters = torch.from_numpy(parameters.astype("float32")).to(device)
+        else:
+            self.parameters = parameters.to(device)
+
+    def __getitem__(self, index):
+        """Return the required sample along with the ground truth parameter."""
+        return self.simulations[index], self.parameters[index]
+
+    def __len__(self):
+        return self.parameters.shape[0]
diff --git a/abcpy/NN_utilities/losses.py b/abcpy/NN_utilities/losses.py
new file mode 100644
index 00000000..4dc28cb2
--- /dev/null
+++ b/abcpy/NN_utilities/losses.py
@@ -0,0 +1,39 @@
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class ContrastiveLoss(nn.Module):
+    """
+    Contrastive loss
+    Takes embeddings of two samples and a target label == 1 if samples are from the same class and label == 0 otherwise.
+
+    Code from https://github.com/adambielski/siamese-triplet"""
+
+    def __init__(self, margin):
+        super(ContrastiveLoss, self).__init__()
+        self.margin = margin
+        self.eps = 1e-9
+
+    def forward(self, output1, output2, target, size_average=True):
+        distances = (output2 - output1).pow(2).sum(1)  # squared distances
+        losses = 0.5 * (target.float() * distances +
+                        (1 + -1 * target).float() * F.relu(self.margin - (distances + self.eps).sqrt()).pow(2))
+        return losses.mean() if size_average else losses.sum()
+
+
+class TripletLoss(nn.Module):
+    """
+    Triplet loss
+    Takes embeddings of an anchor sample, a positive sample and a negative sample.
+
+    Code from https://github.com/adambielski/siamese-triplet"""
+
+    def __init__(self, margin):
+        super(TripletLoss, self).__init__()
+        self.margin = margin
+
+    def forward(self, anchor, positive, negative, size_average=True):
+        distance_positive = (anchor - positive).pow(2).sum(1)  # .pow(.5)
+        distance_negative = (anchor - negative).pow(2).sum(1)  # .pow(.5)
+        losses = F.relu(distance_positive - distance_negative + self.margin)
+        return losses.mean() if size_average else losses.sum()
diff --git a/abcpy/NN_utilities/networks.py b/abcpy/NN_utilities/networks.py
new file mode 100644
index 00000000..72e80168
--- /dev/null
+++ b/abcpy/NN_utilities/networks.py
@@ -0,0 +1,91 @@
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class SiameseNet(nn.Module):
+    """ This is used in the contrastive distance learning. It is a network wrapping a standard neural network and
+    feeding two samples through it at once.
+
+    From https://github.com/adambielski/siamese-triplet"""
+
+    def __init__(self, embedding_net):
+        super(SiameseNet, self).__init__()
+        self.embedding_net = embedding_net
+
+    def forward(self, x1, x2):
+        output1 = self.embedding_net(x1)
+        output2 = self.embedding_net(x2)
+        return output1, output2
+
+    def get_embedding(self, x):
+        return self.embedding_net(x)
+
+
+class TripletNet(nn.Module):
+    """ This is used in the triplet distance learning. It is a network wrapping a standard neural network and
+    feeding three samples through it at once.
+
+    From https://github.com/adambielski/siamese-triplet"""
+
+    def __init__(self, embedding_net):
+        super(TripletNet, self).__init__()
+        self.embedding_net = embedding_net
+
+    def forward(self, x1, x2, x3):
+        output1 = self.embedding_net(x1)
+        output2 = self.embedding_net(x2)
+        output3 = self.embedding_net(x3)
+        return output1, output2, output3
+
+    def get_embedding(self, x):
+        return self.embedding_net(x)
+
+
+def createDefaultNN(input_size, output_size, hidden_sizes=None):
+    """Function returning a fully connected neural network class with a given input and output size, and optionally
+    given hidden layer sizes (if these are not given, they are determined from the input and output size with some
+    expression.
+
+    In order to instantiate the network, you need to write: createDefaultNN(input_size, output_size)() as the function
+    returns a class, and () is needed to instantiate an object."""
+    class DefaultNN(nn.Module):
+        """Neural network class with sizes determined by the upper level variables."""
+        def __init__(self):
+            super(DefaultNN, self).__init__()
+            # put some fully connected layers:
+
+            if hidden_sizes is not None and len(hidden_sizes) == 0:
+                # it is effectively a linear network
+                self.fc_in = nn.Linear(input_size, output_size)
+
+            else:
+                if hidden_sizes is None:
+                    # then set some default values for the hidden layers sizes; is this parametrization reasonable?
+                    hidden_sizes_list = [int(input_size * 1.5), int(input_size * 0.75 + output_size * 3),
+                                         int(output_size * 5)]
+
+                else:
+                    hidden_sizes_list = hidden_sizes
+
+                self.fc_in = nn.Linear(input_size, hidden_sizes_list[0])
+
+                # define now the hidden layers
+                self.fc_hidden = []
+                for i in range(len(hidden_sizes_list) - 1):
+                    self.fc_hidden.append(nn.Linear(hidden_sizes_list[i], hidden_sizes_list[i + 1]))
+                self.fc_out = nn.Linear(hidden_sizes_list[-1], output_size)
+
+        def forward(self, x):
+            if not hasattr(self,
+                           "fc_hidden"):  # it means that hidden sizes was provided and the length of the list was 0
+                return self.fc_in(x)
+
+            x = F.relu(self.fc_in(x))
+            for i in range(len(self.fc_hidden)):
+                x = F.relu(self.fc_hidden[i](x))
+
+            x = self.fc_out(x)
+
+            return x
+
+    return DefaultNN
diff --git a/abcpy/NN_utilities/trainer.py b/abcpy/NN_utilities/trainer.py
new file mode 100644
index 00000000..dbcf9533
--- /dev/null
+++ b/abcpy/NN_utilities/trainer.py
@@ -0,0 +1,72 @@
+from tqdm import tqdm
+import logging
+
+
+def fit(train_loader, model, loss_fn, optimizer, scheduler, n_epochs, cuda, start_epoch=0):
+    """
+    Basic function to train a neural network given a train_loader, a loss function and an optimizer.
+
+    Loaders, model, loss function and metrics should work together for a given task,
+    i.e. The model should be able to process data output of loaders,
+    loss function should process target output of loaders and outputs from the model
+
+    Examples: Classification: batch loader, classification model, NLL loss, accuracy metric
+    Siamese network: Siamese loader, siamese model, contrastive loss
+
+    Adapted from https://github.com/adambielski/siamese-triplet
+    """
+
+    logger = logging.getLogger("NN Trainer")
+
+    for epoch in range(0, start_epoch):
+        scheduler.step()
+
+    for epoch in tqdm(range(start_epoch, n_epochs)):
+        scheduler.step()
+
+        # Train stage
+        train_loss = train_epoch(train_loader, model, loss_fn, optimizer, cuda)
+
+        logger.debug('Epoch: {}/{}. Train set: Average loss: {:.4f}'.format(epoch + 1, n_epochs, train_loss))
+
+
+def train_epoch(train_loader, model, loss_fn, optimizer, cuda):
+    """Function implementing the training in one epoch.
+
+    Adapted from https://github.com/adambielski/siamese-triplet
+    """
+    model.train()
+    losses = []
+    total_loss = 0
+
+    for batch_idx, (data, target) in enumerate(train_loader):
+        target = target if len(target) > 0 else None
+        if not type(data) in (tuple, list):
+            data = (data,)
+        if cuda:
+            data = tuple(d.cuda() for d in data)
+            if target is not None:
+                target = target.cuda()
+
+        optimizer.zero_grad()
+        outputs = model(*data)
+
+        if type(outputs) not in (tuple, list):
+            outputs = (outputs,)
+
+        loss_inputs = outputs
+        if target is not None:
+            target = (target,)
+            loss_inputs += target
+
+        loss_outputs = loss_fn(*loss_inputs)
+        loss = loss_outputs[0] if type(loss_outputs) in (tuple, list) else loss_outputs
+        losses.append(loss.item())
+        total_loss += loss.item()
+        loss.backward()
+        optimizer.step()
+
+        losses = []
+
+    total_loss /= (batch_idx + 1)
+    return total_loss
diff --git a/abcpy/NN_utilities/utilities.py b/abcpy/NN_utilities/utilities.py
new file mode 100644
index 00000000..92fb6171
--- /dev/null
+++ b/abcpy/NN_utilities/utilities.py
@@ -0,0 +1,55 @@
+try:
+    import torch
+except ImportError:
+    has_torch = False
+else:
+    has_torch = True
+
+import numpy as np
+import logging
+
+
+def dist2(x, y):
+    """Compute the square of the Euclidean distance between 2 arrays of same length"""
+    return np.dot(x - y, x - y)
+
+
+def compute_similarity_matrix(target, quantile=0.1, return_pairwise_distances=False):
+    """Compute the similarity matrix between some values given a given quantile of the Euclidean distances.
+
+    If return_pairwise_distances is True, it also returns a matrix with the pairwise distances with every distance."""
+
+    logger = logging.getLogger("Compute_similarity_matrix")
+
+    n_samples = target.shape[0]
+
+    pairwise_distances = np.zeros([n_samples] * 2)
+
+    for i in range(n_samples):
+        for j in range(n_samples):
+            pairwise_distances[i, j] = dist2(target[i], target[j])
+
+    q = np.quantile(pairwise_distances[~np.eye(n_samples, dtype=bool)].reshape(-1), quantile)
+
+    similarity_set = pairwise_distances < q
+
+    logger.info("Fraction of similar pairs (epurated by self-similarity): {}".format(
+        (np.sum(similarity_set) - n_samples) / n_samples ** 2))
+
+    if (np.sum(similarity_set) - n_samples) / n_samples ** 2 == 0:
+        raise RuntimeError("The chosen quantile is too small, as there are no similar samples according to the "
+                           "corresponding threshold.\nPlease increase the quantile.")
+
+    return (similarity_set, pairwise_distances) if return_pairwise_distances else similarity_set
+
+
+def save_net(path, net):
+    """Function to save the Pytorch state_dict of a network to a file."""
+    torch.save(net.state_dict(), path)
+
+
+def load_net(path, network_class):
+    """Function to load a network from a Pytorch state_dict, given the corresponding network_class."""
+    net = network_class()
+    net.load_state_dict(torch.load(path))
+    return net.eval()  # call the network to eval model. Needed with batch normalization and dropout layers.
diff --git a/abcpy/approx_lhd.py b/abcpy/approx_lhd.py
index a14a54df..2b6b800b 100644
--- a/abcpy/approx_lhd.py
+++ b/abcpy/approx_lhd.py
@@ -48,7 +48,7 @@ def likelihood(y_obs, y_sim):
         raise NotImplemented
 
 
-class SynLiklihood(Approx_likelihood):
+class SynLikelihood(Approx_likelihood):
     """This class implements the approximate likelihood function which computes the approximate
     likelihood using the synthetic likelihood approach described in Wood [1].
     For synthetic likelihood approximation, we compute the robust precision matrix using Ledoit and Wolf's [2]
@@ -67,7 +67,7 @@ def __init__(self, statistics_calc):
 
 
     def likelihood(self, y_obs, y_sim):
-        # print("DEBUG: SynLiklihood.likelihood().")
+        # print("DEBUG: SynLikelihood.likelihood().")
         if not isinstance(y_obs, list):
             # print("type(y_obs) : ", type(y_obs), " , type(y_sim) : ", type(y_sim))
             # print("y_obs : ", y_obs)
diff --git a/abcpy/inferences.py b/abcpy/inferences.py
index 2cefa7cd..535e84ce 100644
--- a/abcpy/inferences.py
+++ b/abcpy/inferences.py
@@ -1,7 +1,7 @@
 import copy
 import logging
 import numpy as np
-
+import time
 import sys
 
 from abc import ABCMeta, abstractmethod, abstractproperty
@@ -1161,12 +1161,15 @@ def sample(self, observations, steps, epsilon, n_samples = 10000, n_samples_per_
             # 1: Calculate  parameters
             self.logger.info("Initial accepted parameters")
             params_and_dists_pds = self.backend.map(self._accept_parameter, data_pds)
+            self.logger.debug("Map step of parallelism is finished")
             params_and_dists = self.backend.collect(params_and_dists_pds)
+            self.logger.debug("Collect step of parallelism is finished")
             new_parameters, new_distances, new_all_parameters, new_all_distances, index, acceptance, counter = [list(t) for t in
                                                                                                        zip(
                                                                                                            *params_and_dists)]
 
             # Keeping counter of number of simulations
+            self.logger.debug("Counting number of simulations")
             for count in counter:
                 self.simulation_counter+=count
 
@@ -1183,6 +1186,7 @@ def sample(self, observations, steps, epsilon, n_samples = 10000, n_samples_per_
                 all_distances = new_all_distances
 
             # Initialize/Update the accepted parameters and their corresponding distances
+            self.logger.info("Initialize/Update the accepted parameters and their corresponding distances")
             if accepted_parameters is None:
                 accepted_parameters = new_parameters
             else:
@@ -1192,10 +1196,12 @@ def sample(self, observations, steps, epsilon, n_samples = 10000, n_samples_per_
             distances[index[acceptance == 1]] = new_distances[acceptance == 1]
 
             # 2: Smoothing of the distances
+            self.logger.info("Smoothing of the distance")
             smooth_distances[index[acceptance == 1]] = self._smoother_distance(distances[index[acceptance == 1]],
                                                                                all_distances)
 
             # 3: Initialize/Update U, epsilon and covariance of perturbation kernel
+            self.logger.info("Initialize/Update U, epsilon and covariance of perturbation kernel")
             if aStep == 0:
                 U = self._average_redefined_distance(self._smoother_distance(all_distances, all_distances), epsilon)
             else:
@@ -1214,10 +1220,10 @@ def sample(self, observations, steps, epsilon, n_samples = 10000, n_samples_per_
                             epsilon, U, acceptance_rate
                             )
                         )
-                self.logger.debug(msg)
+                self.logger.info(msg)
                 if acceptance_rate < ar_cutoff:
                     broken_preemptively = True
-                    self.logger.debug("Stopping as acceptance rate is lower than cutoff")
+                    self.logger.info("Stopping as acceptance rate is lower than cutoff")
                     break
 
             # 5: Resampling if number of accepted particles greater than resample
@@ -1235,20 +1241,23 @@ def sample(self, observations, steps, epsilon, n_samples = 10000, n_samples_per_
                 epsilon = self._schedule(U, v)
 
                 ## Print effective sampling size
-                print('Resampling: Effective sampling size: ', 1 / sum(pow(weight / sum(weight), 2)))
+                self.logger.info('Resampling: Effective sampling size: '+str(1 / sum(pow(weight / sum(weight), 2))))
                 accept = 0
                 samples_until = 0
 
                 ## Compute and broadcast accepted parameters, accepted kernel parameters and accepted Covariance matrix
                 # Broadcast Accepted parameters and add to journal
+                self.logger.info("Broadcast Accepted parameters and add to journal")
                 self.accepted_parameters_manager.update_broadcast(self.backend, accepted_weights=accepted_weights, accepted_parameters=accepted_parameters)
                 # Compute Accepetd Kernel parameters and broadcast them
+                self.logger.debug("Compute Accepetd Kernel parameters and broadcast them")
                 kernel_parameters = []
                 for kernel in self.kernel.kernels:
                     kernel_parameters.append(
                         self.accepted_parameters_manager.get_accepted_parameters_bds_values(kernel.models))
                 self.accepted_parameters_manager.update_kernel_values(self.backend, kernel_parameters=kernel_parameters)
                 # Compute Kernel Covariance Matrix and broadcast it
+                self.logger.debug("Compute Kernel Covariance Matrix and broadcast it")
                 new_cov_mats = self.kernel.calculate_cov(self.accepted_parameters_manager)
                 accepted_cov_mats = []
                 for new_cov_mat in new_cov_mats:
@@ -1261,7 +1270,7 @@ def sample(self, observations, steps, epsilon, n_samples = 10000, n_samples_per_
 
                 if (full_output == 1 and aStep<= steps-1):
                     ## Saving intermediate configuration to output journal.
-                    print('Saving after resampling')
+                    self.logger.info('Saving after resampling')
                     journal.add_accepted_parameters(copy.deepcopy(accepted_parameters))
                     journal.add_weights(copy.deepcopy(accepted_weights))
                     journal.add_distances(copy.deepcopy(distances))
@@ -1419,7 +1428,9 @@ def _accept_parameter(self, data, npc=None):
                 self.sample_from_prior(rng=rng)
                 new_theta = self.get_parameters()
                 all_parameters.append(new_theta)
+                t0 = time.time()
                 y_sim = self.simulate(self.n_samples_per_param, rng=rng, npc=npc)
+                self.logger.debug("Simulation took " + str(time.time() - t0) + "sec")
                 counter+=1
                 distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim)
                 all_distances.append(distance)
@@ -1436,13 +1447,15 @@ def _accept_parameter(self, data, npc=None):
                 if perturbation_output[0] and self.pdf_of_prior(self.model, perturbation_output[1]) != 0:
                     new_theta = perturbation_output[1]
                     break
-
+            t0 = time.time()
             y_sim = self.simulate(self.n_samples_per_param, rng=rng, npc=npc)
+            self.logger.debug("Simulation took "+ str(time.time()-t0)+"sec")
             counter+=1
             distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim)
             smooth_distance = self._smoother_distance([distance], self.all_distances_bds.value())
 
             ## Calculate acceptance probability:
+            self.logger.debug("Calulate acceptance probability")
             ratio_prior_prob = self.pdf_of_prior(self.model, perturbation_output[1]) / self.pdf_of_prior(self.model,
                 self.accepted_parameters_manager.accepted_parameters_bds.value()[index])
             ratio_likelihood_prob = np.exp((self.smooth_distances_bds.value()[index] - smooth_distance) / self.epsilon)
@@ -2970,4 +2983,4 @@ def _accept_parameter_r_hit_kernel(self, rng_and_index, npc=None):
                 self.set_parameters(self.accepted_parameters_manager.accepted_parameters_bds.value()[index])
                 y_sim = self.accepted_y_sim_bds.value()[index]
         distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim)
-        return (self.get_parameters(), y_sim, distance, counter)
\ No newline at end of file
+        return (self.get_parameters(), y_sim, distance, counter)
diff --git a/abcpy/output.py b/abcpy/output.py
index 41bb9775..9a77ee67 100644
--- a/abcpy/output.py
+++ b/abcpy/output.py
@@ -1,6 +1,9 @@
-import numpy as np
 import pickle
+import warnings
 
+import matplotlib.pyplot as plt
+import numpy as np
+from scipy.stats import gaussian_kde
 
 
 class Journal:
@@ -14,13 +17,13 @@ class Journal:
         a nxpxt matrix
     weights : numpy.array
         a nxt matrix
-    opt_value: numpy.array
+    opt_value : numpy.array
         nxp matrix containing for each parameter the evaluated objective function for every time step
-    configuration: Python dictionary
+    configuration : Python dictionary
         dictionary containing the schemes configuration parameters
 
     """
-    
+
     def __init__(self, type):
         """
         Initializes a new output journal of given type.
@@ -31,7 +34,7 @@ def __init__(self, type):
             type=0 only logs final parametersa and weight (production use);
             type=1 logs all generated information (reproducibily use).
         """
-        
+
         self.accepted_parameters = []
         self.names_and_parameters = []
         self.weights = []
@@ -39,14 +42,12 @@ def __init__(self, type):
         self.opt_values = []
         self.configuration = {}
 
-
-
         if type not in [0, 1]:
             raise ValueError("Parameter type has not valid value.")
         else:
             self._type = type
 
-        self.number_of_simulations =[]
+        self.number_of_simulations = []
 
     @classmethod
     def fromFile(cls, filename):
@@ -72,7 +73,7 @@ def fromFile(cls, filename):
         >>> jnl = Journal.fromFile('example_output.jnl')
 
         """
-        
+
         with open(filename, 'rb') as input:
             journal = pickle.load(input)
         return journal
@@ -86,7 +87,7 @@ def add_user_parameters(self, names_and_params):
         names_and_params: list
             Each entry is a tupel, where the first entry is the name of the probabilistic model, and the second entry is the parameters associated with this model.
         """
-        if(self._type == 0):
+        if (self._type == 0):
             self.names_and_parameters = [dict(names_and_params)]
         else:
             self.names_and_parameters.append(dict(names_and_params))
@@ -280,7 +281,9 @@ def posterior_mean(self, iteration=None):
 
         return_value = []
         for keyind in params.keys():
-            return_value.append((keyind, np.average(np.array(params[keyind]).squeeze(), weights = weights.reshape(len(weights),), axis = 0)))
+            return_value.append((keyind,
+                                 np.average(np.array(params[keyind]).squeeze(), weights=weights.reshape(len(weights), ),
+                                            axis=0)))
 
         return dict(return_value)
 
@@ -308,18 +311,18 @@ def posterior_cov(self, iteration=None):
         for keyind in params.keys():
             joined_params.append(np.array(params[keyind]).squeeze(axis=1))
 
-        return np.cov(np.transpose(np.hstack(joined_params)), aweights = weights.reshape(len(weights),)), params.keys()
+        return np.cov(np.transpose(np.hstack(joined_params)), aweights=weights.reshape(len(weights), )), params.keys()
 
-    def posterior_histogram(self, iteration=None, n_bins = 10):
+    def posterior_histogram(self, iteration=None, n_bins=10):
         """
         Computes a weighted histogram of multivariate posterior samples
-        andreturns histogram H and A list of p arrays describing the bin 
+        and returns histogram H and a list of p arrays describing the bin
         edges for each dimension.
         
         Returns
         -------
         python list 
-            containing two elements (H = np.ndarray, edges = list of p arraya)
+            containing two elements (H = np.ndarray, edges = list of p arrays)
         """
         if iteration is None:
             endp = len(self.names_and_parameters) - 1
@@ -333,5 +336,356 @@ def posterior_histogram(self, iteration=None, n_bins = 10):
         for keyind in params.keys():
             joined_params.append(np.array(params[keyind]).squeeze(axis=1))
 
-        H, edges = np.histogramdd(np.hstack(joined_params), bins = n_bins, weights = weights.reshape(len(weights),))
-        return [H, edges]
\ No newline at end of file
+        H, edges = np.histogramdd(np.hstack(joined_params), bins=n_bins, weights=weights.reshape(len(weights), ))
+        return [H, edges]
+
+    def plot_posterior_distr(self, parameters_to_show=None, ranges_parameters=None, iteration=None, show_samples=None,
+                             single_marginals_only=False, double_marginals_only=False, write_posterior_mean=True,
+                             true_parameter_values=None, contour_levels=14, show_density_values=True, bw_method=None,
+                             path_to_save=None):
+        """
+        Produces a visualization of the posterior distribution of the parameters of the model.
+
+        A Gaussian kernel density estimate (KDE) is used to approximate the density starting from the sampled
+        parameters. Specifically, it produces a scatterplot matrix, where the diagonal contains single parameter
+        marginals, while the off diagonal elements contain the contourplot for the paired marginals for each possible
+        pair of parameters.
+
+        This visualization is not satisfactory for parameters that take on discrete values, specially in the case where
+        the number of values it can assume are small.
+
+        Parameters
+        ----------
+        parameters_to_show : list, optional
+            a list of the parameters for which you want to plot the posterior distribution. For each parameter, you need
+            to provide the name string as it was defined in the model.
+            If `None`, then all parameters will be displayed.
+        ranges_parameters : Python dictionary, optional
+            a dictionary in which you can optionally provide the plotting range for the parameters that you chose to
+            display. You can use this even if `parameters_to_show=None`. The dictionary key is the name of parameter,
+            and the range needs to be an array-like of the form [lower_limit, upper_limit]. For instance:
+            {"theta" : [0,2]} specifies that you want to plot the posterior distribution for the parameter "theta" in
+            the range [0,2].
+        iteration : int, optional
+            specify the iteration for which to plot the posterior distribution, in the case of a sequential algorithm.
+            If `None`, then the last iteration will be used.
+        show_samples : boolean, optional
+            specifies if you want to show the posterior samples overimposed to the contourplots of the posterior
+            distribution. If `None`, the default behaviour is the following: if the posterior samples are associated
+            with importance weights, then the samples are not showed (in fact, the KDE for the posterior distribution
+            takes into account the weights, and showing the samples may be misleading). Otherwise, if the posterior #
+            samples are not associated with weights, they are displayed by defauly.
+        single_marginals_only : boolean, optional
+            if `True`, the method does not show the paired marginals but only the single parameter marginals;
+            otherwise, it shows the paired marginals as well. Default to `False`.
+        double_marginals_only : boolean, optional
+            if `True`, the method shows the contour plot for the marginal posterior for each possible pair of parameters
+            in the parameters that have to be shown (all parameters of the model if `parameters_to_show` is None).
+            Default to `False`.
+        write_posterior_mean : boolean, optional
+            Whether to write or not the posterior mean on the single marginal plots. Default to `True`.
+        true_parameter_values: array-like, optional
+            you can provide here the true values of the parameters, if known, and that will be displayed in the
+            posterior plot. It has to be an array-like of the same length of `parameters_to_show` (if that is provided),
+            otherwise of length equal to the number of parameters in the model, and with entries corresponding to the
+            true value of that parameter (in case `parameters_to_show` is not provided, the order of the parameters is
+            the same order the model `forward_simulate` step takes.
+        contour_levels: integer, optional
+            The number of levels to be used in the contour plots. Default to 14.
+        show_density_values: boolean, optional
+            If `True`, the method displays the value of the density at each contour level in the contour plot. Default
+            to `True`.
+        bw_method: str, scalar or callable, optional
+            The parameter of the `scipy.stats.gaussian_kde` defining the method used to calculate the bandwith in the
+            Gaussian kernel density estimator. Please refer to the documentation therein for details. Default to `None`.
+        path_to_save : string, optional
+            if provided, save the figure in png format in the specified path.
+
+        Returns
+        -------
+        list
+            a list containing the matplotlib "fig, axes" objects defining the plot. Can be useful for further
+            modifications.
+        """
+
+        # if you do not pass any name, then we plot all parameters.
+        # you can get the list of parameters from the journal file as:
+        if parameters_to_show is None:
+            parameters_to_show = list(self.names_and_parameters[0].keys())
+        else:
+            for param_name in parameters_to_show:
+                if param_name not in self.names_and_parameters[0].keys():
+                    raise KeyError("Parameter '{}' is not a parameter of the model.".format(param_name))
+
+        if single_marginals_only and double_marginals_only:
+            raise RuntimeError("You cannot ask to produce only plots for single marginals and double marginals only at "
+                               "the same time. Either or both of `single_marginal_only` or `double_marginal_only` have "
+                               "to be False.")
+
+        if len(parameters_to_show) == 1 and double_marginals_only:
+            raise RuntimeError("It is not possible to plot a bivariate marginal if only one parameter is in the "
+                               "`parameters_to_show` list")
+
+        if true_parameter_values is not None:
+            if len(true_parameter_values) != len(parameters_to_show):
+                raise RuntimeError("You need to provide values for all the parameters to be shown.")
+
+        meanpost = np.array([self.posterior_mean()[x] for x in parameters_to_show])
+
+        post_samples_dict = {name: np.concatenate(self.get_parameters(iteration)[name]) for name in parameters_to_show}
+
+        weights = np.concatenate(self.get_weights(iteration))
+        all_weights_are_1 = all([weights[i] == 1 for i in range(len(weights))])
+
+        if show_samples is None:
+            # by default, we display samples if the weights are all 1. Otherwise, we choose not to display them by
+            # default as they may be misleading
+            if all_weights_are_1:
+                show_samples = True
+            else:
+                show_samples = False
+
+        elif not all_weights_are_1 and show_samples is True:
+            # in this case, the user required to show the sample points but importance weights are present. Then warn
+            # the user about this
+            warnings.warn(
+                "You requested to show the sample points; however, note that the algorithm generated posterior samples "
+                "associated with weights, and the kernel density estimate used to produce the density plots consider "
+                "those. Therefore, the resulting plot may be misleading. ", RuntimeWarning)
+
+        data = np.hstack(list(post_samples_dict.values()))
+        datat = data.transpose()
+
+        # now set up the range of parameters. If the range for a given parameter is not given, use the min and max
+        # value of posterior samples.
+
+        if ranges_parameters is None:
+            ranges_parameters = {}
+
+        # check if the user provided range for some parameters that are not requested to be displayed:
+        if not all([key in post_samples_dict for key in ranges_parameters.keys()]):
+            warnings.warn("You provided range for a parameter that was not requested to be displayed (or it may be "
+                          "that you misspelled something). This will be ignored.", RuntimeWarning)
+
+        for name in parameters_to_show:
+            if name in ranges_parameters:
+                # then check the format is correct
+                if isinstance(ranges_parameters[name], list):
+                    if not (len(ranges_parameters[name]) == 2 and isinstance(ranges_parameters[name][0], (int, float))):
+                        raise TypeError(
+                            "The range for the parameter {} should be an array-like with two numbers.".format(name))
+                elif isinstance(ranges_parameters[name], np.ndarray):
+                    if not (ranges_parameters[name].shape == 2 or ranges_parameters[name].shape == (2, 1)):
+                        raise TypeError(
+                            "The range for the parameter {} should be an array-like with two numbers.".format(name))
+            else:
+                # add as new range the min and max values. We add also a 5% of the whole range on both sides to have
+                # better visualization
+                difference = np.max(post_samples_dict[name]) - np.min(post_samples_dict[name])
+                ranges_parameters[name] = [np.min(post_samples_dict[name]) - 0.05 * difference,
+                                           np.max(post_samples_dict[name]) + 0.05 * difference]
+
+        def write_post_mean_function(ax, post_mean, name):
+            ax.text(0.15, 0.06, r"Post. mean = {:.2f}".format(post_mean), size=14.5 * 2 / len(meanpost),
+                    transform=ax.transAxes)
+
+        def scatterplot_matrix(data, meanpost, names, single_marginals_only=False, **kwargs):
+            """
+            Plots a scatterplot matrix of subplots.  Each row of "data" is plotted
+            against other rows, resulting in a nrows by nrows grid of subplots with the
+            diagonal subplots labeled with "parameters_to_show".  Additional keyword arguments are
+            passed on to matplotlib's "plot" command. Returns the matplotlib figure
+            object containg the subplot grid.
+            """
+            numvars, numdata = data.shape
+            if single_marginals_only:
+                fig, axes = plt.subplots(nrows=1, ncols=numvars, figsize=(4 * len(names), 4))
+            else:
+                fig, axes = plt.subplots(nrows=numvars, ncols=numvars, figsize=(8, 8))
+                fig.subplots_adjust(hspace=0.08, wspace=0.08)
+
+            # if we have to plot 1 single parameter value, envelop the ax in an array, so that it gives no troubles:
+            if len(names) == 1:
+                if not single_marginals_only:
+                    axes = np.array([[axes]])
+                else:
+                    axes = np.array([axes])
+
+            if not single_marginals_only:
+                if len(names) > 1:
+                    for ax in axes.flat:
+                        # Hide all ticks and labels
+                        ax.xaxis.set_visible(False)
+                        ax.yaxis.set_visible(False)
+
+                        # Set up ticks only on one side for the "edge" subplots...
+                        if ax.is_first_col():
+                            ax.yaxis.set_ticks_position('left')
+                        if ax.is_last_col():
+                            ax.yaxis.set_ticks_position('right')
+                        if ax.is_first_row():
+                            ax.xaxis.set_ticks_position('top')
+                        if ax.is_last_row():
+                            ax.xaxis.set_ticks_position('bottom')
+
+                # off diagonal plots:
+                for x in range(numvars):
+                    for y in range(numvars):
+                        if x is not y:
+                            # this plots the posterior samples
+                            if show_samples:
+                                axes[x, y].plot(data[y], data[x], '.k', markersize='1')
+
+                            xmin, xmax = ranges_parameters[names[y]]
+                            ymin, ymax = ranges_parameters[names[x]]
+
+                            # then you need to perform the KDE and plot the contour of posterior
+                            X, Y = np.mgrid[xmin:xmax:100j, ymin:ymax:100j]
+                            positions = np.vstack([X.ravel(), Y.ravel()])
+                            values = np.vstack([data[y].T, data[x].T])
+                            kernel = gaussian_kde(values, weights=weights, bw_method=bw_method)
+                            Z = np.reshape(kernel(positions).T, X.shape)
+                            # axes[x, y].plot(meanpost[y], meanpost[x], 'r+', markersize='10')
+                            axes[x, y].plot([xmin, xmax], [meanpost[x], meanpost[x]], "red", markersize='20',
+                                            linestyle='solid')
+                            axes[x, y].plot([meanpost[y], meanpost[y]], [ymin, ymax], "red", markersize='20',
+                                            linestyle='solid')
+                            if true_parameter_values is not None:
+                                axes[x, y].plot([xmin, xmax], [true_parameter_values[x], true_parameter_values[x]],
+                                                "green",
+                                                markersize='20', linestyle='dashed')
+                                axes[x, y].plot([true_parameter_values[y], true_parameter_values[y]], [ymin, ymax],
+                                                "green",
+                                                markersize='20', linestyle='dashed')
+
+                            CS = axes[x, y].contour(X, Y, Z, contour_levels, linestyles='solid')
+                            if show_density_values:
+                                axes[x, y].clabel(CS, fontsize=9, inline=1)
+                            axes[x, y].set_xlim([xmin, xmax])
+                            axes[x, y].set_ylim([ymin, ymax])
+
+            # diagonal plots
+
+            if not single_marginals_only:
+                diagonal_axes = np.array([axes[i, i] for i in range(len(axes))])
+            else:
+                diagonal_axes = axes
+
+            for i, label in enumerate(names):
+                xmin, xmax = ranges_parameters[names[i]]
+                positions = np.linspace(xmin, xmax, 100)
+                gaussian_kernel = gaussian_kde(data[i], weights=weights, bw_method=bw_method)
+                diagonal_axes[i].plot(positions, gaussian_kernel(positions), color='k', linestyle='solid', lw="1",
+                                      alpha=1, label="Density")
+                values = gaussian_kernel(positions)
+                # axes[i, i].plot([positions[np.argmax(values)], positions[np.argmax(values)]], [0, np.max(values)])
+                diagonal_axes[i].plot([meanpost[i], meanpost[i]], [0, 1.1 * np.max(values)], "red", alpha=1,
+                                      label="Posterior mean")
+                if true_parameter_values is not None:
+                    diagonal_axes[i].plot([true_parameter_values[i], true_parameter_values[i]],
+                                          [0, 1.1 * np.max(values)], "green",
+                                          alpha=1, label="True value")
+                if write_posterior_mean:
+                    write_post_mean_function(diagonal_axes[i], meanpost[i], label)
+                diagonal_axes[i].set_xlim([xmin, xmax])
+                diagonal_axes[i].set_ylim([0, 1.1 * np.max(values)])
+
+            # labels and ticks:
+            if not single_marginals_only:
+                for j, label in enumerate(names):
+                    axes[0, j].set_title(label, size=17)
+
+                    if len(names) > 1:
+                        axes[j, 0].set_ylabel(label)
+                        axes[-1, j].set_xlabel(label)
+                    else:
+                        axes[j, 0].set_ylabel("Density")
+
+                    axes[j, 0].ticklabel_format(style='sci', axis='y', scilimits=(0, 0))
+                    axes[j, 0].yaxis.set_visible(True)
+
+                    axes[-1, j].ticklabel_format(style='sci', axis='x')  # , scilimits=(0, 0))
+                    axes[-1, j].xaxis.set_visible(True)
+                    axes[j, -1].ticklabel_format(style='sci', axis='y', scilimits=(0, 0))
+                    axes[j, -1].yaxis.set_visible(True)
+
+            else:
+                for j, label in enumerate(names):
+                    axes[j].set_title(label, size=17)
+                axes[0].set_ylabel("Density")
+
+            return fig, axes
+
+        def double_marginals_plot(data, meanpost, names, **kwargs):
+            """
+            Plots contour plots of all possible pairs of samples. Additional keyword arguments are
+            passed on to matplotlib's "plot" command. Returns the matplotlib figure
+            object containg the subplot grid.
+            """
+            numvars, numdata = data.shape
+            numplots = np.int(numvars * (numvars - 1) / 2)
+            fig, axes = plt.subplots(nrows=1, ncols=numplots, figsize=(4 * numplots, 4))
+
+            if numplots == 1:  # in this case you will only have one plot. Envelop it to avoid problems.
+                axes = [axes]
+
+            # if we have to plot 1 single parameter value, envelop the ax in an array, so that it gives no troubles:
+
+            ax_counter = 0
+
+            for x in range(numvars):
+                for y in range(0, x):
+                    # this plots the posterior samples
+                    if show_samples:
+                        axes[ax_counter].plot(data[y], data[x], '.k', markersize='1')
+
+                    xmin, xmax = ranges_parameters[names[y]]
+                    ymin, ymax = ranges_parameters[names[x]]
+
+                    # then you need to perform the KDE and plot the contour of posterior
+                    X, Y = np.mgrid[xmin:xmax:100j, ymin:ymax:100j]
+                    positions = np.vstack([X.ravel(), Y.ravel()])
+                    values = np.vstack([data[y].T, data[x].T])
+                    kernel = gaussian_kde(values, weights=weights, bw_method=bw_method)
+                    Z = np.reshape(kernel(positions).T, X.shape)
+                    # axes[x, y].plot(meanpost[y], meanpost[x], 'r+', markersize='10')
+                    axes[ax_counter].plot([xmin, xmax], [meanpost[x], meanpost[x]], "red", markersize='20',
+                                          linestyle='solid')
+                    axes[ax_counter].plot([meanpost[y], meanpost[y]], [ymin, ymax], "red", markersize='20',
+                                          linestyle='solid')
+                    if true_parameter_values is not None:
+                        axes[ax_counter].plot([xmin, xmax], [true_parameter_values[x], true_parameter_values[x]],
+                                              "green",
+                                              markersize='20', linestyle='dashed')
+                        axes[ax_counter].plot([true_parameter_values[y], true_parameter_values[y]], [ymin, ymax],
+                                              "green",
+                                              markersize='20', linestyle='dashed')
+
+                    CS = axes[ax_counter].contour(X, Y, Z, contour_levels, linestyles='solid')
+                    if show_density_values:
+                        axes[ax_counter].clabel(CS, fontsize=9, inline=1)
+                    axes[ax_counter].set_xlim([xmin, xmax])
+                    axes[ax_counter].set_ylim([ymin, ymax])
+
+                    axes[ax_counter].set_ylabel(names[x])
+                    axes[ax_counter].set_xlabel(names[y])
+
+                    axes[ax_counter].ticklabel_format(style='sci', axis='y', scilimits=(0, 0))
+                    axes[ax_counter].yaxis.set_visible(True)
+                    axes[ax_counter].ticklabel_format(style='sci', axis='x', scilimits=(0, 0))
+                    axes[ax_counter].xaxis.set_visible(True)
+
+                    ax_counter += 1
+
+            return fig, axes
+
+        if not double_marginals_only:
+            fig, axes = scatterplot_matrix(datat, meanpost, parameters_to_show,
+                                           single_marginals_only=single_marginals_only)
+        else:
+            fig, axes = double_marginals_plot(datat, meanpost, parameters_to_show)
+
+        if path_to_save is not None:
+            plt.savefig(path_to_save, bbox_inches="tight")
+
+        return fig, axes
diff --git a/abcpy/statistics.py b/abcpy/statistics.py
index a49ec07b..74ea8ce8 100644
--- a/abcpy/statistics.py
+++ b/abcpy/statistics.py
@@ -1,52 +1,67 @@
 from abc import ABCMeta, abstractmethod
 import numpy as np
 
-class Statistics(metaclass = ABCMeta):
+try:
+    import torch
+except ImportError:
+    has_torch = False
+else:
+    has_torch = True
+    from abcpy.NN_utilities.utilities import load_net
+    from abcpy.NN_utilities.networks import createDefaultNN
+
+
+class Statistics(metaclass=ABCMeta):
     """This abstract base class defines how to calculate statistics from dataset.
 
     The base class also implements a polynomial expansion with cross-product
     terms that can be used to get desired polynomial expansion of the calculated statistics.
-    
-            
+
+
     """
-    
+
     @abstractmethod
-    def __init__(self, degree = 2, cross = True):
+    def __init__(self, degree=1, cross=False, previous_statistics=None):
         """Constructor that must be overwritten by the sub-class.
 
-        The constructor of a sub-class must accept arguments for the polynomial 
-        expansion after extraction of the summary statistics, one has to define 
+        The constructor of a sub-class must accept arguments for the polynomial
+        expansion after extraction of the summary statistics, one has to define
         the degree of polynomial expansion and cross, indicating whether cross-prodcut
-        terms are included. 
+        terms are included.
 
         Parameters
         ----------
         degree: integer, optional
             Of polynomial expansion. The default value is 2 meaning second order polynomial expansion.
         cross: boolean, optional
-           Defines whether to include the cross-product terms. The default value is TRUE, meaning the cross product term is included.
-        """       
+            Defines whether to include the cross-product terms. The default value is True, meaning the cross product term
+            is included.
+        previous_statistics: Statistics class, optional
+            It allows pipelining of Statistics. Specifically, if the final statistic to be used is determined by the
+            composition of two Statistics, you can pass the first here; then, whenever the final statistic is needed, it
+            is sufficient to call the `statistics` method of the second one, and that will automatically apply both
+            transformations.
+        """
 
         raise NotImplementedError
-        
-        
+
     @abstractmethod
     def statistics(self, data: object) -> object:
-        """To be overwritten by any sub-class: should extract statistics from the 
-        data set data. It is assumed that data is a  list of n same type 
+        """To be overwritten by any sub-class: should extract statistics from the
+        data set data. It is assumed that data is a  list of n same type
         elements(eg., The data can be a list containing n timeseries, n graphs or n np.ndarray).
-        
+
         Parameters
         ----------
         data: python list
-            Contains n data sets.
+            Contains n data sets with length p.
         Returns
         -------
         numpy.ndarray
             nxp matrix where for each of the n data points p statistics are calculated.
-            
+
         """
-        
+
         raise NotImplementedError
 
     def _polynomial_expansion(self, summary_statistics):
@@ -56,55 +71,299 @@ def _polynomial_expansion(self, summary_statistics):
         Parameters
         ----------
         summary_statistics: numpy.ndarray
-            nxp matrix where n is number of data points in the datasets data set and p number os 
+            nxp matrix where n is number of data points in the datasets data set and p number os
             summary statistics calculated.
         Returns
         -------
         numpy.ndarray
-            nx(p+degree*p+cross*nchoosek(p,2)) matrix where for each of the n pointss with 
+            nx(p+degree*p+cross*nchoosek(p,2)) matrix where for each of the n pointss with
             p statistics, degree*p polynomial expansion term and cross*nchoosek(p,2) many
-            cross-product terms are calculated.      
-               
+            cross-product terms are calculated.
+
         """
-        
+
         # Check summary_statistics is a np.ndarry
-        if not isinstance(summary_statistics, (np.ndarray)):
-            raise TypeError('Summary statisticss is not of allowed types')
+        if not isinstance(summary_statistics, np.ndarray):
+            raise TypeError('Summary statistics is not of allowed types')
         # Include the polynomial expansion
         result = summary_statistics
-        for ind in range(2,self.degree+1):
-            result = np.column_stack((result,np.power(summary_statistics,ind)))
+        for ind in range(2, self.degree + 1):
+            result = np.column_stack((result, np.power(summary_statistics, ind)))
 
         # Include the cross-product term
-        if self.cross == True and summary_statistics.shape[1]>1:          
+        if self.cross == True and summary_statistics.shape[1] > 1:
             # Convert to a matrix
-            for ind1 in range(0,summary_statistics.shape[1]):
-                for ind2 in range(ind1+1,summary_statistics.shape[1]):
-                    result = np.column_stack((result,summary_statistics[:,ind1]*summary_statistics[:,ind2]))
+            for ind1 in range(0, summary_statistics.shape[1]):
+                for ind2 in range(ind1 + 1, summary_statistics.shape[1]):
+                    result = np.column_stack((result, summary_statistics[:, ind1] * summary_statistics[:, ind2]))
         return result
 
+    def _check_and_transform_input(self, data):
+        """
+        """
+        if isinstance(data, list):
+            if np.array(data).shape == (len(data),):
+                if len(data) == 1:
+                    data = np.array(data).reshape(1, 1)
+                data = np.array(data).reshape(len(data), 1)
+            else:
+                data = np.concatenate(data).reshape(len(data), -1)
+        else:
+            raise TypeError('Input data should be of type list, but found type {}'.format(type(data)))
+
+        return data
 
 
 class Identity(Statistics):
     """
-    This class implements identity statistics returning a nxp matrix when the data set 
-    contains n numpy.ndarray of length p.
+    This class implements identity statistics not applying any transformation to the data, before the optional
+    polynomial expansion step. If the data set contains n numpy.ndarray of length p, it returns therefore an
+    nx(p+degree*p+cross*nchoosek(p,2)) matrix, where for each of the n points with p statistics, degree*p polynomial
+    expansion term and cross*nchoosek(p,2) many cross-product terms are calculated.
     """
-    def __init__(self, degree = 2, cross = True):
+
+    def __init__(self, degree=1, cross=False, previous_statistics=None):
+        """
+
+        Parameters
+        ----------
+        degree : integer, optional
+            Of polynomial expansion. The default value is 2 meaning second order polynomial expansion.
+        cross : boolean, optional
+            Defines whether to include the cross-product terms. The default value is True, meaning the cross product term
+            is included.
+        previous_statistics : Statistics class, optional
+            It allows pipelining of Statistics. Specifically, if the final statistic to be used is determined by the
+            composition of two Statistics, you can pass the first here; then, whenever the final statistic is needed, it
+            is sufficient to call the `statistics` method of the second one, and that will automatically apply both
+            transformations.
+        """
         self.degree = degree
         self.cross = cross
+        self.previous_statistics = previous_statistics
 
     def statistics(self, data):
-        if isinstance(data, list):
-            if np.array(data).shape == (len(data),):
-                if len(data) == 1:                
-                    data = np.array(data).reshape(1,1)
-                data = np.array(data).reshape(len(data),1)
-            else:
-                data = np.concatenate(data).reshape(len(data),-1)
+        """
+        Parameters
+        ----------
+        data: python list
+            Contains n data sets with length p.
+        Returns
+        -------
+        numpy.ndarray
+            nx(p+degree*p+cross*nchoosek(p,2)) matrix where for each of the n data points with length p,
+            (p+degree*p+cross*nchoosek(p,2)) statistics are calculated.
+        """
+
+        # pipeline: first call the previous statistics:
+        if self.previous_statistics is not None:
+            data = self.previous_statistics.statistics(data)
+        # the first of the statistics need to take list as input, in order to match the API. Then actually the
+        # transformations work on np.arrays. In fact the first statistic transforms the list to array. Therefore, the
+        # following code needs to be called only if the self statistic is the first, i.e. it does not have a
+        # previous_statistic element.
         else:
-            raise TypeError('Input data should be of type list, but found type {}'.format(type(data)))
-        # Expand the data with polynomial expansion            
-        result = self._polynomial_expansion(data)  
-        
-        return result    
+            data = self._check_and_transform_input(data)
+
+        # Expand the data with polynomial expansion
+        result = self._polynomial_expansion(data)
+
+        return result
+
+
+class LinearTransformation(Statistics):
+    """Applies a linear transformation to the data to get (usually) a lower dimensional statistics. Then you can apply
+    an additional polynomial expansion step.
+    """
+
+    def __init__(self, coefficients, degree=1, cross=False, previous_statistics=None):
+        """
+        Parameters
+        ----------
+        coefficients: coefficients is a matrix with size d x p, where d is the dimension of the summary statistic that
+            is obtained after applying the linear transformation (i.e. before a possible polynomial expansion is
+            applied), while d is the dimension of each data.
+        degree : integer, optional
+            Of polynomial expansion. The default value is 2 meaning second order polynomial expansion.
+        cross : boolean, optional
+            Defines whether to include the cross-product terms. The default value is True, meaning the cross product term
+            is included.
+        previous_statistics : Statistics class, optional
+            It allows pipelining of Statistics. Specifically, if the final statistic to be used is determined by the
+            composition of two Statistics, you can pass the first here; then, whenever the final statistic is needed, it
+            is sufficient to call the `statistics` method of the second one, and that will automatically apply both
+            transformations.
+        """
+        self.coefficients = coefficients
+        self.degree = degree
+        self.cross = cross
+        self.previous_statistics = previous_statistics
+
+    def statistics(self, data):
+        """
+        Parameters
+        ----------
+        data: python list
+            Contains n data sets with length p.
+        Returns
+        -------
+        numpy.ndarray
+            nx(d+degree*d+cross*nchoosek(d,2)) matrix where for each of the n data points with length p you apply the
+            linear transformation to get to dimension d, from where (d+degree*d+cross*nchoosek(d,2)) statistics are
+            calculated.
+        """
+
+        # pipeline: first call the previous statistics:
+        if self.previous_statistics is not None:
+            data = self.previous_statistics.statistics(data)
+        # the first of the statistics need to take list as input, in order to match the API. Then actually the
+        # transformations work on np.arrays. In fact the first statistic transforms the list to array. Therefore, the
+        # following code needs to be called only if the self statistic is the first, i.e. it does not have a
+        # previous_statistic element.
+        else:
+            data = self._check_and_transform_input(data)
+
+        # Apply now the linear transformation
+        if not data.shape[1] == self.coefficients.shape[0]:
+            raise ValueError('Mismatch in dimension of summary statistics and coefficients')
+        result = np.dot(data, self.coefficients)
+
+        # Expand the data with polynomial expansion
+        result = self._polynomial_expansion(result)
+
+        return result
+
+
+class NeuralEmbedding(Statistics):
+    """Computes the statistics by applying a neural network transformation. 
+    
+    It is essentially a wrapper for the application of a neural network transformation to the data. Note that the
+    neural network has had to be trained in some way (for instance check the statistics learning routines) and that 
+    Pytorch is required for this part to work.   
+    """
+
+    def __init__(self, net, previous_statistics=None):  # are these default values OK?
+        """
+        Parameters
+        ----------
+        net : torch.nn object
+            the embedding neural network. The input size of the neural network must coincide with the size of each of
+            the datapoints.
+        previous_statistics : Statistics class, optional
+            It allows pipelining of Statistics. Specifically, if the final statistic to be used is determined by the
+            composition of two Statistics, you can pass the first here; then, whenever the final statistic is needed, it
+            is sufficient to call the `statistics` method of the second one, and that will automatically apply both
+            transformations.
+        """
+        if not has_torch:
+            raise ImportError(
+                "Pytorch is required to instantiate an element of the {} class, in order to handle "
+                "neural networks. Please install it. ".format(self.__class__.__name__))
+
+        self.net = net
+        self.previous_statistics = previous_statistics
+
+    @classmethod
+    def fromFile(cls, path_to_net_state_dict, network_class=None, input_size=None, output_size=None, hidden_sizes=None,
+                 previous_statistics=None):
+        """If the neural network state_dict was saved to the disk, this method can be used to instantiate a
+        NeuralEmbedding object with that neural network.
+
+        In order for the state_dict to be read correctly, the network class is needed. Therefore, we provide 2 options:
+        1) the Pytorch neural network class can be passed (if the user defined it, for instance)
+        2) if the neural network was defined by using the DefaultNN class in abcpy.NN_utilities.networks, you can
+        provide arguments `input_size`, `output_size` and `hidden_sizes` (the latter is optional) that define
+        the sizes of a fully connected network; then a DefaultNN is instantiated with those sizes. This can be used
+        if for instance the neural network was trained using the utilities in abcpy.statisticslearning and you did
+        not provide explicitly the neural network class there, but defined it through the sizes of the different layers.
+
+        In both cases, note that the input size of the neural network must coincide with the size of each of the
+        datapoints generated from the model (unless some other statistics are computed beforehand).
+
+        Parameters
+        ----------
+        path_to_net_state_dict : basestring
+            the path where the state-dict is saved
+        network_class : torch.nn class, optional
+            if the neural network class is known explicitly (for instance if the used defined it), then it has to be
+             passed here. This must not be provided together with `input_size` or `output_size`.
+        input_size : integer, optional
+            if the neural network is an instance of abcpy.NN_utilities.networks.DefaultNN with some input and
+            output size, then you should provide here the input size of the network. It has to be provided together with
+            the corresponding output_size, and it must not be provided with `network_class`.
+        output_size : integer, optional
+            if the neural network is an instance of abcpy.NN_utilities.networks.DefaultNN with some input and
+            output size, then you should provide here the output size of the network. It has to be provided together
+            with the corresponding input_size, and it must not be provided with `network_class`.
+        hidden_sizes : array-like, optional
+            if the neural network is an instance of abcpy.NN_utilities.networks.DefaultNN with some input and
+            output size, then you can provide here an array-like with the size of the hidden layers (for instance
+            [5,7,5] denotes 3 hidden layers with correspondingly 5,7,5 neurons). In case this parameter is not provided,
+            the hidden sizes are determined from the input and output sizes as determined in
+            abcpy.NN_utilities.networks.DefaultNN. Note that this must not be provided together with `network_class`.
+        previous_statistics : Statistics class, optional
+            It allows pipelining of Statistics. Specifically, if the final statistic to be used is determined by the
+            composition of two Statistics, you can pass the first here; then, whenever the final statistic is needed, it
+            is sufficient to call the `statistics` method of the second one, and that will automatically apply both
+            transformations. In this case, this is the statistics that has to be computed before the neural network
+            transformation is applied.
+        Returns
+        -------
+        abcpy.statistics.NeuralEmbedding
+            the `NeuralEmbedding` object with the neural network obtained from the specified file.
+        """
+        if not has_torch:
+            raise ImportError(
+                "Pytorch is required to instantiate an element of the {} class, in order to handle "
+                "neural networks. Please install it. ".format(cls.__name__))
+
+        if network_class is None and (input_size is None or output_size is None):
+            raise RuntimeError("You need to pass either network class or both input_size and output_size.")
+        if network_class is not None and (input_size is not None or output_size is not None):
+            raise RuntimeError("You can't pass together network_class and one of input_size, output_size")
+        if network_class is not None and hidden_sizes is not None:
+            raise RuntimeError("You passed hidden_sizes as an argument, but that may be passed only if you are passing "
+                               "input_size and input_size as well, and you are not passing network_class.")
+
+        if network_class is not None:  # user explicitly passed the NN class
+            net = load_net(path_to_net_state_dict, network_class)
+            statistic_object = cls(net, previous_statistics=previous_statistics)
+        else:  # the user passed the input_size, output_size and (maybe) the hidden_sizes
+            net = load_net(path_to_net_state_dict, createDefaultNN(input_size=input_size, output_size=output_size,
+                                                                   hidden_sizes=hidden_sizes))
+            statistic_object = cls(net, previous_statistics=previous_statistics)
+
+        return statistic_object
+
+    def statistics(self, data):
+        """
+        Parameters
+        ----------
+        data: python list
+            Contains n data sets with length p.
+        Returns
+        -------
+        numpy.ndarray
+            the statistics computed by applying the neural network.
+        """
+
+        # pipeline: first call the previous statistics:
+        if self.previous_statistics is not None:
+            data = self.previous_statistics.statistics(data)
+        # the first of the statistics need to take list as input, in order to match the API. Then actually the
+        # transformations work on np.arrays. In fact the first statistic transforms the list to array. Therefore, the
+        # following code needs to be called only if the self statistic is the first, i.e. it does not have a
+        # previous_statistic element.
+        else:
+            data = self._check_and_transform_input(data)
+
+        data = torch.from_numpy(data.astype("float32"))
+
+        # move data to gpu if the net is on gpu
+        if next(self.net.parameters()).is_cuda:
+            data = data.cuda()
+
+        # simply apply the network transformation.
+        result = self.net(data).cpu().detach().numpy()
+
+        return np.array(result)
diff --git a/abcpy/statisticslearning.py b/abcpy/statisticslearning.py
new file mode 100644
index 00000000..53fbb889
--- /dev/null
+++ b/abcpy/statisticslearning.py
@@ -0,0 +1,660 @@
+import logging
+from abc import ABCMeta, abstractmethod
+
+from sklearn import linear_model
+
+from abcpy.acceptedparametersmanager import *
+from abcpy.graphtools import GraphTools
+# import dataset and networks definition:
+from abcpy.statistics import LinearTransformation
+
+# Different torch components
+try:
+    import torch
+except ImportError:
+    has_torch = False
+else:
+    has_torch = True
+    from abcpy.NN_utilities.networks import createDefaultNN
+    from abcpy.statistics import NeuralEmbedding
+
+from abcpy.NN_utilities.algorithms import FP_nn_training, triplet_training, contrastive_training
+from abcpy.NN_utilities.utilities import compute_similarity_matrix
+
+
+# TODO: there seems to be issue when n_samples_per_param >1. Check that. Should you modify the _sample_parameters-statistics function?
+
+class StatisticsLearning(metaclass=ABCMeta):
+    """This abstract base class defines a way to choose the summary statistics.
+    """
+
+    def __init__(self, model, statistics_calc, backend, n_samples=1000, n_samples_per_param=1, parameters=None,
+                 simulations=None, seed=None):
+
+        """The constructor of a sub-class must accept a non-optional model, statistics calculator and
+        backend which are stored to self.model, self.statistics_calc and self.backend. Further it
+        accepts two optional parameters n_samples and seed defining the number of simulated dataset
+        used for the pilot to decide the summary statistics and the integer to initialize the random
+        number generator.
+
+        This __init__ takes care of sample-statistics generation, with the parallelization; however, you can choose
+        to provide simulations and corresponding parameters that have been previously generated, with the parameters
+        `parameters` and `simulations`.
+
+        Parameters
+        ----------
+        model: abcpy.models.Model
+            Model object that conforms to the Model class.
+        statistics_cal: abcpy.statistics.Statistics
+            Statistics object that conforms to the Statistics class.
+        backend: abcpy.backends.Backend
+            Backend object that conforms to the Backend class.
+        n_samples: int, optional
+            The number of (parameter, simulated data) tuple to be generated to learn the summary statistics in pilot
+            step. The default value is 1000.
+            This is ignored if `simulations` and `parameters` are provided.
+        n_samples_per_param: int, optional
+            Number of data points in each simulated data set. This is ignored if `simulations` and `parameters` are
+            provided. Default to 1.
+        parameters: array, optional
+            A numpy array with shape (n_samples, n_parameters) that is used, together with `simulations` to fit the
+            summary selection learning algorithm. It has to be provided together with `simulations`, in which case no
+            other simulations are performed. Default value is None.
+        simulations: array, optional
+            A numpy array with shape (n_samples, output_size) that is used, together with `parameters` to fit the
+            summary selection learning algorithm. It has to be provided together with `parameters`, in which case no
+            other simulations are performed. Default value is None.
+        seed: integer, optional
+            Optional initial seed for the random number generator. The default value is generated randomly.
+        """
+        if (parameters is None) != (simulations is None):
+            raise RuntimeError("parameters and simulations need to be provided together.")
+
+        self.model = model
+        self.statistics_calc = statistics_calc
+        self.backend = backend
+        self.rng = np.random.RandomState(seed)
+        self.n_samples_per_param = n_samples_per_param
+        self.logger = logging.getLogger(__name__)
+
+        if parameters is None:  # then also simulations is None
+            self.logger.info('Generation of data...')
+
+            self.logger.debug("Definitions for parallelization.")
+            # An object managing the bds objects
+            self.accepted_parameters_manager = AcceptedParametersManager(self.model)
+            self.accepted_parameters_manager.broadcast(self.backend, [])
+
+            self.logger.debug("Map phase.")
+            # main algorithm
+            seed_arr = self.rng.randint(1, n_samples * n_samples, size=n_samples, dtype=np.int32)
+            rng_arr = np.array([np.random.RandomState(seed) for seed in seed_arr])
+            rng_pds = self.backend.parallelize(rng_arr)
+
+            self.logger.debug("Collect phase.")
+            sample_parameters_statistics_pds = self.backend.map(self._sample_parameter_statistics, rng_pds)
+
+            sample_parameters_and_statistics = self.backend.collect(sample_parameters_statistics_pds)
+            sample_parameters, sample_statistics = [list(t) for t in zip(*sample_parameters_and_statistics)]
+            sample_parameters = np.array(sample_parameters)
+            self.sample_statistics = np.concatenate(sample_statistics)
+
+            self.logger.debug("Reshape data")
+            # reshape the sample parameters; so that we can also work with multidimensional parameters
+            self.sample_parameters = sample_parameters.reshape((n_samples, -1))
+
+            # now reshape the statistics in the case in which several n_samples_per_param > 1, and repeat the array with
+            # the parameters so that the regression algorithms can work on the pair of arrays. Maybe there are smarter
+            # ways of doing this.
+
+            self.sample_statistics = self.sample_statistics.reshape(n_samples * self.n_samples_per_param, -1)
+            self.sample_parameters = np.repeat(self.sample_parameters, self.n_samples_per_param, axis=0)
+            self.logger.info('Data generation finished.')
+
+        else:
+            # do all the checks on dimensions:
+            if not isinstance(parameters, np.ndarray) or not isinstance(simulations, np.ndarray):
+                raise TypeError("parameters and simulations need to be numpy arrays.")
+            if len(parameters.shape) != 2:
+                raise RuntimeError("parameters have to be a 2-dimensional array")
+            if len(simulations.shape) != 2:
+                raise RuntimeError("parameters have to be a 2-dimensional array")
+            if simulations.shape[0] != parameters.shape[0]:
+                raise RuntimeError("parameters and simulations need to have the same first dimension")
+
+            # if all checks are passed:
+            self.sample_statistics = self.statistics_calc.statistics(
+                [simulations[i] for i in range(simulations.shape[0])])
+            self.sample_parameters = parameters
+
+            self.logger.info("The statistics will be learned using the provided data and parameters")
+
+    def __getstate__(self):
+        state = self.__dict__.copy()
+        del state['backend']
+        return state
+
+    @abstractmethod
+    def get_statistics(self):
+        """
+        This should return a statistics object that implements the learned transformation.
+
+        Returns
+        -------
+        abcpy.statistics.Statistics object
+            a statistics object that implements the learned transformation.
+        """
+        raise NotImplementedError
+
+    def _sample_parameter_statistics(self, rng=np.random.RandomState()):
+        """Function that generates (parameter, statistics). It is mapped to the different workers during data
+        generation.
+        """
+        self.sample_from_prior(rng=rng)
+        parameter = self.get_parameters()
+        y_sim = self.simulate(self.n_samples_per_param, rng=rng)
+        if y_sim is not None:
+            statistics = self.statistics_calc.statistics(y_sim)
+        return parameter, statistics
+
+
+class Semiautomatic(StatisticsLearning, GraphTools):
+    """This class implements the semi automatic summary statistics learning technique described in Fearnhead and
+    Prangle [1].
+
+    [1] Fearnhead P., Prangle D. 2012. Constructing summary statistics for approximate
+    Bayesian computation: semi-automatic approximate Bayesian computation. J. Roy. Stat. Soc. B 74:419–474.
+    """
+
+    def __init__(self, model, statistics_calc, backend, n_samples=1000, n_samples_per_param=1, parameters=None,
+                 simulations=None, seed=None):
+        """
+        Parameters
+        ----------
+        model: abcpy.models.Model
+            Model object that conforms to the Model class.
+        statistics_cal: abcpy.statistics.Statistics
+            Statistics object that conforms to the Statistics class.
+        backend: abcpy.backends.Backend
+            Backend object that conforms to the Backend class.
+        n_samples: int, optional
+            The number of (parameter, simulated data) tuple to be generated to learn the summary statistics in pilot
+            step. The default value is 1000.
+            This is ignored if `simulations` and `parameters` are provided.
+        n_samples_per_param: int, optional
+            Number of data points in each simulated data set. This is ignored if `simulations` and `parameters` are
+            provided.
+        parameters: array, optional
+            A numpy array with shape (n_samples, n_parameters) that is used, together with `simulations` to fit the
+            summary selection learning algorithm. It has to be provided together with `simulations`, in which case no
+            other simulations are performed. Default value is None.
+        simulations: array, optional
+            A numpy array with shape (n_samples, output_size) that is used, together with `parameters` to fit the
+            summary selection learning algorithm. It has to be provided together with `parameters`, in which case no
+            other simulations are performed. Default value is None.
+        seed: integer, optional
+            Optional initial seed for the random number generator. The default value is generated randomly.
+        """
+        # the sampling is performed by the init of the parent class
+        super(Semiautomatic, self).__init__(model, statistics_calc, backend,
+                                            n_samples, n_samples_per_param, parameters=parameters,
+                                            simulations=simulations, seed=seed)
+
+        self.logger.info('Learning of the transformation...')
+
+        self.coefficients_learnt = np.zeros(shape=(self.sample_parameters.shape[1], self.sample_statistics.shape[1]))
+        regr = linear_model.LinearRegression(fit_intercept=True)
+        for ind in range(self.sample_parameters.shape[1]):
+            regr.fit(self.sample_statistics, self.sample_parameters[:, ind])
+            self.coefficients_learnt[ind, :] = regr.coef_
+
+        self.logger.info("Finished learning the transformation.")
+
+    def get_statistics(self):
+        """
+        Returns an abcpy.statistics.LinearTransformation Statistics implementing the learned transformation.
+
+        Returns
+        -------
+        abcpy.statistics.LinearTransformation object
+            a statistics object that implements the learned transformation.
+        """
+        return LinearTransformation(np.transpose(self.coefficients_learnt), previous_statistics=self.statistics_calc)
+
+
+class StatisticsLearningNN(StatisticsLearning, GraphTools):
+    """This is the base class for all the statistics learning techniques involving neural networks. In most cases, you
+    should not instantiate this directly. The actual classes instantiate this with the right arguments.
+
+    In order to use this technique, Pytorch is required to handle the neural networks.
+    """
+
+    def __init__(self, model, statistics_calc, backend, training_routine, distance_learning, embedding_net=None,
+                 n_samples=1000, n_samples_per_param=1, parameters=None, simulations=None, seed=None, cuda=None,
+                 quantile=0.1, **training_routine_kwargs):
+        """
+        Parameters
+        ----------
+        model: abcpy.models.Model
+            Model object that conforms to the Model class.
+        statistics_cal: abcpy.statistics.Statistics
+            Statistics object that conforms to the Statistics class.
+        backend: abcpy.backends.Backend
+            Backend object that conforms to the Backend class.
+        training_routine: function
+            training routine to train the network. It has to take as first and second arguments the matrix of
+            simulations and the corresponding targets (or the similarity matrix if `distance_learning` is True). It also
+            needs to have as keyword parameters embedding_net and cuda.
+        distance_learning: boolean
+            this has to be True if the statistics learning technique is based on distance learning, in which case the
+            __init__ computes the similarity matrix.
+        embedding_net: torch.nn object or list
+            it can be a torch.nn object with input size corresponding to size of model output, alternatively, a list
+            with integer numbers denoting the width of the hidden layers, from which a fully connected network with
+            that structure is created, having the input and output size corresponding to size of model output and
+            number of parameters. In case this is None, the depth of the network and the width of the hidden layers is
+            determined from the input and output size as specified in abcpy.NN_utilities.networks.DefaultNN.
+        n_samples: int, optional
+            The number of (parameter, simulated data) tuple to be generated to learn the summary statistics in pilot
+            step. The default value is 1000.
+            This is ignored if `simulations` and `parameters` are provided.
+        n_samples_per_param: int, optional
+            Number of data points in each simulated data set. This is ignored if `simulations` and `parameters` are
+            provided. Default to 1.
+        parameters: array, optional
+            A numpy array with shape (n_samples, n_parameters) that is used, together with `simulations` to fit the
+            summary selection learning algorithm. It has to be provided together with `simulations`, in which case no
+            other simulations are performed. Default value is None.
+        simulations: array, optional
+            A numpy array with shape (n_samples, output_size) that is used, together with `parameters` to fit the
+            summary selection learning algorithm. It has to be provided together with `parameters`, in which case no
+            other simulations are performed. Default value is None.
+        seed: integer, optional
+            Optional initial seed for the random number generator. The default value is generated randomly.
+        cuda: boolean, optional
+             If cuda=None, it will select GPU if it is available. Or you can specify True to use GPU or False to use CPU
+        quantile: float, optional
+            quantile used to define the similarity set if distance_learning is True. Default to 0.1.
+        training_routine_kwargs:
+            additional kwargs to be passed to the underlying training routine.
+        """
+        self.logger = logging.getLogger(__name__)
+
+        # Define device
+        if not has_torch:
+            raise ImportError(
+                "Pytorch is required to instantiate an element of the {} class, in order to handle "
+                "neural networks. Please install it. ".format(self.__class__.__name__))
+
+        # set random seed for torch as well:
+        if seed is not None:
+            torch.manual_seed(seed)
+
+        if cuda is None:
+            cuda = torch.cuda.is_available()
+        elif cuda and not torch.cuda.is_available:
+            # if the user requested to use GPU but no GPU is there
+            cuda = False
+            self.logger.warning(
+                "You requested to use GPU but no GPU is available! The computation will proceed on CPU.")
+
+        self.device = "cuda" if cuda and torch.cuda.is_available else "cpu"
+        if self.device == "cuda":
+            self.logger.debug("We are using GPU to train the network.")
+        else:
+            self.logger.debug("We are using CPU to train the network.")
+
+        # this handles generation of the data (or its formatting in case the data is provided to the Semiautomatic
+        # class)
+        super(StatisticsLearningNN, self).__init__(model, statistics_calc, backend, n_samples, n_samples_per_param,
+                                                   parameters, simulations, seed)
+
+        self.logger.info('Learning of the transformation...')
+        # Define Data
+        target, simulations_reshaped = self.sample_parameters, self.sample_statistics
+
+        if distance_learning:
+            self.logger.debug("Computing similarity matrix...")
+            # define the similarity set
+            similarity_set = compute_similarity_matrix(target, quantile)
+            self.logger.debug("Done")
+
+        # now setup the default neural network or not
+
+        if isinstance(embedding_net, torch.nn.Module):
+            self.embedding_net = embedding_net
+            self.logger.debug('We use the provided neural network')
+
+        elif isinstance(embedding_net, list) or embedding_net is None:
+            # therefore we need to generate the neural network given the list. The following function returns a class
+            # of NN with given input size, output size and hidden sizes; then, need () to instantiate the network
+            self.embedding_net = createDefaultNN(input_size=simulations_reshaped.shape[1], output_size=target.shape[1],
+                                                 hidden_sizes=embedding_net)()
+            self.logger.debug('We generate a default neural network')
+
+        if cuda:
+            self.embedding_net.cuda()
+
+        self.logger.debug('We now run the training routine')
+
+        if distance_learning:
+            self.embedding_net = training_routine(simulations_reshaped, similarity_set,
+                                                  embedding_net=self.embedding_net, cuda=cuda,
+                                                  **training_routine_kwargs)
+        else:
+            self.embedding_net = training_routine(simulations_reshaped, target, embedding_net=self.embedding_net,
+                                                  cuda=cuda, **training_routine_kwargs)
+
+        self.logger.info("Finished learning the transformation.")
+
+    def get_statistics(self):
+        """
+        Returns a NeuralEmbedding Statistics implementing the learned transformation.
+
+        Returns
+        -------
+        abcpy.statistics.NeuralEmbedding object
+            a statistics object that implements the learned transformation.
+        """
+        return NeuralEmbedding(net=self.embedding_net, previous_statistics=self.statistics_calc)
+
+
+# the following classes subclass the base class StatisticsLearningNN with different training routines
+
+class SemiautomaticNN(StatisticsLearningNN):
+    """This class implements the semi automatic summary statistics learning technique as described in
+     Jiang et al. 2017 [1].
+
+     In order to use this technique, Pytorch is required to handle the neural networks.
+
+     [1] Jiang, B., Wu, T.Y., Zheng, C. and Wong, W.H., 2017. Learning summary statistic for approximate Bayesian
+     computation via deep neural network. Statistica Sinica, pp.1595-1618.
+    """
+
+    def __init__(self, model, statistics_calc, backend, embedding_net=None, n_samples=1000, n_samples_per_param=1,
+                 parameters=None, simulations=None, seed=None, cuda=None, batch_size=16, n_epochs=200,
+                 load_all_data_GPU=False, lr=1e-3, optimizer=None, scheduler=None, start_epoch=0, verbose=False,
+                 optimizer_kwargs={}, scheduler_kwargs={}, loader_kwargs={}):
+        """
+        Parameters
+        ----------
+        model: abcpy.models.Model
+            Model object that conforms to the Model class.
+        statistics_cal: abcpy.statistics.Statistics
+            Statistics object that conforms to the Statistics class.
+        backend: abcpy.backends.Backend
+            Backend object that conforms to the Backend class.
+        embedding_net: torch.nn object or list
+            it can be a torch.nn object with input size corresponding to size of model output and output size
+            corresponding to the number of parameters or, alternatively, a list with integer numbers denoting the width
+            of the hidden layers, from which a fully connected network with that structure is created, having the input
+            and output size corresponding to size of model output and number of parameters. In case this is None, the
+            depth of the network and the width of the hidden layers is determined from the input and output size as
+            specified in abcpy.NN_utilities.networks.DefaultNN.
+        n_samples: int, optional
+            The number of (parameter, simulated data) tuple to be generated to learn the summary statistics in pilot
+            step. The default value is 1000.
+            This is ignored if `simulations` and `parameters` are provided.
+        n_samples_per_param: int, optional
+            Number of data points in each simulated data set. This is ignored if `simulations` and `parameters` are
+            provided. Default to 1.
+        parameters: array, optional
+            A numpy array with shape (n_samples, n_parameters) that is used, together with `simulations` to fit the
+            summary selection learning algorithm. It has to be provided together with `simulations`, in which case no
+            other simulations are performed. Default value is None.
+        simulations: array, optional
+            A numpy array with shape (n_samples, output_size) that is used, together with `parameters` to fit the
+            summary selection learning algorithm. It has to be provided together with `parameters`, in which case no
+            other simulations are performed. Default value is None.
+        seed: integer, optional
+            Optional initial seed for the random number generator. The default value is generated randomly.
+        cuda: boolean, optional
+             If cuda=None, it will select GPU if it is available. Or you can specify True to use GPU or False to use CPU
+        batch_size: integer, optional
+            the batch size used for training the neural network. Default is 16
+        n_epochs: integer, optional
+            the number of epochs used for training the neural network. Default is 200
+        load_all_data_GPU: boolean, optional
+            If True and if we a GPU is used, the whole dataset is loaded on the GPU before training begins; this may
+            speed up training as it avoid transfer between CPU and GPU, but it is not guaranteed to do. Note that if the
+            dataset is not small enough, setting this to True causes things to crash if the dataset is too large.
+            Default to False, you should not rely too much on this.
+        lr: float, optional
+            The learning rate to be used in the iterative training scheme of the neural network. Default to 1e-3.
+        optimizer: torch Optimizer class, optional
+            A torch Optimizer class, for instance `SGD` or `Adam`. Default to `Adam`. Additional parameters may be
+            passed through the `optimizer_kwargs` parameter.
+        scheduler: torch _LRScheduler class, optional
+            A torch _LRScheduler class, used to modify the learning rate across epochs. By default, no scheduler is
+            used. Additional parameters may be passed through the `scheduler_kwargs` parameter.
+        start_epoch: integer, optional
+            If a scheduler is provided, for the first `start_epoch` epochs the scheduler is applied to modify the
+            learning rate without training the network. From then on, the training proceeds normally, applying both the
+            scheduler and the optimizer at each epoch. Default to 0.
+        verbose: boolean, optional
+            if True, prints more information from the training routine. Default to False.
+        optimizer_kwargs: Python dictionary, optional
+            dictionary containing optional keyword arguments for the optimizer.
+        scheduler_kwargs: Python dictionary, optional
+            dictionary containing optional keyword arguments for the scheduler.
+        loader_kwargs: Python dictionary, optional
+            dictionary containing optional keyword arguments for the loader (that handles loading the samples from the
+            dataset during the training phase).
+        """
+        super(SemiautomaticNN, self).__init__(model, statistics_calc, backend, FP_nn_training, distance_learning=False,
+                                              embedding_net=embedding_net, n_samples=n_samples,
+                                              n_samples_per_param=n_samples_per_param, parameters=parameters,
+                                              simulations=simulations, seed=seed, cuda=cuda, batch_size=batch_size,
+                                              n_epochs=n_epochs, load_all_data_GPU=load_all_data_GPU, lr=lr,
+                                              optimizer=optimizer, scheduler=scheduler, start_epoch=start_epoch,
+                                              verbose=verbose, optimizer_kwargs=optimizer_kwargs,
+                                              scheduler_kwargs=scheduler_kwargs, loader_kwargs=loader_kwargs)
+
+
+class TripletDistanceLearning(StatisticsLearningNN):
+    """This class implements the statistics learning technique by using the triplet loss [1] for distance learning as
+     described in Pacchiardi et al. 2019 [2].
+
+     In order to use this technique, Pytorch is required to handle the neural networks.
+
+     [1] Schroff, F., Kalenichenko, D. and Philbin, J., 2015. Facenet: A unified embedding for face recognition and
+     clustering. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 815-823).
+
+     [2] Pacchiardi, L., Kunzli, P., Schoengens, M., Chopard, B. and Dutta, R., 2019. Distance-learning For Approximate
+     Bayesian Computation To Model a Volcanic Eruption. arXiv preprint arXiv:1909.13118.
+    """
+
+    def __init__(self, model, statistics_calc, backend, embedding_net=None, n_samples=1000, n_samples_per_param=1,
+                 parameters=None, simulations=None, seed=None, cuda=None, quantile=0.1, batch_size=16, n_epochs=200,
+                 load_all_data_GPU=False, margin=1., lr=None, optimizer=None, scheduler=None, start_epoch=0,
+                 verbose=False, optimizer_kwargs={}, scheduler_kwargs={}, loader_kwargs={}):
+        """
+        Parameters
+        ----------
+        model: abcpy.models.Model
+            Model object that conforms to the Model class.
+        statistics_cal: abcpy.statistics.Statistics
+            Statistics object that conforms to the Statistics class.
+        backend: abcpy.backends.Backend
+            Backend object that conforms to the Backend class.
+        embedding_net: torch.nn object or list
+            it can be a torch.nn object with input size corresponding to size of model output (output size can be any);
+            alternatively, a list with integer numbers denoting the width of the hidden layers, from which a fully
+            connected network with that structure is created, having the input and output size corresponding to size of
+            model output and number of parameters. In case this is None, the depth of the network and the width of the
+            hidden layers is determined from the input and output size as specified in
+            abcpy.NN_utilities.networks.DefaultNN.
+        n_samples: int, optional
+            The number of (parameter, simulated data) tuple to be generated to learn the summary statistics in pilot
+            step. The default value is 1000.
+            This is ignored if `simulations` and `parameters` are provided.
+        n_samples_per_param: int, optional
+            Number of data points in each simulated data set. This is ignored if `simulations` and `parameters` are
+            provided. Default to 1.
+        parameters: array, optional
+            A numpy array with shape (n_samples, n_parameters) that is used, together with `simulations` to fit the
+            summary selection learning algorithm. It has to be provided together with `simulations`, in which case no
+            other simulations are performed. Default value is None.
+        simulations: array, optional
+            A numpy array with shape (n_samples, output_size) that is used, together with `parameters` to fit the
+            summary selection learning algorithm. It has to be provided together with `parameters`, in which case no
+            other simulations are performed. Default value is None.
+        seed: integer, optional
+            Optional initial seed for the random number generator. The default value is generated randomly.
+        cuda: boolean, optional
+             If cuda=None, it will select GPU if it is available. Or you can specify True to use GPU or False to use CPU
+        quantile: float, optional
+            quantile used to define the similarity set if distance_learning is True. Default to 0.1.
+        batch_size: integer, optional
+            the batch size used for training the neural network. Default is 16
+        n_epochs: integer, optional
+            the number of epochs used for training the neural network. Default is 200
+        load_all_data_GPU: boolean, optional
+            If True and if we a GPU is used, the whole dataset is loaded on the GPU before training begins; this may
+            speed up training as it avoid transfer between CPU and GPU, but it is not guaranteed to do. Note that if the
+            dataset is not small enough, setting this to True causes things to crash if the dataset is too large.
+            Default to False, you should not rely too much on this.
+        margin: float, optional
+            margin defining the triplet loss. The larger it is, the further away dissimilar samples are pushed with
+            respect to similar ones. Default to 1.
+        lr: float, optional
+            The learning rate to be used in the iterative training scheme of the neural network. Default to 1e-3.
+        optimizer: torch Optimizer class, optional
+            A torch Optimizer class, for instance `SGD` or `Adam`. Default to `Adam`. Additional parameters may be
+            passed through the `optimizer_kwargs` parameter.
+        scheduler: torch _LRScheduler class, optional
+            A torch _LRScheduler class, used to modify the learning rate across epochs. By default, no scheduler is
+            used. Additional parameters may be passed through the `scheduler_kwargs` parameter.
+        start_epoch: integer, optional
+            If a scheduler is provided, for the first `start_epoch` epochs the scheduler is applied to modify the
+            learning rate without training the network. From then on, the training proceeds normally, applying both the
+            scheduler and the optimizer at each epoch. Default to 0.
+        verbose: boolean, optional
+            if True, prints more information from the training routine. Default to False.
+        optimizer_kwargs: Python dictionary, optional
+            dictionary containing optional keyword arguments for the optimizer.
+        scheduler_kwargs: Python dictionary, optional
+            dictionary containing optional keyword arguments for the scheduler.
+        loader_kwargs: Python dictionary, optional
+            dictionary containing optional keyword arguments for the loader (that handles loading the samples from the
+            dataset during the training phase).
+        """
+
+        super(TripletDistanceLearning, self).__init__(model, statistics_calc, backend, triplet_training,
+                                                      distance_learning=True, embedding_net=embedding_net,
+                                                      n_samples=n_samples, n_samples_per_param=n_samples_per_param,
+                                                      parameters=parameters, simulations=simulations, seed=seed,
+                                                      cuda=cuda, quantile=quantile, batch_size=batch_size,
+                                                      n_epochs=n_epochs, load_all_data_GPU=load_all_data_GPU,
+                                                      margin=margin, lr=lr, optimizer=optimizer, scheduler=scheduler,
+                                                      start_epoch=start_epoch, verbose=verbose,
+                                                      optimizer_kwargs=optimizer_kwargs,
+                                                      scheduler_kwargs=scheduler_kwargs, loader_kwargs=loader_kwargs)
+
+
+class ContrastiveDistanceLearning(StatisticsLearningNN):
+    """This class implements the statistics learning technique by using the contrastive loss [1] for distance learning
+     as described in Pacchiardi et al. 2019 [2].
+
+     In order to use this technique, Pytorch is required to handle the neural networks.
+
+     [1] Hadsell, R., Chopra, S. and LeCun, Y., 2006, June. Dimensionality reduction by learning an invariant mapping.
+     In 2006 IEEE Computer Society Conference on Computer Vision and Pattern Recognition (CVPR'06) (Vol. 2,
+     pp. 1735-1742). IEEE.
+
+     [2] Pacchiardi, L., Kunzli, P., Schoengens, M., Chopard, B. and Dutta, R., 2019. Distance-learning For Approximate
+     Bayesian Computation To Model a Volcanic Eruption. arXiv preprint arXiv:1909.13118.
+    """
+
+    def __init__(self, model, statistics_calc, backend, embedding_net=None, n_samples=1000, n_samples_per_param=1,
+                 parameters=None, simulations=None, seed=None, cuda=None, quantile=0.1, batch_size=16, n_epochs=200,
+                 positive_weight=None, load_all_data_GPU=False, margin=1., lr=None, optimizer=None, scheduler=None,
+                 start_epoch=0, verbose=False, optimizer_kwargs={}, scheduler_kwargs={}, loader_kwargs={}):
+        """
+        Parameters
+        ----------
+        model: abcpy.models.Model
+            Model object that conforms to the Model class.
+        statistics_cal: abcpy.statistics.Statistics
+            Statistics object that conforms to the Statistics class.
+        backend: abcpy.backends.Backend
+            Backend object that conforms to the Backend class.
+        embedding_net: torch.nn object or list
+            it can be a torch.nn object with input size corresponding to size of model output (output size can be any);
+            alternatively, a list with integer numbers denoting the width of the hidden layers, from which a fully
+            connected network with that structure is created, having the input and output size corresponding to size of
+            model output and number of parameters. In case this is None, the depth of the network and the width of the
+            hidden layers is determined from the input and output size as specified in
+            abcpy.NN_utilities.networks.DefaultNN.
+        n_samples: int, optional
+            The number of (parameter, simulated data) tuple to be generated to learn the summary statistics in pilot
+            step. The default value is 1000.
+            This is ignored if `simulations` and `parameters` are provided.
+        n_samples_per_param: int, optional
+            Number of data points in each simulated data set. This is ignored if `simulations` and `parameters` are
+            provided. Default to 1.
+        parameters: array, optional
+            A numpy array with shape (n_samples, n_parameters) that is used, together with `simulations` to fit the
+            summary selection learning algorithm. It has to be provided together with `simulations`, in which case no
+            other simulations are performed. Default value is None.
+        simulations: array, optional
+            A numpy array with shape (n_samples, output_size) that is used, together with `parameters` to fit the
+            summary selection learning algorithm. It has to be provided together with `parameters`, in which case no
+            other simulations are performed. Default value is None.
+        seed: integer, optional
+            Optional initial seed for the random number generator. The default value is generated randomly.
+        cuda: boolean, optional
+             If cuda=None, it will select GPU if it is available. Or you can specify True to use GPU or False to use CPU
+        quantile: float, optional
+            quantile used to define the similarity set if distance_learning is True. Default to 0.1.
+        batch_size: integer, optional
+            the batch size used for training the neural network. Default is 16
+        n_epochs: integer, optional
+            the number of epochs used for training the neural network. Default is 200
+        positive_weight: float, optional
+            The contrastive loss samples pairs of elements at random, and if the majority of samples are labelled as
+            dissimilar, the probability of considering similar pairs is small. Then, you can set this value to a number
+            between 0 and 1 in order to sample positive pairs with that probability during training.
+        load_all_data_GPU: boolean, optional
+            If True and if we a GPU is used, the whole dataset is loaded on the GPU before training begins; this may
+            speed up training as it avoid transfer between CPU and GPU, but it is not guaranteed to do. Note that if the
+            dataset is not small enough, setting this to True causes things to crash if the dataset is too large.
+            Default to False, you should not rely too much on this.
+        margin: float, optional
+            margin defining the contrastive loss. The larger it is, the further away dissimilar samples are pushed with
+            respect to similar ones. Default to 1.
+        lr: float, optional
+            The learning rate to be used in the iterative training scheme of the neural network. Default to 1e-3.
+        optimizer: torch Optimizer class, optional
+            A torch Optimizer class, for instance `SGD` or `Adam`. Default to `Adam`. Additional parameters may be
+            passed through the `optimizer_kwargs` parameter.
+        scheduler: torch _LRScheduler class, optional
+            A torch _LRScheduler class, used to modify the learning rate across epochs. By default, no scheduler is
+            used. Additional parameters may be passed through the `scheduler_kwargs` parameter.
+        start_epoch: integer, optional
+            If a scheduler is provided, for the first `start_epoch` epochs the scheduler is applied to modify the
+            learning rate without training the network. From then on, the training proceeds normally, applying both the
+            scheduler and the optimizer at each epoch. Default to 0.
+        verbose: boolean, optional
+            if True, prints more information from the training routine. Default to False.
+        optimizer_kwargs: Python dictionary, optional
+            dictionary containing optional keyword arguments for the optimizer.
+        scheduler_kwargs: Python dictionary, optional
+            dictionary containing optional keyword arguments for the scheduler.
+        loader_kwargs: Python dictionary, optional
+            dictionary containing optional keyword arguments for the loader (that handles loading the samples from the
+            dataset during the training phase).
+        """
+
+        super(ContrastiveDistanceLearning, self).__init__(model, statistics_calc, backend, contrastive_training,
+                                                          distance_learning=True, embedding_net=embedding_net,
+                                                          n_samples=n_samples, n_samples_per_param=n_samples_per_param,
+                                                          parameters=parameters, simulations=simulations, seed=seed,
+                                                          cuda=cuda, quantile=quantile, batch_size=batch_size,
+                                                          n_epochs=n_epochs, positive_weight=positive_weight,
+                                                          load_all_data_GPU=load_all_data_GPU, margin=margin, lr=lr,
+                                                          optimizer=optimizer, scheduler=scheduler,
+                                                          start_epoch=start_epoch, verbose=verbose,
+                                                          optimizer_kwargs=optimizer_kwargs,
+                                                          scheduler_kwargs=scheduler_kwargs,
+                                                          loader_kwargs=loader_kwargs)
diff --git a/abcpy/summaryselections.py b/abcpy/summaryselections.py
deleted file mode 100644
index 10690bfb..00000000
--- a/abcpy/summaryselections.py
+++ /dev/null
@@ -1,112 +0,0 @@
-from abc import ABCMeta, abstractmethod
-
-from abcpy.graphtools import GraphTools
-from abcpy.acceptedparametersmanager import *
-
-import numpy as np
-from sklearn import linear_model
-
-
-class Summaryselections(metaclass=ABCMeta):
-    """This abstract base class defines a way to choose the summary statistics.
-    """
-
-    @abstractmethod
-    def __init__(self, model, statistics_calc, backend, n_samples=1000, seed=None):
-        """The constructor of a sub-class must accept a non-optional model, statistics calculator and 
-        backend which are stored to self.model, self.statistics_calc and self.backend. Further it 
-        accepts two optional parameters n_samples and seed defining the number of simulated dataset
-        used for the pilot to decide the summary statistics and the integer to initialize the random 
-        number generator.
-    
-        Parameters
-        ----------
-        model: abcpy.models.Model
-            Model object that conforms to the Model class.
-        statistics_cal: abcpy.statistics.Statistics
-            Statistics object that conforms to the Statistics class.
-        backend: abcpy.backends.Backend
-            Backend object that conforms to the Backend class.
-        n_samples: int, optional
-            The number of (parameter, simulated data) tuple generated to learn the summary statistics in pilot step. 
-            The default value is 1000.
-        n_samples_per_param: int, optional
-            Number of data points in each simulated data set.
-        seed: integer, optional
-            Optional initial seed for the random number generator. The default value is generated randomly.    
-        """
-        raise NotImplementedError
-
-    def __getstate__(self):
-        state = self.__dict__.copy()
-        del state['backend']
-        return state
-
-    @abstractmethod
-    def transformation(self, statistics):
-        raise NotImplementedError
-
-
-class Semiautomatic(Summaryselections, GraphTools):
-    """This class implements the semi automatic summary statistics choice described in Fearnhead and Prangle [1].
-    
-    [1] Fearnhead P., Prangle D. 2012. Constructing summary statistics for approximate
-    Bayesian computation: semi-automatic approximate Bayesian computation. J. Roy. Stat. Soc. B 74:419–474.    
-    """
-
-    def __init__(self, model, statistics_calc, backend, n_samples=1000, n_samples_per_param = 1, seed=None):
-        self.model = model
-        self.statistics_calc = statistics_calc
-        self.backend = backend
-        self.rng = np.random.RandomState(seed)
-        self.n_samples_per_param = n_samples_per_param
-
-        # An object managing the bds objects
-        self.accepted_parameters_manager = AcceptedParametersManager(self.model)
-        self.accepted_parameters_manager.broadcast(self.backend, [])
-
-        # main algorithm
-        seed_arr = self.rng.randint(1, n_samples * n_samples, size=n_samples, dtype=np.int32)
-        rng_arr = np.array([np.random.RandomState(seed) for seed in seed_arr])
-        rng_pds = self.backend.parallelize(rng_arr)
-
-        sample_parameters_statistics_pds = self.backend.map(self._sample_parameter_statistics, rng_pds)
-
-        sample_parameters_and_statistics = self.backend.collect(sample_parameters_statistics_pds)
-        sample_parameters, sample_statistics = [list(t) for t in zip(*sample_parameters_and_statistics)]
-        sample_parameters = np.array(sample_parameters)
-        sample_statistics = np.concatenate(sample_statistics)
-
-        self.coefficients_learnt = np.zeros(shape=(sample_parameters.shape[1], sample_statistics.shape[1]))
-        regr = linear_model.LinearRegression(fit_intercept=True)
-        for ind in range(sample_parameters.shape[1]):
-            regr.fit(sample_statistics, sample_parameters[:, ind])
-            self.coefficients_learnt[ind, :] = regr.coef_
-
-    def transformation(self, statistics):
-        if not statistics.shape[1] == self.coefficients_learnt.shape[1]:
-            raise ValueError('Mismatch in dimension of summary statistics')
-        return np.dot(statistics, np.transpose(self.coefficients_learnt))
-
-    def _sample_parameter_statistics(self, rng=np.random.RandomState()):
-        """
-        Samples a single model parameter and simulates from it until
-        distance between simulated outcome and the observation is
-        smaller than eplison.
-
-        Parameters
-        ----------
-        seed: int
-            value of a seed to be used for reseeding
-        Returns
-        -------
-        np.array
-            accepted parameter
-        """
-
-        self.sample_from_prior(rng=rng)
-        parameter = self.get_parameters()
-        y_sim = self.simulate(self.n_samples_per_param, rng=rng)
-        if y_sim is not None:
-            statistics = self.statistics_calc.statistics(y_sim)
-        return (parameter, statistics)
diff --git a/doc/source/abcpy.rst b/doc/source/abcpy.rst
index a063941f..4f332e70 100644
--- a/doc/source/abcpy.rst
+++ b/doc/source/abcpy.rst
@@ -1,7 +1,16 @@
 abcpy package
 =============
 
-This reference given details about the API of modules, classes and functions included in ABCpy.
+This reference gives details about the API of modules, classes and functions included in ABCpy.
+
+The following diagram shows selected classes with their most important
+methods. Abstract classes, which cannot be instantiated, are highlighted in
+dark gray and derived classes are highlighted in light gray. Inheritance is
+shown by filled arrows. Arrows with no filling highlight associations, e.g.,
+:py:class:`Distance <abcpy.distances.Distance>` is associated with :py:class:`Statistics <abcpy.statistics.Statistics>`
+because it calls a method of the instantiated class to translate the input data to summary statistics.
+
+.. image:: class-diagram.png
 
 .. currentmodule:: abcpy
 
@@ -81,19 +90,71 @@ abcpy.graphtools module
     :undoc-members:
     :show-inheritance:
 
-abcpy.output module
--------------------
+abcpy.inferences module
+-----------------------
 
-.. automodule:: abcpy.output
+.. automodule:: abcpy.inferences
     :members:
-    :special-members: __init__       
+    :special-members: __init__
     :undoc-members:
     :show-inheritance:
 
-abcpy.inferences module
------------------------
+abcpy.modelselections module
+----------------------------
 
-.. automodule:: abcpy.inferences
+.. automodule:: abcpy.modelselections
+   :members:
+   :special-members: __init__
+   :undoc-members:
+   :show-inheritance:
+
+abcpy.NN_utilities module
+-------------------------
+
+Functions and classes needed for the neural network based summary statistics learning.
+
+.. automodule:: abcpy.NN_utilities.algorithms
+   :members:
+   :special-members: __init__
+   :undoc-members:
+   :show-inheritance:
+
+.. automodule:: abcpy.NN_utilities.datasets
+   :members:
+   :special-members: __init__
+   :undoc-members:
+   :show-inheritance:
+
+.. automodule:: abcpy.NN_utilities.losses
+   :members:
+   :special-members: __init__
+   :undoc-members:
+   :show-inheritance:
+
+.. automodule:: abcpy.NN_utilities.networks
+   :members:
+   :special-members: __init__
+   :undoc-members:
+   :show-inheritance:
+
+.. automodule:: abcpy.NN_utilities.trainer
+   :members:
+   :special-members: __init__
+   :undoc-members:
+   :show-inheritance:
+
+.. automodule:: abcpy.NN_utilities.utilities
+   :members:
+   :special-members: __init__
+   :undoc-members:
+   :show-inheritance:
+
+
+
+abcpy.output module
+-------------------
+
+.. automodule:: abcpy.output
     :members:
     :special-members: __init__       
     :undoc-members:
@@ -118,16 +179,6 @@ abcpy.probabilisticmodels module
     :show-inheritance:
 
 
-abcpy.modelselections module
-----------------------------
-
-.. automodule:: abcpy.modelselections
-   :members:
-   :special-members: __init__
-   :undoc-members:
-   :show-inheritance:
-
-
 abcpy.statistics module
 -----------------------
 
@@ -137,10 +188,10 @@ abcpy.statistics module
    :undoc-members:
    :show-inheritance:
 
-abcpy.summaryselections module
-------------------------------
+abcpy.statisticslearning module
+-------------------------------
 
-.. automodule:: abcpy.summaryselections
+.. automodule:: abcpy.statisticslearning
    :members:
    :special-members: __init__
    :undoc-members:
diff --git a/doc/source/class-diagram.png b/doc/source/class-diagram.png
new file mode 100644
index 00000000..d5bc304a
Binary files /dev/null and b/doc/source/class-diagram.png differ
diff --git a/doc/source/conf.py b/doc/source/conf.py
index 2b7bfb8e..fb895aa4 100644
--- a/doc/source/conf.py
+++ b/doc/source/conf.py
@@ -25,7 +25,7 @@ class Mock(MagicMock):
     def __getattr__(cls, name):
             return MagicMock()
 
-MOCK_MODULES = ['numpy', 'pandas', 'glmnet', 'mpi4py', 'scipy', 'scipy.stats', 'scipy.special', 'scipy.optimize', 'sklearn', 'sklearn.covariance', 'findspark', 'coverage', 'numpy.random']
+MOCK_MODULES = ['numpy', 'pandas', 'glmnet', 'mpi4py', 'scipy', 'scipy.stats', 'scipy.special', 'scipy.optimize', 'sklearn', 'sklearn.covariance', 'findspark', 'coverage', 'numpy.random', 'matplotlib', 'matplotlib.pyplot', 'torch']
 sys.modules.update((mod_name, Mock()) for mod_name in MOCK_MODULES)
 
 # If extensions (or modules to document with autodoc) are in another directory,
diff --git a/doc/source/getting_started.rst b/doc/source/getting_started.rst
index dd3df413..025fecd3 100644
--- a/doc/source/getting_started.rst
+++ b/doc/source/getting_started.rst
@@ -85,7 +85,7 @@ the datasets, and then compute the distance between the two statistics.
 Algorithms in ABCpy often require a perturbation kernel, a tool to explore the parameter space. Here, we use the default
 kernel provided, which explores the parameter space of random variables, by using e.g. a multivariate Gaussian
 distribution or by performing a random walk depending on whether the  corresponding random variable is continuous or
-discrete. For a more involved example, please consult `Complex Perturbation Kernels`_.
+discrete. For a more involved example, please consult `Composite Perturbation Kernels`_.
 
 .. literalinclude:: ../../examples/extensions/models/gaussian_python/pmcabc_gaussian_model_simple.py
     :language: python
@@ -248,10 +248,10 @@ customized combination strategies can be implemented by the user.
 The full source code can be found in `examples/hierarchicalmodels/pmcabc_inference_on_multiple_sets_of_obs.py`.
 
 
-Complex Perturbation Kernels
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Composite Perturbation Kernels
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-As pointed out earlier, it is possible to define complex perturbation kernels, perturbing different random variables in
+As pointed out earlier, it is possible to define composite perturbation kernels, perturbing different random variables in
 different ways. Let us take the same example as in the `Hierarchical Model`_ and assume that we want to perturb the
 schools budget, grade score and scholarship score without additional effect, using a multivariate normal kernel. However, the remaining
 parameters we would like to perturb using a multivariate Student's-T kernel. This can be implemented as follows:
@@ -302,14 +302,14 @@ We also have implemented the population Monte Carlo :py:class:`abcpy.inferences.
 the likelihood or approximate likelihood function is available. For approximation of the likelihood function we provide
 two methods:
 
-* Synthetic likelihood approximation :py:class:`abcpy.approx_lhd.SynLiklihood`, and another method using
+* Synthetic likelihood approximation :py:class:`abcpy.approx_lhd.SynLikelihood`, and another method using
 * penalized logistic regression :py:class:`abcpy.approx_lhd.PenLogReg`.
 
 Next we explain how we can use PMC algorithm using approximation of the
 likelihood functions. As we are now considering two observed datasets
 corresponding to two root models, we need to define an approximation of
 likelihood function for each of them separately. Here, we use the
-:py:class:`abcpy.approx_lhd.SynLiklihood` for each of the root models. It is
+:py:class:`abcpy.approx_lhd.SynLikelihood` for each of the root models. It is
 also possible to use two different approximate likelihoods for two different
 root models.
 
@@ -333,33 +333,61 @@ Further possibilities of combination will be made available in later versions of
 The source code can be found in `examples/approx_lhd/pmc_hierarchical_models.py`.
 
 
-Summary Selection
-~~~~~~~~~~~~~~~~~
+Statistics Learning
+~~~~~~~~~~~~~~~~~~~
 
 We have noticed in the `Parameters as Random Variables`_ Section, the discrepancy measure between two datasets is
 defined by a distance function between extracted summary statistics from the datasets. Hence, the ABC algorithms are
 subjective to the summary statistics choice. This subjectivity can be avoided by a data-driven summary statistics choice
-from the available summary statistics of the dataset. In ABCpy we provide a semi-automatic summary selection procedure in
-:py:class:`abcpy.summaryselections.Semiautomatic`
-
-Taking our initial example from `Parameters as Random Variables`_ where we model the height of humans, we can had summary
-statistics defined as follows:
-
-.. literalinclude:: ../../examples/summaryselection/pmcabc_gaussian_summary_selection.py
+from the available summary statistics of the dataset. In ABCpy we provide several statistics learning procedures,
+implemented in the subclasses of :py:class:`abcpy.statisticslearning.StatisticsLearning`, that generate a training
+dataset of (parameter, sample) pairs and use it to learn a transformation of the data that will be used in the inference
+step. The following techniques are available:
+
+* Semiautomatic :py:class:`abcpy.statisticslearning.Semiautomatic`,
+* SemiautomaticNN :py:class:`abcpy.statisticslearning.SemiautomaticNN`,
+* ContrastiveDistanceLearning :py:class:`abcpy.statisticslearning.ContrastiveDistanceLearning`,
+* TripletDistanceLearning :py:class:`abcpy.statisticslearning.TripletDistanceLearning`.
+
+The first two build a transformation that approximates the parameter that generated the corresponding observation, the
+first one by using a linear regression approach and the second one by using a neural network embedding. The other two
+use instead neural networks to learn an embedding of the data so that the distance between the embeddings is close to
+the distance between the parameter values that generated the data.
+
+We remark that the techniques using neural networks require `Pytorch <https://pytorch.org/>`_ to be installed. As this is an optional feature,
+however, Pytorch is not in the list of dependencies of ABCpy. Rather, when one of the neural network based routines is
+called, ABCpy checks if Pytorch is present and, if not, asks the user to install it. Moreover, unless the user wants to
+provide a specific form of neural network, the implementation of the neural network based ones do not require any
+explicit neural network coding, handling all the necessary definitions and training internally.
+
+We note finally that the statistics learning techniques can be applied after compute a first set of statistics; therefore,
+the learned transformation will be a transformation applied to the original set of statistics.
+For instance, consider our initial example from `Parameters as Random Variables`_ where we model the height of humans.
+The original summary statistics were defined as follows:
+
+.. literalinclude:: ../../examples/statisticslearning/pmcabc_gaussian_statistics_learning.py
     :language: python
     :lines: 21-23
     :dedent: 4
 
-Then we can learn the optimized summary statistics from the given list of summary statistics using the semi-automatic
+Then we can learn the optimized summary statistics from the above list of summary statistics using the semi-automatic
 summary selection procedure as follows:
 
-.. literalinclude:: ../../examples/summaryselection/pmcabc_gaussian_summary_selection.py
+.. literalinclude:: ../../examples/statisticslearning/pmcabc_gaussian_statistics_learning.py
     :language: python
-    :lines: 25-32
+    :lines: 25-31
     :dedent: 4
 
-Then we can perform the inference as before, but the distances will be computed on the newly learned summary statistics
-using the semi-automatic summary selection procedure.
+We remark that the minimal amount of coding needed for using the neural network based regression does not change at all:
+
+.. literalinclude:: ../../examples/statisticslearning/pmcabc_gaussian_statistics_learning.py
+    :language: python
+    :lines: 34-40
+    :dedent: 4
+
+And similarly for the other two approaches.
+
+We can then perform the inference as before, but the distances will be computed on the newly learned summary statistics.
 
 Model Selection
 ~~~~~~~~~~~~~~~
diff --git a/doc/source/installation.rst b/doc/source/installation.rst
index 97521a93..0b761349 100644
--- a/doc/source/installation.rst
+++ b/doc/source/installation.rst
@@ -35,7 +35,7 @@ To create a package and install it, do
 
    make package
 
-   pip3 install build/dist/abcpy-0.5.6-py3-none-any.whl
+   pip3 install build/dist/abcpy-0.5.7-py3-none-any.whl
 
 
 Note that ABCpy requires Python3.
diff --git a/doc/source/postanalysis.rst b/doc/source/postanalysis.rst
index 7a82b82e..2462be32 100644
--- a/doc/source/postanalysis.rst
+++ b/doc/source/postanalysis.rst
@@ -56,6 +56,13 @@ algorithm that created it:
     :lines: 57
     :dedent: 4
 
+Finally, you can plot the inferred posterior mean of the parameters in the following way:
+
+.. literalinclude:: ../../examples/backends/dummy/pmcabc_gaussian.py
+    :language: python
+    :lines: 65
+    :dedent: 4
+
 And certainly, a journal can easily be saved to and loaded from disk:
 
 .. literalinclude:: ../../examples/backends/dummy/pmcabc_gaussian.py
diff --git a/examples/approx_lhd/pmc_hierarchical_models.py b/examples/approx_lhd/pmc_hierarchical_models.py
index 0a07e97e..03a14476 100644
--- a/examples/approx_lhd/pmc_hierarchical_models.py
+++ b/examples/approx_lhd/pmc_hierarchical_models.py
@@ -36,9 +36,9 @@ def infer_parameters():
     statistics_calculator_final_scholarship = Identity(degree = 3, cross = False)
 
     # Define a distance measure for final grade and final scholarship
-    from abcpy.approx_lhd import SynLiklihood
-    approx_lhd_final_grade = SynLiklihood(statistics_calculator_final_grade)
-    approx_lhd_final_scholarship = SynLiklihood(statistics_calculator_final_scholarship)
+    from abcpy.approx_lhd import SynLikelihood
+    approx_lhd_final_grade = SynLikelihood(statistics_calculator_final_grade)
+    approx_lhd_final_scholarship = SynLikelihood(statistics_calculator_final_scholarship)
 
     # Define a backend
     from abcpy.backends import BackendDummy as Backend
diff --git a/examples/backends/dummy/pmcabc_gaussian.py b/examples/backends/dummy/pmcabc_gaussian.py
index 4682e873..9ede5916 100644
--- a/examples/backends/dummy/pmcabc_gaussian.py
+++ b/examples/backends/dummy/pmcabc_gaussian.py
@@ -62,6 +62,7 @@ def analyse_journal(journal):
     from abcpy.output import Journal
     new_journal = Journal.fromFile('experiments.jnl')
 
+    journal.plot_posterior_distr()
 
 # this code is for testing purposes only and not relevant to run the example
 import unittest
diff --git a/examples/backends/mpi/mpi_model_inferences.py b/examples/backends/mpi/mpi_model_inferences.py
index 9134390c..72cdaacd 100644
--- a/examples/backends/mpi/mpi_model_inferences.py
+++ b/examples/backends/mpi/mpi_model_inferences.py
@@ -306,8 +306,8 @@ def infer_parameters_pmc():
     from abcpy.statistics import Identity
     statistics_calculator = Identity(degree = 2, cross = False)
 
-    from abcpy.approx_lhd import SynLiklihood
-    approx_lhd = SynLiklihood(statistics_calculator)
+    from abcpy.approx_lhd import SynLikelihood
+    approx_lhd = SynLikelihood(statistics_calculator)
 
     # define sampling scheme
     from abcpy.inferences import PMC
diff --git a/examples/statisticslearning/__init__.py b/examples/statisticslearning/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/examples/summaryselection/pmcabc_gaussian_summary_selection.py b/examples/statisticslearning/pmcabc_gaussian_statistics_learning.py
similarity index 80%
rename from examples/summaryselection/pmcabc_gaussian_summary_selection.py
rename to examples/statisticslearning/pmcabc_gaussian_statistics_learning.py
index 35841589..78757acf 100644
--- a/examples/summaryselection/pmcabc_gaussian_summary_selection.py
+++ b/examples/statisticslearning/pmcabc_gaussian_statistics_learning.py
@@ -13,36 +13,45 @@ def infer_parameters():
     from abcpy.continuousmodels import Uniform
     mu = Uniform([[150], [200]], )
     sigma = Uniform([[5], [25]], )
-    
+
     # define the model
     from abcpy.continuousmodels import Normal
     height = Normal([mu, sigma], )
 
     # define statistics
     from abcpy.statistics import Identity
-    statistics_calculator = Identity(degree = 3, cross = True)
+    statistics_calculator = Identity(degree=3, cross=True)
 
     # Learn the optimal summary statistics using Semiautomatic summary selection
-    from abcpy.summaryselections import Semiautomatic
-    summary_selection = Semiautomatic([height], statistics_calculator, backend,
+    from abcpy.statisticslearning import Semiautomatic
+    statistics_learning = Semiautomatic([height], statistics_calculator, backend,
                                       n_samples=1000,n_samples_per_param=1, seed=1)
 
     # Redefine the statistics function
-    statistics_calculator.statistics = lambda x, f2=summary_selection.transformation, \
-                                              f1=statistics_calculator.statistics: f2(f1(x))
+    new_statistics_calculator = statistics_learning.get_statistics()
+
+
+    # Learn the optimal summary statistics using SemiautomaticNN summary selection
+    from abcpy.statisticslearning import SemiautomaticNN
+    statistics_learning = SemiautomaticNN([height], statistics_calculator, backend,
+                                        n_samples=1000,n_samples_per_param=1, seed=1)
+
+    # Redefine the statistics function
+    new_statistics_calculator = statistics_learning.get_statistics()
+
 
     # define distance
     from abcpy.distances import Euclidean
-    distance_calculator = Euclidean(statistics_calculator)
-    
+    distance_calculator = Euclidean(new_statistics_calculator)
+
     # define kernel
     from abcpy.perturbationkernel import DefaultKernel
     kernel = DefaultKernel([mu, sigma])
-    
+
     # define sampling scheme
     from abcpy.inferences import PMCABC
     sampler = PMCABC([height], [distance_calculator], backend, kernel, seed=1)
-    
+
     # sample from scheme
     T, n_sample, n_samples_per_param = 3, 10, 10
     eps_arr = np.array([500])
@@ -56,18 +65,18 @@ def analyse_journal(journal):
     # output parameters and weights
     print(journal.opt_values)
     print(journal.get_weights())
-    
+
     # do post analysis
     print(journal.posterior_mean())
     print(journal.posterior_cov())
     print(journal.posterior_histogram())
-    
+
     # print configuration
     print(journal.configuration)
-    
+
     # save and load journal
     journal.save("experiments.jnl")
-    
+
     from abcpy.output import Journal
     new_journal = Journal.fromFile('experiments.jnl')
 
diff --git a/requirements.txt b/requirements.txt
index 9fb82a89..abc3ad13 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -7,3 +7,5 @@ sphinx_rtd_theme
 coverage
 mpi4py
 cloudpickle
+matplotlib
+tqdm
\ No newline at end of file
diff --git a/requirements/optional-requirements.txt b/requirements/optional-requirements.txt
new file mode 100644
index 00000000..e29ebb46
--- /dev/null
+++ b/requirements/optional-requirements.txt
@@ -0,0 +1,2 @@
+torch
+
diff --git a/tests/approx_lhd_tests.py b/tests/approx_lhd_tests.py
index 547c276c..b5642319 100644
--- a/tests/approx_lhd_tests.py
+++ b/tests/approx_lhd_tests.py
@@ -4,7 +4,7 @@
 from abcpy.continuousmodels import Normal
 from abcpy.continuousmodels import Uniform
 from abcpy.statistics import Identity
-from abcpy.approx_lhd import PenLogReg, SynLiklihood
+from abcpy.approx_lhd import PenLogReg, SynLikelihood
 
 class PenLogRegTests(unittest.TestCase):
     def setUp(self):
@@ -31,13 +31,13 @@ def test_likelihood(self):
         # This checks whether it computes a correct value and dimension is right
         self.assertLess(comp_likelihood - expected_likelihood, 10e-2)
         
-class SynLiklihoodTests(unittest.TestCase):
+class SynLikelihoodTests(unittest.TestCase):
     def setUp(self):
         self.mu = Uniform([[-5.0], [5.0]], name='mu')
         self.sigma = Uniform([[5.0], [10.0]], name='sigma')
         self.model = Normal([self.mu,self.sigma])
         self.stat_calc = Identity(degree = 2, cross = 0)
-        self.likfun = SynLiklihood(self.stat_calc) 
+        self.likfun = SynLikelihood(self.stat_calc)
 
 
     def test_likelihood(self):
diff --git a/tests/inferences_tests.py b/tests/inferences_tests.py
index 7542fdba..257052b7 100644
--- a/tests/inferences_tests.py
+++ b/tests/inferences_tests.py
@@ -1,13 +1,12 @@
 import unittest
 import numpy as np
-import warnings
 
 from abcpy.backends import BackendDummy
 from abcpy.continuousmodels import Normal
 
 from abcpy.distances import Euclidean
 
-from abcpy.approx_lhd import SynLiklihood
+from abcpy.approx_lhd import SynLikelihood
 
 from abcpy.continuousmodels import Uniform
 
@@ -78,7 +77,7 @@ def test_sample(self):
         y_obs = [np.array(9.8)]
       
         # Define the likelihood function
-        likfun = SynLiklihood(stat_calc)
+        likfun = SynLikelihood(stat_calc)
 
 
         T, n_sample, n_samples_per_param = 1, 10, 100
diff --git a/tests/jointapprox_lhd_tests.py b/tests/jointapprox_lhd_tests.py
index a3922fc3..1b086fb8 100644
--- a/tests/jointapprox_lhd_tests.py
+++ b/tests/jointapprox_lhd_tests.py
@@ -1,7 +1,7 @@
 import unittest
 import numpy as np
 
-from abcpy.approx_lhd import SynLiklihood
+from abcpy.approx_lhd import SynLikelihood
 from abcpy.statistics import Identity
 from abcpy.continuousmodels import Normal, Uniform
 from abcpy.jointapprox_lhd import ProductCombination
@@ -10,8 +10,8 @@ class ProductCombinationTests(unittest.TestCase):
     def setUp(self):
         self.stat_calc1 = Identity(degree = 1, cross = 0)
         self.stat_calc2 = Identity(degree= 1, cross = 0)
-        self.likfun1 = SynLiklihood(self.stat_calc1)
-        self.likfun2 = SynLiklihood(self.stat_calc2)
+        self.likfun1 = SynLikelihood(self.stat_calc1)
+        self.likfun2 = SynLikelihood(self.stat_calc2)
         ## Define Models
         # define a uniform prior distribution
         self.mu = Uniform([[-5.0], [5.0]], name='mu')
diff --git a/tests/statistics_tests.py b/tests/statistics_tests.py
index 2c99063c..6cf9bd75 100644
--- a/tests/statistics_tests.py
+++ b/tests/statistics_tests.py
@@ -1,36 +1,99 @@
 import unittest
 import numpy as np
-from abcpy.statistics import Identity
+from abcpy.statistics import Identity, LinearTransformation, NeuralEmbedding
+
+try:
+    import torch
+except ImportError:
+    has_torch = False
+else:
+    has_torch = True
+    from abcpy.NN_utilities.networks import createDefaultNN
+
 
 class IdentityTests(unittest.TestCase):
     def setUp(self):
-        self.stat_calc = Identity(degree = 1, cross = 0)
+        self.stat_calc = Identity(degree=1, cross=0)
 
     def test_statistics(self):
         self.assertRaises(TypeError, self.stat_calc.statistics, 3.4)
-        vec1 = np.array([1,2])
+        vec1 = np.array([1, 2])
         vec2 = np.array([1])
         self.assertTrue((self.stat_calc.statistics([vec1]) == np.array([vec1])).all())
-        self.assertTrue((self.stat_calc.statistics([vec1,vec1]) == np.array([[vec1],[vec1]])).all())
-        self.assertTrue((self.stat_calc.statistics([vec2,vec2]) == np.array([[vec2],[vec2]])).all())
-    
+        self.assertTrue((self.stat_calc.statistics([vec1, vec1]) == np.array([[vec1], [vec1]])).all())
+        self.assertTrue((self.stat_calc.statistics([vec2, vec2]) == np.array([[vec2], [vec2]])).all())
+
+    def test_polynomial_expansion(self):
+        # Checks whether wrong input type produces error message
+        self.assertRaises(TypeError, self.stat_calc._polynomial_expansion, 3.4)
+
+        a = [np.array([0, 2]), np.array([2, 1])]
+        # test cross-product part
+        self.stat_calc = Identity(degree=2, cross=1)
+        self.assertTrue((self.stat_calc.statistics(a) == np.array([[0, 2, 0, 4, 0], [2, 1, 4, 1, 2]])).all())
+        # When a tuple
+        a = [np.array([0, 2])]
+        self.stat_calc = Identity(degree=2, cross=1)
+        self.assertTrue((self.stat_calc.statistics(a) == np.array([[0, 2, 0, 4, 0]])).all())
+        self.stat_calc = Identity(degree=2, cross=0)
+        self.assertTrue((self.stat_calc.statistics(a) == np.array([[0, 2, 0, 4]])).all())
+        a = list(np.array([2]))
+        self.stat_calc = Identity(degree=2, cross=1)
+        self.assertTrue((self.stat_calc.statistics(a) == np.array([[2, 4]])).all())
+
+
+class LinearTransformationTests(unittest.TestCase):
+    def setUp(self):
+        self.coeff = np.array([[3, 4], [5, 6]])
+        self.stat_calc = LinearTransformation(self.coeff, degree=1, cross=0)
+
+    def test_statistics(self):
+        self.assertRaises(TypeError, self.stat_calc.statistics, 3.4)
+        vec1 = np.array([1, 2])
+        vec2 = np.array([1])
+        self.assertTrue((self.stat_calc.statistics([vec1]) == np.dot(vec1, self.coeff)).all())
+        self.assertTrue((self.stat_calc.statistics([vec1, vec1]) == np.array(
+            [np.dot(np.array([1, 2]), self.coeff), np.dot(np.array([1, 2]), self.coeff)])).all())
+        self.assertRaises(ValueError, self.stat_calc.statistics, [vec2])
+
     def test_polynomial_expansion(self):
-        #Checks whether wrong input type produces error message
+        # Checks whether wrong input type produces error message
         self.assertRaises(TypeError, self.stat_calc._polynomial_expansion, 3.4)
 
-        a = [np.array([0, 2]),np.array([2,1])]             
+        a = [np.array([0, 2]), np.array([2, 1])]
         # test cross-product part
-        self.stat_calc = Identity(degree = 2, cross = 1)
-        self.assertTrue((self.stat_calc.statistics(a) == np.array([[0,2,0,4,0],[2,1,4,1,2]])).all())
+        self.stat_calc = LinearTransformation(self.coeff, degree=2, cross=1)
+        self.assertTrue((self.stat_calc.statistics(a) == np.array([[10, 12, 100, 144, 120],
+                                                                   [11, 14, 121, 196, 154]])).all())
         # When a tuple
-        a = [np.array([0, 2])] 
-        self.stat_calc = Identity(degree = 2, cross = 1)
-        self.assertTrue((self.stat_calc.statistics(a) == np.array([[0,2,0,4,0]])).all())
-        self.stat_calc = Identity(degree = 2, cross = 0)
-        self.assertTrue((self.stat_calc.statistics(a) == np.array([[0,2,0,4]])).all())
-        a = list(np.array([2])) 
-        self.stat_calc = Identity(degree = 2, cross = 1)
-        self.assertTrue((self.stat_calc.statistics(a) == np.array([[2,4]])).all())
-        
+        a = [np.array([0, 2])]
+        self.stat_calc = LinearTransformation(self.coeff, degree=2, cross=1)
+        self.assertTrue((self.stat_calc.statistics(a) == np.array([[10, 12, 100, 144, 120]])).all())
+        self.stat_calc = LinearTransformation(self.coeff, degree=2, cross=0)
+        self.assertTrue((self.stat_calc.statistics(a) == np.array([[10, 12, 100, 144]])).all())
+        a = list(np.array([2]))
+        self.stat_calc = LinearTransformation(self.coeff, degree=2, cross=1)
+        self.assertRaises(ValueError, self.stat_calc.statistics, a)
+
+
+class NeuralEmbeddingTests(unittest.TestCase):
+    def setUp(self):
+        if has_torch:
+            self.net = createDefaultNN(2, 3)()
+
+    def test_statistics(self):
+        if not has_torch:
+            self.assertRaises(ImportError, NeuralEmbedding, None)
+        else:
+            self.stat_calc = NeuralEmbedding(self.net)
+
+            self.assertRaises(TypeError, self.stat_calc.statistics, 3.4)
+            vec1 = np.array([1, 2])
+            vec2 = np.array([1])
+            self.assertTrue((self.stat_calc.statistics([vec1])).all())
+            self.assertTrue((self.stat_calc.statistics([vec1, vec1])).all())
+            self.assertRaises(RuntimeError, self.stat_calc.statistics, [vec2])
+
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/statisticslearning_tests.py b/tests/statisticslearning_tests.py
new file mode 100644
index 00000000..285147f2
--- /dev/null
+++ b/tests/statisticslearning_tests.py
@@ -0,0 +1,161 @@
+import unittest
+import numpy as np
+from abcpy.continuousmodels import Uniform
+from abcpy.continuousmodels import Normal
+from abcpy.statistics import Identity
+from abcpy.backends import BackendDummy as Backend
+from abcpy.statisticslearning import Semiautomatic, SemiautomaticNN, TripletDistanceLearning, \
+    ContrastiveDistanceLearning
+
+try:
+    import torch
+except ImportError:
+    has_torch = False
+else:
+    has_torch = True
+
+
+class SemiautomaticTests(unittest.TestCase):
+    def setUp(self):
+        # define prior and model
+        sigma = Uniform([[10], [20]])
+        mu = Normal([0, 1])
+        Y = Normal([mu, sigma])
+
+        # define backend
+        self.backend = Backend()
+
+        # define statistics
+        self.statistics_cal = Identity(degree=3, cross=False)
+
+        # Initialize statistics learning
+        self.statisticslearning = Semiautomatic([Y], self.statistics_cal, self.backend, n_samples=1000,
+                                                n_samples_per_param=1, seed=1)
+
+    def test_transformation(self):
+        # Transform statistics extraction
+        self.new_statistics_calculator = self.statisticslearning.get_statistics()
+        # Simulate observed data
+        Obs = Normal([2, 4])
+        y_obs = Obs.forward_simulate(Obs.get_input_values(), 1)[0].tolist()
+
+        extracted_statistics = self.new_statistics_calculator.statistics(y_obs)
+        self.assertEqual(np.shape(extracted_statistics), (1, 2))
+
+        # NOTE we cannot test this, since the linear regression used uses a random number generator (which we cannot access and is in C). Therefore, our results differ and testing might fail
+        # self.assertLess(extracted_statistics[0,0] - 0.00215507052338, 10e-2)
+        # self.assertLess(extracted_statistics[0,1] - (-0.0058023274456), 10e-2)
+
+
+class SemiautomaticNNTests(unittest.TestCase):
+    def setUp(self):
+        # define prior and model
+        sigma = Uniform([[10], [20]])
+        mu = Normal([0, 1])
+        self.Y = Normal([mu, sigma])
+
+        # define backend
+        self.backend = Backend()
+
+        # define statistics
+        self.statistics_cal = Identity(degree=3, cross=False)
+
+        if has_torch:
+            # Initialize statistics learning
+            self.statisticslearning = SemiautomaticNN([self.Y], self.statistics_cal, self.backend, n_samples=100,
+                                                      n_samples_per_param=1, seed=1, n_epochs=10)
+
+    def test_initialization(self):
+        if not has_torch:
+            self.assertRaises(ImportError, SemiautomaticNN, [self.Y], self.statistics_cal, self.backend)
+
+    def test_transformation(self):
+        if has_torch:
+            # Transform statistics extraction
+            self.new_statistics_calculator = self.statisticslearning.get_statistics()
+            # Simulate observed data
+            Obs = Normal([2, 4])
+            y_obs = Obs.forward_simulate(Obs.get_input_values(), 1)[0].tolist()
+
+            extracted_statistics = self.new_statistics_calculator.statistics(y_obs)
+            self.assertEqual(np.shape(extracted_statistics), (1, 2))
+
+            self.assertRaises(RuntimeError, self.new_statistics_calculator.statistics, [np.array([1, 2])])
+
+
+class ContrastiveDistanceLearningTests(unittest.TestCase):
+    def setUp(self):
+        # define prior and model
+        sigma = Uniform([[10], [20]])
+        mu = Normal([0, 1])
+        self.Y = Normal([mu, sigma])
+
+        # define backend
+        self.backend = Backend()
+
+        # define statistics
+        self.statistics_cal = Identity(degree=3, cross=False)
+
+        if has_torch:
+            # Initialize statistics learning
+            self.statisticslearning = ContrastiveDistanceLearning([self.Y], self.statistics_cal, self.backend,
+                                                                  n_samples=100, n_samples_per_param=1, seed=1,
+                                                                  n_epochs=10)
+
+    def test_initialization(self):
+        if not has_torch:
+            self.assertRaises(ImportError, ContrastiveDistanceLearning, [self.Y], self.statistics_cal,
+                              self.backend)
+
+    def test_transformation(self):
+        if has_torch:
+            # Transform statistics extraction
+            self.new_statistics_calculator = self.statisticslearning.get_statistics()
+            # Simulate observed data
+            Obs = Normal([2, 4])
+            y_obs = Obs.forward_simulate(Obs.get_input_values(), 1)[0].tolist()
+
+            extracted_statistics = self.new_statistics_calculator.statistics(y_obs)
+            self.assertEqual(np.shape(extracted_statistics), (1, 2))
+
+            self.assertRaises(RuntimeError, self.new_statistics_calculator.statistics, [np.array([1, 2])])
+
+
+class TripletDistanceLearningTests(unittest.TestCase):
+    def setUp(self):
+        # define prior and model
+        sigma = Uniform([[10], [20]])
+        mu = Normal([0, 1])
+        self.Y = Normal([mu, sigma])
+
+        # define backend
+        self.backend = Backend()
+
+        # define statistics
+        self.statistics_cal = Identity(degree=3, cross=False)
+
+        if has_torch:
+            # Initialize statistics learning
+            self.statisticslearning = TripletDistanceLearning([self.Y], self.statistics_cal, self.backend,
+                                                              n_samples=100, n_samples_per_param=1, seed=1, n_epochs=10)
+
+    def test_initialization(self):
+        if not has_torch:
+            self.assertRaises(ImportError, TripletDistanceLearning, [self.Y], self.statistics_cal, self.backend)
+
+    def test_transformation(self):
+        if has_torch:
+            # Transform statistics extraction
+            self.new_statistics_calculator = self.statisticslearning.get_statistics()
+            # Simulate observed data
+            Obs = Normal([2, 4])
+            y_obs = Obs.forward_simulate(Obs.get_input_values(), 1)[0].tolist()
+
+            extracted_statistics = self.new_statistics_calculator.statistics(y_obs)
+            self.assertEqual(np.shape(extracted_statistics), (1, 2))
+
+            self.assertRaises(RuntimeError, self.new_statistics_calculator.statistics, [np.array([1, 2])])
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/summaryselections_tests.py b/tests/summaryselections_tests.py
deleted file mode 100644
index 56e53492..00000000
--- a/tests/summaryselections_tests.py
+++ /dev/null
@@ -1,42 +0,0 @@
-import unittest
-import numpy as np
-from abcpy.continuousmodels import Uniform
-from abcpy.continuousmodels import Normal
-from abcpy.statistics import Identity
-from abcpy.backends import BackendDummy as Backend
-from abcpy.summaryselections import Semiautomatic
-
-
-class SemiautomaticTests(unittest.TestCase):
-    def setUp(self):
-
-        # define prior and model
-        sigma = Uniform([[10], [20]])
-        mu = Normal([0, 1])
-        Y = Normal([mu, sigma])
-
-        # define backend
-        self.backend = Backend()
-
-        # define statistics
-        self.statistics_cal = Identity(degree = 3, cross = False)
-        
-        # Initialize summaryselection
-        self.summaryselection = Semiautomatic([Y], self.statistics_cal, self.backend, n_samples = 1000, n_samples_per_param = 1, seed = 1)
-        
-    def test_transformation(self):
-        # Transform statistics extraction
-        self.statistics_cal.statistics = lambda x, f2=self.summaryselection.transformation, f1=self.statistics_cal.statistics: f2(f1(x))
-        # Simulate observed data
-        Obs = Normal([2, 4] )
-        y_obs = Obs.forward_simulate(Obs.get_input_values(), 1)[0].tolist()
-
-        extracted_statistics = self.statistics_cal.statistics(y_obs)
-        self.assertEqual(np.shape(extracted_statistics), (1,2))
-
-        # NOTE we cannot test this, since the linear regression used uses a random number generator (which we cannot access and is in C). Therefore, our results differ and testing might fail
-        #self.assertLess(extracted_statistics[0,0] - 0.00215507052338, 10e-2)
-        #self.assertLess(extracted_statistics[0,1] - (-0.0058023274456), 10e-2)
-        
-if __name__ == '__main__':
-    unittest.main()