diff --git a/hela/__init__.py b/hela/__init__.py
new file mode 100644
index 0000000..e3dd264
--- /dev/null
+++ b/hela/__init__.py
@@ -0,0 +1,7 @@
+
+from .dataset import *
+from .feature_importance import *
+from .models import *
+from .plot import *
+from .utils import *
+from .wpercentile import *
diff --git a/dataset.py b/hela/dataset.py
similarity index 71%
rename from dataset.py
rename to hela/dataset.py
index 727db15..59ba2be 100644
--- a/dataset.py
+++ b/hela/dataset.py
@@ -8,8 +8,7 @@
 
 __all__ = ["Dataset", "load_dataset", "load_data_file"]
 
-logger = logging.getLogger(__name__)
-
+LOGGER = logging.getLogger(__name__)
 
 Dataset = namedtuple("Dataset", ["training_x", "training_y",
                                  "testing_x", "testing_y",
@@ -17,39 +16,40 @@
 
 
 def load_data_file(data_file, num_features):
-    
+
     data = np.load(data_file)
-    
+
     if data.ndim == 1:
         data = data[None, :]
-    
+
     x = data[:, :num_features]
     y = data[:, num_features:]
-    
+
     return x, y
 
 
 def load_dataset(dataset_file):
-    
+
     with open(dataset_file, "r") as f:
         dataset_info = json.load(f)
-    
+
     metadata = dataset_info["metadata"]
-    
+
     base_path = os.path.dirname(dataset_file)
-    
+
     # Load training data
-    training_file = os.path.join(base_path, dataset_info["training_data"])    
-    logger.debug("Loading training data from '{}'...".format(training_file))
-    training_x, training_y = load_data_file(training_file, metadata["num_features"])
-    
+    training_file = os.path.join(base_path, dataset_info["training_data"])
+    LOGGER.debug("Loading training data from '%s'...", training_file)
+    training_x, training_y = load_data_file(training_file,
+                                            metadata["num_features"])
+
     # Optionally, load testing data
     testing_x, testing_y = None, None
     if dataset_info["testing_data"] is not None:
         testing_file = os.path.join(base_path, dataset_info["testing_data"])
-        logger.debug("Loading testing data from '{}'...".format(testing_file))
-        testing_x, testing_y = load_data_file(testing_file, metadata["num_features"])
-    
+        LOGGER.debug("Loading testing data from '%s'...", testing_file)
+        testing_x, testing_y = load_data_file(testing_file,
+                                              metadata["num_features"])
+
     return Dataset(training_x, training_y, testing_x, testing_y,
                    metadata["names"], metadata["ranges"], metadata["colors"])
-
diff --git a/hela/feature_importance.py b/hela/feature_importance.py
new file mode 100644
index 0000000..1f1537e
--- /dev/null
+++ b/hela/feature_importance.py
@@ -0,0 +1,183 @@
+
+from collections import namedtuple
+from multiprocessing import Pool
+from typing import Optional, Tuple
+import logging
+
+import numpy as np
+
+from sklearn.ensemble import RandomForestRegressor
+from sklearn.tree import DecisionTreeRegressor
+
+from .models import _tree_weights
+from .utils import tqdm
+
+__all__ = [
+    'importances_per_output'
+]
+
+LOGGER = logging.getLogger(__name__)
+
+
+def importances_per_output(
+        forest: RandomForestRegressor,
+        x: np.ndarray,
+        y: np.ndarray
+        ) -> np.ndarray:
+    """
+    Compute the feature importances with a breakdown per output variable (i.e.,
+    per label).
+
+    Parameters
+    ----------
+    forest : RandomForestRegressor
+        A trained random forest from which feature importances will be
+        extracted.
+    x : array-like of shape (n_samples, n_features)
+        The input samples of the training dataset used to train `forest`.
+    y : array-like of shape (n_samples, n_outputs)
+
+    Returns
+    -------
+    importances : np.ndarray of shape (n_features, n_outputs)
+        The feature importances splitted according to the contributions to each
+        output variable. Summing along the columns of this array will provide
+        the global feature importances contained in
+        forest.feature_importances_.
+    """
+
+    impurities = impurities_per_output(forest, x, y)
+    return importances_from_impurities(forest, impurities)
+
+
+def importances_from_impurities(
+        forest: RandomForestRegressor,
+        impurities: np.ndarray
+        ) -> np.ndarray:
+
+    LOGGER.info("Computing importances from impurities...")
+    with Pool(forest.n_jobs) as pool:
+        importances_ = list(tqdm(
+            pool.imap(_tree_importances_per_output,
+                      zip(forest, impurities)),
+            total=len(forest)
+        ))
+
+    importances = np.mean(importances_, axis=0)
+
+    return importances / importances.sum()
+
+
+def _tree_importances_per_output(
+        args: Tuple[DecisionTreeRegressor, np.ndarray]
+        ) -> np.ndarray:
+
+    tree, impurities = args
+
+    tree_ = tree.tree_
+    importances = np.zeros((tree_.n_features, tree_.n_outputs))
+
+    Node = namedtuple(
+        "Node",
+        ['left', 'right', 'weighted_n_node_samples', 'feature', 'impurities']
+    )
+
+    for node in zip(tree_.children_left, tree_.children_right,
+                    tree_.weighted_n_node_samples, tree_.feature, impurities):
+        node = Node(*node)
+        if node.left == -1:
+            continue
+
+        left = Node(
+            None, None,
+            tree_.weighted_n_node_samples[node.left],
+            tree_.feature[node.left],
+            impurities[node.left]
+        )
+        right = Node(
+            None, None,
+            tree_.weighted_n_node_samples[node.right],
+            tree_.feature[node.right], impurities[node.right]
+        )
+
+        importances[node.feature] += (
+            node.weighted_n_node_samples * node.impurities -
+            left.weighted_n_node_samples * left.impurities -
+            right.weighted_n_node_samples * right.impurities
+        )
+
+    return importances / importances.sum()
+
+
+# Global variables to avoid pickling very large arrays with multiprocessing
+_X = None
+_Y = None
+
+
+def impurities_per_output(
+        forest: RandomForestRegressor,
+        x: np.ndarray,
+        y: np.ndarray
+        ) -> np.ndarray:
+
+    LOGGER.info("Computing impurities...")
+    global _X
+    global _Y
+    _X = x
+    _Y = y
+    with Pool(forest.n_jobs) as pool:
+        impurities = list(tqdm(
+            pool.imap(_tree_impurities_per_output, forest),
+            total=len(forest)
+        ))
+
+    return np.array(impurities)
+
+
+def _tree_impurities_per_output(tree: DecisionTreeRegressor) -> np.ndarray:
+
+    assert _X is not None and _Y is not None
+
+    tree_ = tree.tree_
+    x, y = _X, _Y
+
+    impurities = np.zeros((tree.tree_.node_count, y.shape[1]))
+    weights = _tree_weights(tree, len(x))
+
+    def __tree_impurities_per_output(node_idx, x, y, weights):
+
+        imp = _wvariance(y, weights, axis=0)
+        impurities[node_idx] = imp
+
+        if tree_.children_left[node_idx] == -1:
+            return
+
+        feature = tree_.feature[node_idx]
+        threshold = tree_.threshold[node_idx]
+        mask = x[:, feature] <= threshold
+        __tree_impurities_per_output(
+            tree_.children_left[node_idx],
+            x[mask],
+            y[mask],
+            weights[mask]
+        )
+        mask = np.logical_not(mask)
+        __tree_impurities_per_output(
+            tree_.children_right[node_idx],
+            x[mask],
+            y[mask],
+            weights[mask]
+        )
+
+    __tree_impurities_per_output(0, x, y, weights)
+    return impurities
+
+
+def _wvariance(
+        data: np.ndarray,
+        weights: np.ndarray,
+        axis: Optional[int] = None
+        ) -> np.ndarray:
+
+    avg = np.average(data, weights=weights, axis=axis)
+    return np.average((data - avg)**2, weights=weights, axis=axis)
diff --git a/models.py b/hela/models.py
similarity index 96%
rename from models.py
rename to hela/models.py
index 6dc3369..f4caa40 100644
--- a/models.py
+++ b/hela/models.py
@@ -8,11 +8,7 @@
 from sklearn.utils import check_random_state
 from sklearn.preprocessing import MinMaxScaler
 
-try:
-    from tqdm import tqdm
-except ImportError:
-    def tqdm(x, *_, **__):
-        return x
+from .utils import tqdm
 
 __all__ = [
     "Model",
@@ -75,14 +71,14 @@ def _scaler_fit(self, y):
 
         self.scaler.fit(y)
 
-    def _scaler_transform(self, y):
+    def scaler_transform(self, y):
         if y.ndim == 1:
             y = y[:, None]
             return self.scaler.transform(y)[:, 0]
 
         return self.scaler.transform(y)
 
-    def _scaler_inverse_transform(self, y):
+    def scaler_inverse_transform(self, y):
 
         if y.ndim == 1:
             y = y[:, None]
@@ -92,7 +88,7 @@ def _scaler_inverse_transform(self, y):
 
     def fit(self, x, y):
         self._scaler_fit(y)
-        self.rf.fit(x, self._scaler_transform(y))
+        self.rf.fit(x, self.scaler_transform(y))
 
         # Build the structures to quickly compute the posteriors
         if self.enable_posterior:
@@ -105,7 +101,7 @@ def fit(self, x, y):
 
     def predict(self, x):
         pred = self.rf.predict(x)
-        return self._scaler_inverse_transform(pred)
+        return self.scaler_inverse_transform(pred)
 
     def predict_median(self, x):
         return self.predict_percentile(x, 50)
diff --git a/plot.py b/hela/plot.py
similarity index 93%
rename from plot.py
rename to hela/plot.py
index a3434f2..bcdfecf 100644
--- a/plot.py
+++ b/hela/plot.py
@@ -9,19 +9,15 @@
 from sklearn import metrics, neighbors
 from sklearn.preprocessing import MinMaxScaler
 
-from models import resample_posterior
-from wpercentile import wmedian
-
-try:
-    from tqdm import tqdm
-except ImportError:
-    def tqdm(x, *_, **__):
-        return x
+from .models import resample_posterior
+from .wpercentile import wmedian
+from .utils import tqdm
 
 __all__ = [
-    "predicted_vs_real",
-    "feature_importances",
-    "posterior_matrix"
+    'predicted_vs_real',
+    'feature_importances',
+    'stacked_feature_importances',
+    'posterior_matrix'
 ]
 
 POSTERIOR_MAX_SIZE = 10000
@@ -96,6 +92,26 @@ def feature_importances(forests, names, colors):
     return fig
 
 
+def stacked_feature_importances(importances, names, colors):
+
+    bottoms = np.zeros(importances.shape[0])
+
+    ind = np.arange(len(importances))
+
+    fig = plt.figure()
+    ax = fig.gca()
+    for data_i, name_i, color_i in zip(importances.T, names, colors):
+        ax.bar(ind, data_i, bottom=bottoms, color=color_i, label=name_i)
+        bottoms += data_i
+
+    ax.set_xlabel("Feature index", fontsize=18)
+    ax.legend(fontsize=16)
+    ax.grid()
+
+    fig.tight_layout()
+    return fig
+
+
 def posterior_matrix(posterior, names, ranges, colors, soft_colors=None):
 
     samples, weights = posterior
diff --git a/utils.py b/hela/utils.py
similarity index 88%
rename from utils.py
rename to hela/utils.py
index 414cc49..2cba725 100644
--- a/utils.py
+++ b/hela/utils.py
@@ -1,40 +1,46 @@
 
-import os
 import logging
 
+try:
+    from tqdm import tqdm
+except ImportError:
+    def tqdm(x, *_, **__):
+        return x
+
 __all__ = [
-    "config_logger"
+    'config_logger',
+    'tqdm'
 ]
 
 
 def config_logger(log_file="/dev/null", level=logging.INFO):
-    
+
     class MyFormatter(logging.Formatter):
-        
+
         info_format = "\x1b[32;1m%(asctime)s [%(name)s]\x1b[0m %(message)s"
         error_format = "\x1b[31;1m%(asctime)s [%(name)s] [%(levelname)s]\x1b[0m %(message)s"
-        
+
         def format(self, record):
-            
+
             if record.levelno > logging.INFO:
                 self._style._fmt = self.error_format
             else:
                 self._style._fmt = self.info_format
-            
+
             res = super(MyFormatter, self).format(record)
             return res
-    
+
     rootLogger = logging.getLogger()
-    
+
     fileHandler = logging.FileHandler(log_file)
     fileFormatter = logging.Formatter("%(asctime)s [%(name)s] [%(levelname)s]> %(message)s")
     fileHandler.setFormatter(fileFormatter)
     rootLogger.addHandler(fileHandler)
-    
+
     consoleHandler = logging.StreamHandler()
     consoleFormatter = MyFormatter()
     consoleHandler.setFormatter(consoleFormatter)
     rootLogger.addHandler(consoleHandler)
-    
+
     rootLogger.setLevel(level)
 
diff --git a/wpercentile.py b/hela/wpercentile.py
similarity index 95%
rename from wpercentile.py
rename to hela/wpercentile.py
index f030db9..529e316 100644
--- a/wpercentile.py
+++ b/hela/wpercentile.py
@@ -8,26 +8,26 @@
 
 
 def _wpercentile1d(data, weights, percentiles):
-    
+
     if data.ndim > 1 or weights.ndim > 1:
         raise ValueError("data and weights must be one-dimensional arrays")
-    
+
     if data.shape != weights.shape:
         raise ValueError("data and weights must have the same shape")
-    
+
     data = np.asarray(data)
     weights = np.asarray(weights)
     percentiles = np.asarray(percentiles)
-    
+
     sort_indices = np.argsort(data)
     sorted_data = data[sort_indices]
     sorted_weights = weights[sort_indices]
-    
+
     cumsum_weights = np.cumsum(sorted_weights)
     sum_weights = cumsum_weights[-1]
-    
+
     pn = 100 * (cumsum_weights - 0.5*sorted_weights) / sum_weights
-    
+
     return np.interp(percentiles, pn, sorted_data)
 
 
@@ -39,22 +39,23 @@ def wpercentile(data, weights, percentiles, axis=None):
         data = np.ravel(data)
         weights = np.ravel(weights)
         return _wpercentile1d(data, weights, percentiles)
-    
+
     axis = np.atleast_1d(axis)
-    
+
     # Reshape the arrays for proper computation
     # Move the requested axis to the final dimensions
     dest_axis = list(range(len(axis)))
     data2 = np.moveaxis(data, axis, dest_axis)
-    
+
     ndim = len(axis)
     shape = data2.shape
     newshape = (np.prod(shape[:ndim]), np.prod(shape[ndim:]))
     newdata = np.reshape(data2, newshape)
     newweights = np.reshape(weights, newshape[0])
-    
-    result = np.apply_along_axis(_wpercentile1d, 0, newdata, newweights, percentiles)
-    
+
+    result = np.apply_along_axis(_wpercentile1d, 0, newdata, newweights,
+                                 percentiles)
+
     final_shape = (*np.shape(percentiles), *shape[ndim:])
     return np.reshape(result, final_shape)
 
diff --git a/rfretrieval.py b/rfretrieval.py
index 662265f..d051d94 100644
--- a/rfretrieval.py
+++ b/rfretrieval.py
@@ -7,122 +7,159 @@
 from sklearn import metrics, multioutput
 import joblib
 
-from dataset import load_dataset, load_data_file
-from models import Model
-from utils import config_logger
-from wpercentile import wpercentile
-import plot
+import hela
 
-logger = logging.getLogger(__name__)
+LOGGER = logging.getLogger(__name__)
 
 
 def train_model(dataset, num_trees, num_jobs, verbose=1):
-    pipeline = Model(num_trees, num_jobs,
-                     names=dataset.names,
-                     ranges=dataset.ranges,
-                     colors=dataset.colors,
-                     verbose=verbose)
+    pipeline = hela.Model(
+        num_trees, num_jobs,
+        names=dataset.names,
+        ranges=dataset.ranges,
+        colors=dataset.colors,
+        verbose=verbose
+    )
     pipeline.fit(dataset.training_x, dataset.training_y)
     return pipeline
 
 
 def test_model(model, dataset, output_path):
-    
+
     if dataset.testing_x is None:
         return
-    
-    logger.info("Testing model...")
+
+    LOGGER.info("Testing model...")
     pred = model.predict(dataset.testing_x)
     # pred = model.predict_median(dataset.testing_x)
-    r2scores = {name_i: metrics.r2_score(real_i, pred_i)
-                for name_i, real_i, pred_i in zip(dataset.names, dataset.testing_y.T, pred.T)}
+    r2scores = {
+        name_i: metrics.r2_score(real_i, pred_i)
+        for name_i, real_i, pred_i in zip(dataset.names,
+                                          dataset.testing_y.T,
+                                          pred.T)
+    }
     print("Testing scores:")
     for name, values in r2scores.items():
         print("\tR^2 score for {}: {:.3f}".format(name, values))
-    
-    logger.info("Plotting testing results...")
-    fig = plot.predicted_vs_real(dataset.testing_y, pred, dataset.names, dataset.ranges)
+
+    LOGGER.info("Plotting testing results...")
+    fig = hela.predicted_vs_real(
+        dataset.testing_y,
+        pred,
+        names=dataset.names,
+        ranges=dataset.ranges
+    )
     fig.savefig(os.path.join(output_path, "predicted_vs_real.pdf"),
                 bbox_inches='tight')
 
 
-def compute_feature_importance(model, dataset, output_path):
-    
-    logger.info("Computing feature importance for individual parameters...")
+def _plot_feature_importances(model, dataset, output_path):
+
+    LOGGER.info("Computing feature importances for individual parameters...")
     regr = multioutput.MultiOutputRegressor(model, n_jobs=1)
     regr.fit(dataset.training_x, dataset.training_y)
-    
-    fig = plot.feature_importances(forests=[i.rf for i in regr.estimators_] + [model.rf],
-                                   names=dataset.names + ["joint prediction"],
-                                   colors=dataset.colors + ["C0"])
-    
+
+    fig = hela.plot.feature_importances(
+        forests=[i.rf for i in regr.estimators_] + [model.rf],
+        names=dataset.names + ["joint prediction"],
+        colors=dataset.colors + ["C0"]
+    )
+
     fig.savefig(os.path.join(output_path, "feature_importances.pdf"),
                 bbox_inches='tight')
 
 
+def _plot_feature_importances_breakdown(model, dataset, output_path):
+
+    LOGGER.info("Computing feature importances per output...")
+    importances = hela.importances_per_output(
+        model.rf,
+        dataset.training_x,
+        model.scaler_transform(dataset.training_y)
+    )
+
+    fig = hela.plot.stacked_feature_importances(
+        importances,
+        dataset.names,
+        dataset.colors
+    )
+
+    fig.tight_layout()
+    fig.savefig(
+        os.path.join(output_path, "feature_importances_breakdown.pdf"),
+        bbox_inches='tight'
+    )
+
+
 def data_ranges(posterior, percentiles=(50, 16, 84)):
-    
+
     samples, weights = posterior
-    values = wpercentile(samples, weights, percentiles, axis=0)
+    values = hela.wpercentile(samples, weights, percentiles, axis=0)
     ranges = np.array([values[0], values[2]-values[0], values[0]-values[1]])
     return ranges.T
 
 
 def main_train(training_dataset, model_path,
                num_trees, num_jobs,
-               feature_importance, quiet,
+               feature_importances,
+               feature_importances_breakdown,
+               quiet,
                **_):
-    
-    logger.info("Loading dataset '{}'...".format(training_dataset))
-    dataset = load_dataset(training_dataset)
-    
-    logger.info("Training model...")
+
+    LOGGER.info("Loading dataset '%s'...", training_dataset)
+    dataset = hela.load_dataset(training_dataset)
+
+    LOGGER.info("Training model...")
     model = train_model(dataset, num_trees, num_jobs, not quiet)
-    
+
     os.makedirs(model_path, exist_ok=True)
     model_file = os.path.join(model_path, "model.pkl")
-    logger.info("Saving model to '{}'...".format(model_file))
+    LOGGER.info("Saving model to '%s'...", model_file)
     joblib.dump(model, model_file)
-    
-    logger.info("Printing model information...")
+
+    LOGGER.info("Printing model information...")
     print("OOB score: {:.4f}".format(model.rf.oob_score_))
-    
+
     test_model(model, dataset, model_path)
-    
-    if feature_importance:
+
+    if feature_importances:
         model.enable_posterior = False
-        compute_feature_importance(model, dataset, model_path)
+        _plot_feature_importances(model, dataset, model_path)
+
+    if feature_importances_breakdown:
+        _plot_feature_importances_breakdown(model, dataset, model_path)
 
 
 def main_predict(model_path, data_file, output_path, plot_posterior, **_):
-    
+
     model_file = os.path.join(model_path, "model.pkl")
-    logger.info("Loading random forest from '{}'...".format(model_file))
+    LOGGER.info("Loading random forest from '%s'...", model_file)
     model = joblib.load(model_file)
-    
-    logger.info("Loading data from '{}'...".format(data_file))
-    data, _ = load_data_file(data_file, model.rf.n_features_)
-    
+
+    LOGGER.info("Loading data from '%s'...", data_file)
+    data, _ = hela.load_data_file(data_file, model.rf.n_features_)
+
     posterior = model.posterior(data[0])
-    
+
     posterior_ranges = data_ranges(posterior)
     for name_i, pred_range_i in zip(model.names, posterior_ranges):
-        print("Prediction for {}: {:.3g} [+{:.3g} -{:.3g}]".format(name_i, *pred_range_i))
-    
+        format_str = "Prediction for {}: {:.3g} [+{:.3g} -{:.3g}]"
+        print(format_str.format(name_i, *pred_range_i))
+
     if plot_posterior:
-        logger.info("Plotting the posterior matrix...")
-        
-        fig = plot.posterior_matrix(
+        LOGGER.info("Plotting the posterior matrix...")
+
+        fig = hela.plot.posterior_matrix(
             posterior,
             names=model.names,
             ranges=model.ranges,
             colors=model.colors
         )
         os.makedirs(output_path, exist_ok=True)
-        logger.info("Saving the figure....")
+        LOGGER.info("Saving the figure....")
         fig.savefig(os.path.join(output_path, "posterior_matrix.pdf"),
                     bbox_inches='tight')
-        logger.info("Done.")
+        LOGGER.info("Done.")
 
 
 def show_usage(parser, **_):
@@ -130,38 +167,65 @@ def show_usage(parser, **_):
 
 
 def main():
-    
-    parser = argparse.ArgumentParser(description="rfretrieval: Atmospheric retrieval with random forests.")
+
+    parser = argparse.ArgumentParser(
+        description="rfretrieval: Atmospheric retrieval with random forests."
+    )
     parser.set_defaults(func=show_usage, parser=parser)
     parser.add_argument("--quiet", action='store_true')
     subparsers = parser.add_subparsers()
-    
+
     parser_train = subparsers.add_parser('train', help="train a model")
-    parser_train.add_argument("training_dataset", type=str,
-                              help="JSON file with the training dataset description")
-    parser_train.add_argument("model_path", type=str,
-                              help="path where the trained model will be saved")
-    parser_train.add_argument("--num-trees", type=int, default=1000,
-                              help="number of trees in the forest")
-    parser_train.add_argument("--num-jobs", type=int, default=5,
-                              help="number of parallel jobs for fitting the random forest")
-    parser_train.add_argument("--feature-importance", action='store_true',
-                              help="compute feature importances after training")
     parser_train.set_defaults(func=main_train)
-    
-    parser_test = subparsers.add_parser('predict', help="use a trained model to perform a prediction")
+    parser_train.add_argument(
+        "training_dataset", type=str,
+        help="JSON file with the training dataset description"
+    )
+    parser_train.add_argument(
+        "model_path", type=str,
+        help="path where the trained model will be saved"
+    )
+    parser_train.add_argument(
+        "--num-trees", type=int, default=1000,
+        help="number of trees in the forest"
+    )
+    parser_train.add_argument(
+        "--num-jobs", type=int, default=5,
+        help="number of parallel jobs for fitting the random forest"
+    )
+    parser_train.add_argument(
+        "--feature-importances", action='store_true',
+        help="plot feature importances after training"
+    )
+    parser_train.add_argument(
+        "--feature-importances-breakdown", action='store_true',
+        help="plot feature importances per output after training"
+    )
+
+    parser_test = subparsers.add_parser(
+        'predict',
+        help="use a trained model to perform a prediction"
+    )
     parser_test.set_defaults(func=main_predict)
-    parser_test.add_argument("model_path", type=str,
-                             help="path to the trained model")
-    parser_test.add_argument("data_file", type=str,
-                             help="NPY file with the data for the prediction")
-    parser_test.add_argument("output_path", type=str,
-                             help="path to write the results of the prediction")
-    parser_test.add_argument("--plot-posterior", action='store_true',
-                             help="plot and save the scatter matrix of the posterior distribution")
-    
+    parser_test.add_argument(
+        "model_path", type=str,
+        help="path to the trained model"
+    )
+    parser_test.add_argument(
+        "data_file", type=str,
+        help="NPY file with the data for the prediction"
+    )
+    parser_test.add_argument(
+        "output_path", type=str,
+        help="path to write the results of the prediction"
+    )
+    parser_test.add_argument(
+        "--plot-posterior", action='store_true',
+        help="plot and save the scatter matrix of the posterior distribution"
+    )
+
     args = parser.parse_args()
-    config_logger(level=logging.WARNING if args.quiet else logging.INFO)
+    hela.config_logger(level=logging.WARNING if args.quiet else logging.INFO)
     args.func(**vars(args))