diff --git a/.gitignore b/.gitignore
index 9c35720..9c20808 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,3 +10,5 @@ qc/
 *.jpg
 *.png
 .gitignore
+*.egg-info/
+dist/
diff --git a/README.md b/README.md
index 3e18418..83dfd43 100755
--- a/README.md
+++ b/README.md
@@ -69,9 +69,10 @@ Per-subject stat: Panda dataframe `df_sub`:
 - intra-subject MEAN: MEAN[CSA(sI, rX, :)] --> MEAN_intra(sI, rX): `df_sub['mean']`
 - intra-subject STD: STD[CSA(sI, rX, :)] --> STD_intra(sI, rX): `df_sub['std']`
 - intra-subject COV: STD[CSA(sI, rX, :)] / MEAN[CSA(sI, rX, :)] --> COV_intra(sI, rX): `df_sub['cov']`
-- rescale_estimated_subject MEAN: MEAN[CSA(sI, rX, :) / CSA(sI, 1, :)] --> MEAN_rescale_estimated_subject(sI, rX): `df_sub['rescale_estimated']`
+- rescale_estimated_subject MEAN: MEAN[ CSA(sI, rX, :) ] / MEAN[ CSA(sI, 1, :) ] --> MEAN_rescale_estimated_subject(sI, rX): `df_sub['rescale_estimated']`
 - intra-subject error MEAN: MEAN[CSA(sI, rX, :)] - (rX^2 * MEAN[CSA(sI, 1, :)]) --> MEAN_error_intra(sI, rX): `df_sub['error']`
 - intra-subject error in percentage MEAN: [MEAN[CSA(sI, rX, :)] - (rX^2 * MEAN[CSA(sI, 1, :)])] / MEAN[CSA(sI, rX, :)] --> MEAN_error_intra_perc(sI, rX): `df_sub['perc_error']`
+- intra-subject difference MEAN: [MEAN[CSA(sI, 1, :)] - (rX^2 * MEAN[CSA(sI, rX, :)])] --> MEAN_diff(sI, rX): `df_sub['diff']`
 
 Across-subject stats: Panda dataframe `df_rescale`
 - intra-subject STD: MEAN[STD_intra(:, rX)] --> STD_intra(rX): `df_rescale['std_intra']`
@@ -81,9 +82,11 @@ Across-subject stats: Panda dataframe `df_rescale`
 - rescale_estimated (across subjects) STD: STD[MEAN_rescale_estimated_subject(:, rX)] --> STD_rescale_estimated(rX): `df_rescale['std_rescale_estimated']`
 - error in percentage (across subjects) MEAN: MEAN[MEAN_error_intra(:, rX)]
 - error in percentage (across subjects) STD: STD[MEAN_error_intra(:, rX)]
+- intra-subject difference STD: STD[MEAN_diff(:, rX)]
 
 Power analysis:
-- sample size: [(z(uncertainty) + z(power))^2 * (2 * STD[MEAN(:, rX)]^2)] / [MEAN[CSA(sI, 1, :)] - MEAN[CSA(sI, rX, :)]] 
+- sample size for a between subject study: [(z(uncertainty) + z(power))^2 * (2 * STD[MEAN(:, rX)]^2)] / [MEAN[CSA(sI, 1, :)] - MEAN[CSA(sI, rX, :)]] 
+- sample size for a within subject study: [(z(uncertainty) + z(power))^2 * (STD[MEAN_diff(:, rX)]^2)] / [MEAN[CSA(sI, 1, :)] - MEAN[CSA(sI, rX, :)]] 
 
 Plot results:
 - STD_intersub
diff --git a/affine_transfo.py b/affine_transfo.py
index ce98e99..5eebf4e 100755
--- a/affine_transfo.py
+++ b/affine_transfo.py
@@ -120,7 +120,11 @@ def random_values(df, subject_name, config_param):
         'shift_PA': shift_PA,
         'shift_IS': shift_IS
     }
-    df = df.append(transfo_dict, ignore_index=True)
+    #df = df.append(transfo_dict, ignore_index=True)
+    print(df)
+    print(transfo_dict)
+    df_new_row = pd.DataFrame(transfo_dict, index=[0])
+    df = pd.concat([df, df_new_row], ignore_index=True)#.reset_index()
     return df, angle_IS, angle_PA, angle_LR, shift_LR, shift_PA, shift_IS
 
 
diff --git a/config_sct_run_batch.yml b/config_sct_run_batch.yml
index f0968be..4cf4ef1 100644
--- a/config_sct_run_batch.yml
+++ b/config_sct_run_batch.yml
@@ -1,10 +1,9 @@
 # config file for sct_run_batch
-path_data: /scratch/pabaua/data-multi-subject-p
-path_output: /scratch/pabaua/results_csa_t2
-script: /home/pabaua/csa-atrophy/process_data.sh
-script_args: /home/pabaua/csa-atrophy/config_script.yml
-jobs: -1
+path_data: ~/datasets/data-multi-subject
+path_output: ~/process_results/results_csa-atrophy_t2-all-2024-08-13
+script: ~/code/csa-atrophy/process_data.sh
+script_args: /home/GRAMES.POLYMTL.CA/sebeda/code/csa-atrophy/config_script.yml
+jobs: 8
 batch_log: sct_run_batch_log.txt
 continue_on_error: 1
 subject_prefix: sub-
-zip: true
diff --git a/config_sct_run_batch_cc.yml b/config_sct_run_batch_cc.yml
new file mode 100644
index 0000000..e9347c0
--- /dev/null
+++ b/config_sct_run_batch_cc.yml
@@ -0,0 +1,10 @@
+# config file for sct_run_batch
+path_data: /home/sabeda/scratch/data-multi-subject
+path_output: /home/sabeda/scratch/results_csa-atrophy_t2-all
+script: /home/sabeda/csa-atrophy/process_data.sh
+script_args: /home/sabeda/csa-atrophy/config_script.yml
+jobs: -1
+batch_log: sct_run_batch_log.txt
+continue_on_error: 1
+subject_prefix: sub-
+zip: true
diff --git a/csa_rescale_stat.py b/csa_rescale_stat.py
index 1894186..1373bee 100755
--- a/csa_rescale_stat.py
+++ b/csa_rescale_stat.py
@@ -17,13 +17,26 @@
 
 import pandas as pd
 import numpy as np
+import logging
 import os
+import sys
 import argparse
+import matplotlib
 import matplotlib.pyplot as plt
 from math import ceil
 from ruamel.yaml import YAML
+from scipy import stats
+from matplotlib import cm
 
 
+# Initialize logging
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)  # default: logging.DEBUG, logging.INFO
+hdlr = logging.StreamHandler(sys.stdout)
+logging.root.addHandler(hdlr)
+
+FNAME_LOG = 'log_stats.txt'
+
 # Parser
 #########################################################################################
 
@@ -78,9 +91,7 @@ def concatenate_csv_files(path_results):
             files.append(path)
     if not files:
         raise FileExistsError("Folder {} does not contain any results csv file.".format(path_results))
-    #metrics = pd.concat(
-       #[pd.read_csv(f).assign(rescale=os.path.basename(f).split('_')[4].split('.csv')[0]) for f in files])
-    print("Concatenate csv files. This will take a few seconds...")
+    logger.info("Concatenate csv files. This will take a few seconds...")
     metrics = pd.concat(pd.read_csv(f) for f in files)
     # output csv file in PATH_RESULTS
     metrics.to_csv(os.path.join(path_results, r'csa_all.csv'))
@@ -109,10 +120,10 @@ def plot_perc_err(df, path_output):
     plt.xlabel('area rescaling in %')
     plt.ylabel('STD in %')
     plt.grid()
-    plt.tight_layout()
+    #plt.tight_layout()
     output_file = os.path.join(path_output, "fig_err.png")
     plt.savefig(output_file)
-    print("--> Created figure: {}".format(output_file))
+    logger.info(f"--> Created figure: {output_file}")
 
 
 def boxplot_csa(df, path_output):
@@ -120,19 +131,22 @@ def boxplot_csa(df, path_output):
     :param df: dataframe with csv files data: df_sub
     :param path_output: directory in which plot is saved
     """
-    # TODO: round xlabel
-    # TODO: find a way to display ylabel title with superscript
     fig2 = plt.figure()
-    # round rescale area
-    df['rescale_area'] = round(df['rescale_area'], ndigits=0).astype(int)
-    df.boxplot(column=['mean'], by='rescale_area', showmeans=True, meanline=True)
+    meanpointprops = dict(marker='x' , markeredgecolor='red' ,
+                          markerfacecolor='red')
+    df.boxplot(column=['mean'], by=df['rescale_area'].round(1), positions=sorted(set(df['rescale_area'].values)), showmeans=True, meanprops=meanpointprops)
+    min_rescale = min(df['rescale_area'].values)
+    min_rescale_r = min(df['rescale'].values)
+    max_rescale = max(df['rescale_area'].values)
+    max_y = df.groupby('rescale').get_group(1).mean()['mean']
+    plt.plot([min_rescale, max_rescale], [max_y * (min_rescale_r ** 2), max_y], ls="--", c=".3")
     plt.title('Boxplot of CSA in function of area rescaling')
     plt.suptitle("")
-    plt.ylabel('csa in mm^2')
-    plt.xlabel('area rescaling in %')
+    plt.ylabel('CSA in mm^2')
+    plt.xlabel('CSA scaling')
     output_file = os.path.join(path_output, "fig_boxplot_csa.png")
     plt.savefig(output_file)
-    print("--> Created figure: {}".format(output_file))
+    logger.info("--> Created figure: {}".format(output_file))
 
 
 def boxplot_atrophy(df, path_output):
@@ -143,98 +157,86 @@ def boxplot_atrophy(df, path_output):
     fig = plt.figure()
     # convert to percentage
     df['rescale_estimated'] = df['rescale_estimated'] * 100
-    # round rescale area
-    df['rescale_area'] = round(df['rescale_area'], ndigits=0).astype(int)
-    df.boxplot(column='rescale_estimated', by='rescale_area', positions=sorted(set(df['rescale_area'].values)), showmeans=True, meanline=True, figsize=(10,8))
+    meanpointprops = dict(marker='x' , markeredgecolor='red' ,
+                          markerfacecolor='red')
+    df.boxplot(column='rescale_estimated', by=df['rescale_area'].round(1), positions=sorted(set(df['rescale_area'].values)), showmeans=True, meanline=False, figsize=(10, 8), meanprops=meanpointprops)
     min_rescale = min(df['rescale_area'].values)
     max_rescale = max(df['rescale_area'].values)
     plt.plot([min_rescale, max_rescale], [min_rescale, max_rescale], ls="--", c=".3")
-    plt.title('boxplot of measured atrophy in function of area rescaling')
-    plt.ylabel('measured atrophy in %')
-    plt.xlabel('area rescaling in %')
+    plt.title('Boxplot of measured atrophy in function of CSA scaling')
     plt.suptitle("")
-    # TODO: scale x and y similarly
-    # TODO: add diagonal (remove horizontal lines)
+    plt.ylabel('Atrophied CSA in % of the un-rescaled CSA')
+    plt.xlabel('CSA scaling')
+    plt.axis('scaled')
     output_file = os.path.join(path_output, "fig_boxplot_atrophy.png")
     plt.savefig(output_file)
-    print("--> Created figure: {}".format(output_file))
+    logger.info("--> Created figure: {}".format(output_file))
 
 
-def plot_sample_size(z_conf, z_power, std, mean_csa, path_output):
+def plot_sample_size(z_conf, z_power, std_arr, mean_csa, path_output):
     """plot minimum number of patients required to detect an atrophy of a given value
     :param z_conf: z score for X % uncertainty. Example: z_conf=1.96
     :param z_power: z score for X % Power. Example: z_power=(0.84, 1.282)
-    :param std: STD around mean CSA of control subjects (without rescaling),
+    :param std_arr: STD around mean CSA of control subjects (without rescaling),
     CSA STD for atrophied subjects and control subjects are considered equal
-    :param mean_csa: mean value of CSA to compute the atrophy percentage. Example: 80
+    :param mean_csa: mean value of CSA to compute the atrophy percentage. Example: 80 mm^2
     :param path_output: directory in which plot is saved
     """
     fig_sample, ax = plt.subplots()
     # data for plotting
-    n = []
+    n_t1 = []
+    n_t2 =[]
     for z_p in z_power:
-        atrophy = np.arange(1.5, 8.0, 0.05)  # x_axis values ranging from 1.5 to 8.0 mm^2
-        num_n = 2 * ((z_conf + z_p) ** 2) * (std ** 2)  # numerator of sample size equation
-        n.append(num_n / ((atrophy) ** 2))
+        # x_axis values ranging from 1.5 to 8.0 mm^2
+        atrophy = np.arange(1.5, 8.0, 0.05)
+        # numerator of sample size equation T1w
+        num_n_t1 = 2 * ((z_conf + z_p) ** 2) * (std_arr[0] ** 2)
+        n_t1.append(num_n_t1 / ((0.01*atrophy*mean_csa[0]) ** 2))
+        # numerator of sample size equation T2w
+        num_n_t2 = 2 * ((z_conf + z_p) ** 2) * (std_arr[1] ** 2)
+        n_t2.append(num_n_t2 / ((0.01*atrophy*mean_csa[1]) ** 2))
     # plot
-    ax.plot(atrophy, n[0], label='80% power')
-    ax.plot(atrophy, n[1], label='90% power')
+    ax.plot(atrophy / mean_csa[0] * 100, n_t1[0], 'tab:blue',  label='T1w 80% power')
+    ax.plot(atrophy / mean_csa[0] * 100, n_t1[1], 'tab:blue', linestyle='--', label='T1w 90% power')
+    ax.plot(atrophy / mean_csa[1] * 100, n_t2[0], 'tab:red', label='T2w 80% power')
+    ax.plot(atrophy / mean_csa[1] * 100, n_t2[1], 'tab:red', linestyle='--', label='T2w 90% power')
     ax.set_ylabel('number of participants per group of study \n(patients or controls) with ratio 1:1')
-    ax.set_xlabel('atrophy in mm^2')
-    # create global variable for secax (second axis) functions
-    global mean_csa_sample
-    mean_csa_sample = mean_csa
 
-    ax.set_title('minimum number of participants to detect an atrophy with 5% uncertainty\n std = ' + str(
-        round(std, 2)) + 'mm², mean_csa = ' + str(mean_csa_sample) + 'mm²')
+    ax.set_xlabel('atrophy in %')
+    plt.suptitle('minimum number of participants to detect an atrophy with 5% uncertainty', fontsize=16, y=1.05)
+    ax.set_title('T1w (1.0 mm iso): SD = {} mm², mean CSA= {} mm² \nT2w (0.8 mm iso): SD = {} mm², mean CSA= {} mm²'.format(
+            str(
+                round(std_arr[0], 2)), str(mean_csa[0]), str(round(std_arr[1], 2)) , str(mean_csa[1])))
     ax.legend()
     ax.grid()
-    def forward(atrophy):
-        return atrophy / mean_csa_sample * 100
-
-    def inverse(atrophy):
-        return atrophy / 100 * mean_csa_sample
-
-    secax = ax.secondary_xaxis('top', functions=(forward, inverse))
-    secax.set_xlabel('atrophy in %')
     output_file = os.path.join(path_output, "fig_min_subj.png")
     plt.savefig(output_file, bbox_inches='tight')
-    print("--> Created figure: {}".format(output_file))
+    logger.info("--> Created figure: {}".format(output_file))
 
 
-def sample_size(df, config_param):
-    """
-    Calculate the minimum number of patients required to detect an atrophy of a given value (i.e. power analysis),
-    ratio patients/control 1:1 and with the assumption that both samples have the same STD.
-    ref: Suresh and Chandrashekara 2012. “Sample size estimation and power analysis for clinical research studies”
-    doi: 10.4103/0974-1208.97779
-    :param df: dataframe for computing stats across subject: df_rescale
-    :param config_param: configuration parameters can be modified in config.yaml file. Example conf = 0.95
-    :return sample_size: sample size for each rescaling
+def error_function_of_csa(df, path_output):
+    """Scatter plot of CSA as a function of Mean error %
+    :param df: dataframe for computing stats per subject: df_sub
+    :param path_output: directory in which plot is saved
     """
-    sample_size = []
-    # configuration parameters can be modified in config.yaml file
-    # conf = confidence level
-    conf = config_param['stats']['sample_size']['conf']
-    # power = power level
-    power = config_param['stats']['sample_size']['power']
-    z_score_dict = {'confidence_Level': [0.60, 0.70, 0.8, 0.85, 0.90, 0.95],
-                    'z_value': [0.842, 1.04, 1.28, 1.44, 1.64, 1.96], }
-
-    df_z = pd.DataFrame(z_score_dict)
-    df_z = df_z.set_index('confidence_Level')
-    for name, group in df.groupby('rescale'):
-        std = group['std_inter'].values[0]
-        mean_patient = group['mean_inter'].values[0]
-        mean_control = df.groupby('rescale').get_group(1)['mean_inter'].values[0]
-        atrophy = mean_control - mean_patient
-        if atrophy != 0:
-            num_n = 2 * ((df_z.at[conf, 'z_value'] + df_z.at[power, 'z_value']) ** 2) * (std ** 2)
-            deno_n = (abs(atrophy)) ** 2
-            sample_size.append(ceil(num_n / deno_n))
-        else:
-            sample_size.append('inf')
-    return sample_size
+    fig, ax = plt.subplots(figsize=(7, 7))
+    df['Normalized CSA in mm²'] = df['mean'].div(df['rescale']**2)
+    # compute linear regression
+    z = np.polyfit(x=df.loc[:, 'perc_error'], y=df.loc[:, 'Normalized CSA in mm²'], deg=1)
+    p = np.poly1d(z)
+    # plot
+    viridis = cm.get_cmap('viridis', 6)
+    df.plot.scatter(x='perc_error', y='Normalized CSA in mm²', c='rescale', colormap=viridis)
+    min_err = min(df['perc_error'].values)
+    max_err = max(df['perc_error'].values)
+    plt.plot([min_err, max_err], [p(min_err), p(max_err)], ls="--", c=".3")
+    plt.title('Normalized CSA in function of error %,\n linear regression: {}'.format(p))
+    plt.xlabel('Mean error %')
+    plt.grid()
+    output_file = os.path.join(path_output, "fig_err_in_function_of_csa.png")
+    plt.savefig(output_file, bbox_inches='tight')
+    print("--> Created figure: {}".format(output_file))
+
 
 def error_function_of_intra_cov(df, path_output):
     """Scatter plot of intra-subject COV in function of error %
@@ -248,64 +250,51 @@ def error_function_of_intra_cov(df, path_output):
     z = np.polyfit(x=df.loc[:, 'perc_error'], y=df.loc[:, 'cov'], deg=1)
     p = np.poly1d(z)
     # plot
-    df.plot.scatter(x='perc_error', y='cov', c='rescale', colormap='viridis')
+    viridis = cm.get_cmap('viridis', 6)
+    df.plot.scatter(x='perc_error', y='cov', c='rescale', colormap=viridis)
     min_err = min(df['perc_error'].values)
     max_err = max(df['perc_error'].values)
-    plt.plot([min_err, max_err], [min_err*z[0]+z[1], max_err*z[0]+z[1]], ls="--", c=".3")
-    ax.set_xlabel('perc_error')
-    ax.set_ylabel('cov')
+    plt.plot([min_err, max_err], [p(min_err), p(max_err)], ls="--", c=".3")
+    plt.xlabel('Mean error %')
+    plt.ylabel('COV')
     plt.title('COV in function of % error,\n linear regression: {}'.format(p))
-    plt.title("COV in function of % error")
-    ax.legend(loc='upper right')
     plt.grid()
     output_file = os.path.join(path_output, "fig_err_in_function_of_cov.png")
     plt.savefig(output_file, bbox_inches='tight')
-    print("--> Created figure: {}".format(output_file))
+    logger.info("--> Created figure: {}".format(output_file))
 
 
 def error_function_of_intra_cov_outlier(df, path_output):
-    """Scatter plot of intra-subject COV in function of error % to identify outliers with either high error %
-     or high intra-subject COV
+    """Scatter plot of intra-subject COV in function of error % to identify the worst outliers (outside the interval
+    [Q1-10IQR, Q3+10IQR] of percentage error)
     :param df: dataframe for computing stats per subject: df_sub
     :param path_output: directory in which plot is saved
     """
     fig, ax = plt.subplots(figsize=(7, 7))
-    # identified outliers either high error % or high intra-subject COV of t1 images
-    outliers_t1_all = ['sub-brnoUhb01', 'sub-brnoUhb02', 'sub-brnoUhb03', 'sub-brnoUhb06', 'sub-brnoUhb07', 'sub-brnoUhb08',
-                       'sub-barcelona04', 'sub-barcelona06', 'sub-beijingPrisma03', 'sub-milan03', 'sub-oxfordFmrib01',
-                       'sub-tokyo750w03', 'sub-pavia04']
-    # identified t1 outliers remaining after subjects removed due to missing vertebrae levels missing CSA
-    outliers_t1 = ['sub-brnoUhb01', 'sub-brnoUhb08', 'sub-milan03', 'sub-oxfordFmrib01', 'sub-cmrrb05',
-                   'sub-tokyo750w03', 'sub-pavia04']
-    # identified outliers either high error % or high intra-subject COV of t2 images
-    outliers_t2_all = ['sub-tokyo750w02', 'sub-tokyo750w04', 'sub-tokyo750w06', 'sub-beijingVerio02',
-                       'sub-beijingVerio03', 'sub-ubc02', 'sub-vuiisIngenia05']
-    # identified t2 outliers remaining after subjects removed due to missing vertebrae levels missing CSA
-    outliers_t2 = ['sub-tokyo750w02', 'sub-tokyo750w04', 'sub-tokyo750w06', 'sub-beijingVerio02', 'sub-beijingVerio03',
-                   'sub-ubc02', 'sub-vuiisIngenia05']
+    Q1 = df['perc_error'].quantile(0.25)
+    Q3 = df['perc_error'].quantile(0.75)
+    # IQR = inter-quartile range.
+    IQR = Q3 - Q1
+    # identified outliers either high error % or high intra-subject COV
+    outliers = set(df[(df['perc_error'] <= Q1 - 10 * IQR) | (df['perc_error'] >= Q3 + 10 * IQR)]['subject'])
     # remove rescale=1 because error=0
     df = df[df['rescale'] != 1]
     ax.scatter(df['perc_error'], df['cov'], color='tab:blue', label='others')
-    # plot scatter outliers of t1w images
-    for outlier_t1 in outliers_t1:
-        df_t1 = df.groupby(['subject']).get_group(outlier_t1)
-        ax.scatter(df_t1['perc_error'], df_t1['cov'], color='tab:red', label=outlier_t1)
+    # scatter outliers
+    for outlier in outliers:
+        df_t1 = df.groupby(['subject']).get_group(outlier)
+        ax.scatter(df_t1['perc_error'], df_t1['cov'], color='tab:red', label=outlier)
         df_t1 = []
-    # plot scatter outliers of t2w images
-    for outlier_t2 in outliers_t2:
-        df_t2 = df.groupby(['subject']).get_group(outlier_t2)
-        ax.scatter(df_t2['perc_error'], df_t2['cov'], color='tab:olive', label=outlier_t2)
-        df_t2 = []
     # plot
-    ax.set_xlabel('perc_error')
-    ax.set_ylabel('cov')
-    plt.title("COV in function of error %")
+    ax.set_xlabel('Mean error %')
+    ax.set_ylabel('COV')
+    plt.title("Intra subject COV as a function of mean absolute error %")
     ax.legend(loc='upper right')
     plt.grid()
     # save image
     output_file = os.path.join(path_output, "fig_err_in_function_of_cov_outlier.png")
     plt.savefig(output_file, bbox_inches='tight')
-    print("--> Created figure: {}".format(output_file))
+    logger.info("--> Created figure: {}".format(output_file))
 
 
 def add_columns_df_sub(df):
@@ -332,6 +321,97 @@ def add_columns_df_sub(df):
     df = df.reset_index()
     return df
 
+def pearson(df, df_rescale):
+    """  The associated Pearson’s coefficients and p-value between subject’s CSA and the associated Pearson’s
+    coefficients and p-value between COV across Monte Carlo transformations (i.e. intra-subject variability) and mean
+    CSA error :param df: dataframe for computing stats per subject: df_sub :param df_rescale: dataframe for computing
+    stats per rescale: df_rescale :return df_rescale: modified dataframe with added theoretic_csa and
+    csa_without_rescale
+    """
+    pearson_cov = []
+    p_value_cov = []
+    pearson_csa = []
+    p_value_csa = []
+    for rescale, group in df.groupby('rescale'):
+        if rescale != 1:
+            pearson_cov.append(stats.pearsonr(group['cov'], group['perc_error'])[0])
+            p_value_cov.append(stats.pearsonr(group['cov'], group['perc_error'])[1])
+            pearson_csa.append(stats.pearsonr(group['mean'], group['perc_error'])[0])
+            p_value_csa.append(stats.pearsonr(group['mean'], group['perc_error'])[1])
+        else:
+            pearson_cov.append(1)
+            p_value_cov.append(0)
+            pearson_csa.append(1)
+            p_value_csa.append(0)
+    df_rescale['pearson_cov'] = pearson_cov
+    df_rescale['p_value_cov'] = p_value_cov
+    df_rescale['pearson_csa'] = pearson_csa
+    df_rescale['p_value_csa'] = p_value_csa
+    return df_rescale
+
+def sample_size(df, df_sub, df_rescale, itt = 50):
+    """  Minimum sample size ( number of subjects) necessary to detect an atrophy in a between-subject (based on a
+    two-sample bilateral t-test) and minimum sample size necessary to detect an atrophy in a
+    within-subject ( repeated-measures in longitudinal study: based on a two-sample bilateral paired t-test).
+    ref. https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3148614/
+    :param df: dataframe for computing stats per subject: df_sub :param df_rescale: dataframe for computing stats per
+    subject: df_rescale :return df_rescale: modified dataframe with added sample_size_80 (between subjects at 80%
+    power), sample_size_90 (between subjects at 90% power), sample_size_long_80 (within subjects at 80% power) and
+    sample_size_long_90 (within subjects at 90% power)
+    """
+    sample_size_80 = []
+    sample_size_90 = []
+    sample_size_long_80 = []
+    sample_size_long_90 = []
+    print("Computing sample size using Monte Carlo simulation with {} iterations. This might take a while...".format(itt))
+    # Compute mean sample size using a Monte Carlo simulation to evaluate variability of measures
+    for n in range(itt):
+        for rescale_r, group_r in df.groupby('rescale'):
+            for sub, subgroup in group_r.groupby('subject'):
+                # for each subject in each scaling pick one transformation (Monte-Carlo sample) and add value in new column "sample" of df_sub
+                df_sub.loc[(df_sub['rescale'] == rescale_r) & (df_sub['subject'] == sub), 'sample'] = \
+                df.loc[(df['rescale'] == rescale_r) & (df['subject'] == sub)].sample(n=1)['MEAN(area)'].values[0]
+                # for each subject in each scaling compute difference between one scaled and one un-scaled transformation (Monte-Carlo sample) and add value in new column "diff" of df_sub
+                df_sub.loc[(df_sub['rescale'] == rescale_r) & (df_sub['subject'] == sub), 'diff'] = \
+                df.loc[(df['rescale'] == 1) & (df['subject'] == sub)].sample(n=1)['MEAN(area)'].values[0] - \
+                df.loc[(df['rescale'] == rescale_r) & (df['subject'] == sub)].sample(n=1)['MEAN(area)'].values[0]
+        # regroup results per scaling
+        df_rescale['mean_diff'] = df_sub.groupby('rescale').mean()['diff'].values
+        df_rescale['std_diff'] = df_sub.groupby('rescale').std()['diff'].values
+        df_rescale['std_sample'] = df_sub.groupby('rescale').std()['sample'].values
+
+        for rescale, group in df_sub.groupby('rescale'):
+            if rescale != 1:
+                # compute mean difference between groups using theoretic scaling and average un-scaled CSA
+                CSA_mean_diff = df_sub.groupby('rescale').get_group(1).mean()['mean']*(1-(rescale**2))
+                # sample size between-subject
+                # var is the sum of the variances of un-scaled and scaled CSA across subjects (simulating two unpaired study arms)
+                var = df_rescale.groupby('rescale').get_group(1)['std_sample'].values[0] ** 2 + df_rescale.groupby('rescale').get_group(rescale)['std_sample'].values[0] ** 2
+                sample_size_80.append((((1.96 + 0.84) ** 2) * (var)) / (CSA_mean_diff ** 2))
+                sample_size_90.append((((1.96 + 1.28) ** 2) * (var)) / (CSA_mean_diff ** 2))
+                # sample size within-subject
+                # var_diff is the variance of the difference between un-scaled and scaled CSA across subjects (simulating longitudinal CSA measures)
+                var_diff = df_rescale.groupby('rescale').get_group(rescale)['std_diff'].values[0] ** 2
+                sample_size_long_80.append((((1.96 + 0.84) ** 2) * (var_diff)) / (CSA_mean_diff ** 2))
+                sample_size_long_90.append((((1.96 + 1.28) ** 2) * (var_diff)) / (CSA_mean_diff ** 2))
+            else:
+                # to avoid dividing by zero
+                sample_size_80.append(np.inf)
+                sample_size_90.append(np.inf)
+                sample_size_long_80.append(np.inf)
+                sample_size_long_90.append(np.inf)
+    # Compute mean and SD of computed sample sizes
+    df_rescale['sample_size_80'] = np.mean(np.reshape(sample_size_80, (itt, -1)), axis=0)
+    df_rescale['std_sample_size_80'] = np.std(np.reshape(sample_size_80, (itt, -1)), axis=0)
+    df_rescale['sample_size_90'] = np.mean(np.reshape(sample_size_90, (itt, -1)), axis=0)
+    df_rescale['std_sample_size_90'] = np.std(np.reshape(sample_size_90, (itt, -1)), axis=0)
+    df_rescale['sample_size_long_80'] = np.mean(np.reshape(sample_size_long_80, (itt, -1)), axis=0)
+    df_rescale['std_sample_size_long_80'] = np.std(np.reshape(sample_size_long_80, (itt, -1)), axis=0)
+    df_rescale['sample_size_long_90'] = np.mean(np.reshape(sample_size_long_90, (itt, -1)), axis=0)
+    df_rescale['std_sample_size_long_90'] = np.std(np.reshape(sample_size_long_90, (itt, -1)), axis=0)
+    return df_rescale
+
+
 
 def main():
     """
@@ -343,6 +423,14 @@ def main():
     path_results = os.path.abspath(os.path.expanduser(arguments.i))
     vertlevels_input = arguments.l
     path_output = os.path.abspath(arguments.o)
+    if not os.path.exists(path_output):
+        os.mkdir(path_output)
+
+    # Dump log file there
+    if os.path.exists(FNAME_LOG):
+        os.remove(FNAME_LOG)
+    fh = logging.FileHandler(os.path.join(path_output, FNAME_LOG))
+    logging.root.addHandler(fh)
 
     # aggregate all csv results files
     concatenate_csv_files(path_results)
@@ -355,23 +443,24 @@ def main():
     pd.set_option('display.max_rows', None)
 
     # identify rows with missing values
-    print("Remove rows with missing values...")
+    logger.info("Remove rows with missing values...")
     lines_to_drop = df_vert[df_vert['MEAN(area)'] == 'None'].index
     df_vert['subject'] = list(sub.split('data_processed/')[1].split('/anat')[0] for sub in df_vert['Filename'])
 
     # remove rows with missing values
     df_vert = df_vert.drop(df_vert.index[lines_to_drop])
     df_vert['MEAN(area)'] = pd.to_numeric(df_vert['MEAN(area)'])
-    print("  Rows removed: {}".format(lines_to_drop))
+    logger.info("  Rows removed: {}".format(lines_to_drop))
 
     # fetch parameters from config.yaml file
     config_param = yaml_parser(arguments.config)
 
     # add useful columns to dataframe
     df_vert['basename'] = list(os.path.basename(path).split('.nii.gz')[0] for path in df_vert['Filename'])
-    df_vert['rescale'] = list(float(b.split('RPI_r_r')[1].split('_')[0]) for b in df_vert['basename'])
+    df_vert['rescale'] = list(float(b.split('_r')[1].split('_')[0]) for b in df_vert['basename'])
     df_vert['slices'] = list(int(slices.split(':')[1]) - int(slices.split(':')[0]) + 1 for slices in df_vert['Slice (I->S)'])
 
+
     # verify if vertlevels of interest were given in input by user
     if vertlevels_input is None:
         vertlevels = list(set(df_vert['VertLevel'].values))
@@ -380,7 +469,7 @@ def main():
         if not all(elem in set(list(df_vert['VertLevel'].values)) for elem in vertlevels):
             raise ValueError("\nInput vertebral levels '{}' do not exist in csv files".format(vertlevels))
     # register vertebrae levels of interest (Default: all vertebrae levels in csv files)
-    print("Stats are averaged across vertebral levels: {}".format(vertlevels))
+    logger.info(f"Stats are averaged across vertebral levels: {vertlevels}")
 
     # Create new dataframe with only selected vertebral levels
     df = df_vert[df_vert['VertLevel'].isin(vertlevels)]
@@ -398,7 +487,7 @@ def main():
     df = df.drop('basename', 1)
 
     # Create dataframe for computing stats per subject: df_sub
-    print("\n==================== subject_dataframe ==========================\n")
+    logger.info("\n==================== subject_dataframe ==========================\n")
     df_sub = pd.DataFrame()
     # add necessary columns to df_sub dataframe
     df_sub['rescale'] = df.groupby(['rescale', 'subject']).mean().reset_index()['rescale']
@@ -412,14 +501,19 @@ def main():
     df_sub['cov'] = df_sub['std'].div(df_sub['mean'])
     df_sub = add_columns_df_sub(df_sub)
     df_sub['rescale_estimated'] = df_sub['mean'].div(df_sub['csa_without_rescale'])
-    df_sub['error'] = (df_sub['mean'] - df_sub['theoretic_csa']).abs()
-    df_sub['perc_error'] = 100 * (df_sub['mean'] - df_sub['theoretic_csa']).abs().div(df_sub['theoretic_csa'])
-    print(df_sub)
+
+    df_sub['error'] = (df_sub['mean'] - df_sub['theoretic_csa'])
+    df_sub['perc_error'] = 100 * (df_sub['mean'] - df_sub['theoretic_csa']).div(df_sub['theoretic_csa'])
+    sample = []
+    for rescale, group in df.groupby('rescale'):
+        for sub, subgroup in group.groupby('subject'):
+            df_sub.loc[(df_sub['rescale'] == rescale) & (df_sub['subject'] == sub), 'sample'] = subgroup.sample(n=1)['MEAN(area)'].values
+
     # save dataframe in a csv file
     df_sub.to_csv(os.path.join(path_output, r'csa_sub.csv'))
 
     # Create dataframe for computing stats across subject: df_rescale
-    print("\n==================== rescaling_dataframe ==========================\n")
+    logger.info("\n==================== rescaling_dataframe ==========================\n")
     df_rescale = pd.DataFrame()
     df_rescale['rescale'] = df_sub.groupby(['rescale']).mean().reset_index()['rescale']
     df_rescale['rescale_area'] = df_sub.groupby('rescale_area').mean().reset_index()['rescale_area']
@@ -430,15 +524,18 @@ def main():
     df_rescale['std_intra'] = df_sub.groupby('rescale').mean()['std'].values
     df_rescale['cov_intra'] = df_sub.groupby('rescale').mean()['cov'].values
     df_rescale['std_inter'] = df_sub.groupby('rescale').std()['mean'].values
+    df_rescale['cov_inter'] = df_sub.groupby('rescale').std()['mean'].div(
+        df_sub.groupby('rescale').mean()['mean']).values
     df_rescale['mean_rescale_estimated'] = df_sub.groupby('rescale').mean()['rescale_estimated'].values
     df_rescale['std_rescale_estimated'] = df_sub.groupby('rescale').std()['rescale_estimated'].values
     df_rescale['mean_perc_error'] = df_sub.groupby('rescale').mean()['perc_error'].values
     df_rescale['mean_error'] = df_sub.groupby('rescale').mean()['error'].values
     df_rescale['std_perc_error'] = df_sub.groupby('rescale').std()['perc_error'].values
-    df_rescale['sample_size'] = sample_size(df_rescale, config_param)
-    print(df_rescale)
+
+    df_rescale = pearson(df_sub, df_rescale)
+    df_rescale = sample_size(df, df_sub, df_rescale)
     # save dataframe in a csv file
-    df_sub.to_csv(os.path.join(path_output, r'csa_transfo.csv'))
+    df_rescale.to_csv(os.path.join(path_output, r'csa_rescale.csv'))
 
     # plot graph if verbose is present
     if arguments.fig:
@@ -461,11 +558,14 @@ def main():
         z_score_power = config_param['fig']['sample_size']['power']
         # std = STD of subjects without rescaling CSA values
         # mean_csa =  mean CSA value of subjects without rescaling
-        plot_sample_size(z_conf=z_score_confidence, z_power=z_score_power, std=df_rescale.loc[1, 'std_inter'], mean_csa=df_rescale.loc[1, 'mean_inter'], path_output=path_output)
+        plot_sample_size(z_conf=z_score_confidence , z_power=z_score_power , std_arr=[7.56 , 8.29] ,
+                         mean_csa=[69.70 , 76.12] , path_output=path_output)
         # scatter plot of COV in function of error %
         error_function_of_intra_cov(df_sub, path_output=path_output)
         # scatter plot of COV in function of error % to identify outliers
         error_function_of_intra_cov_outlier(df_sub, path_output=path_output)
+        # scatter plot of CSA in function of error %
+        error_function_of_csa(df_sub, path_output=path_output)
 
 if __name__ == "__main__":
     main()
diff --git a/job_template.sh b/job_template.sh
index 922c5c0..972ba17 100644
--- a/job_template.sh
+++ b/job_template.sh
@@ -3,7 +3,7 @@
 #SBATCH --nodes=1
 #SBATCH --cpus-per-task=32    # number of OpenMP processes
 #SBATCH --mem=128G
-#SBATCH --mail-user=paul.bautin@polymtl.ca
+#SBATCH --mail-user=sandrine.bedard@polymtl.ca
 #SBATCH --mail-type=ALL
 export OMP_NUM_THREADS=1
 export MKL_NUM_THREADS=1
diff --git a/process_data.sh b/process_data.sh
index 15d9f91..2c389f5 100755
--- a/process_data.sh
+++ b/process_data.sh
@@ -49,48 +49,55 @@ segment_and_label_if_does_not_exist(){
   local file="$1"
   local contrast="$2"
   local contrast_str="$3"
-  segment_if_does_not_exist $file ${contrast} "-qc ${PATH_QC} -qc-subject ${SUBJECT}"
+  FILESEG="${file}_seg"
+  sct_deepseg -i ${file}.nii.gz -task seg_sc_contrast_agnostic #-qc $PATH_QC -qc-subject ${SUBJECT}
+  #sct_qc -i ${file}.nii.gz -s ${FILESEG}.nii.gz -p sct_deepseg_sc -qc $PATH_QC -qc-subject ${SUBJECT}
+  #segment_if_does_not_exist $file ${contrast} "-qc ${PATH_QC} -qc-subject ${SUBJECT}"
   local file_seg=${FILESEG}
   # Update global variable with segmentation file name
-  FILELABEL="${file}_labels-disc"
-  FILELABELMANUAL="${path_derivatives}/${SUBJECT}_${contrast_str}_labels-disc-manual"
+  FILELABEL="${file}_label-discs_dlabel"  # TODO change for updated names
+  FILELABELMANUAL="${path_derivatives}/${SUBJECT}_${contrast_str}_label-discs_dlabel"
   if [ -e "${FILELABELMANUAL}.nii.gz" ]; then
     echo "manual labeled file was found: ${FILELABELMANUAL}"
+    rsync -avzh ${FILELABELMANUAL}.nii.gz ${FILELABEL}.nii.gz
     # reorienting and resampling image
-    sct_image -i ${FILELABELMANUAL}.nii.gz -setorient RPI -o "${FILELABELMANUAL}_RPI.nii.gz"
-    sct_maths -i ${FILELABELMANUAL}_RPI.nii.gz -dilate 2 -o ${FILELABELMANUAL}_RPI_dil.nii.gz
-    sct_resample -i ${FILELABELMANUAL}_RPI_dil.nii.gz -mm $interp -x nn -o ${FILELABELMANUAL}_RPI_dil_r.nii.gz
-    rsync -avzh "${FILELABELMANUAL}_RPI_dil_r.nii.gz" ${FILELABEL}.nii.gz
+    #sct_image -i ${FILELABELMANUAL}.nii.gz -setorient RPI -o "${FILELABELMANUAL}_RPI.nii.gz"
+    #sct_maths -i ${FILELABELMANUAL}_RPI.nii.gz -dilate 2 -o ${FILELABELMANUAL}_RPI_dil.nii.gz
+    #sct_resample -i ${FILELABELMANUAL}_RPI_dil.nii.gz -mm $interp -x nn -o ${FILELABELMANUAL}_RPI_dil_r.nii.gz
+    #rsync -avzh "${FILELABELMANUAL}_RPI_dil_r.nii.gz" ${FILELABEL}.nii.gz
     # Generate labeled segmentation
-    sct_label_vertebrae -i ${file}.nii.gz -s ${file_seg}.nii.gz -c ${contrast} -discfile "${FILELABELMANUAL}_RPI_dil_r.nii.gz" -qc ${PATH_QC} -qc-subject ${SUBJECT}
+    sct_image -i ${file_seg}.nii.gz -set-sform-to-qform
+    sct_image -i ${file}.nii.gz -set-sform-to-qform
+    sct_image -i "${FILELABEL}.nii.gz" -set-sform-to-qform
+    sct_label_vertebrae -i ${file}.nii.gz -s ${file_seg}.nii.gz -c ${contrast} -discfile "${FILELABEL}.nii.gz" #-qc ${PATH_QC} -qc-subject ${SUBJECT}
   else
     # Generate labeled segmentation
-    sct_label_vertebrae -i ${file}.nii.gz -s ${file_seg}.nii.gz -c ${contrast} -qc ${PATH_QC} -qc-subject ${SUBJECT}
+    sct_label_vertebrae -i ${file}.nii.gz -s ${file_seg}.nii.gz -c ${contrast} #-qc ${PATH_QC} -qc-subject ${SUBJECT}
   fi
   # Create labels in the Spinal Cord
-  sct_label_utils -i ${file_seg}_labeled.nii.gz -vert-body 0 -o ${FILELABEL}.nii.gz
+  #sct_label_utils -i ${file_seg}_labeled.nii.gz -vert-body 0 -o ${FILELABEL}.nii.gz
   FILE_SEG_LABEL=${file_seg}_labeled
 }
 
 # Check if manual segmentation already exists. If it does, copy it locally. If
 # it does not, perform seg.
-segment_if_does_not_exist(){
-  local file="$1"
-  local contrast="$2"
-  local qc=$3
-  # Update global variable with segmentation file name
-  FILESEG="${file}_seg"
-  FILESEGMANUAL="${path_derivatives}/${FILESEG}-manual"
-  if [ -e "${FILESEGMANUAL}.nii.gz" ]; then
-    echo "Found! Using manual segmentation."
-    sct_resample -i ${FILESEGMANUAL}.nii.gz -mm $interp -x nn -o ${FILESEGMANUAL}_r.nii.gz
-    rsync -avzh ${FILESEGMANUAL}_r.nii.gz ${FILESEG}.nii.gz
-    sct_qc -i ${file}.nii.gz -s ${FILESEG}.nii.gz -p sct_deepseg_sc $qc
-  else
-    # Segment spinal cord
-    sct_deepseg_sc -i ${file}.nii.gz -c ${contrast} $qc
-  fi
-}
+# segment_if_does_not_exist(){
+#   local file="$1"
+#   local contrast="$2"
+#   local qc=$3
+#   # Update global variable with segmentation file name
+#   FILESEG="${file}_seg"
+#   FILESEGMANUAL="${path_derivatives}/${FILESEG}-manual"
+#   if [ -e "${FILESEGMANUAL}.nii.gz" ]; then
+#     echo "Found! Using manual segmentation."
+#     sct_resample -i ${FILESEGMANUAL}.nii.gz -mm $interp -x nn -o ${FILESEGMANUAL}_r.nii.gz
+#     rsync -avzh ${FILESEGMANUAL}_r.nii.gz ${FILESEG}.nii.gz
+#     sct_qc -i ${file}.nii.gz -s ${FILESEG}.nii.gz -p sct_deepseg_sc $qc
+#   else
+#     # Segment spinal cord
+#     sct_deepseg_sc -i ${file}.nii.gz -c ${contrast} $qc
+#   fi
+# }
 
 
 # SCRIPT STARTS HERE
@@ -98,9 +105,7 @@ segment_if_does_not_exist(){
 # Display useful info for the log, such as SCT version, RAM and CPU cores available
 sct_check_dependencies -short
 # copy derivatives directory containing manual corrections to PATH_DATA_PROCESSED
-mkdir -p "${PATH_DATA_PROCESSED}/derivatives/labels/${SUBJECT}/anat/"
-cp -r "${PATH_DATA}/derivatives/labels/${SUBJECT}/anat" "${PATH_DATA_PROCESSED}/derivatives/labels/${SUBJECT}"
-path_derivatives="${PATH_DATA_PROCESSED}/derivatives/labels/${SUBJECT}/anat"
+path_derivatives="${PATH_DATA}/derivatives/labels/${SUBJECT}/anat"
 # Go to results folder, where most of the outputs will be located
 cd $PATH_DATA_PROCESSED
 # Copy source images
@@ -124,13 +129,13 @@ fi
 file=${SUBJECT}_${contrast_str}
 # Reorient image to RPI
 sct_image -i ${file}.nii.gz -setorient RPI -o ${file}_RPI.nii.gz
-file=${file}_RPI
 # Resample image isotropically
-sct_resample -i ${file}.nii.gz -mm $interp -o ${file}_r.nii.gz
-file=${file}_r
-end=`date +%s`
-runtime=$((end-start))
-echo "+++++++++++ TIME: Duration of reorienting and resampling:    $(($runtime / 3600))hrs $((($runtime / 60) % 60))min $(($runtime % 60))sec"
+sct_resample -i ${file}_RPI.nii.gz -mm $interp -o ${file}_RPI_r.nii.gz
+mv "${file}_RPI_r.nii.gz" "${SUBJECT}_space-other_${contrast_str}.nii.gz"
+file=${SUBJECT}_space-other_${contrast_str}
+# end=`date +%s`
+# runtime=$((end-start))
+# echo "+++++++++++ TIME: Duration of reorienting and resampling:    $(($runtime / 3600))hrs $((($runtime / 60) % 60))min $(($runtime % 60))sec"
 
 # Label spinal cord (only if it does not exist) in dir anat
 start=`date +%s`
@@ -198,9 +203,13 @@ for r_coef in ${R_COEFS[@]}; do
     file_label_r_t=${file_label_r_t}_crop
 
 
-    # Segment spinal cord (only if it does not exist)
+    # Segment spinal cord
     start=`date +%s`
-    sct_deepseg_sc -i ${file_r_t}.nii.gz -c ${contrast}
+    #sct_deepseg_sc -i ${file_r_t}.nii.gz -c ${contrast}
+    sct_deepseg -i ${file_r_t}.nii.gz -task seg_sc_contrast_agnostic # -qc $PATH_QC -qc-subject ${SUBJECT}
+    # TODO: soft csa?
+    #sct_qc -i ${file_r_t}.nii.gz -s ${file_r_t}_seg.nii.gz -p sct_deepseg_sc -qc $PATH_QC -qc-subject ${SUBJECT}
+
     end=`date +%s`
     runtime=$((end-start))
     echo "+++++++++++ TIME: Duration of segmentation t${i_transfo}:    $(($runtime / 3600))hrs $((($runtime / 60) % 60))min $(($runtime % 60))sec"
diff --git a/requirements.txt b/requirements_cc.txt
similarity index 51%
rename from requirements.txt
rename to requirements_cc.txt
index f110068..dacd8a0 100755
--- a/requirements.txt
+++ b/requirements_cc.txt
@@ -5,9 +5,9 @@ matplotlib
 scikit-image
 nibabel
 ruamel.yaml
+argparse
+setuptools
+PyYAML
+coloredlogs
+networkx
 yaml-1.3
-argparse~=1.4.0
-setuptools~=49.6.0
-PyYAML~=5.3.1
-coloredlogs~=14.0
-
diff --git a/requirements_manual_pip.txt b/requirements_manual_pip.txt
new file mode 100755
index 0000000..099fd2e
--- /dev/null
+++ b/requirements_manual_pip.txt
@@ -0,0 +1,2 @@
+networkx==3.2.1
+yaml-1.3
diff --git a/setup.py b/setup.py
index f0786f9..2ca613a 100644
--- a/setup.py
+++ b/setup.py
@@ -29,6 +29,7 @@
     ],
     keywords='',
     install_requires=install_reqs,
+    packages = [],
     entry_points={
         'console_scripts': [
             'affine_transfo=affine_transfo:main',