From 0d18af4adf2bf351d50740275bbc88100692cc36 Mon Sep 17 00:00:00 2001
From: NadiaBlostein <nadia.blostein@mail.mcgill.ca>
Date: Mon, 17 Apr 2023 09:47:11 -0400
Subject: [PATCH 01/54] finished updating preprocessing fxns according to
 upcoming SCT 6.0 release; also automatically updates average disc level
 distances according to the sample and computes lowest disc according to 75th
 quantile by default

---
 preprocessing.py | 767 ++++++++++++++++++++++++-----------------------
 1 file changed, 395 insertions(+), 372 deletions(-)

diff --git a/preprocessing.py b/preprocessing.py
index 93bfadf..f6d3b89 100755
--- a/preprocessing.py
+++ b/preprocessing.py
@@ -6,13 +6,16 @@
 from copy import copy
 from tqdm import tqdm
 
-import sct_utils as sct
-from msct_types import Centerline
-from sct_straighten_spinalcord import smooth_centerline
+from spinalcordtoolbox import utils as sct 
+from spinalcordtoolbox.types import Centerline
+from spinalcordtoolbox import straightening
+from spinalcordtoolbox.centerline.core import ParamCenterline
+from spinalcordtoolbox.centerline.core import get_centerline
 from spinalcordtoolbox.image import Image
-from sct_download_data import download_data, unzip
-
+from spinalcordtoolbox.download import download_data, unzip
 
+list_labels = [50, 49, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
+               27, 28, 29, 30]
 labels_regions = {'PMJ': 50, 'PMG': 49,
                   'C1': 1, 'C2': 2, 'C3': 3, 'C4': 4, 'C5': 5, 'C6': 6, 'C7': 7,
                   'T1': 8, 'T2': 9, 'T3': 10, 'T4': 11, 'T5': 12, 'T6': 13, 'T7': 14, 'T8': 15, 'T9': 16, 'T10': 17,
@@ -20,7 +23,6 @@
                   'L1': 20, 'L2': 21, 'L3': 22, 'L4': 23, 'L5': 24,
                   'S1': 25, 'S2': 26, 'S3': 27, 'S4': 28, 'S5': 29,
                   'Co': 30}
-
 regions_labels = {'50': 'PMJ', '49': 'PMG',
                   '1': 'C1', '2': 'C2', '3': 'C3', '4': 'C4', '5': 'C5', '6': 'C6', '7': 'C7',
                   '8': 'T1', '9': 'T2', '10': 'T3', '11': 'T4', '12': 'T5', '13': 'T6', '14': 'T7', '15': 'T8',
@@ -28,10 +30,6 @@
                   '20': 'L1', '21': 'L2', '22': 'L3', '23': 'L4', '24': 'L5',
                   '25': 'S1', '26': 'S2', '27': 'S3', '28': 'S4', '29': 'S5',
                   '30': 'Co'}
-
-list_labels = [50, 49, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
-               27, 28, 29, 30]
-
 average_vert_length = {'PMJ': 30.0, 'PMG': 15.0, 'C1': 0.0,
                        'C2': 20.176514191661337, 'C3': 17.022090519403065, 'C4': 17.842111671016056,
                        'C5': 16.800356992319429, 'C6': 16.019212889311383, 'C7': 15.715854192723905,
@@ -42,55 +40,106 @@
                        'L1': 31.89272719870084, 'L2': 33.511890474486449, 'L3': 35.721413718617441}
 
 
-def download_data_template(path_data='./', name='example', force=False):
+def average_coordinates_over_slices(self, image): ### deprecated from latest version of spinalcordtoolbox
+    # extracting points information for each coordinates
+    P_x = np.array([point[0] for point in self.points])
+    P_y = np.array([point[1] for point in self.points])
+    P_z = np.array([point[2] for point in self.points])
+    P_z_vox = np.array([coord[2] for coord in image.transfo_phys2pix(self.points)])
+    P_x_d = np.array([deriv[0] for deriv in self.derivatives])
+    P_y_d = np.array([deriv[1] for deriv in self.derivatives])
+    P_z_d = np.array([deriv[2] for deriv in self.derivatives])
+
+    P_z_vox = np.array([int(np.round(P_z_vox[i])) for i in range(0, len(P_z_vox))])
+    # not perfect but works (if "enough" points), in order to deal with missing z slices
+    for i in range(min(P_z_vox), max(P_z_vox) + 1, 1):
+        if i not in P_z_vox:
+            from bisect import bisect_right
+            idx_closest = bisect_right(P_z_vox, i)
+            z_min, z_max = P_z_vox[idx_closest - 1], P_z_vox[idx_closest]
+            if z_min == z_max:
+                weight_min = weight_max = 0.5
+            else:
+                weight_min, weight_max = abs((z_min - i) / (z_max - z_min)), abs((z_max - i) / (z_max - z_min))
+            P_x_temp = np.insert(P_x, idx_closest, weight_min * P_x[idx_closest - 1] + weight_max * P_x[idx_closest])
+            P_y_temp = np.insert(P_y, idx_closest, weight_min * P_y[idx_closest - 1] + weight_max * P_y[idx_closest])
+            P_z_temp = np.insert(P_z, idx_closest, weight_min * P_z[idx_closest - 1] + weight_max * P_z[idx_closest])
+            P_x_d_temp = np.insert(P_x_d, idx_closest, weight_min * P_x_d[idx_closest - 1] + weight_max * P_x_d[idx_closest])
+            P_y_d_temp = np.insert(P_y_d, idx_closest, weight_min * P_y_d[idx_closest - 1] + weight_max * P_y_d[idx_closest])
+            P_z_d_temp = np.insert(P_z_d, idx_closest, weight_min * P_z_d[idx_closest - 1] + weight_max * P_z_d[idx_closest])
+            P_z_vox_temp = np.insert(P_z_vox, idx_closest, i)
+            P_x, P_y, P_z, P_x_d, P_y_d, P_z_d, P_z_vox = P_x_temp, P_y_temp, P_z_temp, P_x_d_temp, P_y_d_temp, P_z_d_temp, P_z_vox_temp
+
+    coord_mean = np.array([[np.mean(P_x[P_z_vox == i]), np.mean(P_y[P_z_vox == i]), np.mean(P_z[P_z_vox == i])] for i in range(min(P_z_vox), max(P_z_vox) + 1, 1)])
+    x_centerline_fit = coord_mean[:, :][:, 0]
+    y_centerline_fit = coord_mean[:, :][:, 1]
+    coord_mean_d = np.array([[np.mean(P_x_d[P_z_vox == i]), np.mean(P_y_d[P_z_vox == i]), np.mean(P_z_d[P_z_vox == i])] for i in range(min(P_z_vox), max(P_z_vox) + 1, 1)])
+    z_centerline = coord_mean[:, :][:, 2]
+    x_centerline_deriv = coord_mean_d[:, :][:, 0]
+    y_centerline_deriv = coord_mean_d[:, :][:, 1]
+    z_centerline_deriv = coord_mean_d[:, :][:, 2]
+
+    return x_centerline_fit, y_centerline_fit, z_centerline, x_centerline_deriv, y_centerline_deriv, z_centerline_deriv
+
+###### DELETE BELOW: 
+# def _get_coordinate_interpolated(self, vertebral_level, relative_position, backup_index = None, backup_centerline = None, mode = 'levels'):
+#     index_closest = self.get_closest_to_absolute_position(vertebral_level, relative_position, backup_index = backup_index, backup_centerline = backup_centerline)
+#     if index_closest is None:
+#         return [np.nan, np.nan, np.nan]
+
+#     relative_position_closest = self.dist_points_rel[index_closest]
+#     coordinate_closest = self.points[index_closest]
+
+#     if relative_position < relative_position_closest:
+#         index_next = index_closest + 1
+#     else:
+#         index_next = index_closest - 1
+#     relative_position_next = self.dist_points_rel[index_next]
+#     coordinate_next = self.points[index_next]
+#     #if (relative_position_next != relative_position_closest):
+#     weight_closest = abs(relative_position - relative_position_closest) / abs(relative_position_next - relative_position_closest)
+#     weight_next = abs(relative_position - relative_position_next) / abs(relative_position_next - relative_position_closest)
+#     #else:
+#        # weight_closest = 1
+#        # weight_next = 0 # same thing!
+#     coordinate_result = [weight_closest * coordinate_closest[0] + weight_next * coordinate_next[0],
+#                          weight_closest * coordinate_closest[1] + weight_next * coordinate_next[1],
+#                          weight_closest * coordinate_closest[2] + weight_next * coordinate_next[2]]
+
+#     return coordinate_result
+
+def get_lowest_vert_dataset(dataset_info): ### NEW + FIX
     """
-    This function downloads the example data and return the path where it was downloaded.
-    :param path_data: destination path where to download the data
-    :return: absolute destination path
+    This function generates spinal cord centerline from binary images (either an image of centerline or segmentation)
+    :param dataset_info: dictionary containing dataset information
+    :param contrast: {'t1', 't2'}
+    :return list of centerline objects
     """
+    path_data = dataset_info['path_data']
+    path_derivatives = path_data + 'derivatives'
+    list_subjects = dataset_info['subjects'].split(', ') 
+    lowest_vert = 100
 
-    # Download data
-    if name == 'example':
-        data_name = 'data'
-        url = 'https://www.neuro.polymtl.ca/_media/downloads/sct/20181128_example_data_template.zip'
-    elif name == 'icbm152':
-        data_name = 'icbm152'
-        url = 'https://www.neuro.polymtl.ca/_media/downloads/sct/20181128_icbm152.zip'
-    else:
-        raise ValueError('ERROR: data name is wrong. It should be either \'example\' or \'icbm152\'')
-
-    if os.path.isdir(path_data + data_name) and not force:
-        print (os.path.abspath(path_data + data_name) + '/')
-        return os.path.abspath(path_data + data_name) + '/'
-
-    verbose = 2
-    try:
-        tmp_file = download_data(url, verbose)
-    except (KeyboardInterrupt):
-        sct.printv('\nERROR: User canceled process.\n', 1, 'error')
-
-    # Check if folder already exists
-    sct.printv('\nCheck if folder already exists...')
-    if os.path.isdir(path_data + data_name):
-        sct.printv('WARNING: Folder ' + path_data + data_name + ' already exists. Removing it...', 1, 'warning')
-        shutil.rmtree(path_data + data_name, ignore_errors=True)
-
-    # unzip
-    unzip(tmp_file, path_data, verbose)
-
-    sct.printv('\nRemove temporary file...')
-    os.remove(tmp_file)
-
-    absolute_path = os.path.abspath(path_data + data_name) + '/'
-    os.chmod(absolute_path, 0o755)
-
-    return absolute_path
-
+    for subject_name in list_subjects:
+        fname_centerline = path_derivatives + '/' + dataset_info['pipeline_centerline'] + '/' + subject_name +  '/' + dataset_info['data_type'] + '/' + subject_name + dataset_info['suffix_centerline']
+        if os.path.isfile(fname_centerline + '.npz'):               
+            centerline = Centerline(fname = fname_centerline + '.npz')
+            dist_C1_dict = {}
+            tmp_list = []
+            for key,value in centerline.distance_from_C1label.items(): 
+                if value in tmp_list: continue
+                else:
+                    tmp_list.append(value)
+                    dist_C1_dict[key] = value
+            if len(dist_C1_dict) < lowest_vert: lowest_vert = len(dist_C1_dict)
+        else:
+            print("Centerline for " + subject_name + " does not exist!")
+    return(lowest_vert)
 
-def read_dataset(fname_json='configuration.json', path_data='./'):
+def read_dataset(fname_json = 'configuration.json', path_data = './'): ### TO COMPLETE
     """
     This function reads a json file that describes the dataset, including the list of subjects as well as
-    suffix for filenames (centerline, disks, segmentations, etc.).
+    suffix for filenames (centerline, discs, segmentations, etc.).
     The function raises an exception if the file is missing or if required fields are missing from the json file.
     :param fname_json: path + file name to json description file.
     :return: a dictionary with all the fields contained in the json file.
@@ -101,240 +150,236 @@ def read_dataset(fname_json='configuration.json', path_data='./'):
     with open(fname_json) as data_file:
         dataset_info = json.load(data_file)
 
-    dataset_temp = {}
-    for key in dataset_info:
-        dataset_temp[str(key)] = dataset_info[key]
-    dataset_info = dataset_temp
-
     error = ''
-
-    # check if required fields are in json file
-    if 'path_data' not in dataset_info:
-        error += 'Dataset info must contain the field \'path_data\'.\n'
-    if 'path_template' not in dataset_info:
-        error += 'Dataset info must contain the field \'path_template\'.\n'
-    if 'subjects' not in dataset_info:
-        error += 'Dataset info must contain a list of subjects.\n'
-    if 'suffix_centerline' not in dataset_info:
-        error += 'Dataset info must contain the field \'suffix_centerline\'.\n'
-    if 'suffix_disks' not in dataset_info:
-        error += 'Dataset info must contain the field \'suffix_disks\'.\n'
+    key_list = ["path_data", "subjects", "data_type", "contrast", "suffix_image", "suffix_label-SC_seg", "suffix_label-disc"]
+    for key in key_list:
+        if key not in dataset_info.keys(): error += 'Dataset configuration file ' + fname_json + ' must contain the field ' + key + '.\n'
 
     # check if path to data and template exist and are absolute
-    if 'path_data' in dataset_info:
-        if not os.path.isdir(path_data + dataset_info['path_data']):
-            error += 'Path to data (field \'path_data\') must exist.\n'
-        else:
-            dataset_info['path_data'] = os.path.abspath(path_data + dataset_info['path_data']) + '/'
-    if 'path_template' in dataset_info:
-        if not os.path.isdir(path_data + dataset_info['path_template']):
-            os.makedirs(path_data + dataset_info['path_template'])
-            os.chmod(path_data + dataset_info['path_template'], 0o755)
-        dataset_info['path_template'] = os.path.abspath(path_data + dataset_info['path_template']) + '/'
+    if 'path_data' in dataset_info and not os.path.isdir(dataset_info['path_data']): error += 'Path to data (field \'path_data\') must exist.\n'
 
     # if there are some errors, raise an exception
-    if error != '':
-        raise ValueError('JSON file containing dataset info is incomplete:\n' + error)
-
-    # conversion of data to utf-8 instead of unicode - only applies to Python 2
-    dataset_info['path_data'] = str(dataset_info['path_data'])
-    dataset_info['path_template'] = str(dataset_info['path_template'])
-    dataset_info['suffix_centerline'] = str(dataset_info['suffix_centerline'])
-    dataset_info['suffix_disks'] = str(dataset_info['suffix_disks'])
-    dataset_info['subjects'] = [str(subject) for subject in dataset_info['subjects']]
-    if 'suffix_segmentation' in dataset_info:
-        dataset_info['suffix_segmentation'] = str(dataset_info['suffix_segmentation'])
+    if error != '': raise ValueError('JSON file containing dataset info is incomplete:\n' + error)
 
     return dataset_info
 
-
-def generate_centerline(dataset_info, contrast='t1', regenerate=False):
+def generate_centerline(dataset_info, lowest_disc = 25, contrast = 't1', regenerate = False, algo_fitting = 'linear', smooth = 50, degree = None, minmax = None):
     """
     This function generates spinal cord centerline from binary images (either an image of centerline or segmentation)
     :param dataset_info: dictionary containing dataset information
     :param contrast: {'t1', 't2'}
     :return list of centerline objects
     """
+    contrast = dataset_info['contrast']
     path_data = dataset_info['path_data']
-    list_subjects = dataset_info['subjects']
+    list_subjects = dataset_info['subjects'].split(', ') 
     list_centerline = []
-
     current_path = os.getcwd()
 
-    tqdm_bar = tqdm(total=len(list_subjects), unit='B', unit_scale=True, desc="Status", ascii=True)
-    for subject_name in list_subjects:
-        path_data_subject = path_data + subject_name + '/' + contrast + '/'
-        fname_image_centerline = path_data_subject + contrast + dataset_info['suffix_centerline'] + '.nii.gz'
-        fname_image_disks = path_data_subject + contrast + dataset_info['suffix_disks'] + '.nii.gz'
+    tqdm_bar = tqdm(total = len(list_subjects), unit = 'B', unit_scale = True, desc = "Status", ascii = True)
 
-        # go to output folder
-        sct.printv('\nExtracting centerline from ' + path_data_subject)
-        os.chdir(path_data_subject)
+    # obtaining centerline of each subject
+    for subject_name in list_subjects:
+        fname_image = path_data + subject_name + '/' + dataset_info['data_type'] + '/' + subject_name + dataset_info['suffix_image'] + '.nii.gz'
+        fname_image_seg = path_data + 'derivatives/labels/' + subject_name +  '/' + dataset_info['data_type'] + '/' + subject_name + dataset_info['suffix_label-SC_seg'] + '.nii.gz'
+        fname_discs = path_data + 'derivatives/labels/' + subject_name +  '/' + dataset_info['data_type'] + '/' + subject_name + dataset_info['suffix_label-disc']
+        fname_image_discs = fname_discs + '-manual.nii.gz' if os.path.isfile(fname_discs + '-manual.nii.gz') else fname_discs + '.nii.gz'
+        fname_centerline = path_data + 'derivatives/labels/' + subject_name +  '/' + dataset_info['data_type'] + '/' + subject_name + dataset_info['suffix_image'] + '_label-centerline'
 
-        fname_centerline = 'centerline'
         # if centerline exists, we load it, if not, we compute it
         if os.path.isfile(fname_centerline + '.npz') and not regenerate:
-            centerline = Centerline(fname=path_data_subject + fname_centerline + '.npz')
+            print("Centerline for " + subject_name + " exists and will not be recomputed!")
+            centerline = Centerline(fname = fname_centerline + '.npz')
         else:
-            # extracting intervertebral disks
-            im = Image(fname_image_disks)
-            coord = im.getNonZeroCoordinates(sorting='z', reverse_coord=True)
+            if os.path.isfile(fname_image_seg):
+                print(subject_name + ' SC segmentation exists. Extracting centerline from ' + fname_image_seg)
+                im_seg = Image(fname_image_seg).change_orientation('RPI')
+                param_centerline = ParamCenterline(algo_fitting = algo_fitting, contrast = contrast, smooth = smooth, degree = degree, minmax = minmax) 
+            else:
+                print(subject_name + ' SC segmentation does not exist. Extracting centerline from ' + fname_image)
+                im_seg = Image(fname_image).change_orientation('RPI')
+                param_centerline = ParamCenterline(algo_fitting = 'optic', contrast = contrast, smooth = smooth, degree = 5, minmax = minmax)
+
+            # extracting intervertebral discs
+            im_discs = Image(fname_image_discs).change_orientation('RPI')
+            coord = im_discs.getNonZeroCoordinates(sorting = 'z', reverse_coord = True)
             coord_physical = []
             for c in coord:
-                if c.value <= 22 or c.value in [48, 49, 50, 51, 52]:  # 22 corresponds to L2
-                    c_p = list(im.transfo_pix2phys([[c.x, c.y, c.z]])[0])
+                if c.value <= lowest_disc or c.value in [48, 49, 50, 51, 52]:
+                    c_p = list(im_discs.transfo_pix2phys([[c.x, c.y, c.z]])[0])
                     c_p.append(c.value)
                     coord_physical.append(c_p)
 
-            # extracting centerline from binary image and create centerline object with vertebral distribution
-            x_centerline_fit, y_centerline_fit, z_centerline, x_centerline_deriv, y_centerline_deriv, z_centerline_deriv = smooth_centerline(
-                fname_image_centerline, algo_fitting='nurbs',
-                verbose=0, nurbs_pts_number=4000, all_slices=False, phys_coordinates=True, remove_outliers=False)
-            centerline = Centerline(x_centerline_fit, y_centerline_fit, z_centerline,
-                                    x_centerline_deriv, y_centerline_deriv, z_centerline_deriv)
+            # extracting centerline
+            im_ctl, arr_ctl, arr_ctl_der, _ = get_centerline(im_seg, param = param_centerline, space = 'phys')
+
+            # save centerline as .nii.gz file
+            im_ctl.save(fname_centerline + '.nii.gz', dtype = 'float32')
+            centerline = Centerline(points_x = arr_ctl[0], points_y = arr_ctl[1], points_z = arr_ctl[2], deriv_x = arr_ctl_der[0], deriv_y = arr_ctl_der[1], deriv_z = arr_ctl_der[2])
             centerline.compute_vertebral_distribution(coord_physical)
-            centerline.save_centerline(fname_output=fname_centerline)
+            # save centerline .npz file
+            centerline.save_centerline(fname_output = fname_centerline)
 
         list_centerline.append(centerline)
         tqdm_bar.update(1)
     tqdm_bar.close()
-
     os.chdir(current_path)
-
     return list_centerline
 
+# def compute_ICBM152_centerline(dataset_info): ###### FIX
+#     """
+#     This function extracts the centerline from the ICBM152 brain template
+#     :param dataset_info: dictionary containing dataset information
+#     :return:
+#     """
+#     path_data = dataset_info['path_data']
 
-def compute_ICBM152_centerline(dataset_info):
-    """
-    This function extracts the centerline from the ICBM152 brain template
-    :param dataset_info: dictionary containing dataset information
-    :return:
-    """
-    path_data = dataset_info['path_data']
-
-    if not os.path.isdir(path_data + 'icbm152/'):
-        download_data_template(path_data=path_data, name='icbm152', force=False)
-
-    image_disks = Image(path_data + 'icbm152/mni_icbm152_t1_tal_nlin_sym_09c_disks_manual.nii.gz')
-    coord = image_disks.getNonZeroCoordinates(sorting='z', reverse_coord=True)
-    coord_physical = []
+#     if not os.path.isdir(path_data + 'icbm152/'):
+#         download_data_template(path_data = path_data, name = 'icbm152', force = False)
 
-    for c in coord:
-        if c.value <= 22 or c.value in [48, 49, 50, 51, 52]:  # 22 corresponds to L2
-            c_p = list(image_disks.transfo_pix2phys([[c.x, c.y, c.z]])[0])
-            c_p.append(c.value)
-            coord_physical.append(c_p)
+#     image_discs = Image(path_data + 'icbm152/mni_icbm152_t1_tal_nlin_sym_09c_discs_manual.nii.gz')
+#     coord = image_discs.getNonZeroCoordinates(sorting = 'z', reverse_coord = True)
+#     coord_physical = []
 
-    x_centerline_fit, y_centerline_fit, z_centerline, x_centerline_deriv, y_centerline_deriv, z_centerline_deriv = smooth_centerline(
-        path_data + 'icbm152/mni_icbm152_t1_centerline_manual.nii.gz', algo_fitting='nurbs',
-        verbose=0, nurbs_pts_number=300, all_slices=False, phys_coordinates=True, remove_outliers=False)
+#     for c in coord:
+#         if c.value <= 20 or c.value in [48, 49, 50, 51, 52]:  # 22 corresponds to L2
+#             c_p = list(image_discs.transfo_pix2phys([[c.x, c.y, c.z]])[0])
+#             c_p.append(c.value)
+#             coord_physical.append(c_p)
 
-    centerline = Centerline(x_centerline_fit, y_centerline_fit, z_centerline,
-                            x_centerline_deriv, y_centerline_deriv, z_centerline_deriv)
+#     x_centerline_fit, y_centerline_fit, z_centerline, x_centerline_deriv, y_centerline_deriv, z_centerline_deriv = smooth_centerline(
+#         path_data + 'icbm152/mni_icbm152_t1_centerline_manual.nii.gz', algo_fitting = 'nurbs',
+#         verbose = 0, nurbs_pts_number = 300, all_slices = False, phys_coordinates = True, remove_outliers = False)
 
-    centerline.compute_vertebral_distribution(coord_physical, label_reference='PMG')
-    return centerline
+#     centerline = Centerline(x_centerline_fit, y_centerline_fit, z_centerline, x_centerline_deriv, y_centerline_deriv, z_centerline_deriv)
 
+#     centerline.compute_vertebral_distribution(coord_physical, label_reference = 'PMG')
+#     return centerline
 
-def average_centerline(list_centerline, dataset_info, use_ICBM152=True, use_label_ref=None):
+def average_centerline(list_centerline, dataset_info, use_ICBM152 = False, use_label_ref = None, lowest_disc_quantile = 0.75):
     """
     This function compute the average centerline and vertebral distribution, that will be used to create the
     final template space.
     :param list_centerline: list of Centerline objects, for all subjects
     :param dataset_info: dictionary containing dataset information
     :return: points_average_centerline: list of points (x, y, z) of the average spinal cord and brainstem centerline
-             position_template_disks: index of intervertebral disks along the template centerline
+             position_template_discs: index of intervertebral discs along the template centerline
     """
 
-    if use_ICBM152:
-        # extracting centerline from ICBM152
-        centerline_icbm152 = compute_ICBM152_centerline(dataset_info)
-
-    # extracting distance of each disk from C1
-    list_dist_disks = []
-    for centerline in list_centerline:
-        list_dist_disks.append(centerline.distance_from_C1label)
+    # extracting centerline from ICBM152
+    # if use_ICBM152: centerline_icbm152 = compute_ICBM152_centerline(dataset_info) ###### FIX
+    
+    list_dist_discs = []
+    for centerline in list_centerline: list_dist_discs.append(centerline.distance_from_C1label)
+
+    # finding lowest disc of the template, according to the dataset being used
+    list_lowest_disc = []
+    for val in list_dist_discs: list_lowest_disc.append(len(val))
+    lowest_disc =  int(np.quantile((np.array(list_lowest_disc)), lowest_disc_quantile))
+
+    # generating custom list of average vertebral lengths
+    new_vert_length = {}
+    for dist_discs in list_dist_discs:
+        for i, disc_label in enumerate(dist_discs):
+            if i < lowest_disc:
+                if disc_label == 'PMJ':
+                    length = abs(dist_discs[disc_label] - dist_discs['PMG'])
+                elif disc_label == 'PMG':
+                    length = abs(dist_discs[disc_label] - dist_discs['C1'])
+                else:
+                    index_current_label = list_labels.index(labels_regions[disc_label])
+                    next_label = regions_labels[str(list_labels[index_current_label + 1])]
+                    if next_label in dist_discs:
+                        length = abs(dist_discs[disc_label] - dist_discs[next_label])
+                        if disc_label in new_vert_length:
+                            new_vert_length[disc_label].append(length)
+                        else:
+                            new_vert_length[disc_label] = [length]
+    new_average_vert_length = {}
+    for disc_label in new_vert_length: new_average_vert_length[disc_label] = np.mean(new_vert_length[disc_label])
 
     # computing length of each vertebral level
     length_vertebral_levels = {}
-    for dist_disks in list_dist_disks:
-        for disk_label in dist_disks:
-            if disk_label == 'PMJ':
-                length = abs(dist_disks[disk_label] - dist_disks['PMG'])
-            elif disk_label == 'PMG':
-                length = abs(dist_disks[disk_label] - dist_disks['C1'])
-            else:
-                index_current_label = list_labels.index(labels_regions[disk_label])
-                next_label = regions_labels[str(list_labels[index_current_label + 1])]
-                if next_label in dist_disks:
-                    length = abs(dist_disks[disk_label] - dist_disks[next_label])
+    for dist_discs in list_dist_discs:
+        for disc_label in new_vert_length:
+            if disc_label in dist_discs: 
+                if disc_label == 'PMJ':
+                    length = abs(dist_discs[disc_label] - dist_discs['PMG'])
+                elif disc_label == 'PMG':
+                    length = abs(dist_discs[disc_label] - dist_discs['C1'])
                 else:
-                    if disk_label in average_vert_length:
-                        length = average_vert_length[disk_label]
+                    index_current_label = list_labels.index(labels_regions[disc_label])
+                    next_label = regions_labels[str(list_labels[index_current_label + 1])]
+                    if next_label in dist_discs:
+                        length = abs(dist_discs[disc_label] - dist_discs[next_label])
                     else:
-                        length = 0.0
+                        length = new_average_vert_length[disc_label]
+            else: length = new_average_vert_length[disc_label]
 
-            if disk_label in length_vertebral_levels:
-                length_vertebral_levels[disk_label].append(length)
+            if disc_label in length_vertebral_levels:
+                length_vertebral_levels[disc_label].append(length)
             else:
-                length_vertebral_levels[disk_label] = [length]
-
+                length_vertebral_levels[disc_label] = [length]
+    
     # averaging the length of vertebral levels
     average_length = {}
-    for disk_label in length_vertebral_levels:
-        mean = np.mean(length_vertebral_levels[disk_label])
-        std = np.std(length_vertebral_levels[disk_label])
-        average_length[disk_label] = [disk_label, mean, std]
+    for disc_label in length_vertebral_levels:
+        mean = np.mean(length_vertebral_levels[disc_label])
+        std = np.std(length_vertebral_levels[disc_label])
+        average_length[disc_label] = [disc_label, mean, std]
 
-    # computing distances of disks from C1, based on average length
-    distances_disks_from_C1 = {'C1': 0.0}
+    # computing distances of discs from C1, based on average length
+    distances_discs_from_C1 = {'C1': 0.0}
     if 'PMG' in average_length:
-        distances_disks_from_C1['PMG'] = -average_length['PMG'][1]
+        distances_discs_from_C1['PMG'] = -average_length['PMG'][1]
         if 'PMJ' in average_length:
-            distances_disks_from_C1['PMJ'] = -average_length['PMG'][1] - average_length['PMJ'][1]
-    for disk_number in list_labels:
-        if disk_number not in [50, 49, 1] and regions_labels[str(disk_number)] in average_length:
-            distances_disks_from_C1[regions_labels[str(disk_number)]] = distances_disks_from_C1[regions_labels[str(disk_number - 1)]] + average_length[regions_labels[str(disk_number)]][1]
+            distances_discs_from_C1['PMJ'] = -average_length['PMG'][1] - average_length['PMJ'][1]
+    for disc_number in list_labels:
+        if disc_number not in [50, 49, 1] and regions_labels[str(disc_number)] in average_length:
+            distances_discs_from_C1[regions_labels[str(disc_number)]] = distances_discs_from_C1[regions_labels[str(disc_number - 1)]] + average_length[regions_labels[str(disc_number)]][1]
 
-    # calculating disks average distances from C1
+    # calculating discs average distances from C1
     average_distances = []
-    for disk_label in distances_disks_from_C1:
-        mean = np.mean(distances_disks_from_C1[disk_label])
-        std = np.std(distances_disks_from_C1[disk_label])
-        average_distances.append([disk_label, mean, std])
+    for disc_label in distances_discs_from_C1:
+        mean = np.mean(distances_discs_from_C1[disc_label])
+        std = np.std(distances_discs_from_C1[disc_label])
+        average_distances.append([disc_label, mean, std])
 
     # averaging distances for all subjects and calculating relative positions
-    average_distances = sorted(average_distances, key=lambda x: x[1], reverse=False)
+    average_distances = sorted(average_distances, key = lambda x: x[1], reverse = False)
     number_of_points_between_levels = 100
-    disk_average_coordinates = {}
+    disc_average_coordinates = {}
     points_average_centerline = []
     label_points = []
     average_positions_from_C1 = {}
-    disk_position_in_centerline = {}
+    disc_position_in_centerline = {}
 
-    for i in range(len(average_distances)):
-        disk_label = average_distances[i][0]
-        average_positions_from_C1[disk_label] = average_distances[i][1]
+    # iterate over each disc level
+    for i in range(len(average_distances)): ###### C1, C2, C3, C4, ...
+        disc_label = average_distances[i][0]
+        average_positions_from_C1[disc_label] = average_distances[i][1]
 
-        for j in range(number_of_points_between_levels):
-            relative_position = float(j) / float(number_of_points_between_levels)
-            if disk_label in ['PMJ', 'PMG']:
+        for j in range(number_of_points_between_levels): ###### C1: {0, 1, 2, 3, ...}
+            relative_position = float(j) / float(number_of_points_between_levels) ###### C1: {0/100, 1/100, 2/100, 3/100, ...}
+            if disc_label in ['PMJ', 'PMG']:
                 relative_position = 1.0 - relative_position
             list_coordinates = [[]] * len(list_centerline)
-            for k, centerline in enumerate(list_centerline):
-                list_coordinates[k] = centerline.get_coordinate_interpolated(disk_label, relative_position)
-
+            for k, centerline in enumerate(list_centerline): ###### iterate through each centerline and get actual absolute coordinate
+                #if disc_label in centerline.distance_from_C1label:
+               list_coordinates[k] = centerline.get_closest_to_relative_position(disc_label, relative_position) ### centerline.get_coordinate_interpolated(disc_label, relative_position)
             # average all coordinates
-            average_coord = np.nanmean(list_coordinates, axis=0)
+            get_avg = []
+            for item in list_coordinates: 
+                if item != None: get_avg.append(item)
+            average_coord = np.array(get_avg).mean()
             # add it to averaged centerline list of points
             points_average_centerline.append(average_coord)
-            label_points.append(disk_label)
+            label_points.append(disc_label)
             if j == 0:
-                disk_average_coordinates[disk_label] = average_coord
-                disk_position_in_centerline[disk_label] = i * number_of_points_between_levels
-
+                disc_average_coordinates[disc_label] = average_coord
+                disc_position_in_centerline[disc_label] = i * number_of_points_between_levels
+    
     # create final template space
-
     if use_label_ref is not None:
         label_ref = use_label_ref
         if label_ref not in length_vertebral_levels:
@@ -347,27 +392,27 @@ def average_centerline(list_centerline, dataset_info, use_ICBM152=True, use_labe
         else:
             raise Exception('ERROR: the images should always have C1 label.')
 
-    position_template_disks = {}
+    position_template_discs = {}
 
     if use_ICBM152:
-        coord_ref = np.copy(centerline_icbm152.points[centerline_icbm152.index_disk[label_ref]])
-        for disk in average_length:
-            if disk in ['C1', 'PMJ', 'PMG']:
-                position_template_disks[disk] = centerline_icbm152.points[centerline_icbm152.index_disk[disk]]
+        coord_ref = np.copy(centerline_icbm152.points[centerline_icbm152.index_disc[label_ref]])
+        for disc in average_length:
+            if disc in ['C1', 'PMJ', 'PMG']:
+                position_template_discs[disc] = centerline_icbm152.points[centerline_icbm152.index_disc[disc]]
             else:
-                coord_disk = coord_ref.copy()
-                coord_disk[2] -= average_positions_from_C1[disk] - average_positions_from_C1[label_ref]
-                position_template_disks[disk] = coord_disk
+                coord_disc = coord_ref.copy()
+                coord_disc[2] -= average_positions_from_C1[disc] - average_positions_from_C1[label_ref]
+                position_template_discs[disc] = coord_disc
     else:
         coord_ref = np.array([0.0, 0.0, 0.0])
-        for disk in average_length:
-            coord_disk = coord_ref.copy()
-            coord_disk[2] -= average_positions_from_C1[disk] - average_positions_from_C1[label_ref]
-            position_template_disks[disk] = coord_disk
+        for disc in average_length:
+            coord_disc = coord_ref.copy()
+            coord_disc[2] -= average_positions_from_C1[disc] - average_positions_from_C1[label_ref]
+            position_template_discs[disc] = coord_disc
 
     # change centerline to be straight below reference if using ICBM152
     if use_ICBM152:
-        index_straight = disk_position_in_centerline[label_ref]
+        index_straight = disc_position_in_centerline[label_ref]
     else:  # else: straighten every points along centerline
         index_straight = 0
 
@@ -376,39 +421,39 @@ def average_centerline(list_centerline, dataset_info, use_ICBM152=True, use_labe
         current_label = label_points[i]
         if current_label in average_length:
             length_current_label = average_length[current_label][1]
-            relative_position_from_disk = float(i - disk_position_in_centerline[current_label]) / float(number_of_points_between_levels)
+            relative_position_from_disc = float(i - disc_position_in_centerline[current_label]) / float(number_of_points_between_levels)
             temp_point = np.copy(coord_ref)
 
             if i >= index_straight:
                 index_current_label = list_labels.index(labels_regions[current_label])
                 next_label = regions_labels[str(list_labels[index_current_label + 1])]
                 if next_label not in average_positions_from_C1:
-                    temp_point[2] = coord_ref[2] - average_positions_from_C1[current_label] - relative_position_from_disk * length_current_label
+                    temp_point[2] = coord_ref[2] - average_positions_from_C1[current_label] - relative_position_from_disc * length_current_label
                 else:
-                    temp_point[2] = coord_ref[2] - average_positions_from_C1[current_label] - abs(relative_position_from_disk * (average_positions_from_C1[current_label] - average_positions_from_C1[next_label]))
+                    temp_point[2] = coord_ref[2] - average_positions_from_C1[current_label] - abs(relative_position_from_disc * (average_positions_from_C1[current_label] - average_positions_from_C1[next_label]))
             points_average_centerline_template.append(temp_point)
 
     if use_ICBM152:
         # append ICBM152 centerline from PMG
-        points_icbm152 = centerline_icbm152.points[centerline_icbm152.index_disk[label_ref]:]
+        points_icbm152 = centerline_icbm152.points[centerline_icbm152.index_disc[label_ref]:]
         points_icbm152 = points_icbm152[::-1]
         points_average_centerline = np.concatenate([points_icbm152, points_average_centerline_template])
     else:
         points_average_centerline = points_average_centerline_template
+    return points_average_centerline, position_template_discs
 
-    return points_average_centerline, position_template_disks
-
-
-def generate_initial_template_space(dataset_info, points_average_centerline, position_template_disks):
+def generate_initial_template_space(dataset_info, points_average_centerline, position_template_discs, contrast = 't1', algo_fitting = 'linear', smooth = 50, degree = None, minmax = None): ##DONE additional options in nb/generate_initial_template_space_branch
     """
     This function generates the initial template space, on which all images will be registered.
     :param points_average_centerline: list of points (x, y, z) of the average spinal cord and brainstem centerline
-    :param position_template_disks: index of intervertebral disks along the template centerline
-    :return:
+    :param position_template_discs: index of intervertebral discs along the template centerline
+    :return: NIFTI files in RPI orientation (template space, template centerline, template disc positions) & .npz file of template Centerline object ##DONE package updates in nb/generate_initial_template_space_branch
     """
-
     # initializing variables
-    path_template = dataset_info['path_template']
+    contrast = dataset_info['contrast']
+    path_template = dataset_info['path_data'] + 'derivatives/template/'
+    if not os.path.exists(path_template): os.makedirs(path_template)
+
     x_size_of_template_space, y_size_of_template_space = 201, 201
     spacing = 0.5
 
@@ -432,7 +477,8 @@ def generate_initial_template_space(dataset_info, points_average_centerline, pos
     template_space.hdr.as_analyze_map()['srow_z'][2] = spacing
     template_space.hdr.set_sform(template_space.hdr.get_sform())
     template_space.hdr.set_qform(template_space.hdr.get_sform())
-    template_space.save(path_template + 'template_space.nii.gz', dtype='uint8')
+    template_space.save(path_template + 'template_space.nii.gz', dtype = 'uint8')
+    print(f'\nSaving template space in {template_space.orientation} orientation as {path_template}template_space.nii.gz\n')
 
     # generate template centerline as an image
     image_centerline = template_space.copy()
@@ -440,140 +486,106 @@ def generate_initial_template_space(dataset_info, points_average_centerline, pos
         coord_pix = image_centerline.transfo_phys2pix([coord])[0]
         if 0 <= coord_pix[0] < image_centerline.data.shape[0] and 0 <= coord_pix[1] < image_centerline.data.shape[1] and 0 <= coord_pix[2] < image_centerline.data.shape[2]:
             image_centerline.data[int(coord_pix[0]), int(coord_pix[1]), int(coord_pix[2])] = 1
-    image_centerline.save(path_template + 'template_centerline.nii.gz', dtype='float32')
+    image_centerline.save(path_template + 'template_label-centerline.nii.gz', dtype = 'float32')
+    print(f'\nSaving template centerline in {template_space.orientation} orientation as {path_template}template_label-centerline.nii.gz\n')
 
-    # generate template disks position
+    # generate template discs position
     coord_physical = []
-    image_disks = template_space.copy()
-    for disk in position_template_disks:
-        label = labels_regions[disk]
-        coord = position_template_disks[disk]
-        coord_pix = image_disks.transfo_phys2pix([coord])[0]
+    image_discs = template_space.copy()
+    for disc in position_template_discs:
+        label = labels_regions[disc]
+        coord = position_template_discs[disc]
+        coord_pix = image_discs.transfo_phys2pix([coord])[0]
 
         coord = coord.tolist()
         coord.append(label)
         coord_physical.append(coord)
-        if 0 <= coord_pix[0] < image_disks.data.shape[0] and 0 <= coord_pix[1] < image_disks.data.shape[1] and 0 <= coord_pix[2] < image_disks.data.shape[2]:
-            image_disks.data[int(coord_pix[0]), int(coord_pix[1]), int(coord_pix[2])] = label
+        if 0 <= coord_pix[0] < image_discs.data.shape[0] and 0 <= coord_pix[1] < image_discs.data.shape[1] and 0 <= coord_pix[2] < image_discs.data.shape[2]:
+            image_discs.data[int(coord_pix[0]), int(coord_pix[1]), int(coord_pix[2])] = label
         else:
             sct.printv(str(coord_pix))
-            sct.printv('ERROR: the disk label ' + str(disk) + ' is not in the template image.')
-    image_disks.save(path_template + 'template_disks.nii.gz', dtype='uint8')
-
-    # generate template centerline as a npz file
-    x_centerline_fit, y_centerline_fit, z_centerline, x_centerline_deriv, y_centerline_deriv, z_centerline_deriv = smooth_centerline(
-        path_template + 'template_centerline.nii.gz', algo_fitting='nurbs', verbose=0, nurbs_pts_number=4000,
-        all_slices=False, phys_coordinates=True, remove_outliers=True)
-    centerline_template = Centerline(x_centerline_fit, y_centerline_fit, z_centerline,
-                                     x_centerline_deriv, y_centerline_deriv, z_centerline_deriv)
-    centerline_template.compute_vertebral_distribution(coord_physical)
-    centerline_template.save_centerline(fname_output=path_template + 'template_centerline')
-
-
-def straighten_all_subjects(dataset_info, normalized=False, contrast='t1'):
+            sct.printv('ERROR: the disc label ' + str(disc) + ' is not in the template image.')
+    image_discs.save(path_template + 'template_label-disc.nii.gz', dtype = 'uint8')
+    print(f'\nSaving disc positions in {image_discs.orientation} orientation as {path_template}template_label-disc.nii.gz\n') ###NEW
+
+    # generate template centerline as a npz file ##DONE updated template Centerline object creation and saving as .npz file in nb/generate_initial_template_space_branch
+    param_centerline = ParamCenterline(algo_fitting = algo_fitting, contrast = contrast, smooth = smooth, degree = degree, minmax = minmax) 
+    # centerline params of original template centerline had options that you cannot just provide `get_centerline` with anymroe (algo_fitting = 'nurbs', nurbs_pts_number = 4000, all_slices = False, phys_coordinates = True, remove_outliers = True)
+    _, arr_ctl, arr_ctl_der, _ = get_centerline(image_centerline, param = param_centerline, space = 'phys') ### we don't need to save im_centerline! ### straightening._get_centerline(im_seg,param_centerline, 1) ### What Rohan & Benjamin added!
+    centerline_template = Centerline(points_x = arr_ctl[0], points_y = arr_ctl[1], points_z = arr_ctl[2], deriv_x = arr_ctl_der[0], deriv_y = arr_ctl_der[1], deriv_z = arr_ctl_der[2])
+    centerline_template.compute_vertebral_distribution(coord_physical)        
+    centerline_template.save_centerline(fname_output = path_template + 'template_label-centerline')
+    print(f'\nSaving template centerline as .npz file (saves all Centerline object information, not just coordinates) as {path_template}template_label-centerline.npz\n')
+
+def straighten_all_subjects(dataset_info, normalized = False, contrast = 't1'): ### NOTE: outputs this to "BIDS" dir for this!
     """
     This function straighten all images based on template centerline
     :param dataset_info: dictionary containing dataset information
     :param normalized: True if images were normalized before straightening
     :param contrast: {'t1', 't2'}
     """
+    contrast = dataset_info['contrast']
     path_data = dataset_info['path_data']
-    path_template = dataset_info['path_template']
-    list_subjects = dataset_info['subjects']
+    path_template = dataset_info['path_data'] + 'derivatives/template/'
+    list_subjects = dataset_info['subjects'].split(', ')
 
-    if normalized:
-        fname_in = contrast + '_norm.nii.gz'
-        fname_out = contrast + '_straight_norm.nii.gz'
-    else:
-        fname_in = contrast + '.nii.gz'
-        fname_out = contrast + '_straight.nii.gz'
+    if not os.path.exists(dataset_info['path_data'] + 'derivatives/sct_straighten_spinalcord'): os.makedirs(dataset_info['path_data'] + 'derivatives/sct_straighten_spinalcord')
 
     # straightening of each subject on the new template
-    tqdm_bar = tqdm(total=len(list_subjects), unit='B', unit_scale=True, desc="Status", ascii=True)
+    tqdm_bar = tqdm(total = len(list_subjects), unit = 'B', unit_scale = True, desc = "Status", ascii = True)
     for subject_name in list_subjects:
-        path_data_subject = path_data + subject_name + '/' + contrast + '/'
-
+        folder_out = dataset_info['path_data'] + 'derivatives/sct_straighten_spinalcord/' + subject_name + '/' + dataset_info['data_type']
+        if not os.path.exists(folder_out): os.makedirs(folder_out)
+        
+        fname_image = path_data + subject_name + '/' + dataset_info['data_type'] + '/' + subject_name + dataset_info['suffix_image'] + '.nii.gz'
+        fname_image_seg = path_data + 'derivatives/labels/' + subject_name +  '/' + dataset_info['data_type'] + '/' + subject_name + dataset_info['suffix_label-SC_seg'] + '.nii.gz'
+        fname_discs = path_data + 'derivatives/labels/' + subject_name +  '/' + dataset_info['data_type'] + '/' + subject_name + dataset_info['suffix_label-disc']
+        fname_image_discs = fname_discs + '-manual.nii.gz' if os.path.isfile(fname_discs + '-manual.nii.gz') else fname_discs + '.nii.gz'
+        fname_image_centerline = path_data + 'derivatives/labels/' + subject_name +  '/' + dataset_info['data_type'] + '/' + subject_name + dataset_info['suffix_image'] + '_label-centerline.nii.gz'
+        fname_out = subject_name + dataset_info['suffix_image'] + '_straight_norm.nii.gz' if normalized else subject_name + dataset_info['suffix_image'] + '_straight.nii.gz' 
+
+        fname_input_seg = fname_image_seg if os.path.isfile(fname_image_seg) else fname_image_centerline
         # go to output folder
-        sct.printv('\nStraightening ' + path_data_subject)
-        os.chdir(path_data_subject)
-        sct.run('sct_straighten_spinalcord'
-                ' -i ' + fname_in +
-                ' -s ' + contrast + dataset_info['suffix_centerline'] + '.nii.gz'
-                ' -ldisc_input ' + contrast + dataset_info['suffix_disks'] + '.nii.gz'
-                ' -dest ' + path_template + 'template_centerline.nii.gz'
-                ' -ldisc_dest ' + path_template + 'template_disks.nii.gz'
-                ' -disable-straight2curved'
-                ' -param threshold_distance=1', verbose=1)
-
-        image_straight = Image(sct.add_suffix(fname_in, '_straight'))
-        image_straight.save(fname_out, dtype='float32')
-
-        tqdm_bar.update(1)
-    tqdm_bar.close()
-
-
-def copy_preprocessed_images(dataset_info, contrast='t1'):
-    path_data = dataset_info['path_data']
-    path_template = dataset_info['path_template']
-    list_subjects = dataset_info['subjects']
-
-    fname_in = contrast + '_straight_norm.nii.gz'
-
-    tqdm_bar = tqdm(total=len(list_subjects), unit='B', unit_scale=True, desc="Status", ascii=True)
-    for subject_name in list_subjects:
-        path_data_subject = path_data + subject_name + '/' + contrast + '/'
-        os.chdir(path_data_subject)
-        shutil.copy(fname_in, path_template + subject_name + '_' + contrast + '.nii.gz')
+        sct.printv('\nStraightening ' + fname_image)
+        os.chdir(folder_out)
+ 
+        # straighten centerline
+        os.system('sct_straighten_spinalcord' + 
+            ' -i ' + fname_image + 
+            ' -s ' + fname_input_seg + 
+            ' -dest ' + path_template + 'template_label-centerline.nii.gz' + 
+            ' -ldisc-input ' + fname_image_discs +        
+            ' -ldisc-dest ' + path_template + 'template_label-disc.nii.gz' + 
+            ' -ofolder ' + folder_out + 
+            ' -o ' + fname_out + 
+            ' -disable-straight2curved' + 
+            ' -param threshold_distance=1')
         tqdm_bar.update(1)
     tqdm_bar.close()
 
-
-def normalize_intensity_template(dataset_info, fname_template_centerline=None, contrast='t1', verbose=1):
+def normalize_intensity_template(dataset_info, contrast = 't1', verbose = 1): ### Removed fname_template_centerline = None -> why would we want this?
     """
     This function normalizes the intensity of the image inside the spinal cord
-    :param fname_template: path to template image
-    :param fname_template_centerline: path to template centerline (binary image or npz)
     :return:
     """
-
-    path_data = dataset_info['path_data']
-    list_subjects = dataset_info['subjects']
-    path_template = dataset_info['path_template']
+    contrast = dataset_info['contrast']
+    fname_template_centerline = dataset_info['path_data'] + 'derivatives/template/' + 'template_label-centerline.npz'
+    list_subjects = dataset_info['subjects'].split(', ') ###NEW
 
     average_intensity = []
     intensity_profiles = {}
+    
 
-    tqdm_bar = tqdm(total=len(list_subjects), unit='B', unit_scale=True, desc="Status", ascii=True)
+    tqdm_bar = tqdm(total = len(list_subjects), unit = 'B', unit_scale = True, desc = "Status", ascii = True)
 
     # computing the intensity profile for each subject
     for subject_name in list_subjects:
-        path_data_subject = path_data + subject_name + '/' + contrast + '/'
-        if fname_template_centerline is None:
-            fname_image = path_data_subject + contrast + '.nii.gz'
-            fname_image_centerline = path_data_subject + contrast + dataset_info['suffix_centerline'] + '.nii.gz'
-        else:
-            fname_image = path_data_subject + contrast + '_straight.nii.gz'
-            if fname_template_centerline.endswith('.npz'):
-                fname_image_centerline = None
-            else:
-                fname_image_centerline = fname_template_centerline
-
+        fname_image = dataset_info['path_data'] + 'derivatives/sct_straighten_spinalcord/' + subject_name + '/' + dataset_info['data_type'] + '/' + subject_name + dataset_info['suffix_image'] + '_straight.nii.gz'
+        centerline_template = Centerline(fname = fname_template_centerline)
         image = Image(fname_image)
         nx, ny, nz, nt, px, py, pz, pt = image.dim
+        x, y, z, xd, yd, zd = average_coordinates_over_slices(self = centerline_template, image = image)
 
-        if fname_image_centerline is not None:
-            # open centerline from template
-            number_of_points_in_centerline = 4000
-            x_centerline_fit, y_centerline_fit, z_centerline, x_centerline_deriv, y_centerline_deriv, z_centerline_deriv = smooth_centerline(
-                fname_image_centerline, algo_fitting='nurbs', verbose=0,
-                nurbs_pts_number=number_of_points_in_centerline,
-                all_slices=False, phys_coordinates=True, remove_outliers=True)
-            centerline_template = Centerline(x_centerline_fit, y_centerline_fit, z_centerline,
-                                             x_centerline_deriv, y_centerline_deriv, z_centerline_deriv)
-        else:
-            centerline_template = Centerline(fname=fname_template_centerline)
-
-        x, y, z, xd, yd, zd = centerline_template.average_coordinates_over_slices(image)
 
         # Compute intensity values
         z_values, intensities = [], []
@@ -601,23 +613,23 @@ def normalize_intensity_template(dataset_info, fname_template_centerline=None, c
 
         # Preparing data for smoothing
         arr_int = [[z_values[i], intensities[i]] for i in range(len(z_values))]
-        arr_int.sort(key=lambda x: x[0])  # and make sure it is ordered with z
-
-        def smooth(x, window_len=11, window='hanning'):
+        arr_int.sort(key = lambda x: x[0])  # and make sure it is ordered with z
+        
+        def smooth(x, window_len = 11, window = 'hanning'):
             """smooth the data using a window with requested size.
             """
-
+           
             if x.ndim != 1:
-                raise ValueError, "smooth only accepts 1 dimension arrays."
+                raise ValueError("smooth only accepts 1 dimension arrays.")
 
             if x.size < window_len:
-                raise ValueError, "Input vector needs to be bigger than window size."
+                raise ValueError("Input vector needs to be bigger than window size.")
 
             if window_len < 3:
                 return x
 
             if not window in ['flat', 'hanning', 'hamming', 'bartlett', 'blackman']:
-                raise ValueError, "Window is on of 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'"
+                raise ValueError("Window is on of 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'")
 
             s = np.r_[x[window_len - 1:0:-1], x, x[-2:-window_len - 1:-1]]
             if window == 'flat':  # moving average
@@ -625,12 +637,13 @@ def smooth(x, window_len=11, window='hanning'):
             else:
                 w = eval('np.' + window + '(window_len)')
 
-            y = np.convolve(w / w.sum(), s, mode='same')
+            y = np.convolve(w / w.sum(), s, mode = 'same')
             return y[window_len - 1:-window_len + 1]
+          
 
         # Smoothing
         intensities = [c[1] for c in arr_int]
-        intensity_profile_smooth = smooth(np.array(intensities), window_len=50)
+        intensity_profile_smooth = smooth(np.array(intensities), window_len = 50)
         average_intensity.append(np.mean(intensity_profile_smooth))
 
         intensity_profiles[subject_name] = intensity_profile_smooth
@@ -648,62 +661,72 @@ def smooth(x, window_len=11, window='hanning'):
 
     # normalize the intensity of the image based on spinal cord
     for subject_name in list_subjects:
-        path_data_subject = path_data + subject_name + '/' + contrast + '/'
-        fname_image = path_data_subject + contrast + '_straight.nii.gz'
-
+        fname_image = dataset_info['path_data'] + 'derivatives/sct_straighten_spinalcord/' + subject_name + '/' + dataset_info['data_type'] + '/' + subject_name + dataset_info['suffix_image'] + '_straight.nii.gz'
+        fname_image_normalized = dataset_info['path_data'] + 'derivatives/sct_straighten_spinalcord/' + subject_name + '/' + dataset_info['data_type'] + '/' + subject_name + dataset_info['suffix_image'] + '_straight_norm.nii.gz'
         image = Image(fname_image)
         nx, ny, nz, nt, px, py, pz, pt = image.dim
 
-        image_image_new = image.copy()
-        image_image_new.change_type(dtype='float32')
+        image_new = image.copy()
+        image_new.change_type(dtype = 'float32')
         for i in range(nz):
-            image_image_new.data[:, :, i] *= average_intensity / intensity_profiles[subject_name][i]
+            if intensity_profiles[subject_name][i] == 0: intensity_profiles[subject_name][i] = 0.001 ### is this how Rohan solved this?
+            image_new.data[:, :, i] *= average_intensity / intensity_profiles[subject_name][i]
 
         # Save intensity normalized template
-        fname_image_normalized = sct.add_suffix(fname_image, '_norm')
-        image_image_new.save(fname_image_normalized)
-
+        image_new.save(fname_image_normalized)
+
+def copy_preprocessed_images(dataset_info, contrast = 't1'):
+    contrast = dataset_info['contrast']
+    list_subjects = dataset_info['subjects'].split(', ') 
+    
+    tqdm_bar = tqdm(total = len(list_subjects), unit = 'B', unit_scale = True, desc = "Status", ascii = True)
+    
+    for subject_name in list_subjects:
+        fname_image = dataset_info['path_data'] + 'derivatives/sct_straighten_spinalcord/' + subject_name + '/' + dataset_info['data_type'] + '/' + subject_name + dataset_info['suffix_image'] + '_straight_norm.nii.gz'
+        shutil.copy(fname_image, dataset_info['path_data'] + 'derivatives/template/' + subject_name + dataset_info['suffix_image'] + '_straight_norm.nii.gz')
+        tqdm_bar.update(1)
+    tqdm_bar.close()
 
-def create_mask_template(dataset_info, contrast='t1'):
-    path_template = dataset_info['path_template']
-    subject_name = dataset_info['subjects'][0]
+def create_mask_template(dataset_info, contrast = 't1'):
+    path_template = dataset_info['path_data'] + 'derivatives/template/'
+    subject_name = dataset_info['subjects'].split(', ')[0]
 
-    template_mask = Image(path_template + subject_name + '_' + contrast + '.nii.gz')
+    template_mask = Image(path_template + subject_name + dataset_info['suffix_image'] + '_straight_norm.nii.gz')
     template_mask.data *= 0.0
     template_mask.data += 1.0
-    template_mask.save(path_template + 'template_mask.nii.gz')
+    template_mask.save(path_template + '/template_mask.nii.gz')
 
     # if mask already present, deleting it
-    if os.path.isfile(path_template + 'template_mask.mnc'):
-        os.remove(path_template + 'template_mask.mnc')
-    sct.run('nii2mnc ' + path_template + 'template_mask.nii.gz ' + ' ' + path_template + 'template_mask.mnc')
+    if os.path.isfile(path_template + '/template_mask.mnc'): os.remove(path_template + '/template_mask.mnc')
 
-    return path_template + 'template_mask.mnc'
+    os.system('nii2mnc ' + path_template + '/template_mask.nii.gz ' + ' ' + path_template + '/template_mask.mnc')
+    return path_template + '/template_mask.mnc'
 
-
-def convert_data2mnc(dataset_info, contrast='t1'):
-    path_template = dataset_info['path_template']
-    list_subjects = dataset_info['subjects']
+def convert_data2mnc(dataset_info, contrast = 't1'):
+    contrast = dataset_info['contrast']
+    path_template = dataset_info['path_data'] + 'derivatives/template/'
+    list_subjects = dataset_info['subjects'].split(', ')
 
     path_template_mask = create_mask_template(dataset_info, contrast)
 
-    output_list = open('subjects.csv', "wb")
-    writer = csv.writer(output_list, delimiter =',', quotechar=',', quoting=csv.QUOTE_MINIMAL)
+    output_list = open('subjects.csv', "w")
+    writer = csv.writer(output_list, delimiter = ',', quotechar = ',', quoting = csv.QUOTE_MINIMAL)
 
-    tqdm_bar = tqdm(total=len(list_subjects), unit='B', unit_scale=True, desc="Status", ascii=True)
+    tqdm_bar = tqdm(total = len(list_subjects), unit = 'B', unit_scale = True, desc = "Status", ascii = True)
     for subject_name in list_subjects:
-        fname_nii = path_template + subject_name + '_' + contrast + '.nii.gz'
-        fname_mnc = path_template + subject_name + '_' + contrast + '.mnc'
+        fname_nii = path_template + subject_name + dataset_info['suffix_image'] + '_straight_norm.nii.gz'
+        fname_mnc = path_template + subject_name + dataset_info['suffix_image'] + '_straight_norm.mnc'
 
-        # if mask already present, deleting it
-        if os.path.isfile(fname_mnc):
-            os.remove(fname_mnc)
+        # if file already present, deleting it
+        if os.path.isfile(fname_mnc): os.remove(fname_mnc)
 
-        sct.run('nii2mnc ' + fname_nii + ' ' + fname_mnc)
+        os.system('nii2mnc ' + fname_nii + ' ' + fname_mnc)
+        os.remove(fname_nii) # remove duplicate nifti file!
 
-        writer.writerow([fname_mnc, path_template_mask])
+        writer.writerow([fname_mnc,path_template_mask])
 
         tqdm_bar.update(1)
     tqdm_bar.close()
 
-    output_list.close()
\ No newline at end of file
+    output_list.close()
+

From 9e954a3a17b8a5d900e9935d0ba67e88c20475fe Mon Sep 17 00:00:00 2001
From: NadiaBlostein <nadia.blostein@mail.mcgill.ca>
Date: Mon, 17 Apr 2023 09:47:57 -0400
Subject: [PATCH 02/54] updating pipeline according to preprocessing fxns

---
 pipeline.py | 34 ++++++++++++++--------------------
 1 file changed, 14 insertions(+), 20 deletions(-)

diff --git a/pipeline.py b/pipeline.py
index 899c6e7..3c36cff 100644
--- a/pipeline.py
+++ b/pipeline.py
@@ -4,38 +4,32 @@
 """
 
 from preprocessing import *
+import sys
 
-# downloading data and configuration file from OSF
-path_data = download_data_template(path_data='./')
-
-# extracting info from dataset
-dataset_info = read_dataset(path_data + 'configuration.json', path_data=path_data)
+dataset_info = read_dataset(sys.argv[1])
 
 # generating centerlines
-list_centerline = generate_centerline(dataset_info=dataset_info, contrast='t1', regenerate=True)
+list_centerline = generate_centerline(dataset_info = dataset_info, regenerate = True) 
 
 # computing average template centerline and vertebral distribution
-points_average_centerline, position_template_disks = average_centerline(list_centerline=list_centerline,
-                                                                        dataset_info=dataset_info,
-                                                                        use_ICBM152=False,
-                                                                        use_label_ref='C1')
+points_average_centerline, position_template_discs = average_centerline(list_centerline = list_centerline,
+	dataset_info = dataset_info,
+	use_ICBM152 = False,
+	use_label_ref = 'C1')
 
 # generating the initial template space
-generate_initial_template_space(dataset_info=dataset_info,
-                                points_average_centerline=points_average_centerline,
-                                position_template_disks=position_template_disks)
+generate_initial_template_space(dataset_info = dataset_info,
+	points_average_centerline = points_average_centerline,
+	position_template_discs = position_template_discs)
 
 # straightening of all spinal cord
-straighten_all_subjects(dataset_info=dataset_info, contrast='t1')
+straighten_all_subjects(dataset_info = dataset_info)
 
 # normalize image intensity inside the spinal cord
-normalize_intensity_template(dataset_info=dataset_info,
-                             fname_template_centerline=dataset_info['path_template'] + 'template_centerline.npz',
-                             contrast='t1',
-                             verbose=1)
+normalize_intensity_template(dataset_info = dataset_info)
 
 # copy preprocessed dataset in template folder
-copy_preprocessed_images(dataset_info=dataset_info, contrast='t1')
+copy_preprocessed_images(dataset_info = dataset_info)
 
 # converting results to Minc format
-convert_data2mnc(dataset_info, contrast='t1')
+convert_data2mnc(dataset_info)

From 46c2c7a007610f341f68aa8b120866f804b7ad50 Mon Sep 17 00:00:00 2001
From: NadiaBlostein <nadia.blostein@mail.mcgill.ca>
Date: Mon, 17 Apr 2023 09:57:44 -0400
Subject: [PATCH 03/54] template for configuration file used by this pipeline,
 according to BIDS convention (previously discussed and agreed upon with Rohan

---
 configuration.json | 9 +++++++++
 1 file changed, 9 insertions(+)
 create mode 100755 configuration.json

diff --git a/configuration.json b/configuration.json
new file mode 100755
index 0000000..da1c84e
--- /dev/null
+++ b/configuration.json
@@ -0,0 +1,9 @@
+{
+	"path_data": "/path/to/data/",
+	"subjects": "sub-101, sub-102", /* subject ID prefix according to BIDS convention */
+	"data_type":"anat", /* see BIDS documentation */
+	"contrast":"t1", /* it is not necessarily related to file name; it is related to the contrast that will be called when you use different SCT fxns */
+	"suffix_image": "_rec-composed_T1w", /* suffix after subject id but before file extensions */
+	"suffix_label-SC_seg": "_rec-composed_T1w_label-SC_seg", /* suffix after subject id + suffix_image but before file extensions */
+	"suffix_label-disc":"_rec-composed_T1w_label-disc"/* suffix after subject id + suffix_image but before file extensions */
+}
\ No newline at end of file

From 75d3190b4c9d1d10ce4b8d083cf0c2cc3bf966a2 Mon Sep 17 00:00:00 2001
From: NadiaBlostein <nadia.blostein@mail.mcgill.ca>
Date: Mon, 17 Apr 2023 10:01:00 -0400
Subject: [PATCH 04/54] changed description of dataset structure according to
 BIDS convention

---
 README.md | 40 ++++++++++++++++++++++------------------
 1 file changed, 22 insertions(+), 18 deletions(-)

diff --git a/README.md b/README.md
index f32d67f..fd4bc59 100644
--- a/README.md
+++ b/README.md
@@ -67,25 +67,29 @@ The template generation framework can be configured by the file "configuration.j
 - "suffix_segmentation": optional suffix for the spinal cord segmentation, that can be used to register the segmentation on the template space and generate probabilistic atlases.
 
 ## Dataset structure
-The dataset should be arranged in a structured fashion, as the following:
-- subject_name/
-    - t1/
-        - t1.nii.gz
-        - t1{suffix_centerline}.nii.gz
-        - t1{suffix_disks}.nii.gz
-        - t1{suffix_segmentation}.nii.gz
-    - t2/
-        - t2.nii.gz
-        - t2{suffix_centerline}.nii.gz
-        - t2{suffix_disks}.nii.gz
-        - t2{suffix_segmentation}.nii.gz
-    - dmri/
-        - dmri.nii.gz
-        - dmri{suffix_centerline}.nii.gz
-        - dmri{suffix_disks}.nii.gz
-        - bvecs.txt
-        - bvals.txt
+The dataset should be arranged according to the BIDS convention. Using the two examples subjects listed in the `configuration.json` template file, this would be as follows:
+- path_data
+    - sub-101
+        - data_type
+            - 'sub-101' + suffix_image + '.nii.gz'
+            - 'sub-101' + suffix_image + '.json'
+    - sub-102
+        - data_type
+            - 'sub-102' + suffix_image + '.nii.gz'
+            - 'sub-102' + suffix_image + '.json'
     - ...
+    - derivatives
+        - sub-101
+            - data_type
+                - 'sub-101' + suffix_image + suffix_label-SC_seg + '.nii.gz'
+                - 'sub-101' + suffix_image + suffix_label-disc + '.nii.gz'
+                - 'sub-101' + suffix_image + suffix_label-disc + '-manual.nii.gz' # optional, will automatically be prioritized over non-manual if found!
+        - sub-102
+            - data_type
+                - 'sub-102' + suffix_image + suffix_label-SC_seg + '.nii.gz'
+                - 'sub-102' + suffix_image + suffix_label-disc + '.nii.gz'
+                - 'sub-102' + suffix_image + suffix_label-disc + '-manual.nii.gz' # optional
+        - ...
 
 ## Licence
 This repository is under a MIT licence.

From fbe663096f6a70500b26e25abf9191bcf6fb8435 Mon Sep 17 00:00:00 2001
From: NadiaBlostein <nadia.blostein@mail.mcgill.ca>
Date: Mon, 17 Apr 2023 10:21:19 -0400
Subject: [PATCH 05/54] better description of configuration.json options

---
 README.md | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index fd4bc59..94c2e2d 100644
--- a/README.md
+++ b/README.md
@@ -59,12 +59,13 @@ python -m scoop -n N -vvv generate_template.py
 
 ## How to generate your own template?
 The template generation framework can be configured by the file "configuration.json", that includes the following variables:
-- "path_data": absolute path to the dataset, including all images [correctly structured](#dataset-structure).
-- "path_template": absolute path to the output folder, in which the final template will be placed.
-- "subjects": list of subjects names, that must be the same as folder names in the dataset structure.
-- "suffix_centerline": suffix for binary centerline.
-- "suffix_disks": suffix for binary images of the intervertebral disks labeling.
-- "suffix_segmentation": optional suffix for the spinal cord segmentation, that can be used to register the segmentation on the template space and generate probabilistic atlases.
+- "path_data": absolute path to the dataset, including all images [correctly structured](#dataset-structure); ends with `/`.
+- "subjects": list of subjects names, that must be the same as folder names in the dataset structure (e.g. `sub-101`).
+- "data_type": [BIDS data type](https://bids-standard.github.io/bids-starter-kit/folders_and_files/folders.html#datatype), same as subfolder name in dataset structure (e.g. `anat`).
+- "contrast": it is related to the contrast that will be called when you use different SCT functions (either `t1` or `t2`) and may not not necessarily correspond to the actual data acquisition (e.g. `t1`).
+- "suffix_image": suffix for image data, after subject ID but before file extension (e.g. `_rec-composed_T1w` in `sub-101_rec-composed_T1w.nii.gz`)
+– "suffix_label-SC_seg": suffix for binary images of the spinal cord mask, after subject id but before file extension (e.g. `_rec-composed_T1w_label-SC_seg` in `sub-101_rec-composed_T1w_label-SC_seg.nii.gz`)
+- "suffix_label-disc": suffix for binary images of the intervertebral disks labeling, after subject id but before file extension (e.g. `_rec-composed_T1w_label-disc` in `sub-101_rec-composed_T1w_label-disc.nii.gz`)
 
 ## Dataset structure
 The dataset should be arranged according to the BIDS convention. Using the two examples subjects listed in the `configuration.json` template file, this would be as follows:

From 1552a5c1c5f580b857239967083974de964587f8 Mon Sep 17 00:00:00 2001
From: NadiaBlostein <nadia.blostein@mail.mcgill.ca>
Date: Mon, 17 Apr 2023 10:21:39 -0400
Subject: [PATCH 06/54] removed comments from configuration.json as the same
 information can be found in the README

---
 configuration.json | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/configuration.json b/configuration.json
index da1c84e..070ce0f 100755
--- a/configuration.json
+++ b/configuration.json
@@ -1,9 +1,9 @@
 {
 	"path_data": "/path/to/data/",
-	"subjects": "sub-101, sub-102", /* subject ID prefix according to BIDS convention */
-	"data_type":"anat", /* see BIDS documentation */
-	"contrast":"t1", /* it is not necessarily related to file name; it is related to the contrast that will be called when you use different SCT fxns */
-	"suffix_image": "_rec-composed_T1w", /* suffix after subject id but before file extensions */
-	"suffix_label-SC_seg": "_rec-composed_T1w_label-SC_seg", /* suffix after subject id + suffix_image but before file extensions */
-	"suffix_label-disc":"_rec-composed_T1w_label-disc"/* suffix after subject id + suffix_image but before file extensions */
+	"subjects": "sub-101, sub-102",
+	"data_type":"anat",
+	"contrast":"t1",
+	"suffix_image": "_rec-composed_T1w",
+	"suffix_label-SC_seg": "_rec-composed_T1w_label-SC_seg",
+	"suffix_label-disc":"_rec-composed_T1w_label-disc"
 }
\ No newline at end of file

From 94a1dd2e9e4cdbde6b3baf7370f9c5f58a33af79 Mon Sep 17 00:00:00 2001
From: NadiaBlostein <nadia.blostein@mail.mcgill.ca>
Date: Wed, 19 Apr 2023 08:53:05 -0400
Subject: [PATCH 07/54] removed unecessary function

---
 preprocessing.py | 28 ----------------------------
 1 file changed, 28 deletions(-)

diff --git a/preprocessing.py b/preprocessing.py
index f6d3b89..f402ad2 100755
--- a/preprocessing.py
+++ b/preprocessing.py
@@ -108,34 +108,6 @@ def average_coordinates_over_slices(self, image): ### deprecated from latest ver
 
 #     return coordinate_result
 
-def get_lowest_vert_dataset(dataset_info): ### NEW + FIX
-    """
-    This function generates spinal cord centerline from binary images (either an image of centerline or segmentation)
-    :param dataset_info: dictionary containing dataset information
-    :param contrast: {'t1', 't2'}
-    :return list of centerline objects
-    """
-    path_data = dataset_info['path_data']
-    path_derivatives = path_data + 'derivatives'
-    list_subjects = dataset_info['subjects'].split(', ') 
-    lowest_vert = 100
-
-    for subject_name in list_subjects:
-        fname_centerline = path_derivatives + '/' + dataset_info['pipeline_centerline'] + '/' + subject_name +  '/' + dataset_info['data_type'] + '/' + subject_name + dataset_info['suffix_centerline']
-        if os.path.isfile(fname_centerline + '.npz'):               
-            centerline = Centerline(fname = fname_centerline + '.npz')
-            dist_C1_dict = {}
-            tmp_list = []
-            for key,value in centerline.distance_from_C1label.items(): 
-                if value in tmp_list: continue
-                else:
-                    tmp_list.append(value)
-                    dist_C1_dict[key] = value
-            if len(dist_C1_dict) < lowest_vert: lowest_vert = len(dist_C1_dict)
-        else:
-            print("Centerline for " + subject_name + " does not exist!")
-    return(lowest_vert)
-
 def read_dataset(fname_json = 'configuration.json', path_data = './'): ### TO COMPLETE
     """
     This function reads a json file that describes the dataset, including the list of subjects as well as

From cbaedf8a88bff258ac8e95392117bfeea64a93d7 Mon Sep 17 00:00:00 2001
From: NadiaBlostein <nadia.blostein@mail.mcgill.ca>
Date: Tue, 2 May 2023 11:35:47 -0400
Subject: [PATCH 08/54] added instructions on how to generate template on
 compute canada

---
 README.md | 41 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)

diff --git a/README.md b/README.md
index 94c2e2d..a08a2f8 100644
--- a/README.md
+++ b/README.md
@@ -92,5 +92,46 @@ The dataset should be arranged according to the BIDS convention. Using the two e
                 - 'sub-102' + suffix_image + suffix_label-disc + '-manual.nii.gz' # optional
         - ...
 
+## Setting up on Compute Canada to generate template
+Once the preprocessing is complete, you will generate the template with `generate_template.py`. This will require  minctoolkit v2, minc2simple and nist-mni-pipelines. The easiest way to set up is to use Compute Canada and set up your environment as follows:
+
+1. Load the right modules and install packages from pip wheel
+```
+module load StdEnv/2020  gcc/9.3.0 minc-toolkit/1.9.18.1 python/3.8.10
+pip install --upgrade pip
+pip install scoop
+```
+
+2. Set up NIST-MNI pipelines
+```
+git clone https://github.com/vfonov/nist_mni_pipelines.git
+nano ~/.bashrc
+```
+add the following:
+```
+export PYTHONPATH="${PYTHONPATH}:/path/to/nist_mni_pipelines"
+export PYTHONPATH="${PYTHONPATH}:/path/to/nist_mni_pipelines/"
+export PYTHONPATH="${PYTHONPATH}:/path/to/nist_mni_pipelines/ipl/"
+export PYTHONPATH="${PYTHONPATH}:/path/to/nist_mni_pipelines/ipl"
+```
+```
+source ~/.bashrc
+```
+3. Minc2simple
+```
+pip install "git+https://github.com/NIST-MNI/minc2-simple.git@develop_new_build#subdirectory=python"
+``` 
+
+4. Create my_job.sh
+```
+#!/bin/bash
+python -m scoop -vvv generate_template.py
+```
+
+5. Batch on Compute Canada
+```
+sbatch --time=24:00:00  --mem-per-cpu 4000 my_job.sh # will probably require batching several times, depending on number of subjects
+```
+
 ## Licence
 This repository is under a MIT licence.

From 2d1ed17dfaedd794f006e176e6b88cdcd9ee6a97 Mon Sep 17 00:00:00 2001
From: Julien Cohen-Adad <jcohen@polymtl.ca>
Date: Wed, 3 May 2023 14:09:06 -0400
Subject: [PATCH 09/54] Added info on Alliance

---
 README.md | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index a08a2f8..6b17654 100644
--- a/README.md
+++ b/README.md
@@ -92,8 +92,9 @@ The dataset should be arranged according to the BIDS convention. Using the two e
                 - 'sub-102' + suffix_image + suffix_label-disc + '-manual.nii.gz' # optional
         - ...
 
-## Setting up on Compute Canada to generate template
-Once the preprocessing is complete, you will generate the template with `generate_template.py`. This will require  minctoolkit v2, minc2simple and nist-mni-pipelines. The easiest way to set up is to use Compute Canada and set up your environment as follows:
+## Setting up on Canada's Alliance CPU cluster to generate template
+
+It is recommended to run the template generation on a large cluster. If you are in Canada, you could make use of [the Alliance](https://alliancecan.ca/en) (formerly Compute Canada), which is a bunch of CPU nodes accessible to researchers in Canada. Once the preprocessing is complete, you will generate the template with `generate_template.py`. This will require  minctoolkit v2, minc2simple and nist-mni-pipelines. The easiest way to set up is to use Compute Canada and set up your environment as follows:
 
 1. Load the right modules and install packages from pip wheel
 ```
@@ -128,7 +129,7 @@ pip install "git+https://github.com/NIST-MNI/minc2-simple.git@develop_new_build#
 python -m scoop -vvv generate_template.py
 ```
 
-5. Batch on Compute Canada
+5. Batch on Alliance Canada
 ```
 sbatch --time=24:00:00  --mem-per-cpu 4000 my_job.sh # will probably require batching several times, depending on number of subjects
 ```

From ede823d35624d433dfd46ef4d4924b83a6e06a31 Mon Sep 17 00:00:00 2001
From: NadiaBlostein <33006815+NadiaBlostein@users.noreply.github.com>
Date: Wed, 3 May 2023 14:26:33 -0400
Subject: [PATCH 10/54] Update preprocessing.py

Explaining param `lowest_disc_quantile` in ` average_centerline` function and updating its default value to 0.25.
---
 preprocessing.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/preprocessing.py b/preprocessing.py
index f402ad2..aea838b 100755
--- a/preprocessing.py
+++ b/preprocessing.py
@@ -228,14 +228,16 @@ def generate_centerline(dataset_info, lowest_disc = 25, contrast = 't1', regener
 #     centerline.compute_vertebral_distribution(coord_physical, label_reference = 'PMG')
 #     return centerline
 
-def average_centerline(list_centerline, dataset_info, use_ICBM152 = False, use_label_ref = None, lowest_disc_quantile = 0.75):
+def average_centerline(list_centerline, dataset_info, use_ICBM152 = False, use_label_ref = None, lowest_disc_quantile = 0.25):
     """
     This function compute the average centerline and vertebral distribution, that will be used to create the
     final template space.
     :param list_centerline: list of Centerline objects, for all subjects
     :param dataset_info: dictionary containing dataset information
+    :param lowest_disc_quantile: quantile in the distribution of lowest disc labels across subjects (will be used to determine 
+        lowest disc label in the template)
     :return: points_average_centerline: list of points (x, y, z) of the average spinal cord and brainstem centerline
-             position_template_discs: index of intervertebral discs along the template centerline
+        position_template_discs: index of intervertebral discs along the template centerline
     """
 
     # extracting centerline from ICBM152

From 03c092da6e9b125a885f39eda0b6968048edc81b Mon Sep 17 00:00:00 2001
From: NadiaBlostein <nadia.blostein@mail.mcgill.ca>
Date: Tue, 9 May 2023 10:22:34 -0400
Subject: [PATCH 11/54] removing 'manual', issue #17'

---
 README.md | 2 --
 1 file changed, 2 deletions(-)

diff --git a/README.md b/README.md
index 6b17654..2287fdd 100644
--- a/README.md
+++ b/README.md
@@ -84,12 +84,10 @@ The dataset should be arranged according to the BIDS convention. Using the two e
             - data_type
                 - 'sub-101' + suffix_image + suffix_label-SC_seg + '.nii.gz'
                 - 'sub-101' + suffix_image + suffix_label-disc + '.nii.gz'
-                - 'sub-101' + suffix_image + suffix_label-disc + '-manual.nii.gz' # optional, will automatically be prioritized over non-manual if found!
         - sub-102
             - data_type
                 - 'sub-102' + suffix_image + suffix_label-SC_seg + '.nii.gz'
                 - 'sub-102' + suffix_image + suffix_label-disc + '.nii.gz'
-                - 'sub-102' + suffix_image + suffix_label-disc + '-manual.nii.gz' # optional
         - ...
 
 ## Setting up on Canada's Alliance CPU cluster to generate template

From 77a166b3f7406dd16abff030d5e08fc8edf4f844 Mon Sep 17 00:00:00 2001
From: NadiaBlostein <nadia.blostein@mail.mcgill.ca>
Date: Tue, 9 May 2023 10:30:33 -0400
Subject: [PATCH 12/54] format directory with 'tree' structure according to
 https://ivadomed.org/data.html, issue #17

---
 README.md | 39 +++++++++++++++++++--------------------
 1 file changed, 19 insertions(+), 20 deletions(-)

diff --git a/README.md b/README.md
index 2287fdd..b0b47f2 100644
--- a/README.md
+++ b/README.md
@@ -69,26 +69,25 @@ The template generation framework can be configured by the file "configuration.j
 
 ## Dataset structure
 The dataset should be arranged according to the BIDS convention. Using the two examples subjects listed in the `configuration.json` template file, this would be as follows:
-- path_data
-    - sub-101
-        - data_type
-            - 'sub-101' + suffix_image + '.nii.gz'
-            - 'sub-101' + suffix_image + '.json'
-    - sub-102
-        - data_type
-            - 'sub-102' + suffix_image + '.nii.gz'
-            - 'sub-102' + suffix_image + '.json'
-    - ...
-    - derivatives
-        - sub-101
-            - data_type
-                - 'sub-101' + suffix_image + suffix_label-SC_seg + '.nii.gz'
-                - 'sub-101' + suffix_image + suffix_label-disc + '.nii.gz'
-        - sub-102
-            - data_type
-                - 'sub-102' + suffix_image + suffix_label-SC_seg + '.nii.gz'
-                - 'sub-102' + suffix_image + suffix_label-disc + '.nii.gz'
-        - ...
+dataset/
+└── dataset_description.json
+└── participants.tsv  <-------------------------------- Metadata describing subjects attributes e.g. sex, age, etc.
+└── sub-01  <------------------------------------------ Folder enclosing data for subject 1
+└── sub-02
+└── sub-03
+    └── anat <----------------------------------------- `anat` can be replaced by the value of `data_type` in configuration.json
+        └── sub-03_T1w.nii.gz  <----------------------- MRI image in NIfTI format; `_T1w` can be replaced by the value of `suffix_image` in configuration.json
+        └── sub-03_T1w.json  <------------------------- Metadata including image parameters, MRI vendor, etc.
+        └── sub-03_T2w.nii.gz
+        └── sub-03_T2w.json
+└── derivatives
+    └── labels
+        └── sub-03
+            └── anat
+                └── sub-03_T1w_label-SC_seg.nii.gz  <-- Spinal cord segmentation; `_T1w` can be replaced by the value of `suffix_image` in configuration.json
+                └── sub-03_T1w_label-disc.nii.gz  <---- Disc labels; `_T1w` can be replaced by the value of `suffix_image` in configuration.json
+                └── sub-03_T2w_label-SC_seg.nii.gz
+                └── sub-03_T2w_label-disc.nii.gz
 
 ## Setting up on Canada's Alliance CPU cluster to generate template
 

From cf6b81be7e8a1a591a159b335c5f19aa14ff6f9d Mon Sep 17 00:00:00 2001
From: NadiaBlostein <33006815+NadiaBlostein@users.noreply.github.com>
Date: Tue, 9 May 2023 12:10:08 -0400
Subject: [PATCH 13/54] Update README.md

Fixed spacing issue in `tree` BIDS structure
---
 README.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/README.md b/README.md
index b0b47f2..0cf7d0f 100644
--- a/README.md
+++ b/README.md
@@ -69,6 +69,7 @@ The template generation framework can be configured by the file "configuration.j
 
 ## Dataset structure
 The dataset should be arranged according to the BIDS convention. Using the two examples subjects listed in the `configuration.json` template file, this would be as follows:
+```
 dataset/
 └── dataset_description.json
 └── participants.tsv  <-------------------------------- Metadata describing subjects attributes e.g. sex, age, etc.
@@ -88,6 +89,7 @@ dataset/
                 └── sub-03_T1w_label-disc.nii.gz  <---- Disc labels; `_T1w` can be replaced by the value of `suffix_image` in configuration.json
                 └── sub-03_T2w_label-SC_seg.nii.gz
                 └── sub-03_T2w_label-disc.nii.gz
+```
 
 ## Setting up on Canada's Alliance CPU cluster to generate template
 

From 5afb977d6ce39c250ceb20dcde5d02b65c704b4e Mon Sep 17 00:00:00 2001
From: Rohan Banerjee <banerjee.rohan98@gmail.com>
Date: Wed, 17 May 2023 13:26:51 -0400
Subject: [PATCH 14/54] removed space parameter for the get_centerline method

---
 preprocessing.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/preprocessing.py b/preprocessing.py
index aea838b..51e5166 100755
--- a/preprocessing.py
+++ b/preprocessing.py
@@ -183,7 +183,7 @@ def generate_centerline(dataset_info, lowest_disc = 25, contrast = 't1', regener
                     coord_physical.append(c_p)
 
             # extracting centerline
-            im_ctl, arr_ctl, arr_ctl_der, _ = get_centerline(im_seg, param = param_centerline, space = 'phys')
+            im_ctl, arr_ctl, arr_ctl_der, _ = get_centerline(im_seg, param = param_centerline)
 
             # save centerline as .nii.gz file
             im_ctl.save(fname_centerline + '.nii.gz', dtype = 'float32')
@@ -485,7 +485,7 @@ def generate_initial_template_space(dataset_info, points_average_centerline, pos
     # generate template centerline as a npz file ##DONE updated template Centerline object creation and saving as .npz file in nb/generate_initial_template_space_branch
     param_centerline = ParamCenterline(algo_fitting = algo_fitting, contrast = contrast, smooth = smooth, degree = degree, minmax = minmax) 
     # centerline params of original template centerline had options that you cannot just provide `get_centerline` with anymroe (algo_fitting = 'nurbs', nurbs_pts_number = 4000, all_slices = False, phys_coordinates = True, remove_outliers = True)
-    _, arr_ctl, arr_ctl_der, _ = get_centerline(image_centerline, param = param_centerline, space = 'phys') ### we don't need to save im_centerline! ### straightening._get_centerline(im_seg,param_centerline, 1) ### What Rohan & Benjamin added!
+    _, arr_ctl, arr_ctl_der, _ = get_centerline(image_centerline, param = param_centerline) ### we don't need to save im_centerline! ### straightening._get_centerline(im_seg,param_centerline, 1) ### What Rohan & Benjamin added!
     centerline_template = Centerline(points_x = arr_ctl[0], points_y = arr_ctl[1], points_z = arr_ctl[2], deriv_x = arr_ctl_der[0], deriv_y = arr_ctl_der[1], deriv_z = arr_ctl_der[2])
     centerline_template.compute_vertebral_distribution(coord_physical)        
     centerline_template.save_centerline(fname_output = path_template + 'template_label-centerline')

From 004f8d1f29aec60ba3ba662a4c438851c1f6e1ca Mon Sep 17 00:00:00 2001
From: NadiaBlostein <nadia.blostein@mail.mcgill.ca>
Date: Mon, 26 Jun 2023 15:06:57 -0400
Subject: [PATCH 15/54] updating README

---
 README.md | 96 ++++++++++++++++++++++++++++++++++---------------------
 1 file changed, 59 insertions(+), 37 deletions(-)

diff --git a/README.md b/README.md
index 0cf7d0f..edfbc72 100644
--- a/README.md
+++ b/README.md
@@ -2,9 +2,9 @@
 Framework for creating unbiased MRI templates of the spinal cord.
 
 ## Dependencies
-- [Spinal Cord Toolbox (SCT)](https://github.com/neuropoly/spinalcordtoolbox)
+- [Spinal Cord Toolbox (SCT)](https://github.com/neuropoly/spinalcordtoolbox) version 5.8, `commit 7ead83200d7ad9ee5e0d112e77a1d7b894add738`
 
-SCT is used for all preprocessing steps, including extraction of centerline, generation of average centerline in the template space, and straightening/registration of all spinal cord images on the initial template space.
+SCT is used for all preprocessing steps, including extraction of centerline, generation of average centerline in the template space, and straightening/registration of all spinal cord images on the initial template space. The current version of the pipeline uses SCT 5.8 in development mode (commit `7ead83200d7ad9ee5e0d112e77a1d7b894add738`) as we prepare for the release of SCT 6.0.
 
 - [ANIMAL registration framework, part of the IPL longitudinal pipeline](https://github.com/vfonov/nist_mni_pipelines)
 
@@ -35,41 +35,8 @@ On OSX, you may need to recompile Minc Toolkit from source to make sure all libr
 
 Install this python library in SCT python.
 
-## Get started
-The script "pipeline.py" contains several functions to preprocess spinal cord MRI data. Preprocessing includes:
-1) extracting the spinal cord centerline and compute the vertebral distribution along the spinal cord, for all subjects.
-2) computing the average centerline, by averaging the position of each intervertebral disks. The average centerline of the spinal cord is straightened and merged with the ICBM152 template.
-3) generating the initial template space, based on the average centerline and positions of intervertebral disks.
-4) straightening of all subjects on the initial template space
-
-A small dataset, containing 5 T1w and T2w images, is available [here](https://osf.io/h73cm/) and is used as example for preprocessing. The dataset is downloaded automatically by the preprocessing script. To use your own dataset and images, follow the section [How to generate your own template?](#how-to-generate-your-own-template). The data preprocessing is performed by running the script `pipeline.py`, after making sure to use SCT python:
-
-```
-source sct_launcher
-python pipeline.py
-```
-
-One the preprocessing is performed, please check your data. The preprocessing results should be a series of straight images registered in the same space, with all the vertebral levels aligned with each others.
-
-Now, you can generate the template using the IPL pipeline with the following command, where N has to be replace by the number of subjects:
-
-```
-python -m scoop -n N -vvv generate_template.py
-```
-
-## How to generate your own template?
-The template generation framework can be configured by the file "configuration.json", that includes the following variables:
-- "path_data": absolute path to the dataset, including all images [correctly structured](#dataset-structure); ends with `/`.
-- "subjects": list of subjects names, that must be the same as folder names in the dataset structure (e.g. `sub-101`).
-- "data_type": [BIDS data type](https://bids-standard.github.io/bids-starter-kit/folders_and_files/folders.html#datatype), same as subfolder name in dataset structure (e.g. `anat`).
-- "contrast": it is related to the contrast that will be called when you use different SCT functions (either `t1` or `t2`) and may not not necessarily correspond to the actual data acquisition (e.g. `t1`).
-- "suffix_image": suffix for image data, after subject ID but before file extension (e.g. `_rec-composed_T1w` in `sub-101_rec-composed_T1w.nii.gz`)
-– "suffix_label-SC_seg": suffix for binary images of the spinal cord mask, after subject id but before file extension (e.g. `_rec-composed_T1w_label-SC_seg` in `sub-101_rec-composed_T1w_label-SC_seg.nii.gz`)
-- "suffix_label-disc": suffix for binary images of the intervertebral disks labeling, after subject id but before file extension (e.g. `_rec-composed_T1w_label-disc` in `sub-101_rec-composed_T1w_label-disc.nii.gz`)
-
 ## Dataset structure
 The dataset should be arranged according to the BIDS convention. Using the two examples subjects listed in the `configuration.json` template file, this would be as follows:
-```
 dataset/
 └── dataset_description.json
 └── participants.tsv  <-------------------------------- Metadata describing subjects attributes e.g. sex, age, etc.
@@ -89,11 +56,66 @@ dataset/
                 └── sub-03_T1w_label-disc.nii.gz  <---- Disc labels; `_T1w` can be replaced by the value of `suffix_image` in configuration.json
                 └── sub-03_T2w_label-SC_seg.nii.gz
                 └── sub-03_T2w_label-disc.nii.gz
+
+## Getting started: data preprocessing
+
+### Segment spinal cord and vertebral discs
+
+Note:
+* SCT functions treat your images with bright CSF as "T2w" (i.e. `t2` option) and dark CSF as "T1w" (i.e. `t1` option).
+* You can therefore still use SCT even if your images are not actually T1w and T2w.
+
+1. Set up SCT (see [dependencies](#Dependencies)).
+1. Update `segment_sc_discs_deepseg.sh`:
+	* Make sure to modify the suffix names for your T2w-like and T1w-like images (SUFFIX_T2w, SUFFIX_T1w) on lines 28 and 29 according to the naming convention in your dataset.
+	* If you do not have 2 types of images for each subject, make sure to comment out the appropriate code.
+2. Update `segmentation_batching_script.sh`:
+	* Update the options according to your own dataset.
+
+3. Run:
+```
+./batching_script.sh # this calls segment_sc_discs_deepseg.sh
+```
+
+4. Quality control (QC):
+* Spinal cord masks and disc labels can be displayed by opening: `/PATH/TO/dataset/derivatives/labels/qc/index.html`
+* See [tutorial](https://spinalcordtoolbox.com/user_section/tutorials/registration-to-template/vertebral-labeling.html) for tips on how to QC and fix disc labels manually.
+
+### Preparing data for template-generation
+
+`template_preprocessing_pipeline.py` contains several functions to preprocess spinal cord MRI data for template generation. Preprocessing includes:
+* extracting the spinal cord centerline and compute the vertebral distribution along the spinal cord, for all subjects.
+* computing the average centerline, by averaging the position of each intervertebral disks. The average centerline of the spinal cord is straightened and merged with the ICBM152 template.
+* generating the initial template space, based on the average centerline and positions of intervertebral disks.
+* straightening of all subjects on the initial template space
+
+1. Create a configuration file according to `configuration_template.json`.
+2. Determine the integer value corresponding to the label of the lowest disc until which you want your template to go (depends on the lowest disc available in your images, nomenclature can be found [here](https://spinalcordtoolbox.com/user_section/tutorials/registration-to-template/vertebral-labeling/labeling-conventions.html)).
+3. Run the following:
+```
+python template_preprocessing_pipeline.py configuration.json LOWEST_DISC
+```
+4. One the preprocessing is performed, please check your data. The preprocessing results should be a series of straight images registered in the same space, with all the vertebral levels aligned with each others.
+
+## How to generate your own template?
+The template generation framework can be configured by the file "configuration.json", that includes the following variables:
+- "path_data": absolute path to the dataset, including all images [correctly structured](#dataset-structure); ends with `/`.
+- "subjects": list of subjects names, that must be the same as folder names in the dataset structure (e.g. `sub-101`).
+- "data_type": [BIDS data type](https://bids-standard.github.io/bids-starter-kit/folders_and_files/folders.html#datatype), same as subfolder name in dataset structure (e.g. `anat`).
+- "contrast": it is related to the contrast that will be called when you use different SCT functions (either `t1` or `t2`) and may not not necessarily correspond to the actual data acquisition (e.g. `t1`).
+- "suffix_image": suffix for image data, after subject ID but before file extension (e.g. `_rec-composed_T1w` in `sub-101_rec-composed_T1w.nii.gz`)
+– "suffix_label-SC_seg": suffix for binary images of the spinal cord mask, after subject id but before file extension (e.g. `_rec-composed_T1w_label-SC_seg` in `sub-101_rec-composed_T1w_label-SC_seg.nii.gz`)
+- "suffix_label-disc": suffix for binary images of the intervertebral disks labeling, after subject id but before file extension (e.g. `_rec-composed_T1w_label-disc` in `sub-101_rec-composed_T1w_label-disc.nii.gz`)
+
+Now, you can generate the template using the IPL pipeline with the following command, where N has to be replace by the number of subjects:
+
+```
+python -m scoop -n N -vvv generate_template.py
 ```
 
-## Setting up on Canada's Alliance CPU cluster to generate template
+### Setting up on Canada's Alliance CPU cluster to generate template
 
-It is recommended to run the template generation on a large cluster. If you are in Canada, you could make use of [the Alliance](https://alliancecan.ca/en) (formerly Compute Canada), which is a bunch of CPU nodes accessible to researchers in Canada. Once the preprocessing is complete, you will generate the template with `generate_template.py`. This will require  minctoolkit v2, minc2simple and nist-mni-pipelines. The easiest way to set up is to use Compute Canada and set up your environment as follows:
+It is recommended to run the template generation on a large cluster. If you are in Canada, you could make use of [the Alliance](https://alliancecan.ca/en) (formerly Compute Canada), which is a bunch of CPU nodes accessible to researchers in Canada. **Once the preprocessing is complete**, you will generate the template with `generate_template.py`. This will require minctoolkit v2, minc2simple and nist-mni-pipelines. The easiest way to set up is to use Compute Canada and set up your virtual environment (without spinal cord toolbox, since your data should have already been preprocessed by now) as follows:
 
 1. Load the right modules and install packages from pip wheel
 ```

From 84a5c800176eb7266a8783de6882f2482952f3ec Mon Sep 17 00:00:00 2001
From: NadiaBlostein <nadia.blostein@mail.mcgill.ca>
Date: Mon, 26 Jun 2023 15:07:40 -0400
Subject: [PATCH 16/54] renaming pipeline.py for nomenclature to be more clear

---
 pipeline.py => template_preprocessing_pipeline.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename pipeline.py => template_preprocessing_pipeline.py (100%)

diff --git a/pipeline.py b/template_preprocessing_pipeline.py
similarity index 100%
rename from pipeline.py
rename to template_preprocessing_pipeline.py

From 5fdd6d35dd1c3ca7830fd6c92d2b60654fed2766 Mon Sep 17 00:00:00 2001
From: NadiaBlostein <nadia.blostein@mail.mcgill.ca>
Date: Mon, 26 Jun 2023 15:08:34 -0400
Subject: [PATCH 17/54] adding code for segmentation

---
 segment_sc_discs_deepseg.sh     | 145 ++++++++++++++++++++++++++++++++
 segmentation_batching_script.sh |   6 ++
 2 files changed, 151 insertions(+)
 create mode 100755 segment_sc_discs_deepseg.sh
 create mode 100755 segmentation_batching_script.sh

diff --git a/segment_sc_discs_deepseg.sh b/segment_sc_discs_deepseg.sh
new file mode 100755
index 0000000..f7b37e7
--- /dev/null
+++ b/segment_sc_discs_deepseg.sh
@@ -0,0 +1,145 @@
+#!/bin/bash
+#
+# Process data. This script is designed to be run in the folder for a single subject, however 'sct_run_batch' can be
+# used to run this script multiple times in parallel across a multi-subject BIDS dataset.
+#
+# This script only deals with T2w and MT images for example purpose. For a more comprehensive qMRI analysis, see for
+# example this script: https://github.com/spine-generic/spine-generic/blob/master/process_data.sh
+#
+# Usage:
+#   ./process_data.sh <SUBJECT>
+#
+# Example:
+#   ./process_data.sh sub-03
+#
+# Author: Julien Cohen-Adad (modified by Nadia Blostein)
+
+# The following global variables are retrieved from the caller sct_run_batch
+# but could be overwritten by uncommenting the lines below:
+#PATH_DATA_PROCESSED="/Users/nadiablostein/Desktop/multi_subject/test/derivatives/sct_deepseg_labels"
+#PATH_RESULTS="/Users/nadiablostein/Desktop/multi_subject/test/derivatives/sct_deepseg_labels/results"
+#PATH_LOG="/Users/nadiablostein/Desktop/multi_subject/test/derivatives/sct_deepseg_labels/log"
+#PATH_QC="/Users/nadiablostein/Desktop/multi_subject/test/derivatives/sct_deepseg_labels/qc"
+
+
+# BASH SETTINGS
+# ======================================================================================================================
+
+SUFFIX_T2w = "rec-composed_T2w"
+SUFFIX_T1w = "rec-composed_T1w"
+
+# Uncomment for full verbose
+# set -v
+
+# Immediately exit if error
+set -e
+
+# Exit if user presses CTRL+C (Linux) or CMD+C (OSX)
+trap "echo Caught Keyboard Interrupt within script. Exiting now.; exit" INT
+
+
+# CONVENIENCE FUNCTIONS
+# ======================================================================================================================
+
+label_if_does_not_exist() {
+  ###
+  #  This function checks if a manual label file already exists, then:
+  #     - If it does, copy it locally.
+  #     - If it doesn't, perform automatic labeling.
+  #   This allows you to add manual labels on a subject-by-subject basis without disrupting the pipeline.
+  ###
+  local file="$1"
+  local file_seg="$2"
+  # Update global variable with segmentation file name
+  FILELABEL="${file}_labels"
+  echo "Looking for labels: ${FILELABEL}"
+  if [[ -e ${FILELABEL}.nii.gz ]]; then
+    echo "Found! Using vertebral labels that exist."
+  else
+    echo "Not found. Proceeding with automatic labeling."
+    # Generate labeled segmentation
+    sct_label_vertebrae -i ${file}.nii.gz -s ${file_seg}.nii.gz -c t2 -qc "${PATH_QC}" -qc-subject "${SUBJECT}"
+  fi
+}
+
+segment_if_does_not_exist() {
+  ###
+  #  This function checks if a manual spinal cord segmentation file already exists, then:
+  #    - If it does, copy it locally.
+  #    - If it doesn't, perform automatic spinal cord segmentation.
+  #  This allows you to add manual segmentations on a subject-by-subject basis without disrupting the pipeline.
+  ###
+  local file="$1"
+  local contrast="$2"
+  # Update global variable with segmentation file name
+  FILESEG="${file}_seg"
+  echo
+  echo "Looking for segmentation: ${FILESEG}"
+  if [[ -e ${FILESEG}.nii.gz ]]; then
+    echo "Found! Using SC segmentation that exists."
+    sct_qc -i ${file}.nii.gz -s ${FILESEG}.nii.gz -p sct_deepseg_sc -qc ${PATH_QC} -qc-subject ${SUBJECT}
+  else
+    echo "Not found. Proceeding with automatic segmentation."
+    # Segment spinal cord
+    sct_deepseg_sc -i ${file}.nii.gz -c $contrast -qc ${PATH_QC} -qc-subject ${SUBJECT}
+  fi
+}
+
+# SCRIPT STARTS HERE
+# ======================================================================================================================
+
+# Retrieve input params
+SUBJECT=$1
+
+# get starting time:
+start=`date +%s`
+
+# Display useful info for the log, such as SCT version, RAM and CPU cores available
+sct_check_dependencies -short
+
+# Go to folder where data will be copied and processed
+cd $PATH_DATA_PROCESSED
+# Copy source images
+rsync -avzh $PATH_DATA/$SUBJECT .
+
+# T2w
+# ======================================================================================================================
+cd "${SUBJECT}/anat/"
+file_t2="${SUBJECT}_${SUFFIX_T2w}"
+# Segment spinal cord 
+segment_if_does_not_exist "${file_t2}" "t2"
+file_t2_seg="${FILESEG}"
+# Create vertebral labels 
+label_if_does_not_exist "${file_t2}" "${file_t2_seg}"
+
+# T1w
+# ======================================================================================================================
+file_t1="${SUBJECT}_${SUFFIX_T1w}"
+# Segment spinal cord 
+segment_if_does_not_exist "${file_t1}" "t1"
+file_t1_seg="${FILESEG}"
+# Create vertebral labels 
+label_if_does_not_exist "${file_t1}" "${file_t1_seg}"
+
+# Verify presence of output files and write log file if error
+# ======================================================================================================================
+FILES_TO_CHECK=(
+  "$file_t2_seg.nii.gz"
+  "$file_t1_seg.nii.gz"
+)
+for file in "${FILES_TO_CHECK[@]}"; do
+  if [ ! -e "${file}" ]; then
+    echo "${SUBJECT}/${file} does not exist" >> "${PATH_LOG}/error.log"
+  fi
+done
+
+# Display useful info for the log
+end=`date +%s`
+runtime=$((end-start))
+echo
+echo "~~~"
+echo "SCT version: `sct_version`"
+echo "Ran on:      `uname -nsr`"
+echo "Duration:    $(($runtime / 3600))hrs $((($runtime / 60) % 60))min $(($runtime % 60))sec"
+echo "~~~"
+
diff --git a/segmentation_batching_script.sh b/segmentation_batching_script.sh
new file mode 100755
index 0000000..755d2ba
--- /dev/null
+++ b/segmentation_batching_script.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+
+sct_run_batch -jobs 6 -path-data "../philadelphia-pediatric" \
+	-subject-prefix "sub-" -exclude-list sub-107 sub-125 -script segment_sc_discs_deepseg.sh \
+	-path-output "../philadelphia-pediatric/derivatives/labels"
+

From 1b0672d86fed22dea021c0ce26351b490247675c Mon Sep 17 00:00:00 2001
From: NadiaBlostein <nadia.blostein@mail.mcgill.ca>
Date: Mon, 26 Jun 2023 15:09:29 -0400
Subject: [PATCH 18/54] making nomenclature of template preprocessing scripts
 more clear

---
 preprocessing.py => template_preprocessing.py | 0
 template_preprocessing_pipeline.py            | 2 +-
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename preprocessing.py => template_preprocessing.py (100%)

diff --git a/preprocessing.py b/template_preprocessing.py
similarity index 100%
rename from preprocessing.py
rename to template_preprocessing.py
diff --git a/template_preprocessing_pipeline.py b/template_preprocessing_pipeline.py
index 3c36cff..476fd97 100644
--- a/template_preprocessing_pipeline.py
+++ b/template_preprocessing_pipeline.py
@@ -3,7 +3,7 @@
 The default dataset is an example dataset that is downloaded at the beginning of the script.
 """
 
-from preprocessing import *
+from template_preprocessing import *
 import sys
 
 dataset_info = read_dataset(sys.argv[1])

From 53bed41e73e15bc955ebc3140494528512385075 Mon Sep 17 00:00:00 2001
From: NadiaBlostein <nadia.blostein@mail.mcgill.ca>
Date: Mon, 26 Jun 2023 15:10:54 -0400
Subject: [PATCH 19/54] fixing more nomenclature for sc and disc segmentation
 scripts

---
 README.md                                                     | 4 ++--
 segmentation_batching_script.sh => segment_batching_script.sh | 0
 2 files changed, 2 insertions(+), 2 deletions(-)
 rename segmentation_batching_script.sh => segment_batching_script.sh (100%)

diff --git a/README.md b/README.md
index edfbc72..8909689 100644
--- a/README.md
+++ b/README.md
@@ -69,12 +69,12 @@ Note:
 1. Update `segment_sc_discs_deepseg.sh`:
 	* Make sure to modify the suffix names for your T2w-like and T1w-like images (SUFFIX_T2w, SUFFIX_T1w) on lines 28 and 29 according to the naming convention in your dataset.
 	* If you do not have 2 types of images for each subject, make sure to comment out the appropriate code.
-2. Update `segmentation_batching_script.sh`:
+2. Update `segmentat_batching_script.sh`:
 	* Update the options according to your own dataset.
 
 3. Run:
 ```
-./batching_script.sh # this calls segment_sc_discs_deepseg.sh
+./segment_batching_script.sh # this calls segment_sc_discs_deepseg.sh
 ```
 
 4. Quality control (QC):
diff --git a/segmentation_batching_script.sh b/segment_batching_script.sh
similarity index 100%
rename from segmentation_batching_script.sh
rename to segment_batching_script.sh

From ff6ca9b542aac1f2e9a18c3dc02973333a613c0e Mon Sep 17 00:00:00 2001
From: NadiaBlostein <nadia.blostein@mail.mcgill.ca>
Date: Mon, 26 Jun 2023 15:15:53 -0400
Subject: [PATCH 20/54] removing redundancy

---
 README.md | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 8909689..0380c02 100644
--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@
 Framework for creating unbiased MRI templates of the spinal cord.
 
 ## Dependencies
-- [Spinal Cord Toolbox (SCT)](https://github.com/neuropoly/spinalcordtoolbox) version 5.8, `commit 7ead83200d7ad9ee5e0d112e77a1d7b894add738`
+- [Spinal Cord Toolbox (SCT)](https://github.com/neuropoly/spinalcordtoolbox) version 5.8
 
 SCT is used for all preprocessing steps, including extraction of centerline, generation of average centerline in the template space, and straightening/registration of all spinal cord images on the initial template space. The current version of the pipeline uses SCT 5.8 in development mode (commit `7ead83200d7ad9ee5e0d112e77a1d7b894add738`) as we prepare for the release of SCT 6.0.
 
@@ -37,6 +37,7 @@ Install this python library in SCT python.
 
 ## Dataset structure
 The dataset should be arranged according to the BIDS convention. Using the two examples subjects listed in the `configuration.json` template file, this would be as follows:
+```
 dataset/
 └── dataset_description.json
 └── participants.tsv  <-------------------------------- Metadata describing subjects attributes e.g. sex, age, etc.
@@ -56,10 +57,10 @@ dataset/
                 └── sub-03_T1w_label-disc.nii.gz  <---- Disc labels; `_T1w` can be replaced by the value of `suffix_image` in configuration.json
                 └── sub-03_T2w_label-SC_seg.nii.gz
                 └── sub-03_T2w_label-disc.nii.gz
-
+```
 ## Getting started: data preprocessing
 
-### Segment spinal cord and vertebral discs
+### A. Segment spinal cord and vertebral discs
 
 Note:
 * SCT functions treat your images with bright CSF as "T2w" (i.e. `t2` option) and dark CSF as "T1w" (i.e. `t1` option).
@@ -81,7 +82,7 @@ Note:
 * Spinal cord masks and disc labels can be displayed by opening: `/PATH/TO/dataset/derivatives/labels/qc/index.html`
 * See [tutorial](https://spinalcordtoolbox.com/user_section/tutorials/registration-to-template/vertebral-labeling.html) for tips on how to QC and fix disc labels manually.
 
-### Preparing data for template-generation
+### B. Preparing data for template-generation
 
 `template_preprocessing_pipeline.py` contains several functions to preprocess spinal cord MRI data for template generation. Preprocessing includes:
 * extracting the spinal cord centerline and compute the vertebral distribution along the spinal cord, for all subjects.

From 9b4d4c6fda6c4d68653fc6d6efe1416e17f64b3d Mon Sep 17 00:00:00 2001
From: NadiaBlostein <nadia.blostein@mail.mcgill.ca>
Date: Mon, 26 Jun 2023 15:16:03 -0400
Subject: [PATCH 21/54] making data path name generic

---
 segment_batching_script.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/segment_batching_script.sh b/segment_batching_script.sh
index 755d2ba..9b45a62 100755
--- a/segment_batching_script.sh
+++ b/segment_batching_script.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 
-sct_run_batch -jobs 6 -path-data "../philadelphia-pediatric" \
+sct_run_batch -jobs 6 -path-data "/PATH/TO/dataset" \
 	-subject-prefix "sub-" -exclude-list sub-107 sub-125 -script segment_sc_discs_deepseg.sh \
-	-path-output "../philadelphia-pediatric/derivatives/labels"
+	-path-output "/PATH/TO/dataset/derivatives/labels"
 

From 86ed4903dbbe5d830c45a06a238436244e75375a Mon Sep 17 00:00:00 2001
From: NadiaBlostein <33006815+NadiaBlostein@users.noreply.github.com>
Date: Mon, 26 Jun 2023 15:29:56 -0400
Subject: [PATCH 22/54] Update README.md

anchor points for dependencies
---
 README.md | 38 ++++++++++++++++++++++----------------
 1 file changed, 22 insertions(+), 16 deletions(-)

diff --git a/README.md b/README.md
index 0380c02..f08c460 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,7 @@
 # Spinal cord MRI template
 Framework for creating unbiased MRI templates of the spinal cord.
 
+<a name="dependencies_anchor"></a>
 ## Dependencies
 - [Spinal Cord Toolbox (SCT)](https://github.com/neuropoly/spinalcordtoolbox) version 5.8
 
@@ -60,13 +61,13 @@ dataset/
 ```
 ## Getting started: data preprocessing
 
+### Dependencies for data preprocessing (see [dependencies](#dependencies_anchor))
+- [Spinal Cord Toolbox (SCT)](https://github.com/neuropoly/spinalcordtoolbox) version 5.8
+
 ### A. Segment spinal cord and vertebral discs
 
-Note:
-* SCT functions treat your images with bright CSF as "T2w" (i.e. `t2` option) and dark CSF as "T1w" (i.e. `t1` option).
-* You can therefore still use SCT even if your images are not actually T1w and T2w.
+Note that SCT functions treat your images with bright CSF as "T2w" (i.e. `t2` option) and dark CSF as "T1w" (i.e. `t1` option). You can therefore still use SCT even if your images are not actually T1w and T2w.
 
-1. Set up SCT (see [dependencies](#Dependencies)).
 1. Update `segment_sc_discs_deepseg.sh`:
 	* Make sure to modify the suffix names for your T2w-like and T1w-like images (SUFFIX_T2w, SUFFIX_T1w) on lines 28 and 29 according to the naming convention in your dataset.
 	* If you do not have 2 types of images for each subject, make sure to comment out the appropriate code.
@@ -82,7 +83,7 @@ Note:
 * Spinal cord masks and disc labels can be displayed by opening: `/PATH/TO/dataset/derivatives/labels/qc/index.html`
 * See [tutorial](https://spinalcordtoolbox.com/user_section/tutorials/registration-to-template/vertebral-labeling.html) for tips on how to QC and fix disc labels manually.
 
-### B. Preparing data for template-generation
+### B. Prepare data for template generation
 
 `template_preprocessing_pipeline.py` contains several functions to preprocess spinal cord MRI data for template generation. Preprocessing includes:
 * extracting the spinal cord centerline and compute the vertebral distribution along the spinal cord, for all subjects.
@@ -90,16 +91,8 @@ Note:
 * generating the initial template space, based on the average centerline and positions of intervertebral disks.
 * straightening of all subjects on the initial template space
 
-1. Create a configuration file according to `configuration_template.json`.
-2. Determine the integer value corresponding to the label of the lowest disc until which you want your template to go (depends on the lowest disc available in your images, nomenclature can be found [here](https://spinalcordtoolbox.com/user_section/tutorials/registration-to-template/vertebral-labeling/labeling-conventions.html)).
-3. Run the following:
-```
-python template_preprocessing_pipeline.py configuration.json LOWEST_DISC
-```
-4. One the preprocessing is performed, please check your data. The preprocessing results should be a series of straight images registered in the same space, with all the vertebral levels aligned with each others.
-
-## How to generate your own template?
-The template generation framework can be configured by the file "configuration.json", that includes the following variables:
+Here are the steps to go through:
+1. The template generation framework can be configured by the file "configuration.json", that includes the following variables:
 - "path_data": absolute path to the dataset, including all images [correctly structured](#dataset-structure); ends with `/`.
 - "subjects": list of subjects names, that must be the same as folder names in the dataset structure (e.g. `sub-101`).
 - "data_type": [BIDS data type](https://bids-standard.github.io/bids-starter-kit/folders_and_files/folders.html#datatype), same as subfolder name in dataset structure (e.g. `anat`).
@@ -107,9 +100,22 @@ The template generation framework can be configured by the file "configuration.j
 - "suffix_image": suffix for image data, after subject ID but before file extension (e.g. `_rec-composed_T1w` in `sub-101_rec-composed_T1w.nii.gz`)
 – "suffix_label-SC_seg": suffix for binary images of the spinal cord mask, after subject id but before file extension (e.g. `_rec-composed_T1w_label-SC_seg` in `sub-101_rec-composed_T1w_label-SC_seg.nii.gz`)
 - "suffix_label-disc": suffix for binary images of the intervertebral disks labeling, after subject id but before file extension (e.g. `_rec-composed_T1w_label-disc` in `sub-101_rec-composed_T1w_label-disc.nii.gz`)
+2. Determine the integer value corresponding to the label of the lowest disc until which you want your template to go (depends on the lowest disc available in your images, nomenclature can be found [here](https://spinalcordtoolbox.com/user_section/tutorials/registration-to-template/vertebral-labeling/labeling-conventions.html)).
+3. Run:
+```
+python template_preprocessing_pipeline.py configuration.json LOWEST_DISC
+```
+4. One the preprocessing is performed, please check your data. The preprocessing results should be a series of straight images registered in the same space, with all the vertebral levels aligned with each others.
 
-Now, you can generate the template using the IPL pipeline with the following command, where N has to be replace by the number of subjects:
+## How to generate your own template?
 
+### Dependencies for template generation (see [dependencies](#dependencies_anchor))
+- [ANIMAL registration framework, part of the IPL longitudinal pipeline](https://github.com/vfonov/nist_mni_pipelines)
+- `scoop` (PyPI)
+- [Minc Toolkit v2](http://bic-mni.github.io/)
+- [minc2_simple](https://github.com/vfonov/minc2-simple)
+
+Now, you can generate the template using the IPL pipeline with the following command, where N has to be replace by the number of subjects:
 ```
 python -m scoop -n N -vvv generate_template.py
 ```

From 43d503e4da4f1d5be5e1f09a21032c9511008a29 Mon Sep 17 00:00:00 2001
From: Julien Cohen-Adad <jcohen@polymtl.ca>
Date: Tue, 27 Jun 2023 14:17:40 -0400
Subject: [PATCH 23/54] minor clarifications

---
 README.md | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/README.md b/README.md
index f08c460..e6ad3fc 100644
--- a/README.md
+++ b/README.md
@@ -1,15 +1,16 @@
 # Spinal cord MRI template
-Framework for creating unbiased MRI templates of the spinal cord.
+Framework for creating MRI templates of the spinal cord.
 
 <a name="dependencies_anchor"></a>
 ## Dependencies
-- [Spinal Cord Toolbox (SCT)](https://github.com/neuropoly/spinalcordtoolbox) version 5.8
+
+### [Spinal Cord Toolbox (SCT)](https://github.com/neuropoly/spinalcordtoolbox) version 5.8
 
 SCT is used for all preprocessing steps, including extraction of centerline, generation of average centerline in the template space, and straightening/registration of all spinal cord images on the initial template space. The current version of the pipeline uses SCT 5.8 in development mode (commit `7ead83200d7ad9ee5e0d112e77a1d7b894add738`) as we prepare for the release of SCT 6.0.
 
-- [ANIMAL registration framework, part of the IPL longitudinal pipeline](https://github.com/vfonov/nist_mni_pipelines)
+### [ANIMAL registration framework](https://github.com/vfonov/nist_mni_pipelines)
 
-ANIMAL is used for generating the template, using iterative nonlinear deformation.
+ANIMAL, part of the IPL longitudinal pipeline, is used for generating the template, using iterative nonlinear deformation.
 The recommanded pipeline for generating a template of the spinal cord is the [nonlinear symmetrical template model](https://github.com/vfonov/nist_mni_pipelines/blob/master/examples/synthetic_tests/test_model_creation/scoop_test_nl_sym.py).
 
 Installation:
@@ -24,15 +25,17 @@ export PYTHONPATH="${PYTHONPATH}:path/to/nist_mni_pipelines/ipl/"
 export PYTHONPATH="${PYTHONPATH}:path/to/nist_mni_pipelines/ipl"
 ```
 
-You will also need to install `scoop` with: `pip install scoop`
-
-- [Minc Toolkit v2](http://bic-mni.github.io/)
+### [Minc Toolkit v2](http://bic-mni.github.io/)
 
 The Minc Toolkit is a dependency of the template generation process.
 
-On OSX, you may need to recompile Minc Toolkit from source to make sure all libraires are linked correctly.
+You will also need to install `scoop` with: `pip install scoop`
 
-- [minc2_simple](https://github.com/vfonov/minc2-simple)
+On macOs, you may need to recompile Minc Toolkit from source to make sure all libraires are linked correctly.
+
+On Linux: TODO
+
+### [minc2_simple](https://github.com/vfonov/minc2-simple)
 
 Install this python library in SCT python.
 

From 68b2c570238c96996e1cbfbda1d6082825e5d0b8 Mon Sep 17 00:00:00 2001
From: Julien Cohen-Adad <jcohen@polymtl.ca>
Date: Tue, 27 Jun 2023 14:52:56 -0400
Subject: [PATCH 24/54] Clarified preprocessing for SC seg and labeling

---
 README.md | 49 ++++++++++++++++++++++++++++---------------------
 1 file changed, 28 insertions(+), 21 deletions(-)

diff --git a/README.md b/README.md
index e6ad3fc..42d793b 100644
--- a/README.md
+++ b/README.md
@@ -1,12 +1,7 @@
 # Spinal cord MRI template
-Framework for creating MRI templates of the spinal cord.
 
-<a name="dependencies_anchor"></a>
-## Dependencies
+Framework for creating MRI templates of the spinal cord. The framework has two distinct pipelines, which has to be run sequentially: [Data preprocessing](#data-preprocessing) and [Template creation](#template-creation).
 
-### [Spinal Cord Toolbox (SCT)](https://github.com/neuropoly/spinalcordtoolbox) version 5.8
-
-SCT is used for all preprocessing steps, including extraction of centerline, generation of average centerline in the template space, and straightening/registration of all spinal cord images on the initial template space. The current version of the pipeline uses SCT 5.8 in development mode (commit `7ead83200d7ad9ee5e0d112e77a1d7b894add738`) as we prepare for the release of SCT 6.0.
 
 ### [ANIMAL registration framework](https://github.com/vfonov/nist_mni_pipelines)
 
@@ -62,36 +57,46 @@ dataset/
                 └── sub-03_T2w_label-SC_seg.nii.gz
                 └── sub-03_T2w_label-disc.nii.gz
 ```
-## Getting started: data preprocessing
+## Data preprocessing
+
+### Install SCT
+
+SCT is used for all preprocessing steps, including extraction of centerline, generation of average centerline in the template space, and straightening/registration of all spinal cord images on the initial template space. The current version of the pipeline uses SCT development version (commit `7ead83200d7ad9ee5e0d112e77a1d7b894add738`) as we prepare for the release of SCT 6.0.
 
-### Dependencies for data preprocessing (see [dependencies](#dependencies_anchor))
-- [Spinal Cord Toolbox (SCT)](https://github.com/neuropoly/spinalcordtoolbox) version 5.8
+Once SCT is installed, make sure to activate SCT's virtual environment because the pipeline will use SCT's API functions.
+
+```
+source ${SCT_DIR}/python/etc/profile.d/conda.sh
+conda activate venv_sct
+```
 
-### A. Segment spinal cord and vertebral discs
+### Segment spinal cord and vertebral discs
 
 Note that SCT functions treat your images with bright CSF as "T2w" (i.e. `t2` option) and dark CSF as "T1w" (i.e. `t1` option). You can therefore still use SCT even if your images are not actually T1w and T2w.
 
-1. Update `segment_sc_discs_deepseg.sh`:
-	* Make sure to modify the suffix names for your T2w-like and T1w-like images (SUFFIX_T2w, SUFFIX_T1w) on lines 28 and 29 according to the naming convention in your dataset.
-	* If you do not have 2 types of images for each subject, make sure to comment out the appropriate code.
-2. Update `segmentat_batching_script.sh`:
-	* Update the options according to your own dataset.
+#### Update `segment_sc_discs_deepseg.sh`
+  * Make sure to modify the suffix names for your T2w-like and T1w-like images (SUFFIX_T2w, SUFFIX_T1w) according to the naming convention in your dataset.
+  * If you do not have 2 types of images for each subject, make sure to comment out the appropriate code.
 
-3. Run:
+#### Run script
 ```
-./segment_batching_script.sh # this calls segment_sc_discs_deepseg.sh
+sct_run_batch -job 10 -path-data "/PATH/TO/dataset" -script segment_sc_discs_deepseg.sh -path-output "/PATH/TO/dataset/derivatives/labels"
 ```
 
-4. Quality control (QC):
+> **Note**
+> Replace values appropriately based on your setup
+
+#### Quality control (QC)
+
 * Spinal cord masks and disc labels can be displayed by opening: `/PATH/TO/dataset/derivatives/labels/qc/index.html`
 * See [tutorial](https://spinalcordtoolbox.com/user_section/tutorials/registration-to-template/vertebral-labeling.html) for tips on how to QC and fix disc labels manually.
 
-### B. Prepare data for template generation
+### Prepare data for template generation
 
 `template_preprocessing_pipeline.py` contains several functions to preprocess spinal cord MRI data for template generation. Preprocessing includes:
 * extracting the spinal cord centerline and compute the vertebral distribution along the spinal cord, for all subjects.
 * computing the average centerline, by averaging the position of each intervertebral disks. The average centerline of the spinal cord is straightened and merged with the ICBM152 template.
-* generating the initial template space, based on the average centerline and positions of intervertebral disks.
+* generating the initial template space, based on the average centerline and positions of intervertebral discs.
 * straightening of all subjects on the initial template space
 
 Here are the steps to go through:
@@ -108,9 +113,11 @@ Here are the steps to go through:
 ```
 python template_preprocessing_pipeline.py configuration.json LOWEST_DISC
 ```
+
 4. One the preprocessing is performed, please check your data. The preprocessing results should be a series of straight images registered in the same space, with all the vertebral levels aligned with each others.
 
-## How to generate your own template?
+
+## Template creation
 
 ### Dependencies for template generation (see [dependencies](#dependencies_anchor))
 - [ANIMAL registration framework, part of the IPL longitudinal pipeline](https://github.com/vfonov/nist_mni_pipelines)

From c68e903e77b0a13f931af53816875a29804b9003 Mon Sep 17 00:00:00 2001
From: Julien Cohen-Adad <jcohen@polymtl.ca>
Date: Tue, 27 Jun 2023 14:59:11 -0400
Subject: [PATCH 25/54] Update README.md

---
 README.md | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index 42d793b..8885a7b 100644
--- a/README.md
+++ b/README.md
@@ -101,12 +101,12 @@ sct_run_batch -job 10 -path-data "/PATH/TO/dataset" -script segment_sc_discs_dee
 
 Here are the steps to go through:
 1. The template generation framework can be configured by the file "configuration.json", that includes the following variables:
-- "path_data": absolute path to the dataset, including all images [correctly structured](#dataset-structure); ends with `/`.
-- "subjects": list of subjects names, that must be the same as folder names in the dataset structure (e.g. `sub-101`).
-- "data_type": [BIDS data type](https://bids-standard.github.io/bids-starter-kit/folders_and_files/folders.html#datatype), same as subfolder name in dataset structure (e.g. `anat`).
-- "contrast": it is related to the contrast that will be called when you use different SCT functions (either `t1` or `t2`) and may not not necessarily correspond to the actual data acquisition (e.g. `t1`).
+- `path_data`: absolute path to the dataset, including all images [correctly structured](#dataset-structure); ends with `/`.
+- `subjects`: List of subjects to include in the preprocessing, separated with comma.
+- `data_type`: [BIDS data type](https://bids-standard.github.io/bids-starter-kit/folders_and_files/folders.html#datatype), same as subfolder name in dataset structure.
+- `contrast`: Contrast to preprocess.
 - "suffix_image": suffix for image data, after subject ID but before file extension (e.g. `_rec-composed_T1w` in `sub-101_rec-composed_T1w.nii.gz`)
-– "suffix_label-SC_seg": suffix for binary images of the spinal cord mask, after subject id but before file extension (e.g. `_rec-composed_T1w_label-SC_seg` in `sub-101_rec-composed_T1w_label-SC_seg.nii.gz`)
+– "suffix_label-SC_seg": suffix for binary images of the spinal cord mask, after subject ID but before file extension (e.g. `_rec-composed_T1w_label-SC_seg` in `sub-101_rec-composed_T1w_label-SC_seg.nii.gz`)
 - "suffix_label-disc": suffix for binary images of the intervertebral disks labeling, after subject id but before file extension (e.g. `_rec-composed_T1w_label-disc` in `sub-101_rec-composed_T1w_label-disc.nii.gz`)
 2. Determine the integer value corresponding to the label of the lowest disc until which you want your template to go (depends on the lowest disc available in your images, nomenclature can be found [here](https://spinalcordtoolbox.com/user_section/tutorials/registration-to-template/vertebral-labeling/labeling-conventions.html)).
 3. Run:

From 130f8a442a28fc9c4def5527728de4fbc5472b77 Mon Sep 17 00:00:00 2001
From: Julien Cohen-Adad <jcohen@polymtl.ca>
Date: Thu, 29 Jun 2023 11:43:13 -0400
Subject: [PATCH 26/54] Clarified workflow

---
 README.md | 49 +++++++++++++++++++++++++++++++------------------
 1 file changed, 31 insertions(+), 18 deletions(-)

diff --git a/README.md b/README.md
index 8885a7b..30b7922 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,9 @@
 # Spinal cord MRI template
 
-Framework for creating MRI templates of the spinal cord. The framework has two distinct pipelines, which has to be run sequentially: [Data preprocessing](#data-preprocessing) and [Template creation](#template-creation).
+Framework for creating MRI templates of the spinal cord. The framework has two distinct pipelines, which has to be run sequentially: [Data preprocessing](#data-preprocessing) and [Template creation](#template-creation). 
+
+> **Important**
+> The framework has to be run independently for each contrast. In the end, the generated templates across contrasts should be perfectly aligned.
 
 
 ### [ANIMAL registration framework](https://github.com/vfonov/nist_mni_pipelines)
@@ -57,11 +60,22 @@ dataset/
                 └── sub-03_T2w_label-SC_seg.nii.gz
                 └── sub-03_T2w_label-disc.nii.gz
 ```
+
+
 ## Data preprocessing
 
+This pipeline includes the following steps:
+
+TODO: remove detials below
+1. Segmentation of SC and disc labeling
+2. QC. If seg didn't work, fix SC seg. If disc labeling did not work, fix labeling.
+3. `configuration_default.json`: Copy this file and rename it as `configuration.json`. Edit it and modify according to your setup.
+4. Extraction of SC centerline, generation of average centerline in the template space, straightening/registration of all spinal cord images on the initial template space. 
+
+
 ### Install SCT
 
-SCT is used for all preprocessing steps, including extraction of centerline, generation of average centerline in the template space, and straightening/registration of all spinal cord images on the initial template space. The current version of the pipeline uses SCT development version (commit `7ead83200d7ad9ee5e0d112e77a1d7b894add738`) as we prepare for the release of SCT 6.0.
+SCT is used for all preprocessing steps. The current version of the pipeline uses SCT development version (commit `7ead83200d7ad9ee5e0d112e77a1d7b894add738`) as we prepare for the release of SCT 6.0.
 
 Once SCT is installed, make sure to activate SCT's virtual environment because the pipeline will use SCT's API functions.
 
@@ -70,21 +84,29 @@ source ${SCT_DIR}/python/etc/profile.d/conda.sh
 conda activate venv_sct
 ```
 
+### Edit configuration file
+
+Copy the file `configuration_default.json` and rename it as `configuration.json`. Edit it and modify according to your setup:
+
+- `path_data`: Absolute path to the input [BIDS dataset](#dataset-structure); The path should end with `/`.
+- `subjects`: List of subjects to include in the preprocessing, separated with comma.
+- `data_type`: [BIDS data type](https://bids-standard.github.io/bids-starter-kit/folders_and_files/folders.html#datatype), same as subfolder name in dataset structure. Typically, it should be "anat".
+- `contrast`: Contrast to preprocess.
+- "suffix_image": suffix for image data, after subject ID but before file extension (e.g. `_rec-composed_T1w` in `sub-101_rec-composed_T1w.nii.gz`)
+– "suffix_label-SC_seg": suffix for binary images of the spinal cord mask, after subject ID but before file extension (e.g. `_rec-composed_T1w_label-SC_seg` in `sub-101_rec-composed_T1w_label-SC_seg.nii.gz`)
+- "suffix_label-disc": suffix for binary images of the intervertebral disks labeling, after subject id but before file extension (e.g. `_rec-composed_T1w_label-disc` in `sub-101_rec-composed_T1w_label-disc.nii.gz`)
+- 
 ### Segment spinal cord and vertebral discs
 
 Note that SCT functions treat your images with bright CSF as "T2w" (i.e. `t2` option) and dark CSF as "T1w" (i.e. `t1` option). You can therefore still use SCT even if your images are not actually T1w and T2w.
 
-#### Update `segment_sc_discs_deepseg.sh`
-  * Make sure to modify the suffix names for your T2w-like and T1w-like images (SUFFIX_T2w, SUFFIX_T1w) according to the naming convention in your dataset.
-  * If you do not have 2 types of images for each subject, make sure to comment out the appropriate code.
-
-#### Run script
+Run script:
 ```
-sct_run_batch -job 10 -path-data "/PATH/TO/dataset" -script segment_sc_discs_deepseg.sh -path-output "/PATH/TO/dataset/derivatives/labels"
+sct_run_batch -jobs 10 -path-data "/PATH/TO/dataset" -script segment_sc_discs_deepseg.sh -path-output "/PATH/TO/dataset/derivatives/labels"
 ```
 
 > **Note**
-> Replace values appropriately based on your setup
+> Replace values appropriately based on your setup (eg: -jobs 10 means that 10 CPU-cores are used. If you have 
 
 #### Quality control (QC)
 
@@ -99,15 +121,6 @@ sct_run_batch -job 10 -path-data "/PATH/TO/dataset" -script segment_sc_discs_dee
 * generating the initial template space, based on the average centerline and positions of intervertebral discs.
 * straightening of all subjects on the initial template space
 
-Here are the steps to go through:
-1. The template generation framework can be configured by the file "configuration.json", that includes the following variables:
-- `path_data`: absolute path to the dataset, including all images [correctly structured](#dataset-structure); ends with `/`.
-- `subjects`: List of subjects to include in the preprocessing, separated with comma.
-- `data_type`: [BIDS data type](https://bids-standard.github.io/bids-starter-kit/folders_and_files/folders.html#datatype), same as subfolder name in dataset structure.
-- `contrast`: Contrast to preprocess.
-- "suffix_image": suffix for image data, after subject ID but before file extension (e.g. `_rec-composed_T1w` in `sub-101_rec-composed_T1w.nii.gz`)
-– "suffix_label-SC_seg": suffix for binary images of the spinal cord mask, after subject ID but before file extension (e.g. `_rec-composed_T1w_label-SC_seg` in `sub-101_rec-composed_T1w_label-SC_seg.nii.gz`)
-- "suffix_label-disc": suffix for binary images of the intervertebral disks labeling, after subject id but before file extension (e.g. `_rec-composed_T1w_label-disc` in `sub-101_rec-composed_T1w_label-disc.nii.gz`)
 2. Determine the integer value corresponding to the label of the lowest disc until which you want your template to go (depends on the lowest disc available in your images, nomenclature can be found [here](https://spinalcordtoolbox.com/user_section/tutorials/registration-to-template/vertebral-labeling/labeling-conventions.html)).
 3. Run:
 ```

From 2c4883ac4dbcd474283d6f2b09763f31ad2281bc Mon Sep 17 00:00:00 2001
From: jcohenadad <jcohen@polymtl.ca>
Date: Thu, 29 Jun 2023 12:01:37 -0400
Subject: [PATCH 27/54] Updated README

---
 README.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 30b7922..bdce14e 100644
--- a/README.md
+++ b/README.md
@@ -3,7 +3,7 @@
 Framework for creating MRI templates of the spinal cord. The framework has two distinct pipelines, which has to be run sequentially: [Data preprocessing](#data-preprocessing) and [Template creation](#template-creation). 
 
 > **Important**
-> The framework has to be run independently for each contrast. In the end, the generated templates across contrasts should be perfectly aligned.
+> The framework has to be run independently for each contrast. In the end, the generated templates across contrasts should be perfectly aligned. This is what was done for the PAM50 template.
 
 
 ### [ANIMAL registration framework](https://github.com/vfonov/nist_mni_pipelines)
@@ -91,9 +91,9 @@ Copy the file `configuration_default.json` and rename it as `configuration.json`
 - `path_data`: Absolute path to the input [BIDS dataset](#dataset-structure); The path should end with `/`.
 - `subjects`: List of subjects to include in the preprocessing, separated with comma.
 - `data_type`: [BIDS data type](https://bids-standard.github.io/bids-starter-kit/folders_and_files/folders.html#datatype), same as subfolder name in dataset structure. Typically, it should be "anat".
-- `contrast`: Contrast to preprocess.
-- "suffix_image": suffix for image data, after subject ID but before file extension (e.g. `_rec-composed_T1w` in `sub-101_rec-composed_T1w.nii.gz`)
-– "suffix_label-SC_seg": suffix for binary images of the spinal cord mask, after subject ID but before file extension (e.g. `_rec-composed_T1w_label-SC_seg` in `sub-101_rec-composed_T1w_label-SC_seg.nii.gz`)
+- `contrast`: Contrast to be used by `sct_deepseg_sc` function.
+- `suffix_image`: Suffix for image data, after subject ID but before file extension (e.g. `_rec-composed_T1w` in `sub-101_rec-composed_T1w.nii.gz`)
+– `suffix_label-SC_seg`: Suffix for binary images of the spinal cord mask, after `suffix_image` (e.g. `_label-SC_seg`).
 - "suffix_label-disc": suffix for binary images of the intervertebral disks labeling, after subject id but before file extension (e.g. `_rec-composed_T1w_label-disc` in `sub-101_rec-composed_T1w_label-disc.nii.gz`)
 - 
 ### Segment spinal cord and vertebral discs

From cecfb6b4b88d33572e53ba06bd4191ab636b4ada Mon Sep 17 00:00:00 2001
From: jcohenadad <jcohen@polymtl.ca>
Date: Thu, 29 Jun 2023 12:04:02 -0400
Subject: [PATCH 28/54] Completed section on config file

---
 README.md | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index bdce14e..d35c7c5 100644
--- a/README.md
+++ b/README.md
@@ -94,8 +94,12 @@ Copy the file `configuration_default.json` and rename it as `configuration.json`
 - `contrast`: Contrast to be used by `sct_deepseg_sc` function.
 - `suffix_image`: Suffix for image data, after subject ID but before file extension (e.g. `_rec-composed_T1w` in `sub-101_rec-composed_T1w.nii.gz`)
 – `suffix_label-SC_seg`: Suffix for binary images of the spinal cord mask, after `suffix_image` (e.g. `_label-SC_seg`).
-- "suffix_label-disc": suffix for binary images of the intervertebral disks labeling, after subject id but before file extension (e.g. `_rec-composed_T1w_label-disc` in `sub-101_rec-composed_T1w_label-disc.nii.gz`)
-- 
+- `suffix_label-disc`: Suffix for the images of the intervertebral discs labeling, after `suffix_image` (e.g. `_label-disc`).
+
+> **Note**
+> If you wish to make a template that does not align discs across subjects, please open an [issue](https://github.com/neuropoly/template/issues) and we will follow-up with you.
+
+
 ### Segment spinal cord and vertebral discs
 
 Note that SCT functions treat your images with bright CSF as "T2w" (i.e. `t2` option) and dark CSF as "T1w" (i.e. `t1` option). You can therefore still use SCT even if your images are not actually T1w and T2w.

From 485a007a178e47b64cdabbe6d74ddb06d25e3fe9 Mon Sep 17 00:00:00 2001
From: jcohenadad <jcohen@polymtl.ca>
Date: Thu, 29 Jun 2023 12:23:23 -0400
Subject: [PATCH 29/54] Updated clarification

---
 README.md | 35 ++++++++++++++++++++++-------------
 1 file changed, 22 insertions(+), 13 deletions(-)

diff --git a/README.md b/README.md
index d35c7c5..7e522e3 100644
--- a/README.md
+++ b/README.md
@@ -96,36 +96,40 @@ Copy the file `configuration_default.json` and rename it as `configuration.json`
 – `suffix_label-SC_seg`: Suffix for binary images of the spinal cord mask, after `suffix_image` (e.g. `_label-SC_seg`).
 - `suffix_label-disc`: Suffix for the images of the intervertebral discs labeling, after `suffix_image` (e.g. `_label-disc`).
 
+> **Note**
+> Note that SCT functions treat your images with bright CSF as "T2w" (i.e. `t2` option) and dark CSF as "T1w" (i.e. `t1` option). You can therefore still use SCT even if your images are not actually T1w and T2w.
+
 > **Note**
 > If you wish to make a template that does not align discs across subjects, please open an [issue](https://github.com/neuropoly/template/issues) and we will follow-up with you.
 
 
 ### Segment spinal cord and vertebral discs
 
-Note that SCT functions treat your images with bright CSF as "T2w" (i.e. `t2` option) and dark CSF as "T1w" (i.e. `t1` option). You can therefore still use SCT even if your images are not actually T1w and T2w.
-
 Run script:
 ```
-sct_run_batch -jobs 10 -path-data "/PATH/TO/dataset" -script segment_sc_discs_deepseg.sh -path-output "/PATH/TO/dataset/derivatives/labels"
+sct_run_batch -jobs 10 -path-data "/PATH/TO/dataset" -script segment_sc_discs_deepseg.sh -path-output "/PATH/TO/results"
 ```
 
 > **Note**
-> Replace values appropriately based on your setup (eg: -jobs 10 means that 10 CPU-cores are used. If you have 
+> Replace values appropriately based on your setup (eg: -jobs 10 means that 10 CPU-cores are used. For more details, run `sct_run_batch -h`).
+
+
+### Quality control (QC)
 
-#### Quality control (QC)
+* Spinal cord segmentation (or centerlines) and disc labels can be displayed by opening: `/PATH/TO/results/qc/index.html`
+* See [tutorial](https://spinalcordtoolbox.com/user_section/tutorials/registration-to-template/vertebral-labeling.html) for tips on how to QC and fix segmentation (or centerline) and/or disc labels manually.
 
-* Spinal cord masks and disc labels can be displayed by opening: `/PATH/TO/dataset/derivatives/labels/qc/index.html`
-* See [tutorial](https://spinalcordtoolbox.com/user_section/tutorials/registration-to-template/vertebral-labeling.html) for tips on how to QC and fix disc labels manually.
 
-### Prepare data for template generation
+### Normalize spinal cord across subjects
 
-`template_preprocessing_pipeline.py` contains several functions to preprocess spinal cord MRI data for template generation. Preprocessing includes:
-* extracting the spinal cord centerline and compute the vertebral distribution along the spinal cord, for all subjects.
-* computing the average centerline, by averaging the position of each intervertebral disks. The average centerline of the spinal cord is straightened and merged with the ICBM152 template.
-* generating the initial template space, based on the average centerline and positions of intervertebral discs.
-* straightening of all subjects on the initial template space
+`template_preprocessing_pipeline.py` contains several functions to normalize the spinal cord across subjects, in preparation for template generation. More specifically:
+* Extracting the spinal cord centerline and compute the vertebral distribution along the spinal cord, for all subjects,
+* Computing the average centerline, by averaging the position of each intervertebral discs. The average centerline of the spinal cord is straightened,
+* Generating the initial template space, based on the average centerline and positions of intervertebral discs,
+* Straightening of all subjects' spinal cord on the initial template space.
 
 2. Determine the integer value corresponding to the label of the lowest disc until which you want your template to go (depends on the lowest disc available in your images, nomenclature can be found [here](https://spinalcordtoolbox.com/user_section/tutorials/registration-to-template/vertebral-labeling/labeling-conventions.html)).
+
 3. Run:
 ```
 python template_preprocessing_pipeline.py configuration.json LOWEST_DISC
@@ -189,5 +193,10 @@ python -m scoop -vvv generate_template.py
 sbatch --time=24:00:00  --mem-per-cpu 4000 my_job.sh # will probably require batching several times, depending on number of subjects
 ```
 
+## Additional information
+
+To have the generated template registered to an existing space (eg, ICBM152), please open an [issue](https://github.com/neuropoly/template/issues) and we will follow-up with you.
+
+
 ## Licence
 This repository is under a MIT licence.

From c72955ebe4eb3d29efde2190121c53801b1134d7 Mon Sep 17 00:00:00 2001
From: rohanbanerjee <banerjee.rohan98@gmail.com>
Date: Fri, 30 Jun 2023 15:52:03 -0400
Subject: [PATCH 30/54] Removal of loading the .npz centerline file

---
 template_preprocessing.py | 60 ++++++++++++++++++---------------------
 1 file changed, 28 insertions(+), 32 deletions(-)

diff --git a/template_preprocessing.py b/template_preprocessing.py
index 51e5166..d235126 100755
--- a/template_preprocessing.py
+++ b/template_preprocessing.py
@@ -158,39 +158,35 @@ def generate_centerline(dataset_info, lowest_disc = 25, contrast = 't1', regener
         fname_image_discs = fname_discs + '-manual.nii.gz' if os.path.isfile(fname_discs + '-manual.nii.gz') else fname_discs + '.nii.gz'
         fname_centerline = path_data + 'derivatives/labels/' + subject_name +  '/' + dataset_info['data_type'] + '/' + subject_name + dataset_info['suffix_image'] + '_label-centerline'
 
-        # if centerline exists, we load it, if not, we compute it
-        if os.path.isfile(fname_centerline + '.npz') and not regenerate:
-            print("Centerline for " + subject_name + " exists and will not be recomputed!")
-            centerline = Centerline(fname = fname_centerline + '.npz')
+        
+        if os.path.isfile(fname_image_seg):
+            print(subject_name + ' SC segmentation exists. Extracting centerline from ' + fname_image_seg)
+            im_seg = Image(fname_image_seg).change_orientation('RPI')
+            param_centerline = ParamCenterline(algo_fitting = algo_fitting, contrast = contrast, smooth = smooth, degree = degree, minmax = minmax) 
         else:
-            if os.path.isfile(fname_image_seg):
-                print(subject_name + ' SC segmentation exists. Extracting centerline from ' + fname_image_seg)
-                im_seg = Image(fname_image_seg).change_orientation('RPI')
-                param_centerline = ParamCenterline(algo_fitting = algo_fitting, contrast = contrast, smooth = smooth, degree = degree, minmax = minmax) 
-            else:
-                print(subject_name + ' SC segmentation does not exist. Extracting centerline from ' + fname_image)
-                im_seg = Image(fname_image).change_orientation('RPI')
-                param_centerline = ParamCenterline(algo_fitting = 'optic', contrast = contrast, smooth = smooth, degree = 5, minmax = minmax)
-
-            # extracting intervertebral discs
-            im_discs = Image(fname_image_discs).change_orientation('RPI')
-            coord = im_discs.getNonZeroCoordinates(sorting = 'z', reverse_coord = True)
-            coord_physical = []
-            for c in coord:
-                if c.value <= lowest_disc or c.value in [48, 49, 50, 51, 52]:
-                    c_p = list(im_discs.transfo_pix2phys([[c.x, c.y, c.z]])[0])
-                    c_p.append(c.value)
-                    coord_physical.append(c_p)
-
-            # extracting centerline
-            im_ctl, arr_ctl, arr_ctl_der, _ = get_centerline(im_seg, param = param_centerline)
-
-            # save centerline as .nii.gz file
-            im_ctl.save(fname_centerline + '.nii.gz', dtype = 'float32')
-            centerline = Centerline(points_x = arr_ctl[0], points_y = arr_ctl[1], points_z = arr_ctl[2], deriv_x = arr_ctl_der[0], deriv_y = arr_ctl_der[1], deriv_z = arr_ctl_der[2])
-            centerline.compute_vertebral_distribution(coord_physical)
-            # save centerline .npz file
-            centerline.save_centerline(fname_output = fname_centerline)
+            print(subject_name + ' SC segmentation does not exist. Extracting centerline from ' + fname_image)
+            im_seg = Image(fname_image).change_orientation('RPI')
+            param_centerline = ParamCenterline(algo_fitting = 'optic', contrast = contrast, smooth = smooth, degree = 5, minmax = minmax)
+
+        # extracting intervertebral discs
+        im_discs = Image(fname_image_discs).change_orientation('RPI')
+        coord = im_discs.getNonZeroCoordinates(sorting = 'z', reverse_coord = True)
+        coord_physical = []
+        for c in coord:
+            if c.value <= lowest_disc or c.value in [48, 49, 50, 51, 52]:
+                c_p = list(im_discs.transfo_pix2phys([[c.x, c.y, c.z]])[0])
+                c_p.append(c.value)
+                coord_physical.append(c_p)
+
+        # extracting centerline
+        im_ctl, arr_ctl, arr_ctl_der, _ = get_centerline(im_seg, param = param_centerline)
+
+        # save centerline as .nii.gz file
+        im_ctl.save(fname_centerline + '.nii.gz', dtype = 'float32')
+        centerline = Centerline(points_x = arr_ctl[0], points_y = arr_ctl[1], points_z = arr_ctl[2], deriv_x = arr_ctl_der[0], deriv_y = arr_ctl_der[1], deriv_z = arr_ctl_der[2])
+        centerline.compute_vertebral_distribution(coord_physical)
+        # save centerline .npz file
+        centerline.save_centerline(fname_output = fname_centerline)
 
         list_centerline.append(centerline)
         tqdm_bar.update(1)

From d0fe6fc3682401f413296d5e87f6e01f62a77c90 Mon Sep 17 00:00:00 2001
From: rohanbanerjee <banerjee.rohan98@gmail.com>
Date: Fri, 30 Jun 2023 15:54:26 -0400
Subject: [PATCH 31/54] reverting to original

---
 template_preprocessing.py | 60 +++++++++++++++++++++------------------
 1 file changed, 32 insertions(+), 28 deletions(-)

diff --git a/template_preprocessing.py b/template_preprocessing.py
index d235126..51e5166 100755
--- a/template_preprocessing.py
+++ b/template_preprocessing.py
@@ -158,35 +158,39 @@ def generate_centerline(dataset_info, lowest_disc = 25, contrast = 't1', regener
         fname_image_discs = fname_discs + '-manual.nii.gz' if os.path.isfile(fname_discs + '-manual.nii.gz') else fname_discs + '.nii.gz'
         fname_centerline = path_data + 'derivatives/labels/' + subject_name +  '/' + dataset_info['data_type'] + '/' + subject_name + dataset_info['suffix_image'] + '_label-centerline'
 
-        
-        if os.path.isfile(fname_image_seg):
-            print(subject_name + ' SC segmentation exists. Extracting centerline from ' + fname_image_seg)
-            im_seg = Image(fname_image_seg).change_orientation('RPI')
-            param_centerline = ParamCenterline(algo_fitting = algo_fitting, contrast = contrast, smooth = smooth, degree = degree, minmax = minmax) 
+        # if centerline exists, we load it, if not, we compute it
+        if os.path.isfile(fname_centerline + '.npz') and not regenerate:
+            print("Centerline for " + subject_name + " exists and will not be recomputed!")
+            centerline = Centerline(fname = fname_centerline + '.npz')
         else:
-            print(subject_name + ' SC segmentation does not exist. Extracting centerline from ' + fname_image)
-            im_seg = Image(fname_image).change_orientation('RPI')
-            param_centerline = ParamCenterline(algo_fitting = 'optic', contrast = contrast, smooth = smooth, degree = 5, minmax = minmax)
-
-        # extracting intervertebral discs
-        im_discs = Image(fname_image_discs).change_orientation('RPI')
-        coord = im_discs.getNonZeroCoordinates(sorting = 'z', reverse_coord = True)
-        coord_physical = []
-        for c in coord:
-            if c.value <= lowest_disc or c.value in [48, 49, 50, 51, 52]:
-                c_p = list(im_discs.transfo_pix2phys([[c.x, c.y, c.z]])[0])
-                c_p.append(c.value)
-                coord_physical.append(c_p)
-
-        # extracting centerline
-        im_ctl, arr_ctl, arr_ctl_der, _ = get_centerline(im_seg, param = param_centerline)
-
-        # save centerline as .nii.gz file
-        im_ctl.save(fname_centerline + '.nii.gz', dtype = 'float32')
-        centerline = Centerline(points_x = arr_ctl[0], points_y = arr_ctl[1], points_z = arr_ctl[2], deriv_x = arr_ctl_der[0], deriv_y = arr_ctl_der[1], deriv_z = arr_ctl_der[2])
-        centerline.compute_vertebral_distribution(coord_physical)
-        # save centerline .npz file
-        centerline.save_centerline(fname_output = fname_centerline)
+            if os.path.isfile(fname_image_seg):
+                print(subject_name + ' SC segmentation exists. Extracting centerline from ' + fname_image_seg)
+                im_seg = Image(fname_image_seg).change_orientation('RPI')
+                param_centerline = ParamCenterline(algo_fitting = algo_fitting, contrast = contrast, smooth = smooth, degree = degree, minmax = minmax) 
+            else:
+                print(subject_name + ' SC segmentation does not exist. Extracting centerline from ' + fname_image)
+                im_seg = Image(fname_image).change_orientation('RPI')
+                param_centerline = ParamCenterline(algo_fitting = 'optic', contrast = contrast, smooth = smooth, degree = 5, minmax = minmax)
+
+            # extracting intervertebral discs
+            im_discs = Image(fname_image_discs).change_orientation('RPI')
+            coord = im_discs.getNonZeroCoordinates(sorting = 'z', reverse_coord = True)
+            coord_physical = []
+            for c in coord:
+                if c.value <= lowest_disc or c.value in [48, 49, 50, 51, 52]:
+                    c_p = list(im_discs.transfo_pix2phys([[c.x, c.y, c.z]])[0])
+                    c_p.append(c.value)
+                    coord_physical.append(c_p)
+
+            # extracting centerline
+            im_ctl, arr_ctl, arr_ctl_der, _ = get_centerline(im_seg, param = param_centerline)
+
+            # save centerline as .nii.gz file
+            im_ctl.save(fname_centerline + '.nii.gz', dtype = 'float32')
+            centerline = Centerline(points_x = arr_ctl[0], points_y = arr_ctl[1], points_z = arr_ctl[2], deriv_x = arr_ctl_der[0], deriv_y = arr_ctl_der[1], deriv_z = arr_ctl_der[2])
+            centerline.compute_vertebral_distribution(coord_physical)
+            # save centerline .npz file
+            centerline.save_centerline(fname_output = fname_centerline)
 
         list_centerline.append(centerline)
         tqdm_bar.update(1)

From 81d8d70708d0494c755189a14a00532e722c15be Mon Sep 17 00:00:00 2001
From: Nadia Blostein <p117938@abbey.neuro.polymtl.ca>
Date: Tue, 4 Jul 2023 15:53:48 -0400
Subject: [PATCH 32/54] new configuration file where redundancy has been
 removed, e.g. no more suffix_label-SC_seg and suffix_label-disc keys.
 additional of min and max disc keys.

---
 configuration_default.json | 9 +++++++++
 1 file changed, 9 insertions(+)
 create mode 100644 configuration_default.json

diff --git a/configuration_default.json b/configuration_default.json
new file mode 100644
index 0000000..7cd0e8d
--- /dev/null
+++ b/configuration_default.json
@@ -0,0 +1,9 @@
+{
+	"path_data": "/path/to/data/",
+	"subjects": "sub-001, sub-002, sub-003",
+	"data_type":"anat",
+	"contrast": "t1",
+	"suffix_image": "_T1w",
+	"first_disc": "1",
+	"last_disc": "26"
+}
\ No newline at end of file

From 6733b47cdd10a82fbf2f2438287b0bca63e20795 Mon Sep 17 00:00:00 2001
From: Nadia Blostein <p117938@abbey.neuro.polymtl.ca>
Date: Tue, 4 Jul 2023 15:54:58 -0400
Subject: [PATCH 33/54] adjusting  and  to new configuration file,

---
 template_preprocessing.py | 186 ++++++++++++++++++++++++++------------
 1 file changed, 127 insertions(+), 59 deletions(-)

diff --git a/template_preprocessing.py b/template_preprocessing.py
index 51e5166..7b60870 100755
--- a/template_preprocessing.py
+++ b/template_preprocessing.py
@@ -16,29 +16,28 @@
 
 list_labels = [50, 49, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
                27, 28, 29, 30]
-labels_regions = {'PMJ': 50, 'PMG': 49,
-                  'C1': 1, 'C2': 2, 'C3': 3, 'C4': 4, 'C5': 5, 'C6': 6, 'C7': 7,
-                  'T1': 8, 'T2': 9, 'T3': 10, 'T4': 11, 'T5': 12, 'T6': 13, 'T7': 14, 'T8': 15, 'T9': 16, 'T10': 17,
-                  'T11': 18, 'T12': 19,
-                  'L1': 20, 'L2': 21, 'L3': 22, 'L4': 23, 'L5': 24,
-                  'S1': 25, 'S2': 26, 'S3': 27, 'S4': 28, 'S5': 29,
-                  'Co': 30}
-regions_labels = {'50': 'PMJ', '49': 'PMG',
-                  '1': 'C1', '2': 'C2', '3': 'C3', '4': 'C4', '5': 'C5', '6': 'C6', '7': 'C7',
-                  '8': 'T1', '9': 'T2', '10': 'T3', '11': 'T4', '12': 'T5', '13': 'T6', '14': 'T7', '15': 'T8',
-                  '16': 'T9', '17': 'T10', '18': 'T11', '19': 'T12',
-                  '20': 'L1', '21': 'L2', '22': 'L3', '23': 'L4', '24': 'L5',
-                  '25': 'S1', '26': 'S2', '27': 'S3', '28': 'S4', '29': 'S5',
-                  '30': 'Co'}
-average_vert_length = {'PMJ': 30.0, 'PMG': 15.0, 'C1': 0.0,
-                       'C2': 20.176514191661337, 'C3': 17.022090519403065, 'C4': 17.842111671016056,
-                       'C5': 16.800356992319429, 'C6': 16.019212889311383, 'C7': 15.715854192723905,
-                       'T1': 16.84466163681078, 'T2': 19.865049296865475, 'T3': 21.550165130933905,
-                       'T4': 21.761237991438083, 'T5': 22.633281372803687, 'T6': 23.801974227738132,
-                       'T7': 24.358357813758332, 'T8': 25.200266294477885, 'T9': 25.315272064638506,
-                       'T10': 25.501856729317133, 'T11': 27.619238824308123, 'T12': 29.465119270009946,
-                       'L1': 31.89272719870084, 'L2': 33.511890474486449, 'L3': 35.721413718617441}
-
+# Centerline.labels_regions = {'PMJ': 50, 'PMG': 49,
+#                   'C1': 1, 'C2': 2, 'C3': 3, 'C4': 4, 'C5': 5, 'C6': 6, 'C7': 7,
+#                   'T1': 8, 'T2': 9, 'T3': 10, 'T4': 11, 'T5': 12, 'T6': 13, 'T7': 14, 'T8': 15, 'T9': 16, 'T10': 17,
+#                   'T11': 18, 'T12': 19,
+#                   'L1': 20, 'L2': 21, 'L3': 22, 'L4': 23, 'L5': 24,
+#                   'S1': 25, 'S2': 26, 'S3': 27, 'S4': 28, 'S5': 29,
+#                   'Co': 30}
+# Centerline.regions_labels = {'50': 'PMJ', '49': 'PMG',
+#                   '1': 'C1', '2': 'C2', '3': 'C3', '4': 'C4', '5': 'C5', '6': 'C6', '7': 'C7',
+#                   '8': 'T1', '9': 'T2', '10': 'T3', '11': 'T4', '12': 'T5', '13': 'T6', '14': 'T7', '15': 'T8',
+#                   '16': 'T9', '17': 'T10', '18': 'T11', '19': 'T12',
+#                   '20': 'L1', '21': 'L2', '22': 'L3', '23': 'L4', '24': 'L5',
+#                   '25': 'S1', '26': 'S2', '27': 'S3', '28': 'S4', '29': 'S5',
+#                   '30': 'Co'}
+# average_vert_length = {'PMJ': 30.0, 'PMG': 15.0, 'C1': 0.0,
+#                        'C2': 20.176514191661337, 'C3': 17.022090519403065, 'C4': 17.842111671016056,
+#                        'C5': 16.800356992319429, 'C6': 16.019212889311383, 'C7': 15.715854192723905,
+#                        'T1': 16.84466163681078, 'T2': 19.865049296865475, 'T3': 21.550165130933905,
+#                        'T4': 21.761237991438083, 'T5': 22.633281372803687, 'T6': 23.801974227738132,
+#                        'T7': 24.358357813758332, 'T8': 25.200266294477885, 'T9': 25.315272064638506,
+#                        'T10': 25.501856729317133, 'T11': 27.619238824308123, 'T12': 29.465119270009946,
+#                        'L1': 31.89272719870084, 'L2': 33.511890474486449, 'L3': 35.721413718617441}
 
 def average_coordinates_over_slices(self, image): ### deprecated from latest version of spinalcordtoolbox
     # extracting points information for each coordinates
@@ -108,7 +107,7 @@ def average_coordinates_over_slices(self, image): ### deprecated from latest ver
 
 #     return coordinate_result
 
-def read_dataset(fname_json = 'configuration.json', path_data = './'): ### TO COMPLETE
+def read_dataset(fname_json = 'configuration.json', path_data = './'):
     """
     This function reads a json file that describes the dataset, including the list of subjects as well as
     suffix for filenames (centerline, discs, segmentations, etc.).
@@ -119,11 +118,11 @@ def read_dataset(fname_json = 'configuration.json', path_data = './'): ### TO CO
     if not os.path.isfile(fname_json):
         raise ValueError('File ' + fname_json + ' doesn\'t seem to exist. Please check your data.')
 
-    with open(fname_json) as data_file:
-        dataset_info = json.load(data_file)
+    with open(fname_json) as data_file: dataset_info = json.load(data_file)
 
     error = ''
-    key_list = ["path_data", "subjects", "data_type", "contrast", "suffix_image", "suffix_label-SC_seg", "suffix_label-disc"]
+    key_list = ["path_data", "subjects", "data_type", "contrast", "suffix_image", "first_disc", "last_disc"]
+
     for key in key_list:
         if key not in dataset_info.keys(): error += 'Dataset configuration file ' + fname_json + ' must contain the field ' + key + '.\n'
 
@@ -135,16 +134,16 @@ def read_dataset(fname_json = 'configuration.json', path_data = './'): ### TO CO
 
     return dataset_info
 
-def generate_centerline(dataset_info, lowest_disc = 25, contrast = 't1', regenerate = False, algo_fitting = 'linear', smooth = 50, degree = None, minmax = None):
+def generate_centerline(dataset_info, regenerate = False, algo_fitting = 'linear', smooth = 50, degree = None, minmax = None):
     """
     This function generates spinal cord centerline from binary images (either an image of centerline or segmentation)
     :param dataset_info: dictionary containing dataset information
     :param contrast: {'t1', 't2'}
     :return list of centerline objects
     """
-    contrast = dataset_info['contrast']
     path_data = dataset_info['path_data']
-    list_subjects = dataset_info['subjects'].split(', ') 
+    list_subjects = dataset_info['subjects'].split(', ')
+    last_disc = int(dataset_info['last_disc'])
     list_centerline = []
     current_path = os.getcwd()
 
@@ -153,9 +152,8 @@ def generate_centerline(dataset_info, lowest_disc = 25, contrast = 't1', regener
     # obtaining centerline of each subject
     for subject_name in list_subjects:
         fname_image = path_data + subject_name + '/' + dataset_info['data_type'] + '/' + subject_name + dataset_info['suffix_image'] + '.nii.gz'
-        fname_image_seg = path_data + 'derivatives/labels/' + subject_name +  '/' + dataset_info['data_type'] + '/' + subject_name + dataset_info['suffix_label-SC_seg'] + '.nii.gz'
-        fname_discs = path_data + 'derivatives/labels/' + subject_name +  '/' + dataset_info['data_type'] + '/' + subject_name + dataset_info['suffix_label-disc']
-        fname_image_discs = fname_discs + '-manual.nii.gz' if os.path.isfile(fname_discs + '-manual.nii.gz') else fname_discs + '.nii.gz'
+        fname_image_seg = path_data + 'derivatives/labels/' + subject_name +  '/' + dataset_info['data_type'] + '/' + subject_name + dataset_info['suffix_image'] + '_label-SC_seg.nii.gz'
+        fname_image_discs = path_data + 'derivatives/labels/' + subject_name +  '/' + dataset_info['data_type'] + '/' + subject_name + dataset_info['suffix_image'] + '_label-disc.nii.gz'
         fname_centerline = path_data + 'derivatives/labels/' + subject_name +  '/' + dataset_info['data_type'] + '/' + subject_name + dataset_info['suffix_image'] + '_label-centerline'
 
         # if centerline exists, we load it, if not, we compute it
@@ -166,18 +164,18 @@ def generate_centerline(dataset_info, lowest_disc = 25, contrast = 't1', regener
             if os.path.isfile(fname_image_seg):
                 print(subject_name + ' SC segmentation exists. Extracting centerline from ' + fname_image_seg)
                 im_seg = Image(fname_image_seg).change_orientation('RPI')
-                param_centerline = ParamCenterline(algo_fitting = algo_fitting, contrast = contrast, smooth = smooth, degree = degree, minmax = minmax) 
+                param_centerline = ParamCenterline(algo_fitting = algo_fitting, smooth = smooth, degree = degree, minmax = minmax) 
             else:
                 print(subject_name + ' SC segmentation does not exist. Extracting centerline from ' + fname_image)
                 im_seg = Image(fname_image).change_orientation('RPI')
-                param_centerline = ParamCenterline(algo_fitting = 'optic', contrast = contrast, smooth = smooth, degree = 5, minmax = minmax)
+                param_centerline = ParamCenterline(algo_fitting = 'optic', smooth = smooth, degree = 5, minmax = minmax)
 
             # extracting intervertebral discs
             im_discs = Image(fname_image_discs).change_orientation('RPI')
-            coord = im_discs.getNonZeroCoordinates(sorting = 'z', reverse_coord = True)
+            coord = im_discs.getNonZeroCoordinates(sorting = 'z') #, reverse_coord = True)
             coord_physical = []
             for c in coord:
-                if c.value <= lowest_disc or c.value in [48, 49, 50, 51, 52]:
+                if c.value <= last_disc or c.value in [48, 49, 50, 51, 52]:
                     c_p = list(im_discs.transfo_pix2phys([[c.x, c.y, c.z]])[0])
                     c_p.append(c.value)
                     coord_physical.append(c_p)
@@ -192,6 +190,75 @@ def generate_centerline(dataset_info, lowest_disc = 25, contrast = 't1', regener
             # save centerline .npz file
             centerline.save_centerline(fname_output = fname_centerline)
 
+            print(f'Centerline.regions_labels: {Centerline.regions_labels}\n')
+            print(f'Centerline.labels_regions: {Centerline.labels_regions}\n')
+
+            ###### TESTS ######
+            # from operator import itemgetter
+            # label_reference = 'C1'
+            # discs_levels = coord_physical
+            # print(f'centerline.number_of_points: {centerline.number_of_points}\n')
+            # progress_length = np.zeros(centerline.number_of_points)
+            # for i in range(centerline.number_of_points - 1):
+            #     # print(f'{i}: progressive_length = {centerline.progressive_length[i]}\nprogress_length = {progress_length[i] + centerline.progressive_length[i]}\n\n')
+            #     progress_length[i + 1] = progress_length[i] + centerline.progressive_length[i]
+            # centerline.label_reference = label_reference
+            # print(f'centerline.label_reference: {centerline.label_reference}\n')
+            # # special case for C2, which might not be present because it is difficult to identify
+            # is_C2_here = False
+            # C1, C3 = None, None
+            # for level in discs_levels:
+            #     if level[3] == 2:
+            #         is_C2_here = True
+            #     elif level[3] == 1:
+            #         C1 = level
+            #     elif level[3] == 3:
+            #         C3 = level
+            # if not is_C2_here and C1 is not None and C3 is not None:
+            #     discs_levels.append([(C1[0] + C3[0]) / 2.0, (C1[1] + C3[1]) / 2.0, (C1[2] + C3[2]) / 2.0, 2])
+
+            # centerline.l_points = [0] * centerline.number_of_points
+            # centerline.dist_points = [0] * centerline.number_of_points
+            # centerline.dist_points_rel = [0] * centerline.number_of_points
+            # centerline.index_disc, index_disc_inv = {}, []
+
+            # # extracting each level based on position and computing its nearest point along the centerline
+            # index_first_label, index_last_label = None, None
+            # for level in discs_levels:
+            #     if level[3] in centerline.list_labels:
+            #         coord_level = [level[0], level[1], level[2]]
+            #         disc = centerline.Centerline.regions_labels[int(level[3])]
+            #         nearest_index = centerline.find_nearest_index(coord_level)
+            #         centerline.index_disc[disc] = nearest_index
+            #         index_disc_inv.append([nearest_index, disc])
+
+            #         # Finding minimum and maximum label, based on list_labels, which is ordered from top to bottom.
+            #         index_label = centerline.list_labels.index(int(level[3]))
+            #         if index_first_label is None or index_label < index_first_label:
+            #             index_first_label = index_label
+            #         if index_last_label is None or index_label > index_last_label:
+            #             index_last_label = index_label
+
+            # if index_first_label is not None:
+            #     centerline.first_label = centerline.list_labels[index_first_label]
+            # if index_last_label is not None:
+            #     centerline.last_label = centerline.list_labels[index_last_label]
+
+            # index_disc_inv.append([0, 'bottom'])
+            # index_disc_inv.sort(key=itemgetter(0))
+
+            # print(f'centerline.index_disc: {centerline.index_disc}\n')
+            
+            # if self.label_reference not in self.index_disc:
+            #     upper = 31
+            #     label_reference = ''
+            #     for label in self.index_disc:
+            #         if self.Centerline.labels_regions[label] < upper:
+            #             label_reference = label
+            #             upper = self.Centerline.labels_regions[label]
+            #     self.label_reference = label_reference
+            ###### TESTS ######
+
         list_centerline.append(centerline)
         tqdm_bar.update(1)
     tqdm_bar.close()
@@ -228,7 +295,7 @@ def generate_centerline(dataset_info, lowest_disc = 25, contrast = 't1', regener
 #     centerline.compute_vertebral_distribution(coord_physical, label_reference = 'PMG')
 #     return centerline
 
-def average_centerline(list_centerline, dataset_info, use_ICBM152 = False, use_label_ref = None, lowest_disc_quantile = 0.25):
+def average_centerline(list_centerline, dataset_info, use_ICBM152 = False, use_label_ref = None):
     """
     This function compute the average centerline and vertebral distribution, that will be used to create the
     final template space.
@@ -240,35 +307,37 @@ def average_centerline(list_centerline, dataset_info, use_ICBM152 = False, use_l
         position_template_discs: index of intervertebral discs along the template centerline
     """
 
+    first_disc = int(dataset_info['first_disc'])
+    last_disc = int(dataset_info['last_disc'])
+
     # extracting centerline from ICBM152
     # if use_ICBM152: centerline_icbm152 = compute_ICBM152_centerline(dataset_info) ###### FIX
     
     list_dist_discs = []
-    for centerline in list_centerline: list_dist_discs.append(centerline.distance_from_C1label)
-
-    # finding lowest disc of the template, according to the dataset being used
-    list_lowest_disc = []
-    for val in list_dist_discs: list_lowest_disc.append(len(val))
-    lowest_disc =  int(np.quantile((np.array(list_lowest_disc)), lowest_disc_quantile))
-
+    for centerline in list_centerline: 
+        list_dist_discs.append(centerline.distance_from_C1label)
+    
     # generating custom list of average vertebral lengths
     new_vert_length = {}
     for dist_discs in list_dist_discs:
         for i, disc_label in enumerate(dist_discs):
-            if i < lowest_disc:
+            if (i + 1) >= first_disc and (i + 1) <= last_disc:
                 if disc_label == 'PMJ':
                     length = abs(dist_discs[disc_label] - dist_discs['PMG'])
                 elif disc_label == 'PMG':
                     length = abs(dist_discs[disc_label] - dist_discs['C1'])
                 else:
-                    index_current_label = list_labels.index(labels_regions[disc_label])
-                    next_label = regions_labels[str(list_labels[index_current_label + 1])]
+                    index_current_label = list_labels.index(Centerline.labels_regions[disc_label])
+                    next_label = Centerline.regions_labels[str(list_labels[index_current_label + 1])]
                     if next_label in dist_discs:
                         length = abs(dist_discs[disc_label] - dist_discs[next_label])
                         if disc_label in new_vert_length:
                             new_vert_length[disc_label].append(length)
                         else:
                             new_vert_length[disc_label] = [length]
+                        print(f'next_label: {next_label}\n')
+                        print(f'disc_label: {disc_label}\n')
+                        print(f'length: {length}\n\n')
     new_average_vert_length = {}
     for disc_label in new_vert_length: new_average_vert_length[disc_label] = np.mean(new_vert_length[disc_label])
 
@@ -282,8 +351,8 @@ def average_centerline(list_centerline, dataset_info, use_ICBM152 = False, use_l
                 elif disc_label == 'PMG':
                     length = abs(dist_discs[disc_label] - dist_discs['C1'])
                 else:
-                    index_current_label = list_labels.index(labels_regions[disc_label])
-                    next_label = regions_labels[str(list_labels[index_current_label + 1])]
+                    index_current_label = list_labels.index(Centerline.labels_regions[disc_label])
+                    next_label = Centerline.regions_labels[str(list_labels[index_current_label + 1])]
                     if next_label in dist_discs:
                         length = abs(dist_discs[disc_label] - dist_discs[next_label])
                     else:
@@ -309,8 +378,8 @@ def average_centerline(list_centerline, dataset_info, use_ICBM152 = False, use_l
         if 'PMJ' in average_length:
             distances_discs_from_C1['PMJ'] = -average_length['PMG'][1] - average_length['PMJ'][1]
     for disc_number in list_labels:
-        if disc_number not in [50, 49, 1] and regions_labels[str(disc_number)] in average_length:
-            distances_discs_from_C1[regions_labels[str(disc_number)]] = distances_discs_from_C1[regions_labels[str(disc_number - 1)]] + average_length[regions_labels[str(disc_number)]][1]
+        if disc_number not in [50, 49, 1] and Centerline.regions_labels[str(disc_number)] in average_length:
+            distances_discs_from_C1[Centerline.regions_labels[str(disc_number)]] = distances_discs_from_C1[Centerline.regions_labels[str(disc_number - 1)]] + average_length[Centerline.regions_labels[str(disc_number)]][1]
 
     # calculating discs average distances from C1
     average_distances = []
@@ -352,7 +421,7 @@ def average_centerline(list_centerline, dataset_info, use_ICBM152 = False, use_l
             if j == 0:
                 disc_average_coordinates[disc_label] = average_coord
                 disc_position_in_centerline[disc_label] = i * number_of_points_between_levels
-    
+
     # create final template space
     if use_label_ref is not None:
         label_ref = use_label_ref
@@ -399,8 +468,8 @@ def average_centerline(list_centerline, dataset_info, use_ICBM152 = False, use_l
             temp_point = np.copy(coord_ref)
 
             if i >= index_straight:
-                index_current_label = list_labels.index(labels_regions[current_label])
-                next_label = regions_labels[str(list_labels[index_current_label + 1])]
+                index_current_label = list_labels.index(Centerline.labels_regions[current_label])
+                next_label = Centerline.regions_labels[str(list_labels[index_current_label + 1])]
                 if next_label not in average_positions_from_C1:
                     temp_point[2] = coord_ref[2] - average_positions_from_C1[current_label] - relative_position_from_disc * length_current_label
                 else:
@@ -416,7 +485,7 @@ def average_centerline(list_centerline, dataset_info, use_ICBM152 = False, use_l
         points_average_centerline = points_average_centerline_template
     return points_average_centerline, position_template_discs
 
-def generate_initial_template_space(dataset_info, points_average_centerline, position_template_discs, contrast = 't1', algo_fitting = 'linear', smooth = 50, degree = None, minmax = None): ##DONE additional options in nb/generate_initial_template_space_branch
+def generate_initial_template_space(dataset_info, points_average_centerline, position_template_discs, algo_fitting = 'linear', smooth = 50, degree = None, minmax = None): ##DONE additional options in nb/generate_initial_template_space_branch
     """
     This function generates the initial template space, on which all images will be registered.
     :param points_average_centerline: list of points (x, y, z) of the average spinal cord and brainstem centerline
@@ -424,7 +493,6 @@ def generate_initial_template_space(dataset_info, points_average_centerline, pos
     :return: NIFTI files in RPI orientation (template space, template centerline, template disc positions) & .npz file of template Centerline object ##DONE package updates in nb/generate_initial_template_space_branch
     """
     # initializing variables
-    contrast = dataset_info['contrast']
     path_template = dataset_info['path_data'] + 'derivatives/template/'
     if not os.path.exists(path_template): os.makedirs(path_template)
 
@@ -467,7 +535,7 @@ def generate_initial_template_space(dataset_info, points_average_centerline, pos
     coord_physical = []
     image_discs = template_space.copy()
     for disc in position_template_discs:
-        label = labels_regions[disc]
+        label = Centerline.labels_regions[disc]
         coord = position_template_discs[disc]
         coord_pix = image_discs.transfo_phys2pix([coord])[0]
 
@@ -483,7 +551,7 @@ def generate_initial_template_space(dataset_info, points_average_centerline, pos
     print(f'\nSaving disc positions in {image_discs.orientation} orientation as {path_template}template_label-disc.nii.gz\n') ###NEW
 
     # generate template centerline as a npz file ##DONE updated template Centerline object creation and saving as .npz file in nb/generate_initial_template_space_branch
-    param_centerline = ParamCenterline(algo_fitting = algo_fitting, contrast = contrast, smooth = smooth, degree = degree, minmax = minmax) 
+    param_centerline = ParamCenterline(algo_fitting = algo_fitting, smooth = smooth, degree = degree, minmax = minmax) 
     # centerline params of original template centerline had options that you cannot just provide `get_centerline` with anymroe (algo_fitting = 'nurbs', nurbs_pts_number = 4000, all_slices = False, phys_coordinates = True, remove_outliers = True)
     _, arr_ctl, arr_ctl_der, _ = get_centerline(image_centerline, param = param_centerline) ### we don't need to save im_centerline! ### straightening._get_centerline(im_seg,param_centerline, 1) ### What Rohan & Benjamin added!
     centerline_template = Centerline(points_x = arr_ctl[0], points_y = arr_ctl[1], points_z = arr_ctl[2], deriv_x = arr_ctl_der[0], deriv_y = arr_ctl_der[1], deriv_z = arr_ctl_der[2])

From 4aeb324e8c2fd809eaf3dcde5da37db95a3a3363 Mon Sep 17 00:00:00 2001
From: Nadia Blostein <p117938@abbey.neuro.polymtl.ca>
Date: Tue, 4 Jul 2023 15:56:20 -0400
Subject: [PATCH 34/54] making sure that disc indices are stored in
 chronological order instead of reverse; otherwise, discs indices go to 0
 before we have iterated through all the discs available

---
 template_preprocessing.py | 71 +--------------------------------------
 1 file changed, 1 insertion(+), 70 deletions(-)

diff --git a/template_preprocessing.py b/template_preprocessing.py
index 7b60870..5925c9c 100755
--- a/template_preprocessing.py
+++ b/template_preprocessing.py
@@ -172,7 +172,7 @@ def generate_centerline(dataset_info, regenerate = False, algo_fitting = 'linear
 
             # extracting intervertebral discs
             im_discs = Image(fname_image_discs).change_orientation('RPI')
-            coord = im_discs.getNonZeroCoordinates(sorting = 'z') #, reverse_coord = True)
+            coord = im_discs.getNonZeroCoordinates(sorting = 'z')
             coord_physical = []
             for c in coord:
                 if c.value <= last_disc or c.value in [48, 49, 50, 51, 52]:
@@ -190,75 +190,6 @@ def generate_centerline(dataset_info, regenerate = False, algo_fitting = 'linear
             # save centerline .npz file
             centerline.save_centerline(fname_output = fname_centerline)
 
-            print(f'Centerline.regions_labels: {Centerline.regions_labels}\n')
-            print(f'Centerline.labels_regions: {Centerline.labels_regions}\n')
-
-            ###### TESTS ######
-            # from operator import itemgetter
-            # label_reference = 'C1'
-            # discs_levels = coord_physical
-            # print(f'centerline.number_of_points: {centerline.number_of_points}\n')
-            # progress_length = np.zeros(centerline.number_of_points)
-            # for i in range(centerline.number_of_points - 1):
-            #     # print(f'{i}: progressive_length = {centerline.progressive_length[i]}\nprogress_length = {progress_length[i] + centerline.progressive_length[i]}\n\n')
-            #     progress_length[i + 1] = progress_length[i] + centerline.progressive_length[i]
-            # centerline.label_reference = label_reference
-            # print(f'centerline.label_reference: {centerline.label_reference}\n')
-            # # special case for C2, which might not be present because it is difficult to identify
-            # is_C2_here = False
-            # C1, C3 = None, None
-            # for level in discs_levels:
-            #     if level[3] == 2:
-            #         is_C2_here = True
-            #     elif level[3] == 1:
-            #         C1 = level
-            #     elif level[3] == 3:
-            #         C3 = level
-            # if not is_C2_here and C1 is not None and C3 is not None:
-            #     discs_levels.append([(C1[0] + C3[0]) / 2.0, (C1[1] + C3[1]) / 2.0, (C1[2] + C3[2]) / 2.0, 2])
-
-            # centerline.l_points = [0] * centerline.number_of_points
-            # centerline.dist_points = [0] * centerline.number_of_points
-            # centerline.dist_points_rel = [0] * centerline.number_of_points
-            # centerline.index_disc, index_disc_inv = {}, []
-
-            # # extracting each level based on position and computing its nearest point along the centerline
-            # index_first_label, index_last_label = None, None
-            # for level in discs_levels:
-            #     if level[3] in centerline.list_labels:
-            #         coord_level = [level[0], level[1], level[2]]
-            #         disc = centerline.Centerline.regions_labels[int(level[3])]
-            #         nearest_index = centerline.find_nearest_index(coord_level)
-            #         centerline.index_disc[disc] = nearest_index
-            #         index_disc_inv.append([nearest_index, disc])
-
-            #         # Finding minimum and maximum label, based on list_labels, which is ordered from top to bottom.
-            #         index_label = centerline.list_labels.index(int(level[3]))
-            #         if index_first_label is None or index_label < index_first_label:
-            #             index_first_label = index_label
-            #         if index_last_label is None or index_label > index_last_label:
-            #             index_last_label = index_label
-
-            # if index_first_label is not None:
-            #     centerline.first_label = centerline.list_labels[index_first_label]
-            # if index_last_label is not None:
-            #     centerline.last_label = centerline.list_labels[index_last_label]
-
-            # index_disc_inv.append([0, 'bottom'])
-            # index_disc_inv.sort(key=itemgetter(0))
-
-            # print(f'centerline.index_disc: {centerline.index_disc}\n')
-            
-            # if self.label_reference not in self.index_disc:
-            #     upper = 31
-            #     label_reference = ''
-            #     for label in self.index_disc:
-            #         if self.Centerline.labels_regions[label] < upper:
-            #             label_reference = label
-            #             upper = self.Centerline.labels_regions[label]
-            #     self.label_reference = label_reference
-            ###### TESTS ######
-
         list_centerline.append(centerline)
         tqdm_bar.update(1)
     tqdm_bar.close()

From 3fe9e23f8a7c9ea7fd95929160ab1fa3d0f1d268 Mon Sep 17 00:00:00 2001
From: Nadia Blostein <p117938@abbey.neuro.polymtl.ca>
Date: Wed, 5 Jul 2023 09:18:08 -0400
Subject: [PATCH 35/54] computing the vertebral distribution should be done
 with coordinates in pixel space, NOT physical space

---
 template_preprocessing.py | 55 +++++++++++++++++++++++++++++++++++++--
 1 file changed, 53 insertions(+), 2 deletions(-)

diff --git a/template_preprocessing.py b/template_preprocessing.py
index 5925c9c..bc45af2 100755
--- a/template_preprocessing.py
+++ b/template_preprocessing.py
@@ -172,13 +172,15 @@ def generate_centerline(dataset_info, regenerate = False, algo_fitting = 'linear
 
             # extracting intervertebral discs
             im_discs = Image(fname_image_discs).change_orientation('RPI')
-            coord = im_discs.getNonZeroCoordinates(sorting = 'z')
+            coord = im_discs.getNonZeroCoordinates(sorting = 'z', reverse_coord = True)
             coord_physical = []
+            coord_pix = [] ############
             for c in coord:
                 if c.value <= last_disc or c.value in [48, 49, 50, 51, 52]:
                     c_p = list(im_discs.transfo_pix2phys([[c.x, c.y, c.z]])[0])
                     c_p.append(c.value)
                     coord_physical.append(c_p)
+                    coord_pix.append(list(c)) ############
 
             # extracting centerline
             im_ctl, arr_ctl, arr_ctl_der, _ = get_centerline(im_seg, param = param_centerline)
@@ -186,10 +188,59 @@ def generate_centerline(dataset_info, regenerate = False, algo_fitting = 'linear
             # save centerline as .nii.gz file
             im_ctl.save(fname_centerline + '.nii.gz', dtype = 'float32')
             centerline = Centerline(points_x = arr_ctl[0], points_y = arr_ctl[1], points_z = arr_ctl[2], deriv_x = arr_ctl_der[0], deriv_y = arr_ctl_der[1], deriv_z = arr_ctl_der[2])
-            centerline.compute_vertebral_distribution(coord_physical)
+            centerline.compute_vertebral_distribution(coord_pix)
+            #centerline.compute_vertebral_distribution(coord_physical)
+            
             # save centerline .npz file
             centerline.save_centerline(fname_output = fname_centerline)
 
+            #################################### TESTS
+            # label_reference = 'C1'
+            # discs_levels = coord_physical
+            # centerline.discs_levels = discs_levels
+            # centerline.label_reference = label_reference
+
+            # # special case for C2, which might not be present because it is difficult to identify
+            # is_C2_here = False
+            # C1, C3 = None, None
+            # for level in discs_levels:
+            #     if level[3] == 2:
+            #         is_C2_here = True
+            #     elif level[3] == 1:
+            #         C1 = level
+            #     elif level[3] == 3:
+            #         C3 = level
+            # if not is_C2_here and C1 is not None and C3 is not None:
+            #     discs_levels.append([(C1[0] + C3[0]) / 2.0, (C1[1] + C3[1]) / 2.0, (C1[2] + C3[2]) / 2.0, 2])
+            
+            # centerline.l_points = [0] * centerline.number_of_points
+            # centerline.dist_points = [0] * centerline.number_of_points
+            # centerline.dist_points_rel = [0] * centerline.number_of_points
+            # centerline.index_disc, index_disc_inv = {}, []
+
+            # # extracting each level based on position and computing its nearest point along the centerline
+            # index_first_label, index_last_label = None, None
+            # for level in discs_levels:
+            #     if level[3] in centerline.list_labels:
+            #         coord_level = [level[0], level[1], level[2]]
+            #         print(f'\ncoord_level: {coord_level}')
+            #         disc = centerline.regions_labels[int(level[3])]
+            #         print(f'\ndisc: {disc}')
+            #         nearest_index = centerline.find_nearest_index(coord_level)
+            #         print(f'\nnearest_index: {nearest_index}\n')
+            #         centerline.index_disc[disc] = nearest_index
+            #         index_disc_inv.append([nearest_index, disc])
+
+            #         # Finding minimum and maximum label, based on list_labels, which is ordered from top to bottom.
+            #         index_label = centerline.list_labels.index(int(level[3]))
+            #         if index_first_label is None or index_label < index_first_label:
+            #             index_first_label = index_label
+            #         if index_last_label is None or index_label > index_last_label:
+            #             index_last_label = index_label
+            print(f'\n\ncenterline.index_disc: {centerline.index_disc}\n')
+            print(f'centerline.distance_from_C1label: {centerline.distance_from_C1label}')
+            # #################################### TESTS
+
         list_centerline.append(centerline)
         tqdm_bar.update(1)
     tqdm_bar.close()

From 289b199355db4e9e6f425c159fefc4e7653a5f3c Mon Sep 17 00:00:00 2001
From: Nadia Blostein <p117938@abbey.neuro.polymtl.ca>
Date: Wed, 5 Jul 2023 10:11:36 -0400
Subject: [PATCH 36/54] updating average_centerline function according to new
 configuration file format

---
 template_preprocessing.py | 81 +++++++--------------------------------
 1 file changed, 13 insertions(+), 68 deletions(-)

diff --git a/template_preprocessing.py b/template_preprocessing.py
index bc45af2..b499593 100755
--- a/template_preprocessing.py
+++ b/template_preprocessing.py
@@ -138,7 +138,6 @@ def generate_centerline(dataset_info, regenerate = False, algo_fitting = 'linear
     """
     This function generates spinal cord centerline from binary images (either an image of centerline or segmentation)
     :param dataset_info: dictionary containing dataset information
-    :param contrast: {'t1', 't2'}
     :return list of centerline objects
     """
     path_data = dataset_info['path_data']
@@ -173,14 +172,13 @@ def generate_centerline(dataset_info, regenerate = False, algo_fitting = 'linear
             # extracting intervertebral discs
             im_discs = Image(fname_image_discs).change_orientation('RPI')
             coord = im_discs.getNonZeroCoordinates(sorting = 'z', reverse_coord = True)
-            coord_physical = []
-            coord_pix = [] ############
+            coord_pix = [] # coord_physical = []
             for c in coord:
-                if c.value <= last_disc or c.value in [48, 49, 50, 51, 52]:
-                    c_p = list(im_discs.transfo_pix2phys([[c.x, c.y, c.z]])[0])
-                    c_p.append(c.value)
-                    coord_physical.append(c_p)
-                    coord_pix.append(list(c)) ############
+                if c.value <= (last_disc + 1) or c.value in [48, 49, 50, 51, 52]:
+                    # c_p = list(im_discs.transfo_pix2phys([[c.x, c.y, c.z]])[0])
+                    # c_p.append(c.value)
+                    # coord_physical.append(c_p)
+                    coord_pix.append(list(c))
 
             # extracting centerline
             im_ctl, arr_ctl, arr_ctl_der, _ = get_centerline(im_seg, param = param_centerline)
@@ -194,53 +192,6 @@ def generate_centerline(dataset_info, regenerate = False, algo_fitting = 'linear
             # save centerline .npz file
             centerline.save_centerline(fname_output = fname_centerline)
 
-            #################################### TESTS
-            # label_reference = 'C1'
-            # discs_levels = coord_physical
-            # centerline.discs_levels = discs_levels
-            # centerline.label_reference = label_reference
-
-            # # special case for C2, which might not be present because it is difficult to identify
-            # is_C2_here = False
-            # C1, C3 = None, None
-            # for level in discs_levels:
-            #     if level[3] == 2:
-            #         is_C2_here = True
-            #     elif level[3] == 1:
-            #         C1 = level
-            #     elif level[3] == 3:
-            #         C3 = level
-            # if not is_C2_here and C1 is not None and C3 is not None:
-            #     discs_levels.append([(C1[0] + C3[0]) / 2.0, (C1[1] + C3[1]) / 2.0, (C1[2] + C3[2]) / 2.0, 2])
-            
-            # centerline.l_points = [0] * centerline.number_of_points
-            # centerline.dist_points = [0] * centerline.number_of_points
-            # centerline.dist_points_rel = [0] * centerline.number_of_points
-            # centerline.index_disc, index_disc_inv = {}, []
-
-            # # extracting each level based on position and computing its nearest point along the centerline
-            # index_first_label, index_last_label = None, None
-            # for level in discs_levels:
-            #     if level[3] in centerline.list_labels:
-            #         coord_level = [level[0], level[1], level[2]]
-            #         print(f'\ncoord_level: {coord_level}')
-            #         disc = centerline.regions_labels[int(level[3])]
-            #         print(f'\ndisc: {disc}')
-            #         nearest_index = centerline.find_nearest_index(coord_level)
-            #         print(f'\nnearest_index: {nearest_index}\n')
-            #         centerline.index_disc[disc] = nearest_index
-            #         index_disc_inv.append([nearest_index, disc])
-
-            #         # Finding minimum and maximum label, based on list_labels, which is ordered from top to bottom.
-            #         index_label = centerline.list_labels.index(int(level[3]))
-            #         if index_first_label is None or index_label < index_first_label:
-            #             index_first_label = index_label
-            #         if index_last_label is None or index_label > index_last_label:
-            #             index_last_label = index_label
-            print(f'\n\ncenterline.index_disc: {centerline.index_disc}\n')
-            print(f'centerline.distance_from_C1label: {centerline.distance_from_C1label}')
-            # #################################### TESTS
-
         list_centerline.append(centerline)
         tqdm_bar.update(1)
     tqdm_bar.close()
@@ -283,13 +234,11 @@ def average_centerline(list_centerline, dataset_info, use_ICBM152 = False, use_l
     final template space.
     :param list_centerline: list of Centerline objects, for all subjects
     :param dataset_info: dictionary containing dataset information
-    :param lowest_disc_quantile: quantile in the distribution of lowest disc labels across subjects (will be used to determine 
-        lowest disc label in the template)
     :return: points_average_centerline: list of points (x, y, z) of the average spinal cord and brainstem centerline
         position_template_discs: index of intervertebral discs along the template centerline
     """
 
-    first_disc = int(dataset_info['first_disc'])
+    # first_disc = int(dataset_info['first_disc'])
     last_disc = int(dataset_info['last_disc'])
 
     # extracting centerline from ICBM152
@@ -303,23 +252,20 @@ def average_centerline(list_centerline, dataset_info, use_ICBM152 = False, use_l
     new_vert_length = {}
     for dist_discs in list_dist_discs:
         for i, disc_label in enumerate(dist_discs):
-            if (i + 1) >= first_disc and (i + 1) <= last_disc:
+            if (i + 1) <= last_disc: # and (i + 1) >= first_disc 
                 if disc_label == 'PMJ':
                     length = abs(dist_discs[disc_label] - dist_discs['PMG'])
                 elif disc_label == 'PMG':
                     length = abs(dist_discs[disc_label] - dist_discs['C1'])
                 else:
                     index_current_label = list_labels.index(Centerline.labels_regions[disc_label])
-                    next_label = Centerline.regions_labels[str(list_labels[index_current_label + 1])]
+                    next_label = Centerline.regions_labels[list_labels[index_current_label + 1]]
                     if next_label in dist_discs:
                         length = abs(dist_discs[disc_label] - dist_discs[next_label])
                         if disc_label in new_vert_length:
                             new_vert_length[disc_label].append(length)
                         else:
                             new_vert_length[disc_label] = [length]
-                        print(f'next_label: {next_label}\n')
-                        print(f'disc_label: {disc_label}\n')
-                        print(f'length: {length}\n\n')
     new_average_vert_length = {}
     for disc_label in new_vert_length: new_average_vert_length[disc_label] = np.mean(new_vert_length[disc_label])
 
@@ -334,7 +280,7 @@ def average_centerline(list_centerline, dataset_info, use_ICBM152 = False, use_l
                     length = abs(dist_discs[disc_label] - dist_discs['C1'])
                 else:
                     index_current_label = list_labels.index(Centerline.labels_regions[disc_label])
-                    next_label = Centerline.regions_labels[str(list_labels[index_current_label + 1])]
+                    next_label = Centerline.regions_labels[list_labels[index_current_label + 1]]
                     if next_label in dist_discs:
                         length = abs(dist_discs[disc_label] - dist_discs[next_label])
                     else:
@@ -345,7 +291,6 @@ def average_centerline(list_centerline, dataset_info, use_ICBM152 = False, use_l
                 length_vertebral_levels[disc_label].append(length)
             else:
                 length_vertebral_levels[disc_label] = [length]
-    
     # averaging the length of vertebral levels
     average_length = {}
     for disc_label in length_vertebral_levels:
@@ -360,8 +305,8 @@ def average_centerline(list_centerline, dataset_info, use_ICBM152 = False, use_l
         if 'PMJ' in average_length:
             distances_discs_from_C1['PMJ'] = -average_length['PMG'][1] - average_length['PMJ'][1]
     for disc_number in list_labels:
-        if disc_number not in [50, 49, 1] and Centerline.regions_labels[str(disc_number)] in average_length:
-            distances_discs_from_C1[Centerline.regions_labels[str(disc_number)]] = distances_discs_from_C1[Centerline.regions_labels[str(disc_number - 1)]] + average_length[Centerline.regions_labels[str(disc_number)]][1]
+        if disc_number not in [50, 49, 1] and Centerline.regions_labels[disc_number] in average_length:
+            distances_discs_from_C1[Centerline.regions_labels[disc_number]] = distances_discs_from_C1[Centerline.regions_labels[disc_number - 1]] + average_length[Centerline.regions_labels[disc_number]][1]
 
     # calculating discs average distances from C1
     average_distances = []
@@ -451,7 +396,7 @@ def average_centerline(list_centerline, dataset_info, use_ICBM152 = False, use_l
 
             if i >= index_straight:
                 index_current_label = list_labels.index(Centerline.labels_regions[current_label])
-                next_label = Centerline.regions_labels[str(list_labels[index_current_label + 1])]
+                next_label = Centerline.regions_labels[list_labels[index_current_label + 1]]
                 if next_label not in average_positions_from_C1:
                     temp_point[2] = coord_ref[2] - average_positions_from_C1[current_label] - relative_position_from_disc * length_current_label
                 else:

From dd27a6fec10659bbed7f73df2d329a7a49885f85 Mon Sep 17 00:00:00 2001
From: Nadia Blostein <p117938@abbey.neuro.polymtl.ca>
Date: Wed, 5 Jul 2023 10:13:24 -0400
Subject: [PATCH 37/54] removing 'regions_labels' and 'labels_regions' lists
 since  contains those lists within the Centerline class

---
 template_preprocessing.py | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/template_preprocessing.py b/template_preprocessing.py
index b499593..0c496a6 100755
--- a/template_preprocessing.py
+++ b/template_preprocessing.py
@@ -16,20 +16,6 @@
 
 list_labels = [50, 49, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
                27, 28, 29, 30]
-# Centerline.labels_regions = {'PMJ': 50, 'PMG': 49,
-#                   'C1': 1, 'C2': 2, 'C3': 3, 'C4': 4, 'C5': 5, 'C6': 6, 'C7': 7,
-#                   'T1': 8, 'T2': 9, 'T3': 10, 'T4': 11, 'T5': 12, 'T6': 13, 'T7': 14, 'T8': 15, 'T9': 16, 'T10': 17,
-#                   'T11': 18, 'T12': 19,
-#                   'L1': 20, 'L2': 21, 'L3': 22, 'L4': 23, 'L5': 24,
-#                   'S1': 25, 'S2': 26, 'S3': 27, 'S4': 28, 'S5': 29,
-#                   'Co': 30}
-# Centerline.regions_labels = {'50': 'PMJ', '49': 'PMG',
-#                   '1': 'C1', '2': 'C2', '3': 'C3', '4': 'C4', '5': 'C5', '6': 'C6', '7': 'C7',
-#                   '8': 'T1', '9': 'T2', '10': 'T3', '11': 'T4', '12': 'T5', '13': 'T6', '14': 'T7', '15': 'T8',
-#                   '16': 'T9', '17': 'T10', '18': 'T11', '19': 'T12',
-#                   '20': 'L1', '21': 'L2', '22': 'L3', '23': 'L4', '24': 'L5',
-#                   '25': 'S1', '26': 'S2', '27': 'S3', '28': 'S4', '29': 'S5',
-#                   '30': 'Co'}
 # average_vert_length = {'PMJ': 30.0, 'PMG': 15.0, 'C1': 0.0,
 #                        'C2': 20.176514191661337, 'C3': 17.022090519403065, 'C4': 17.842111671016056,
 #                        'C5': 16.800356992319429, 'C6': 16.019212889311383, 'C7': 15.715854192723905,

From d124a70b013499588325a15e64e67bfbbb3d81a0 Mon Sep 17 00:00:00 2001
From: Nadia Blostein <p117938@abbey.neuro.polymtl.ca>
Date: Wed, 5 Jul 2023 10:15:18 -0400
Subject: [PATCH 38/54] removing  list since the  function generates one
 adapted to your dataset

---
 template_preprocessing.py | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

diff --git a/template_preprocessing.py b/template_preprocessing.py
index 0c496a6..7accb50 100755
--- a/template_preprocessing.py
+++ b/template_preprocessing.py
@@ -16,14 +16,6 @@
 
 list_labels = [50, 49, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
                27, 28, 29, 30]
-# average_vert_length = {'PMJ': 30.0, 'PMG': 15.0, 'C1': 0.0,
-#                        'C2': 20.176514191661337, 'C3': 17.022090519403065, 'C4': 17.842111671016056,
-#                        'C5': 16.800356992319429, 'C6': 16.019212889311383, 'C7': 15.715854192723905,
-#                        'T1': 16.84466163681078, 'T2': 19.865049296865475, 'T3': 21.550165130933905,
-#                        'T4': 21.761237991438083, 'T5': 22.633281372803687, 'T6': 23.801974227738132,
-#                        'T7': 24.358357813758332, 'T8': 25.200266294477885, 'T9': 25.315272064638506,
-#                        'T10': 25.501856729317133, 'T11': 27.619238824308123, 'T12': 29.465119270009946,
-#                        'L1': 31.89272719870084, 'L2': 33.511890474486449, 'L3': 35.721413718617441}
 
 def average_coordinates_over_slices(self, image): ### deprecated from latest version of spinalcordtoolbox
     # extracting points information for each coordinates
@@ -252,8 +244,8 @@ def average_centerline(list_centerline, dataset_info, use_ICBM152 = False, use_l
                             new_vert_length[disc_label].append(length)
                         else:
                             new_vert_length[disc_label] = [length]
-    new_average_vert_length = {}
-    for disc_label in new_vert_length: new_average_vert_length[disc_label] = np.mean(new_vert_length[disc_label])
+    average_vert_length = {}
+    for disc_label in new_vert_length: average_vert_length[disc_label] = np.mean(new_vert_length[disc_label])
 
     # computing length of each vertebral level
     length_vertebral_levels = {}
@@ -270,8 +262,8 @@ def average_centerline(list_centerline, dataset_info, use_ICBM152 = False, use_l
                     if next_label in dist_discs:
                         length = abs(dist_discs[disc_label] - dist_discs[next_label])
                     else:
-                        length = new_average_vert_length[disc_label]
-            else: length = new_average_vert_length[disc_label]
+                        length = average_vert_length[disc_label]
+            else: length = average_vert_length[disc_label]
 
             if disc_label in length_vertebral_levels:
                 length_vertebral_levels[disc_label].append(length)

From 2b33bba4d71090fd14a9cb0165bbde2d432df319 Mon Sep 17 00:00:00 2001
From: Nadia Blostein <p117938@abbey.neuro.polymtl.ca>
Date: Wed, 5 Jul 2023 10:34:17 -0400
Subject: [PATCH 39/54] updating  and  according to  format

---
 template_preprocessing.py | 26 +++++++++++---------------
 1 file changed, 11 insertions(+), 15 deletions(-)

diff --git a/template_preprocessing.py b/template_preprocessing.py
index 7accb50..bdaf6d6 100755
--- a/template_preprocessing.py
+++ b/template_preprocessing.py
@@ -264,7 +264,6 @@ def average_centerline(list_centerline, dataset_info, use_ICBM152 = False, use_l
                     else:
                         length = average_vert_length[disc_label]
             else: length = average_vert_length[disc_label]
-
             if disc_label in length_vertebral_levels:
                 length_vertebral_levels[disc_label].append(length)
             else:
@@ -303,18 +302,18 @@ def average_centerline(list_centerline, dataset_info, use_ICBM152 = False, use_l
     disc_position_in_centerline = {}
 
     # iterate over each disc level
-    for i in range(len(average_distances)): ###### C1, C2, C3, C4, ...
+    for i in range(len(average_distances)):
         disc_label = average_distances[i][0]
         average_positions_from_C1[disc_label] = average_distances[i][1]
 
-        for j in range(number_of_points_between_levels): ###### C1: {0, 1, 2, 3, ...}
-            relative_position = float(j) / float(number_of_points_between_levels) ###### C1: {0/100, 1/100, 2/100, 3/100, ...}
+        for j in range(number_of_points_between_levels):
+            relative_position = float(j) / float(number_of_points_between_levels)
             if disc_label in ['PMJ', 'PMG']:
                 relative_position = 1.0 - relative_position
             list_coordinates = [[]] * len(list_centerline)
-            for k, centerline in enumerate(list_centerline): ###### iterate through each centerline and get actual absolute coordinate
-                #if disc_label in centerline.distance_from_C1label:
-               list_coordinates[k] = centerline.get_closest_to_relative_position(disc_label, relative_position) ### centerline.get_coordinate_interpolated(disc_label, relative_position)
+            for k, centerline in enumerate(list_centerline):
+                # if disc_label in centerline.distance_from_C1label:
+               list_coordinates[k] = centerline.get_closest_to_relative_position(disc_label, relative_position) # centerline.get_coordinate_interpolated(disc_label, relative_position)
             # average all coordinates
             get_avg = []
             for item in list_coordinates: 
@@ -390,7 +389,7 @@ def average_centerline(list_centerline, dataset_info, use_ICBM152 = False, use_l
         points_average_centerline = points_average_centerline_template
     return points_average_centerline, position_template_discs
 
-def generate_initial_template_space(dataset_info, points_average_centerline, position_template_discs, algo_fitting = 'linear', smooth = 50, degree = None, minmax = None): ##DONE additional options in nb/generate_initial_template_space_branch
+def generate_initial_template_space(dataset_info, points_average_centerline, position_template_discs, algo_fitting = 'linear', smooth = 50, degree = None, minmax = None):
     """
     This function generates the initial template space, on which all images will be registered.
     :param points_average_centerline: list of points (x, y, z) of the average spinal cord and brainstem centerline
@@ -464,14 +463,12 @@ def generate_initial_template_space(dataset_info, points_average_centerline, pos
     centerline_template.save_centerline(fname_output = path_template + 'template_label-centerline')
     print(f'\nSaving template centerline as .npz file (saves all Centerline object information, not just coordinates) as {path_template}template_label-centerline.npz\n')
 
-def straighten_all_subjects(dataset_info, normalized = False, contrast = 't1'): ### NOTE: outputs this to "BIDS" dir for this!
+def straighten_all_subjects(dataset_info, normalized = False):
     """
     This function straighten all images based on template centerline
     :param dataset_info: dictionary containing dataset information
     :param normalized: True if images were normalized before straightening
-    :param contrast: {'t1', 't2'}
     """
-    contrast = dataset_info['contrast']
     path_data = dataset_info['path_data']
     path_template = dataset_info['path_data'] + 'derivatives/template/'
     list_subjects = dataset_info['subjects'].split(', ')
@@ -485,10 +482,9 @@ def straighten_all_subjects(dataset_info, normalized = False, contrast = 't1'):
         if not os.path.exists(folder_out): os.makedirs(folder_out)
         
         fname_image = path_data + subject_name + '/' + dataset_info['data_type'] + '/' + subject_name + dataset_info['suffix_image'] + '.nii.gz'
-        fname_image_seg = path_data + 'derivatives/labels/' + subject_name +  '/' + dataset_info['data_type'] + '/' + subject_name + dataset_info['suffix_label-SC_seg'] + '.nii.gz'
-        fname_discs = path_data + 'derivatives/labels/' + subject_name +  '/' + dataset_info['data_type'] + '/' + subject_name + dataset_info['suffix_label-disc']
-        fname_image_discs = fname_discs + '-manual.nii.gz' if os.path.isfile(fname_discs + '-manual.nii.gz') else fname_discs + '.nii.gz'
-        fname_image_centerline = path_data + 'derivatives/labels/' + subject_name +  '/' + dataset_info['data_type'] + '/' + subject_name + dataset_info['suffix_image'] + '_label-centerline.nii.gz'
+        fname_image_seg = path_data + 'derivatives/labels/' + subject_name +  '/' + dataset_info['data_type'] + '/' + subject_name + dataset_info['suffix_image'] + '_label-SC_seg.nii.gz'
+        fname_image_discs = path_data + 'derivatives/labels/' + subject_name +  '/' + dataset_info['data_type'] + '/' + subject_name + dataset_info['suffix_image'] + '_label-disc.nii.gz'
+        fname_image_centerline =  path_data + 'derivatives/labels/' + subject_name +  '/' + dataset_info['data_type'] + '/' + subject_name + dataset_info['suffix_image'] + '_label-centerline.nii.gz'
         fname_out = subject_name + dataset_info['suffix_image'] + '_straight_norm.nii.gz' if normalized else subject_name + dataset_info['suffix_image'] + '_straight.nii.gz' 
 
         fname_input_seg = fname_image_seg if os.path.isfile(fname_image_seg) else fname_image_centerline

From d64111db3801decb03e3748811684e0da195f9c1 Mon Sep 17 00:00:00 2001
From: Nadia Blostein <p117938@abbey.neuro.polymtl.ca>
Date: Wed, 5 Jul 2023 17:10:58 -0400
Subject: [PATCH 40/54] adapted to new config file

---
 template_preprocessing.py          | 118 +++++++++++++++++------------
 template_preprocessing_pipeline.py |   8 +-
 2 files changed, 74 insertions(+), 52 deletions(-)

diff --git a/template_preprocessing.py b/template_preprocessing.py
index bdaf6d6..0008f17 100755
--- a/template_preprocessing.py
+++ b/template_preprocessing.py
@@ -16,6 +16,29 @@
 
 list_labels = [50, 49, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
                27, 28, 29, 30]
+labels_regions = {'PMJ': 50, 'PMG': 49,
+                  'C1': 1, 'C2': 2, 'C3': 3, 'C4': 4, 'C5': 5, 'C6': 6, 'C7': 7,
+                  'T1': 8, 'T2': 9, 'T3': 10, 'T4': 11, 'T5': 12, 'T6': 13, 'T7': 14, 'T8': 15, 'T9': 16, 'T10': 17,
+                  'T11': 18, 'T12': 19,
+                  'L1': 20, 'L2': 21, 'L3': 22, 'L4': 23, 'L5': 24,
+                  'S1': 25, 'S2': 26, 'S3': 27, 'S4': 28, 'S5': 29,
+                  'Co': 30}
+regions_labels = {'50': 'PMJ', '49': 'PMG',
+                  '1': 'C1', '2': 'C2', '3': 'C3', '4': 'C4', '5': 'C5', '6': 'C6', '7': 'C7',
+                  '8': 'T1', '9': 'T2', '10': 'T3', '11': 'T4', '12': 'T5', '13': 'T6', '14': 'T7', '15': 'T8',
+                  '16': 'T9', '17': 'T10', '18': 'T11', '19': 'T12',
+                  '20': 'L1', '21': 'L2', '22': 'L3', '23': 'L4', '24': 'L5',
+                  '25': 'S1', '26': 'S2', '27': 'S3', '28': 'S4', '29': 'S5',
+                  '30': 'Co'}
+average_vert_length = {'PMJ': 30.0, 'PMG': 15.0, 'C1': 0.0,
+                       'C2': 20.176514191661337, 'C3': 17.022090519403065, 'C4': 17.842111671016056,
+                       'C5': 16.800356992319429, 'C6': 16.019212889311383, 'C7': 15.715854192723905,
+                       'T1': 16.84466163681078, 'T2': 19.865049296865475, 'T3': 21.550165130933905,
+                       'T4': 21.761237991438083, 'T5': 22.633281372803687, 'T6': 23.801974227738132,
+                       'T7': 24.358357813758332, 'T8': 25.200266294477885, 'T9': 25.315272064638506,
+                       'T10': 25.501856729317133, 'T11': 27.619238824308123, 'T12': 29.465119270009946,
+                       'L1': 31.89272719870084, 'L2': 33.511890474486449, 'L3': 35.721413718617441}
+
 
 def average_coordinates_over_slices(self, image): ### deprecated from latest version of spinalcordtoolbox
     # extracting points information for each coordinates
@@ -116,6 +139,7 @@ def generate_centerline(dataset_info, regenerate = False, algo_fitting = 'linear
     """
     This function generates spinal cord centerline from binary images (either an image of centerline or segmentation)
     :param dataset_info: dictionary containing dataset information
+    :param contrast: {'t1', 't2'}
     :return list of centerline objects
     """
     path_data = dataset_info['path_data']
@@ -150,23 +174,20 @@ def generate_centerline(dataset_info, regenerate = False, algo_fitting = 'linear
             # extracting intervertebral discs
             im_discs = Image(fname_image_discs).change_orientation('RPI')
             coord = im_discs.getNonZeroCoordinates(sorting = 'z', reverse_coord = True)
-            coord_pix = [] # coord_physical = []
+            coord_physical = []
             for c in coord:
-                if c.value <= (last_disc + 1) or c.value in [48, 49, 50, 51, 52]:
-                    # c_p = list(im_discs.transfo_pix2phys([[c.x, c.y, c.z]])[0])
-                    # c_p.append(c.value)
-                    # coord_physical.append(c_p)
-                    coord_pix.append(list(c))
+                if c.value <= last_disc or c.value in [48, 49, 50, 51, 52]:
+                    c_p = list(im_discs.transfo_pix2phys([[c.x, c.y, c.z]])[0])
+                    c_p.append(c.value)
+                    coord_physical.append(c_p)
 
             # extracting centerline
-            im_ctl, arr_ctl, arr_ctl_der, _ = get_centerline(im_seg, param = param_centerline)
+            im_ctl, arr_ctl, arr_ctl_der, _ = get_centerline(im_seg, param = param_centerline, space = 'phys')
 
             # save centerline as .nii.gz file
             im_ctl.save(fname_centerline + '.nii.gz', dtype = 'float32')
             centerline = Centerline(points_x = arr_ctl[0], points_y = arr_ctl[1], points_z = arr_ctl[2], deriv_x = arr_ctl_der[0], deriv_y = arr_ctl_der[1], deriv_z = arr_ctl_der[2])
-            centerline.compute_vertebral_distribution(coord_pix)
-            #centerline.compute_vertebral_distribution(coord_physical)
-            
+            centerline.compute_vertebral_distribution(coord_physical)
             # save centerline .npz file
             centerline.save_centerline(fname_output = fname_centerline)
 
@@ -212,40 +233,38 @@ def average_centerline(list_centerline, dataset_info, use_ICBM152 = False, use_l
     final template space.
     :param list_centerline: list of Centerline objects, for all subjects
     :param dataset_info: dictionary containing dataset information
+    :param lowest_disc: integer value containing the lowest disc until which the template will go
     :return: points_average_centerline: list of points (x, y, z) of the average spinal cord and brainstem centerline
-        position_template_discs: index of intervertebral discs along the template centerline
+             position_template_discs: index of intervertebral discs along the template centerline
     """
 
-    # first_disc = int(dataset_info['first_disc'])
     last_disc = int(dataset_info['last_disc'])
-
     # extracting centerline from ICBM152
     # if use_ICBM152: centerline_icbm152 = compute_ICBM152_centerline(dataset_info) ###### FIX
     
     list_dist_discs = []
-    for centerline in list_centerline: 
-        list_dist_discs.append(centerline.distance_from_C1label)
-    
+    for centerline in list_centerline: list_dist_discs.append(centerline.distance_from_C1label)
+
     # generating custom list of average vertebral lengths
     new_vert_length = {}
     for dist_discs in list_dist_discs:
         for i, disc_label in enumerate(dist_discs):
-            if (i + 1) <= last_disc: # and (i + 1) >= first_disc 
+            if i <= last_disc:
                 if disc_label == 'PMJ':
                     length = abs(dist_discs[disc_label] - dist_discs['PMG'])
                 elif disc_label == 'PMG':
                     length = abs(dist_discs[disc_label] - dist_discs['C1'])
                 else:
-                    index_current_label = list_labels.index(Centerline.labels_regions[disc_label])
-                    next_label = Centerline.regions_labels[list_labels[index_current_label + 1]]
+                    index_current_label = list_labels.index(labels_regions[disc_label])
+                    next_label = regions_labels[str(list_labels[index_current_label + 1])]
                     if next_label in dist_discs:
                         length = abs(dist_discs[disc_label] - dist_discs[next_label])
                         if disc_label in new_vert_length:
                             new_vert_length[disc_label].append(length)
                         else:
                             new_vert_length[disc_label] = [length]
-    average_vert_length = {}
-    for disc_label in new_vert_length: average_vert_length[disc_label] = np.mean(new_vert_length[disc_label])
+    new_average_vert_length = {}
+    for disc_label in new_vert_length: new_average_vert_length[disc_label] = np.mean(new_vert_length[disc_label])
 
     # computing length of each vertebral level
     length_vertebral_levels = {}
@@ -257,17 +276,19 @@ def average_centerline(list_centerline, dataset_info, use_ICBM152 = False, use_l
                 elif disc_label == 'PMG':
                     length = abs(dist_discs[disc_label] - dist_discs['C1'])
                 else:
-                    index_current_label = list_labels.index(Centerline.labels_regions[disc_label])
-                    next_label = Centerline.regions_labels[list_labels[index_current_label + 1]]
+                    index_current_label = list_labels.index(labels_regions[disc_label])
+                    next_label = regions_labels[str(list_labels[index_current_label + 1])]
                     if next_label in dist_discs:
                         length = abs(dist_discs[disc_label] - dist_discs[next_label])
                     else:
-                        length = average_vert_length[disc_label]
-            else: length = average_vert_length[disc_label]
+                        length = new_average_vert_length[disc_label]
+            else: length = new_average_vert_length[disc_label]
+
             if disc_label in length_vertebral_levels:
                 length_vertebral_levels[disc_label].append(length)
             else:
                 length_vertebral_levels[disc_label] = [length]
+    
     # averaging the length of vertebral levels
     average_length = {}
     for disc_label in length_vertebral_levels:
@@ -282,8 +303,8 @@ def average_centerline(list_centerline, dataset_info, use_ICBM152 = False, use_l
         if 'PMJ' in average_length:
             distances_discs_from_C1['PMJ'] = -average_length['PMG'][1] - average_length['PMJ'][1]
     for disc_number in list_labels:
-        if disc_number not in [50, 49, 1] and Centerline.regions_labels[disc_number] in average_length:
-            distances_discs_from_C1[Centerline.regions_labels[disc_number]] = distances_discs_from_C1[Centerline.regions_labels[disc_number - 1]] + average_length[Centerline.regions_labels[disc_number]][1]
+        if disc_number not in [50, 49, 1] and regions_labels[str(disc_number)] in average_length:
+            distances_discs_from_C1[regions_labels[str(disc_number)]] = distances_discs_from_C1[regions_labels[str(disc_number - 1)]] + average_length[regions_labels[str(disc_number)]][1]
 
     # calculating discs average distances from C1
     average_distances = []
@@ -302,18 +323,18 @@ def average_centerline(list_centerline, dataset_info, use_ICBM152 = False, use_l
     disc_position_in_centerline = {}
 
     # iterate over each disc level
-    for i in range(len(average_distances)):
+    for i in range(len(average_distances)): ###### C1, C2, C3, C4, ...
         disc_label = average_distances[i][0]
         average_positions_from_C1[disc_label] = average_distances[i][1]
 
-        for j in range(number_of_points_between_levels):
-            relative_position = float(j) / float(number_of_points_between_levels)
+        for j in range(number_of_points_between_levels): ###### C1: {0, 1, 2, 3, ...}
+            relative_position = float(j) / float(number_of_points_between_levels) ###### C1: {0/100, 1/100, 2/100, 3/100, ...}
             if disc_label in ['PMJ', 'PMG']:
                 relative_position = 1.0 - relative_position
             list_coordinates = [[]] * len(list_centerline)
-            for k, centerline in enumerate(list_centerline):
-                # if disc_label in centerline.distance_from_C1label:
-               list_coordinates[k] = centerline.get_closest_to_relative_position(disc_label, relative_position) # centerline.get_coordinate_interpolated(disc_label, relative_position)
+            for k, centerline in enumerate(list_centerline): ###### iterate through each centerline and get actual absolute coordinate
+                #if disc_label in centerline.distance_from_C1label:
+               list_coordinates[k] = centerline.get_closest_to_relative_position(disc_label, relative_position) ### centerline.get_coordinate_interpolated(disc_label, relative_position)
             # average all coordinates
             get_avg = []
             for item in list_coordinates: 
@@ -325,7 +346,7 @@ def average_centerline(list_centerline, dataset_info, use_ICBM152 = False, use_l
             if j == 0:
                 disc_average_coordinates[disc_label] = average_coord
                 disc_position_in_centerline[disc_label] = i * number_of_points_between_levels
-
+    
     # create final template space
     if use_label_ref is not None:
         label_ref = use_label_ref
@@ -372,8 +393,8 @@ def average_centerline(list_centerline, dataset_info, use_ICBM152 = False, use_l
             temp_point = np.copy(coord_ref)
 
             if i >= index_straight:
-                index_current_label = list_labels.index(Centerline.labels_regions[current_label])
-                next_label = Centerline.regions_labels[list_labels[index_current_label + 1]]
+                index_current_label = list_labels.index(labels_regions[current_label])
+                next_label = regions_labels[str(list_labels[index_current_label + 1])]
                 if next_label not in average_positions_from_C1:
                     temp_point[2] = coord_ref[2] - average_positions_from_C1[current_label] - relative_position_from_disc * length_current_label
                 else:
@@ -389,7 +410,7 @@ def average_centerline(list_centerline, dataset_info, use_ICBM152 = False, use_l
         points_average_centerline = points_average_centerline_template
     return points_average_centerline, position_template_discs
 
-def generate_initial_template_space(dataset_info, points_average_centerline, position_template_discs, algo_fitting = 'linear', smooth = 50, degree = None, minmax = None):
+def generate_initial_template_space(dataset_info, points_average_centerline, position_template_discs, algo_fitting = 'linear', smooth = 50, degree = None, minmax = None): ##DONE additional options in nb/generate_initial_template_space_branch
     """
     This function generates the initial template space, on which all images will be registered.
     :param points_average_centerline: list of points (x, y, z) of the average spinal cord and brainstem centerline
@@ -439,7 +460,7 @@ def generate_initial_template_space(dataset_info, points_average_centerline, pos
     coord_physical = []
     image_discs = template_space.copy()
     for disc in position_template_discs:
-        label = Centerline.labels_regions[disc]
+        label = labels_regions[disc]
         coord = position_template_discs[disc]
         coord_pix = image_discs.transfo_phys2pix([coord])[0]
 
@@ -457,17 +478,18 @@ def generate_initial_template_space(dataset_info, points_average_centerline, pos
     # generate template centerline as a npz file ##DONE updated template Centerline object creation and saving as .npz file in nb/generate_initial_template_space_branch
     param_centerline = ParamCenterline(algo_fitting = algo_fitting, smooth = smooth, degree = degree, minmax = minmax) 
     # centerline params of original template centerline had options that you cannot just provide `get_centerline` with anymroe (algo_fitting = 'nurbs', nurbs_pts_number = 4000, all_slices = False, phys_coordinates = True, remove_outliers = True)
-    _, arr_ctl, arr_ctl_der, _ = get_centerline(image_centerline, param = param_centerline) ### we don't need to save im_centerline! ### straightening._get_centerline(im_seg,param_centerline, 1) ### What Rohan & Benjamin added!
+    _, arr_ctl, arr_ctl_der, _ = get_centerline(image_centerline, param = param_centerline, space = 'phys') ### we don't need to save im_centerline! ### straightening._get_centerline(im_seg,param_centerline, 1) ### What Rohan & Benjamin added!
     centerline_template = Centerline(points_x = arr_ctl[0], points_y = arr_ctl[1], points_z = arr_ctl[2], deriv_x = arr_ctl_der[0], deriv_y = arr_ctl_der[1], deriv_z = arr_ctl_der[2])
     centerline_template.compute_vertebral_distribution(coord_physical)        
     centerline_template.save_centerline(fname_output = path_template + 'template_label-centerline')
     print(f'\nSaving template centerline as .npz file (saves all Centerline object information, not just coordinates) as {path_template}template_label-centerline.npz\n')
 
-def straighten_all_subjects(dataset_info, normalized = False):
+def straighten_all_subjects(dataset_info, normalized = False): ### NOTE: outputs this to "BIDS" dir for this!
     """
     This function straighten all images based on template centerline
     :param dataset_info: dictionary containing dataset information
     :param normalized: True if images were normalized before straightening
+    :param contrast: {'t1', 't2'}
     """
     path_data = dataset_info['path_data']
     path_template = dataset_info['path_data'] + 'derivatives/template/'
@@ -506,12 +528,11 @@ def straighten_all_subjects(dataset_info, normalized = False):
         tqdm_bar.update(1)
     tqdm_bar.close()
 
-def normalize_intensity_template(dataset_info, contrast = 't1', verbose = 1): ### Removed fname_template_centerline = None -> why would we want this?
+def normalize_intensity_template(dataset_info, verbose = 1): ### Removed fname_template_centerline = None -> why would we want this?
     """
     This function normalizes the intensity of the image inside the spinal cord
     :return:
     """
-    contrast = dataset_info['contrast']
     fname_template_centerline = dataset_info['path_data'] + 'derivatives/template/' + 'template_label-centerline.npz'
     list_subjects = dataset_info['subjects'].split(', ') ###NEW
 
@@ -582,7 +603,6 @@ def smooth(x, window_len = 11, window = 'hanning'):
 
             y = np.convolve(w / w.sum(), s, mode = 'same')
             return y[window_len - 1:-window_len + 1]
-          
 
         # Smoothing
         intensities = [c[1] for c in arr_int]
@@ -598,6 +618,8 @@ def smooth(x, window_len = 11, window = 'hanning'):
             plt.plot(intensities)
             plt.plot(intensity_profile_smooth)
             plt.show()
+        tqdm_bar.update(1)
+    tqdm_bar.close()
 
     # set the average image intensity over the entire dataset
     average_intensity = 1000.0
@@ -618,8 +640,7 @@ def smooth(x, window_len = 11, window = 'hanning'):
         # Save intensity normalized template
         image_new.save(fname_image_normalized)
 
-def copy_preprocessed_images(dataset_info, contrast = 't1'):
-    contrast = dataset_info['contrast']
+def copy_preprocessed_images(dataset_info):
     list_subjects = dataset_info['subjects'].split(', ') 
     
     tqdm_bar = tqdm(total = len(list_subjects), unit = 'B', unit_scale = True, desc = "Status", ascii = True)
@@ -630,7 +651,7 @@ def copy_preprocessed_images(dataset_info, contrast = 't1'):
         tqdm_bar.update(1)
     tqdm_bar.close()
 
-def create_mask_template(dataset_info, contrast = 't1'):
+def create_mask_template(dataset_info):
     path_template = dataset_info['path_data'] + 'derivatives/template/'
     subject_name = dataset_info['subjects'].split(', ')[0]
 
@@ -645,12 +666,11 @@ def create_mask_template(dataset_info, contrast = 't1'):
     os.system('nii2mnc ' + path_template + '/template_mask.nii.gz ' + ' ' + path_template + '/template_mask.mnc')
     return path_template + '/template_mask.mnc'
 
-def convert_data2mnc(dataset_info, contrast = 't1'):
-    contrast = dataset_info['contrast']
+def convert_data2mnc(dataset_info):
     path_template = dataset_info['path_data'] + 'derivatives/template/'
     list_subjects = dataset_info['subjects'].split(', ')
 
-    path_template_mask = create_mask_template(dataset_info, contrast)
+    path_template_mask = create_mask_template(dataset_info)
 
     output_list = open('subjects.csv', "w")
     writer = csv.writer(output_list, delimiter = ',', quotechar = ',', quoting = csv.QUOTE_MINIMAL)
diff --git a/template_preprocessing_pipeline.py b/template_preprocessing_pipeline.py
index 476fd97..9b4eeb6 100644
--- a/template_preprocessing_pipeline.py
+++ b/template_preprocessing_pipeline.py
@@ -1,6 +1,5 @@
 """
 This script calls all the methods required for the data preprocessing.
-The default dataset is an example dataset that is downloaded at the beginning of the script.
 """
 
 from template_preprocessing import *
@@ -9,13 +8,15 @@
 dataset_info = read_dataset(sys.argv[1])
 
 # generating centerlines
-list_centerline = generate_centerline(dataset_info = dataset_info, regenerate = True) 
+list_centerline = generate_centerline(dataset_info = dataset_info, regenerate = False) 
 
 # computing average template centerline and vertebral distribution
 points_average_centerline, position_template_discs = average_centerline(list_centerline = list_centerline,
 	dataset_info = dataset_info,
 	use_ICBM152 = False,
-	use_label_ref = 'C1')
+	use_label_ref = 'C1',
+	#lowest_disc = int(sys.argv[2])
+    )
 
 # generating the initial template space
 generate_initial_template_space(dataset_info = dataset_info,
@@ -33,3 +34,4 @@
 
 # converting results to Minc format
 convert_data2mnc(dataset_info)
+

From 80ed5cc6bb9b90ce4823b0be19b0ad7c386560da Mon Sep 17 00:00:00 2001
From: Nadia Blostein <p117938@abbey.neuro.polymtl.ca>
Date: Wed, 5 Jul 2023 17:23:43 -0400
Subject: [PATCH 41/54] updating commit version of SCT used

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 7e522e3..2db424a 100644
--- a/README.md
+++ b/README.md
@@ -75,7 +75,7 @@ TODO: remove detials below
 
 ### Install SCT
 
-SCT is used for all preprocessing steps. The current version of the pipeline uses SCT development version (commit `7ead83200d7ad9ee5e0d112e77a1d7b894add738`) as we prepare for the release of SCT 6.0.
+SCT is used for all preprocessing steps. The current version of the pipeline uses SCT development version (commit `e740edf4c8408ffa44ef7ba23ad068c6d07e4b87`) as we prepare for the release of SCT 6.0.
 
 Once SCT is installed, make sure to activate SCT's virtual environment because the pipeline will use SCT's API functions.
 

From d55817820946f5f442fe088d63c125139ecac2ae Mon Sep 17 00:00:00 2001
From: Nadia Blostein <p117938@abbey.neuro.polymtl.ca>
Date: Wed, 5 Jul 2023 18:00:52 -0400
Subject: [PATCH 42/54] removing  list (redundant, as  generates it according
 to the specific dataset used

---
 template_preprocessing.py | 17 ++++-------------
 1 file changed, 4 insertions(+), 13 deletions(-)

diff --git a/template_preprocessing.py b/template_preprocessing.py
index 0008f17..6363f6d 100755
--- a/template_preprocessing.py
+++ b/template_preprocessing.py
@@ -30,15 +30,6 @@
                   '20': 'L1', '21': 'L2', '22': 'L3', '23': 'L4', '24': 'L5',
                   '25': 'S1', '26': 'S2', '27': 'S3', '28': 'S4', '29': 'S5',
                   '30': 'Co'}
-average_vert_length = {'PMJ': 30.0, 'PMG': 15.0, 'C1': 0.0,
-                       'C2': 20.176514191661337, 'C3': 17.022090519403065, 'C4': 17.842111671016056,
-                       'C5': 16.800356992319429, 'C6': 16.019212889311383, 'C7': 15.715854192723905,
-                       'T1': 16.84466163681078, 'T2': 19.865049296865475, 'T3': 21.550165130933905,
-                       'T4': 21.761237991438083, 'T5': 22.633281372803687, 'T6': 23.801974227738132,
-                       'T7': 24.358357813758332, 'T8': 25.200266294477885, 'T9': 25.315272064638506,
-                       'T10': 25.501856729317133, 'T11': 27.619238824308123, 'T12': 29.465119270009946,
-                       'L1': 31.89272719870084, 'L2': 33.511890474486449, 'L3': 35.721413718617441}
-
 
 def average_coordinates_over_slices(self, image): ### deprecated from latest version of spinalcordtoolbox
     # extracting points information for each coordinates
@@ -263,8 +254,8 @@ def average_centerline(list_centerline, dataset_info, use_ICBM152 = False, use_l
                             new_vert_length[disc_label].append(length)
                         else:
                             new_vert_length[disc_label] = [length]
-    new_average_vert_length = {}
-    for disc_label in new_vert_length: new_average_vert_length[disc_label] = np.mean(new_vert_length[disc_label])
+    average_vert_length = {}
+    for disc_label in new_vert_length: average_vert_length[disc_label] = np.mean(new_vert_length[disc_label])
 
     # computing length of each vertebral level
     length_vertebral_levels = {}
@@ -281,8 +272,8 @@ def average_centerline(list_centerline, dataset_info, use_ICBM152 = False, use_l
                     if next_label in dist_discs:
                         length = abs(dist_discs[disc_label] - dist_discs[next_label])
                     else:
-                        length = new_average_vert_length[disc_label]
-            else: length = new_average_vert_length[disc_label]
+                        length = average_vert_length[disc_label]
+            else: length = average_vert_length[disc_label]
 
             if disc_label in length_vertebral_levels:
                 length_vertebral_levels[disc_label].append(length)

From 12c51d9c338376cc3a070adbf29651737e045338 Mon Sep 17 00:00:00 2001
From: Nadia Blostein <p117938@abbey.neuro.polymtl.ca>
Date: Wed, 5 Jul 2023 18:15:46 -0400
Subject: [PATCH 43/54] resolving issue #47: removing redundant lists  and

---
 template_preprocessing.py | 32 +++++++++-----------------------
 1 file changed, 9 insertions(+), 23 deletions(-)

diff --git a/template_preprocessing.py b/template_preprocessing.py
index 6363f6d..42d4077 100755
--- a/template_preprocessing.py
+++ b/template_preprocessing.py
@@ -16,20 +16,6 @@
 
 list_labels = [50, 49, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
                27, 28, 29, 30]
-labels_regions = {'PMJ': 50, 'PMG': 49,
-                  'C1': 1, 'C2': 2, 'C3': 3, 'C4': 4, 'C5': 5, 'C6': 6, 'C7': 7,
-                  'T1': 8, 'T2': 9, 'T3': 10, 'T4': 11, 'T5': 12, 'T6': 13, 'T7': 14, 'T8': 15, 'T9': 16, 'T10': 17,
-                  'T11': 18, 'T12': 19,
-                  'L1': 20, 'L2': 21, 'L3': 22, 'L4': 23, 'L5': 24,
-                  'S1': 25, 'S2': 26, 'S3': 27, 'S4': 28, 'S5': 29,
-                  'Co': 30}
-regions_labels = {'50': 'PMJ', '49': 'PMG',
-                  '1': 'C1', '2': 'C2', '3': 'C3', '4': 'C4', '5': 'C5', '6': 'C6', '7': 'C7',
-                  '8': 'T1', '9': 'T2', '10': 'T3', '11': 'T4', '12': 'T5', '13': 'T6', '14': 'T7', '15': 'T8',
-                  '16': 'T9', '17': 'T10', '18': 'T11', '19': 'T12',
-                  '20': 'L1', '21': 'L2', '22': 'L3', '23': 'L4', '24': 'L5',
-                  '25': 'S1', '26': 'S2', '27': 'S3', '28': 'S4', '29': 'S5',
-                  '30': 'Co'}
 
 def average_coordinates_over_slices(self, image): ### deprecated from latest version of spinalcordtoolbox
     # extracting points information for each coordinates
@@ -246,8 +232,8 @@ def average_centerline(list_centerline, dataset_info, use_ICBM152 = False, use_l
                 elif disc_label == 'PMG':
                     length = abs(dist_discs[disc_label] - dist_discs['C1'])
                 else:
-                    index_current_label = list_labels.index(labels_regions[disc_label])
-                    next_label = regions_labels[str(list_labels[index_current_label + 1])]
+                    index_current_label = list_labels.index(Centerline.labels_regions[disc_label])
+                    next_label = Centerline.regions_labels[list_labels[index_current_label + 1]]
                     if next_label in dist_discs:
                         length = abs(dist_discs[disc_label] - dist_discs[next_label])
                         if disc_label in new_vert_length:
@@ -267,8 +253,8 @@ def average_centerline(list_centerline, dataset_info, use_ICBM152 = False, use_l
                 elif disc_label == 'PMG':
                     length = abs(dist_discs[disc_label] - dist_discs['C1'])
                 else:
-                    index_current_label = list_labels.index(labels_regions[disc_label])
-                    next_label = regions_labels[str(list_labels[index_current_label + 1])]
+                    index_current_label = list_labels.index(Centerline.labels_regions[disc_label])
+                    next_label = Centerline.regions_labels[list_labels[index_current_label + 1]]
                     if next_label in dist_discs:
                         length = abs(dist_discs[disc_label] - dist_discs[next_label])
                     else:
@@ -294,8 +280,8 @@ def average_centerline(list_centerline, dataset_info, use_ICBM152 = False, use_l
         if 'PMJ' in average_length:
             distances_discs_from_C1['PMJ'] = -average_length['PMG'][1] - average_length['PMJ'][1]
     for disc_number in list_labels:
-        if disc_number not in [50, 49, 1] and regions_labels[str(disc_number)] in average_length:
-            distances_discs_from_C1[regions_labels[str(disc_number)]] = distances_discs_from_C1[regions_labels[str(disc_number - 1)]] + average_length[regions_labels[str(disc_number)]][1]
+        if disc_number not in [50, 49, 1] and Centerline.regions_labels[disc_number] in average_length:
+            distances_discs_from_C1[Centerline.regions_labels[disc_number]] = distances_discs_from_C1[Centerline.regions_labels[disc_number - 1]] + average_length[Centerline.regions_labels[disc_number]][1]
 
     # calculating discs average distances from C1
     average_distances = []
@@ -384,8 +370,8 @@ def average_centerline(list_centerline, dataset_info, use_ICBM152 = False, use_l
             temp_point = np.copy(coord_ref)
 
             if i >= index_straight:
-                index_current_label = list_labels.index(labels_regions[current_label])
-                next_label = regions_labels[str(list_labels[index_current_label + 1])]
+                index_current_label = list_labels.index(Centerline.labels_regions[current_label])
+                next_label = Centerline.regions_labels[list_labels[index_current_label + 1]]
                 if next_label not in average_positions_from_C1:
                     temp_point[2] = coord_ref[2] - average_positions_from_C1[current_label] - relative_position_from_disc * length_current_label
                 else:
@@ -451,7 +437,7 @@ def generate_initial_template_space(dataset_info, points_average_centerline, pos
     coord_physical = []
     image_discs = template_space.copy()
     for disc in position_template_discs:
-        label = labels_regions[disc]
+        label = Centerline.labels_regions[disc]
         coord = position_template_discs[disc]
         coord_pix = image_discs.transfo_phys2pix([coord])[0]
 

From 7448830817abd0d92b575c4e5ee4e41ec51698bf Mon Sep 17 00:00:00 2001
From: Nadia Blostein <p117938@abbey.neuro.polymtl.ca>
Date: Wed, 5 Jul 2023 21:40:35 -0400
Subject: [PATCH 44/54] resolving issues 39 and 40

---
 ...ssing.py => normalize_SC_across_dataset.py | 44 +++++++++++++++++--
 template_preprocessing_pipeline.py            | 37 ----------------
 2 files changed, 41 insertions(+), 40 deletions(-)
 rename template_preprocessing.py => normalize_SC_across_dataset.py (95%)
 mode change 100755 => 100644
 delete mode 100644 template_preprocessing_pipeline.py

diff --git a/template_preprocessing.py b/normalize_SC_across_dataset.py
old mode 100755
new mode 100644
similarity index 95%
rename from template_preprocessing.py
rename to normalize_SC_across_dataset.py
index 42d4077..07497bd
--- a/template_preprocessing.py
+++ b/normalize_SC_across_dataset.py
@@ -5,6 +5,7 @@
 import csv
 from copy import copy
 from tqdm import tqdm
+import sys
 
 from spinalcordtoolbox import utils as sct 
 from spinalcordtoolbox.types import Centerline
@@ -116,7 +117,6 @@ def generate_centerline(dataset_info, regenerate = False, algo_fitting = 'linear
     """
     This function generates spinal cord centerline from binary images (either an image of centerline or segmentation)
     :param dataset_info: dictionary containing dataset information
-    :param contrast: {'t1', 't2'}
     :return list of centerline objects
     """
     path_data = dataset_info['path_data']
@@ -461,12 +461,11 @@ def generate_initial_template_space(dataset_info, points_average_centerline, pos
     centerline_template.save_centerline(fname_output = path_template + 'template_label-centerline')
     print(f'\nSaving template centerline as .npz file (saves all Centerline object information, not just coordinates) as {path_template}template_label-centerline.npz\n')
 
-def straighten_all_subjects(dataset_info, normalized = False): ### NOTE: outputs this to "BIDS" dir for this!
+def straighten_all_subjects(dataset_info, normalized = False):
     """
     This function straighten all images based on template centerline
     :param dataset_info: dictionary containing dataset information
     :param normalized: True if images were normalized before straightening
-    :param contrast: {'t1', 't2'}
     """
     path_data = dataset_info['path_data']
     path_template = dataset_info['path_data'] + 'derivatives/template/'
@@ -670,3 +669,42 @@ def convert_data2mnc(dataset_info):
 
     output_list.close()
 
+# main
+# =======================================================================================================================
+def main(configuration_file):
+    """
+    Pipeline for data processing.
+    """
+    dataset_info = read_dataset(configuration_file)
+
+    # generating centerlines
+    list_centerline = generate_centerline(dataset_info = dataset_info, regenerate = False) 
+
+    # computing average template centerline and vertebral distribution
+    points_average_centerline, position_template_discs = average_centerline(list_centerline = list_centerline,
+        dataset_info = dataset_info,
+        use_ICBM152 = False,
+        use_label_ref = 'C1')
+
+    # generating the initial template space
+    generate_initial_template_space(dataset_info = dataset_info,
+        points_average_centerline = points_average_centerline,
+        position_template_discs = position_template_discs)
+
+    # straightening of all spinal cord
+    straighten_all_subjects(dataset_info = dataset_info)
+
+    # normalize image intensity inside the spinal cord
+    normalize_intensity_template(dataset_info = dataset_info)
+
+    # copy preprocessed dataset in template folder
+    copy_preprocessed_images(dataset_info = dataset_info)
+
+    # converting results to Minc format
+    convert_data2mnc(dataset_info)
+
+# =======================================================================================================================
+# Start program
+# =======================================================================================================================
+if __name__ == "__main__":
+    main(sys.argv[1])
diff --git a/template_preprocessing_pipeline.py b/template_preprocessing_pipeline.py
deleted file mode 100644
index 9b4eeb6..0000000
--- a/template_preprocessing_pipeline.py
+++ /dev/null
@@ -1,37 +0,0 @@
-"""
-This script calls all the methods required for the data preprocessing.
-"""
-
-from template_preprocessing import *
-import sys
-
-dataset_info = read_dataset(sys.argv[1])
-
-# generating centerlines
-list_centerline = generate_centerline(dataset_info = dataset_info, regenerate = False) 
-
-# computing average template centerline and vertebral distribution
-points_average_centerline, position_template_discs = average_centerline(list_centerline = list_centerline,
-	dataset_info = dataset_info,
-	use_ICBM152 = False,
-	use_label_ref = 'C1',
-	#lowest_disc = int(sys.argv[2])
-    )
-
-# generating the initial template space
-generate_initial_template_space(dataset_info = dataset_info,
-	points_average_centerline = points_average_centerline,
-	position_template_discs = position_template_discs)
-
-# straightening of all spinal cord
-straighten_all_subjects(dataset_info = dataset_info)
-
-# normalize image intensity inside the spinal cord
-normalize_intensity_template(dataset_info = dataset_info)
-
-# copy preprocessed dataset in template folder
-copy_preprocessed_images(dataset_info = dataset_info)
-
-# converting results to Minc format
-convert_data2mnc(dataset_info)
-

From c7cf01f4ec464a3b57c26ea7b2a1a99ab9bae817 Mon Sep 17 00:00:00 2001
From: Nadia Blostein <p117938@abbey.neuro.polymtl.ca>
Date: Wed, 5 Jul 2023 21:43:17 -0400
Subject: [PATCH 45/54] solving issue 39

---
 ...ross_dataset.py => preprocess_normalize.py |   0
 segment_sc_discs_deepseg.sh                   | 145 ------------------
 2 files changed, 145 deletions(-)
 rename normalize_SC_across_dataset.py => preprocess_normalize.py (100%)
 delete mode 100755 segment_sc_discs_deepseg.sh

diff --git a/normalize_SC_across_dataset.py b/preprocess_normalize.py
similarity index 100%
rename from normalize_SC_across_dataset.py
rename to preprocess_normalize.py
diff --git a/segment_sc_discs_deepseg.sh b/segment_sc_discs_deepseg.sh
deleted file mode 100755
index f7b37e7..0000000
--- a/segment_sc_discs_deepseg.sh
+++ /dev/null
@@ -1,145 +0,0 @@
-#!/bin/bash
-#
-# Process data. This script is designed to be run in the folder for a single subject, however 'sct_run_batch' can be
-# used to run this script multiple times in parallel across a multi-subject BIDS dataset.
-#
-# This script only deals with T2w and MT images for example purpose. For a more comprehensive qMRI analysis, see for
-# example this script: https://github.com/spine-generic/spine-generic/blob/master/process_data.sh
-#
-# Usage:
-#   ./process_data.sh <SUBJECT>
-#
-# Example:
-#   ./process_data.sh sub-03
-#
-# Author: Julien Cohen-Adad (modified by Nadia Blostein)
-
-# The following global variables are retrieved from the caller sct_run_batch
-# but could be overwritten by uncommenting the lines below:
-#PATH_DATA_PROCESSED="/Users/nadiablostein/Desktop/multi_subject/test/derivatives/sct_deepseg_labels"
-#PATH_RESULTS="/Users/nadiablostein/Desktop/multi_subject/test/derivatives/sct_deepseg_labels/results"
-#PATH_LOG="/Users/nadiablostein/Desktop/multi_subject/test/derivatives/sct_deepseg_labels/log"
-#PATH_QC="/Users/nadiablostein/Desktop/multi_subject/test/derivatives/sct_deepseg_labels/qc"
-
-
-# BASH SETTINGS
-# ======================================================================================================================
-
-SUFFIX_T2w = "rec-composed_T2w"
-SUFFIX_T1w = "rec-composed_T1w"
-
-# Uncomment for full verbose
-# set -v
-
-# Immediately exit if error
-set -e
-
-# Exit if user presses CTRL+C (Linux) or CMD+C (OSX)
-trap "echo Caught Keyboard Interrupt within script. Exiting now.; exit" INT
-
-
-# CONVENIENCE FUNCTIONS
-# ======================================================================================================================
-
-label_if_does_not_exist() {
-  ###
-  #  This function checks if a manual label file already exists, then:
-  #     - If it does, copy it locally.
-  #     - If it doesn't, perform automatic labeling.
-  #   This allows you to add manual labels on a subject-by-subject basis without disrupting the pipeline.
-  ###
-  local file="$1"
-  local file_seg="$2"
-  # Update global variable with segmentation file name
-  FILELABEL="${file}_labels"
-  echo "Looking for labels: ${FILELABEL}"
-  if [[ -e ${FILELABEL}.nii.gz ]]; then
-    echo "Found! Using vertebral labels that exist."
-  else
-    echo "Not found. Proceeding with automatic labeling."
-    # Generate labeled segmentation
-    sct_label_vertebrae -i ${file}.nii.gz -s ${file_seg}.nii.gz -c t2 -qc "${PATH_QC}" -qc-subject "${SUBJECT}"
-  fi
-}
-
-segment_if_does_not_exist() {
-  ###
-  #  This function checks if a manual spinal cord segmentation file already exists, then:
-  #    - If it does, copy it locally.
-  #    - If it doesn't, perform automatic spinal cord segmentation.
-  #  This allows you to add manual segmentations on a subject-by-subject basis without disrupting the pipeline.
-  ###
-  local file="$1"
-  local contrast="$2"
-  # Update global variable with segmentation file name
-  FILESEG="${file}_seg"
-  echo
-  echo "Looking for segmentation: ${FILESEG}"
-  if [[ -e ${FILESEG}.nii.gz ]]; then
-    echo "Found! Using SC segmentation that exists."
-    sct_qc -i ${file}.nii.gz -s ${FILESEG}.nii.gz -p sct_deepseg_sc -qc ${PATH_QC} -qc-subject ${SUBJECT}
-  else
-    echo "Not found. Proceeding with automatic segmentation."
-    # Segment spinal cord
-    sct_deepseg_sc -i ${file}.nii.gz -c $contrast -qc ${PATH_QC} -qc-subject ${SUBJECT}
-  fi
-}
-
-# SCRIPT STARTS HERE
-# ======================================================================================================================
-
-# Retrieve input params
-SUBJECT=$1
-
-# get starting time:
-start=`date +%s`
-
-# Display useful info for the log, such as SCT version, RAM and CPU cores available
-sct_check_dependencies -short
-
-# Go to folder where data will be copied and processed
-cd $PATH_DATA_PROCESSED
-# Copy source images
-rsync -avzh $PATH_DATA/$SUBJECT .
-
-# T2w
-# ======================================================================================================================
-cd "${SUBJECT}/anat/"
-file_t2="${SUBJECT}_${SUFFIX_T2w}"
-# Segment spinal cord 
-segment_if_does_not_exist "${file_t2}" "t2"
-file_t2_seg="${FILESEG}"
-# Create vertebral labels 
-label_if_does_not_exist "${file_t2}" "${file_t2_seg}"
-
-# T1w
-# ======================================================================================================================
-file_t1="${SUBJECT}_${SUFFIX_T1w}"
-# Segment spinal cord 
-segment_if_does_not_exist "${file_t1}" "t1"
-file_t1_seg="${FILESEG}"
-# Create vertebral labels 
-label_if_does_not_exist "${file_t1}" "${file_t1_seg}"
-
-# Verify presence of output files and write log file if error
-# ======================================================================================================================
-FILES_TO_CHECK=(
-  "$file_t2_seg.nii.gz"
-  "$file_t1_seg.nii.gz"
-)
-for file in "${FILES_TO_CHECK[@]}"; do
-  if [ ! -e "${file}" ]; then
-    echo "${SUBJECT}/${file} does not exist" >> "${PATH_LOG}/error.log"
-  fi
-done
-
-# Display useful info for the log
-end=`date +%s`
-runtime=$((end-start))
-echo
-echo "~~~"
-echo "SCT version: `sct_version`"
-echo "Ran on:      `uname -nsr`"
-echo "Duration:    $(($runtime / 3600))hrs $((($runtime / 60) % 60))min $(($runtime % 60))sec"
-echo "~~~"
-

From 5a2e4e75a5b1ccd9e8216bb8e0f74a00b10ab05e Mon Sep 17 00:00:00 2001
From: Nadia Blostein <p117938@abbey.neuro.polymtl.ca>
Date: Thu, 6 Jul 2023 00:19:02 -0400
Subject: [PATCH 46/54] naming files more clearly and updating README

---
 ..._batching_script.sh => preprocess_label.sh |   2 +-
 segment_subjects.sh                           | 145 ++++++++++++++++++
 2 files changed, 146 insertions(+), 1 deletion(-)
 rename segment_batching_script.sh => preprocess_label.sh (56%)
 mode change 100755 => 100644
 create mode 100644 segment_subjects.sh

diff --git a/segment_batching_script.sh b/preprocess_label.sh
old mode 100755
new mode 100644
similarity index 56%
rename from segment_batching_script.sh
rename to preprocess_label.sh
index 9b45a62..981596b
--- a/segment_batching_script.sh
+++ b/preprocess_label.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 
 sct_run_batch -jobs 6 -path-data "/PATH/TO/dataset" \
-	-subject-prefix "sub-" -exclude-list sub-107 sub-125 -script segment_sc_discs_deepseg.sh \
+        -subject-prefix "sub-" -exclude-list sub-107 sub-125 -script segment_subjects.sh \
 	-path-output "/PATH/TO/dataset/derivatives/labels"
 
diff --git a/segment_subjects.sh b/segment_subjects.sh
new file mode 100644
index 0000000..f7b37e7
--- /dev/null
+++ b/segment_subjects.sh
@@ -0,0 +1,145 @@
+#!/bin/bash
+#
+# Process data. This script is designed to be run in the folder for a single subject, however 'sct_run_batch' can be
+# used to run this script multiple times in parallel across a multi-subject BIDS dataset.
+#
+# This script only deals with T2w and MT images for example purpose. For a more comprehensive qMRI analysis, see for
+# example this script: https://github.com/spine-generic/spine-generic/blob/master/process_data.sh
+#
+# Usage:
+#   ./process_data.sh <SUBJECT>
+#
+# Example:
+#   ./process_data.sh sub-03
+#
+# Author: Julien Cohen-Adad (modified by Nadia Blostein)
+
+# The following global variables are retrieved from the caller sct_run_batch
+# but could be overwritten by uncommenting the lines below:
+#PATH_DATA_PROCESSED="/Users/nadiablostein/Desktop/multi_subject/test/derivatives/sct_deepseg_labels"
+#PATH_RESULTS="/Users/nadiablostein/Desktop/multi_subject/test/derivatives/sct_deepseg_labels/results"
+#PATH_LOG="/Users/nadiablostein/Desktop/multi_subject/test/derivatives/sct_deepseg_labels/log"
+#PATH_QC="/Users/nadiablostein/Desktop/multi_subject/test/derivatives/sct_deepseg_labels/qc"
+
+
+# BASH SETTINGS
+# ======================================================================================================================
+
+SUFFIX_T2w = "rec-composed_T2w"
+SUFFIX_T1w = "rec-composed_T1w"
+
+# Uncomment for full verbose
+# set -v
+
+# Immediately exit if error
+set -e
+
+# Exit if user presses CTRL+C (Linux) or CMD+C (OSX)
+trap "echo Caught Keyboard Interrupt within script. Exiting now.; exit" INT
+
+
+# CONVENIENCE FUNCTIONS
+# ======================================================================================================================
+
+label_if_does_not_exist() {
+  ###
+  #  This function checks if a manual label file already exists, then:
+  #     - If it does, copy it locally.
+  #     - If it doesn't, perform automatic labeling.
+  #   This allows you to add manual labels on a subject-by-subject basis without disrupting the pipeline.
+  ###
+  local file="$1"
+  local file_seg="$2"
+  # Update global variable with segmentation file name
+  FILELABEL="${file}_labels"
+  echo "Looking for labels: ${FILELABEL}"
+  if [[ -e ${FILELABEL}.nii.gz ]]; then
+    echo "Found! Using vertebral labels that exist."
+  else
+    echo "Not found. Proceeding with automatic labeling."
+    # Generate labeled segmentation
+    sct_label_vertebrae -i ${file}.nii.gz -s ${file_seg}.nii.gz -c t2 -qc "${PATH_QC}" -qc-subject "${SUBJECT}"
+  fi
+}
+
+segment_if_does_not_exist() {
+  ###
+  #  This function checks if a manual spinal cord segmentation file already exists, then:
+  #    - If it does, copy it locally.
+  #    - If it doesn't, perform automatic spinal cord segmentation.
+  #  This allows you to add manual segmentations on a subject-by-subject basis without disrupting the pipeline.
+  ###
+  local file="$1"
+  local contrast="$2"
+  # Update global variable with segmentation file name
+  FILESEG="${file}_seg"
+  echo
+  echo "Looking for segmentation: ${FILESEG}"
+  if [[ -e ${FILESEG}.nii.gz ]]; then
+    echo "Found! Using SC segmentation that exists."
+    sct_qc -i ${file}.nii.gz -s ${FILESEG}.nii.gz -p sct_deepseg_sc -qc ${PATH_QC} -qc-subject ${SUBJECT}
+  else
+    echo "Not found. Proceeding with automatic segmentation."
+    # Segment spinal cord
+    sct_deepseg_sc -i ${file}.nii.gz -c $contrast -qc ${PATH_QC} -qc-subject ${SUBJECT}
+  fi
+}
+
+# SCRIPT STARTS HERE
+# ======================================================================================================================
+
+# Retrieve input params
+SUBJECT=$1
+
+# get starting time:
+start=`date +%s`
+
+# Display useful info for the log, such as SCT version, RAM and CPU cores available
+sct_check_dependencies -short
+
+# Go to folder where data will be copied and processed
+cd $PATH_DATA_PROCESSED
+# Copy source images
+rsync -avzh $PATH_DATA/$SUBJECT .
+
+# T2w
+# ======================================================================================================================
+cd "${SUBJECT}/anat/"
+file_t2="${SUBJECT}_${SUFFIX_T2w}"
+# Segment spinal cord 
+segment_if_does_not_exist "${file_t2}" "t2"
+file_t2_seg="${FILESEG}"
+# Create vertebral labels 
+label_if_does_not_exist "${file_t2}" "${file_t2_seg}"
+
+# T1w
+# ======================================================================================================================
+file_t1="${SUBJECT}_${SUFFIX_T1w}"
+# Segment spinal cord 
+segment_if_does_not_exist "${file_t1}" "t1"
+file_t1_seg="${FILESEG}"
+# Create vertebral labels 
+label_if_does_not_exist "${file_t1}" "${file_t1_seg}"
+
+# Verify presence of output files and write log file if error
+# ======================================================================================================================
+FILES_TO_CHECK=(
+  "$file_t2_seg.nii.gz"
+  "$file_t1_seg.nii.gz"
+)
+for file in "${FILES_TO_CHECK[@]}"; do
+  if [ ! -e "${file}" ]; then
+    echo "${SUBJECT}/${file} does not exist" >> "${PATH_LOG}/error.log"
+  fi
+done
+
+# Display useful info for the log
+end=`date +%s`
+runtime=$((end-start))
+echo
+echo "~~~"
+echo "SCT version: `sct_version`"
+echo "Ran on:      `uname -nsr`"
+echo "Duration:    $(($runtime / 3600))hrs $((($runtime / 60) % 60))min $(($runtime % 60))sec"
+echo "~~~"
+

From 66192764ba6116e65c625f58c31fb30d4c34be34 Mon Sep 17 00:00:00 2001
From: Nadia Blostein <p117938@abbey.neuro.polymtl.ca>
Date: Thu, 6 Jul 2023 00:19:25 -0400
Subject: [PATCH 47/54] updated README

---
 README.md | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index 2db424a..dc78543 100644
--- a/README.md
+++ b/README.md
@@ -92,9 +92,9 @@ Copy the file `configuration_default.json` and rename it as `configuration.json`
 - `subjects`: List of subjects to include in the preprocessing, separated with comma.
 - `data_type`: [BIDS data type](https://bids-standard.github.io/bids-starter-kit/folders_and_files/folders.html#datatype), same as subfolder name in dataset structure. Typically, it should be "anat".
 - `contrast`: Contrast to be used by `sct_deepseg_sc` function.
-- `suffix_image`: Suffix for image data, after subject ID but before file extension (e.g. `_rec-composed_T1w` in `sub-101_rec-composed_T1w.nii.gz`)
-– `suffix_label-SC_seg`: Suffix for binary images of the spinal cord mask, after `suffix_image` (e.g. `_label-SC_seg`).
-- `suffix_label-disc`: Suffix for the images of the intervertebral discs labeling, after `suffix_image` (e.g. `_label-disc`).
+- `suffix_image`: Suffix for image data, after subject ID but before file extension (e.g. `_rec-composed_T1w` in `sub-101_rec-composed_T1w.nii.gz`).
+- `first_disc`: Integer value corresponding to the label of the first vertebral disc you want present in the template (see [spinalcordtoolbox labeling conventions](https://spinalcordtoolbox.com/user_section/tutorials/registration-to-template/vertebral-labeling/labeling-conventions.html)).
+- `last_disc`: Integer value corresponding to the label of the last vertebral disc you want present in the template.
 
 > **Note**
 > Note that SCT functions treat your images with bright CSF as "T2w" (i.e. `t2` option) and dark CSF as "T1w" (i.e. `t1` option). You can therefore still use SCT even if your images are not actually T1w and T2w.
@@ -107,7 +107,7 @@ Copy the file `configuration_default.json` and rename it as `configuration.json`
 
 Run script:
 ```
-sct_run_batch -jobs 10 -path-data "/PATH/TO/dataset" -script segment_sc_discs_deepseg.sh -path-output "/PATH/TO/results"
+sct_run_batch -jobs 10 -path-data "/PATH/TO/dataset" -script preprocess_label.sh  -path-output "/PATH/TO/results"
 ```
 
 > **Note**
@@ -122,7 +122,7 @@ sct_run_batch -jobs 10 -path-data "/PATH/TO/dataset" -script segment_sc_discs_de
 
 ### Normalize spinal cord across subjects
 
-`template_preprocessing_pipeline.py` contains several functions to normalize the spinal cord across subjects, in preparation for template generation. More specifically:
+`preprocess_normalize.py` contains several functions to normalize the spinal cord across subjects, in preparation for template generation. More specifically:
 * Extracting the spinal cord centerline and compute the vertebral distribution along the spinal cord, for all subjects,
 * Computing the average centerline, by averaging the position of each intervertebral discs. The average centerline of the spinal cord is straightened,
 * Generating the initial template space, based on the average centerline and positions of intervertebral discs,
@@ -132,7 +132,7 @@ sct_run_batch -jobs 10 -path-data "/PATH/TO/dataset" -script segment_sc_discs_de
 
 3. Run:
 ```
-python template_preprocessing_pipeline.py configuration.json LOWEST_DISC
+python preprocess_normalize.py configuration.json
 ```
 
 4. One the preprocessing is performed, please check your data. The preprocessing results should be a series of straight images registered in the same space, with all the vertebral levels aligned with each others.

From 0f684d6cf89f4a760c6949f85573fb1bd1ac2e8d Mon Sep 17 00:00:00 2001
From: Julien Cohen-Adad <jcohen@polymtl.ca>
Date: Thu, 6 Jul 2023 09:38:05 -0400
Subject: [PATCH 48/54] Delete configuration.json

Related to #28
---
 configuration.json | 9 ---------
 1 file changed, 9 deletions(-)
 delete mode 100755 configuration.json

diff --git a/configuration.json b/configuration.json
deleted file mode 100755
index 070ce0f..0000000
--- a/configuration.json
+++ /dev/null
@@ -1,9 +0,0 @@
-{
-	"path_data": "/path/to/data/",
-	"subjects": "sub-101, sub-102",
-	"data_type":"anat",
-	"contrast":"t1",
-	"suffix_image": "_rec-composed_T1w",
-	"suffix_label-SC_seg": "_rec-composed_T1w_label-SC_seg",
-	"suffix_label-disc":"_rec-composed_T1w_label-disc"
-}
\ No newline at end of file

From b8dbd7320397cb1813c3c413cb8960623709f367 Mon Sep 17 00:00:00 2001
From: Julien Cohen-Adad <jcohen@polymtl.ca>
Date: Thu, 6 Jul 2023 09:40:57 -0400
Subject: [PATCH 49/54] Update configuration_default.json

Fixed for consistency
---
 configuration_default.json | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/configuration_default.json b/configuration_default.json
index 7cd0e8d..2e9c66b 100644
--- a/configuration_default.json
+++ b/configuration_default.json
@@ -1,9 +1,9 @@
 {
 	"path_data": "/path/to/data/",
 	"subjects": "sub-001, sub-002, sub-003",
-	"data_type":"anat",
+	"data_type": "anat",
 	"contrast": "t1",
 	"suffix_image": "_T1w",
 	"first_disc": "1",
 	"last_disc": "26"
-}
\ No newline at end of file
+}

From a3c3dbf483f570914e74d1a2e090964398f223a1 Mon Sep 17 00:00:00 2001
From: jcohenadad <jcohen@polymtl.ca>
Date: Thu, 6 Jul 2023 09:43:40 -0400
Subject: [PATCH 50/54] Renamed according to #39

---
 segment_subjects.sh => preprocess_segment.sh | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename segment_subjects.sh => preprocess_segment.sh (100%)

diff --git a/segment_subjects.sh b/preprocess_segment.sh
similarity index 100%
rename from segment_subjects.sh
rename to preprocess_segment.sh

From c1c3295c1d379e33aac1f81e54a5b214605702ef Mon Sep 17 00:00:00 2001
From: jcohenadad <jcohen@polymtl.ca>
Date: Thu, 6 Jul 2023 09:46:56 -0400
Subject: [PATCH 51/54] Udpated README with correct sct_run_batch syntax

---
 README.md | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index dc78543..7d7fd04 100644
--- a/README.md
+++ b/README.md
@@ -107,11 +107,12 @@ Copy the file `configuration_default.json` and rename it as `configuration.json`
 
 Run script:
 ```
-sct_run_batch -jobs 10 -path-data "/PATH/TO/dataset" -script preprocess_label.sh  -path-output "/PATH/TO/results"
+sct_run_batch -jobs 6 -path-data "/PATH/TO/dataset" -script preprocess_segment.sh -path-output "/PATH/TO/results"
 ```
 
 > **Note**
-> Replace values appropriately based on your setup (eg: -jobs 10 means that 10 CPU-cores are used. For more details, run `sct_run_batch -h`).
+> Replace values appropriately based on your setup (eg: -jobs 6 means that 10 CPU-cores are used. For more details, run `sct_run_batch -h`).
+> If you wish to exclude subjects, add flag "-exclude-list". Example: `-exclude-list sub-107 sub-125`
 
 
 ### Quality control (QC)

From e37563517e05196f5927de4612f38476891264ea Mon Sep 17 00:00:00 2001
From: jcohenadad <jcohen@polymtl.ca>
Date: Thu, 6 Jul 2023 09:47:26 -0400
Subject: [PATCH 52/54] Removed as per #39

---
 preprocess_label.sh | 6 ------
 1 file changed, 6 deletions(-)
 delete mode 100644 preprocess_label.sh

diff --git a/preprocess_label.sh b/preprocess_label.sh
deleted file mode 100644
index 981596b..0000000
--- a/preprocess_label.sh
+++ /dev/null
@@ -1,6 +0,0 @@
-#!/bin/bash
-
-sct_run_batch -jobs 6 -path-data "/PATH/TO/dataset" \
-        -subject-prefix "sub-" -exclude-list sub-107 sub-125 -script segment_subjects.sh \
-	-path-output "/PATH/TO/dataset/derivatives/labels"
-

From 90d0df6845fc88ebff30782da80c0a853bcc92e8 Mon Sep 17 00:00:00 2001
From: NadiaBlostein <33006815+NadiaBlostein@users.noreply.github.com>
Date: Thu, 6 Jul 2023 10:02:17 -0400
Subject: [PATCH 53/54] Update README.md

---
 README.md | 43 ++++++++++++++++++++++++-------------------
 1 file changed, 24 insertions(+), 19 deletions(-)

diff --git a/README.md b/README.md
index 7d7fd04..ef89af2 100644
--- a/README.md
+++ b/README.md
@@ -6,6 +6,13 @@ Framework for creating MRI templates of the spinal cord. The framework has two d
 > The framework has to be run independently for each contrast. In the end, the generated templates across contrasts should be perfectly aligned. This is what was done for the PAM50 template.
 
 
+## Dependencies
+
+### [Spinal Cord Toolbox (SCT)](https://spinalcordtoolbox.com/)
+
+Installation instructions can be found [here](https://spinalcordtoolbox.com/user_section/installation.html).
+For the following repository, we used SCT in developper mode (commit `e740edf4c8408ffa44ef7ba23ad068c6d07e4b87`).
+
 ### [ANIMAL registration framework](https://github.com/vfonov/nist_mni_pipelines)
 
 ANIMAL, part of the IPL longitudinal pipeline, is used for generating the template, using iterative nonlinear deformation.
@@ -62,7 +69,7 @@ dataset/
 ```
 
 
-## Data preprocessing
+## 1. Data preprocessing
 
 This pipeline includes the following steps:
 
@@ -72,8 +79,7 @@ TODO: remove detials below
 3. `configuration_default.json`: Copy this file and rename it as `configuration.json`. Edit it and modify according to your setup.
 4. Extraction of SC centerline, generation of average centerline in the template space, straightening/registration of all spinal cord images on the initial template space. 
 
-
-### Install SCT
+### 1.1 Install SCT
 
 SCT is used for all preprocessing steps. The current version of the pipeline uses SCT development version (commit `e740edf4c8408ffa44ef7ba23ad068c6d07e4b87`) as we prepare for the release of SCT 6.0.
 
@@ -84,7 +90,7 @@ source ${SCT_DIR}/python/etc/profile.d/conda.sh
 conda activate venv_sct
 ```
 
-### Edit configuration file
+### 1.2 Edit configuration file
 
 Copy the file `configuration_default.json` and rename it as `configuration.json`. Edit it and modify according to your setup:
 
@@ -102,8 +108,7 @@ Copy the file `configuration_default.json` and rename it as `configuration.json`
 > **Note**
 > If you wish to make a template that does not align discs across subjects, please open an [issue](https://github.com/neuropoly/template/issues) and we will follow-up with you.
 
-
-### Segment spinal cord and vertebral discs
+### 1.3 Segment spinal cord and vertebral discs
 
 Run script:
 ```
@@ -114,14 +119,13 @@ sct_run_batch -jobs 6 -path-data "/PATH/TO/dataset" -script preprocess_segment.s
 > Replace values appropriately based on your setup (eg: -jobs 6 means that 10 CPU-cores are used. For more details, run `sct_run_batch -h`).
 > If you wish to exclude subjects, add flag "-exclude-list". Example: `-exclude-list sub-107 sub-125`
 
-
-### Quality control (QC)
+### 1.4 Quality control (QC) labels
 
 * Spinal cord segmentation (or centerlines) and disc labels can be displayed by opening: `/PATH/TO/results/qc/index.html`
 * See [tutorial](https://spinalcordtoolbox.com/user_section/tutorials/registration-to-template/vertebral-labeling.html) for tips on how to QC and fix segmentation (or centerline) and/or disc labels manually.
 
 
-### Normalize spinal cord across subjects
+### 1.5 Normalize spinal cord across subjects
 
 `preprocess_normalize.py` contains several functions to normalize the spinal cord across subjects, in preparation for template generation. More specifically:
 * Extracting the spinal cord centerline and compute the vertebral distribution along the spinal cord, for all subjects,
@@ -129,17 +133,17 @@ sct_run_batch -jobs 6 -path-data "/PATH/TO/dataset" -script preprocess_segment.s
 * Generating the initial template space, based on the average centerline and positions of intervertebral discs,
 * Straightening of all subjects' spinal cord on the initial template space.
 
-2. Determine the integer value corresponding to the label of the lowest disc until which you want your template to go (depends on the lowest disc available in your images, nomenclature can be found [here](https://spinalcordtoolbox.com/user_section/tutorials/registration-to-template/vertebral-labeling/labeling-conventions.html)).
-
-3. Run:
+Run:
 ```
 python preprocess_normalize.py configuration.json
 ```
 
-4. One the preprocessing is performed, please check your data. The preprocessing results should be a series of straight images registered in the same space, with all the vertebral levels aligned with each others.
+### 1.6 Quality control (QC) spinal cord normalizatio across subjects
 
+One the preprocessing is performed, please check your data. The preprocessing results should be a series of straight images registered in the same space, with all the vertebral levels aligned with each others.
 
-## Template creation
+
+## 2. Template creation
 
 ### Dependencies for template generation (see [dependencies](#dependencies_anchor))
 - [ANIMAL registration framework, part of the IPL longitudinal pipeline](https://github.com/vfonov/nist_mni_pipelines)
@@ -156,14 +160,14 @@ python -m scoop -n N -vvv generate_template.py
 
 It is recommended to run the template generation on a large cluster. If you are in Canada, you could make use of [the Alliance](https://alliancecan.ca/en) (formerly Compute Canada), which is a bunch of CPU nodes accessible to researchers in Canada. **Once the preprocessing is complete**, you will generate the template with `generate_template.py`. This will require minctoolkit v2, minc2simple and nist-mni-pipelines. The easiest way to set up is to use Compute Canada and set up your virtual environment (without spinal cord toolbox, since your data should have already been preprocessed by now) as follows:
 
-1. Load the right modules and install packages from pip wheel
+a) Load the right modules and install packages from pip wheel
 ```
 module load StdEnv/2020  gcc/9.3.0 minc-toolkit/1.9.18.1 python/3.8.10
 pip install --upgrade pip
 pip install scoop
 ```
 
-2. Set up NIST-MNI pipelines
+b) Set up NIST-MNI pipelines
 ```
 git clone https://github.com/vfonov/nist_mni_pipelines.git
 nano ~/.bashrc
@@ -178,22 +182,23 @@ export PYTHONPATH="${PYTHONPATH}:/path/to/nist_mni_pipelines/ipl"
 ```
 source ~/.bashrc
 ```
-3. Minc2simple
+c) Minc2simple
 ```
 pip install "git+https://github.com/NIST-MNI/minc2-simple.git@develop_new_build#subdirectory=python"
 ``` 
 
-4. Create my_job.sh
+d) Create `my_job.sh`
 ```
 #!/bin/bash
 python -m scoop -vvv generate_template.py
 ```
 
-5. Batch on Alliance Canada
+e) Batch on Alliance Canada
 ```
 sbatch --time=24:00:00  --mem-per-cpu 4000 my_job.sh # will probably require batching several times, depending on number of subjects
 ```
 
+
 ## Additional information
 
 To have the generated template registered to an existing space (eg, ICBM152), please open an [issue](https://github.com/neuropoly/template/issues) and we will follow-up with you.

From f642ca62c67b90514badeb867725926336d3c367 Mon Sep 17 00:00:00 2001
From: NadiaBlostein <33006815+NadiaBlostein@users.noreply.github.com>
Date: Thu, 6 Jul 2023 10:09:23 -0400
Subject: [PATCH 54/54] Update README.md

---
 README.md | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/README.md b/README.md
index ef89af2..9e82aed 100644
--- a/README.md
+++ b/README.md
@@ -69,15 +69,15 @@ dataset/
 ```
 
 
-## 1. Data preprocessing
+## Step 1. Data preprocessing
 
-This pipeline includes the following steps:
-
-TODO: remove detials below
-1. Segmentation of SC and disc labeling
-2. QC. If seg didn't work, fix SC seg. If disc labeling did not work, fix labeling.
-3. `configuration_default.json`: Copy this file and rename it as `configuration.json`. Edit it and modify according to your setup.
-4. Extraction of SC centerline, generation of average centerline in the template space, straightening/registration of all spinal cord images on the initial template space. 
+This pipeline includes the following steps:\
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;**1.1** Install SCT;\
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;**1.2** Edit configuration file;\
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;**1.3** Segment spinal cord and vertebral discs;\
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;**1.4** Quality control (QC) labels;\
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;**1.5** Normalize spinal cord across subjects;\
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;**1.6** Quality control (QC) spinal cord normalization across subjects.
 
 ### 1.1 Install SCT
 
@@ -143,7 +143,7 @@ python preprocess_normalize.py configuration.json
 One the preprocessing is performed, please check your data. The preprocessing results should be a series of straight images registered in the same space, with all the vertebral levels aligned with each others.
 
 
-## 2. Template creation
+## Step 2. Template creation
 
 ### Dependencies for template generation (see [dependencies](#dependencies_anchor))
 - [ANIMAL registration framework, part of the IPL longitudinal pipeline](https://github.com/vfonov/nist_mni_pipelines)