Merge pull request #75 from neuropoly/rb/preprocess_segment

Removal of hard-coded `configuration` file in `preprocess_segment.sh` script and extraction of `include_list` from the `configuration` file directly
neuropoly · Aug 28, 2023 · a7915f4 · a7915f4
2 parents abf3df0 + a5d1402
commit a7915f4
Show file tree

Hide file tree

Showing 4 changed files with 43 additions and 42 deletions.
diff --git a/README.md b/README.md
@@ -98,7 +98,7 @@ conda activate venv_sct
 Copy the file `configuration_default.json` and rename it as `configuration.json`. Edit it and modify according to your setup:
 
 - `path_data`: Absolute path to the input [BIDS dataset](#dataset-structure); the path should end with `/`.
-- `include-list`: List of subjects to include in the preprocessing, separated with a space.
+- `include_list`: List of subjects to include in the preprocessing, separated with a space.
 - `data_type`: [BIDS data type](https://bids-standard.github.io/bids-starter-kit/folders_and_files/folders.html#datatype), same as subfolder name in dataset structure. Typically, it should be "anat".
 - `contrast`: Contrast to be used by `sct_deepseg_sc` function.
 - `suffix_image`: Suffix for image data, after subject ID but before file extension (e.g. `_rec-composed_T1w` in `sub-101_rec-composed_T1w.nii.gz`).
@@ -115,16 +115,15 @@ Copy the file `configuration_default.json` and rename it as `configuration.json`
 
 Run script:
 ```
-sct_run_batch -script preprocess_segment.sh -config configuration.json -include-list sub-001 sub-002 sub-003 -path-output PATH_OUT -jobs N_CPU
+sct_run_batch -script preprocess_segment.sh -config configuration.json -path-output PATH_OUT -jobs N_CPU -script-args configuration.json
 ```
+> **Note**
+> The value `configuration.json` should be the same from both the flags `-config` and `-script-args`.
 
 With:
 - `PATH_OUT`: The location where to output the processed data, results, the logs and the QC information. Example: `/scratch/template_preproc_YYYYMMDD-HHMMSS`. This is a temporary directory in that it is only needed to QC your labels. It therefore cannot be stored inside `path_data`.
 - `N_CPU`: The number of CPU cores to dedicate to this task (one subject will be process per core).
 
-> **Note**
-> Copy-paste the values to the `include-list` key from `configuration.json` to go after `-include-list` option here.
-
 ### 1.4 Quality control (QC) labels
 
 * Spinal cord segmentation (or centerlines) and disc labels can be displayed by opening: `PATH_OUT/qc/index.html`;

diff --git a/configuration_default.json b/configuration_default.json
@@ -1,6 +1,6 @@
 {
 	"path_data": "/path/to/data/",
-	"include-list": "sub-001 sub-002 sub-003",
+	"include_list": "sub-001 sub-002 sub-003",
 	"data_type": "anat",
 	"contrast": "t1",
 	"suffix_image": "_T1w",

diff --git a/preprocess_normalize.py b/preprocess_normalize.py
@@ -101,7 +101,7 @@ def read_dataset(fname_json = 'configuration.json', path_data = './'):
     with open(fname_json) as data_file: dataset_info = json.load(data_file)
 
     error = ''
-    key_list = ["path_data", "include-list", "data_type", "contrast", "suffix_image", "last_disc"]
+    key_list = ["path_data", "include_list", "data_type", "contrast", "suffix_image", "last_disc"]
 
     for key in key_list:
         if key not in dataset_info.keys(): error += 'Dataset configuration file ' + fname_json + ' must contain the field ' + key + '.\n'
@@ -121,7 +121,7 @@ def generate_centerline(dataset_info, algo_fitting = 'linear', smooth = 50, degr
     :return list of centerline objects
     """
     path_data = dataset_info['path_data']
-    list_subjects = dataset_info['include-list'].split(' ')
+    list_subjects = dataset_info['include_list'].split(' ')
     last_disc = int(dataset_info['last_disc'])
     list_centerline = []
     current_path = os.getcwd()
@@ -468,7 +468,7 @@ def straighten_all_subjects(dataset_info, normalized = False):
     """
     path_data = dataset_info['path_data']
     path_template = dataset_info['path_data'] + 'derivatives/template/'
-    list_subjects = dataset_info['include-list'].split(' ')
+    list_subjects = dataset_info['include_list'].split(' ')
 
     if not os.path.exists(dataset_info['path_data'] + 'derivatives/sct_straighten_spinalcord'): os.makedirs(dataset_info['path_data'] + 'derivatives/sct_straighten_spinalcord')
 
@@ -509,7 +509,7 @@ def normalize_intensity_template(dataset_info, verbose = 1):
     :return:
     """
     fname_template_centerline = dataset_info['path_data'] + 'derivatives/template/' + 'template_label-centerline.npz'
-    list_subjects = dataset_info['include-list'].split(' ')
+    list_subjects = dataset_info['include_list'].split(' ')
 
     average_intensity = []
     intensity_profiles = {}
@@ -616,7 +616,7 @@ def smooth(x, window_len = 11, window = 'hanning'):
         image_new.save(fname_image_normalized)
 
 def copy_preprocessed_images(dataset_info):
-    list_subjects = dataset_info['include-list'].split(' ') 
+    list_subjects = dataset_info['include_list'].split(' ') 
 
     tqdm_bar = tqdm(total = len(list_subjects), unit = 'B', unit_scale = True, desc = "Status", ascii = True)
 
@@ -628,7 +628,7 @@ def copy_preprocessed_images(dataset_info):
 
 def create_mask_template(dataset_info):
     path_template = dataset_info['path_data'] + 'derivatives/template/'
-    subject_name = dataset_info['include-list'].split(' ')[0]
+    subject_name = dataset_info['include_list'].split(' ')[0]
 
     template_mask = Image(path_template + subject_name + dataset_info['suffix_image'] + '_straight_norm.nii.gz')
     template_mask.data *= 0.0
@@ -643,7 +643,7 @@ def create_mask_template(dataset_info):
 
 def convert_data2mnc(dataset_info):
     path_template = dataset_info['path_data'] + 'derivatives/template/'
-    list_subjects = dataset_info['include-list'].split(' ')
+    list_subjects = dataset_info['include_list'].split(' ')
 
     path_template_mask = create_mask_template(dataset_info)
 

diff --git a/preprocess_segment.sh b/preprocess_segment.sh
@@ -9,13 +9,36 @@
 # Example:
 #   ./process_data.sh sub-03
 #
-# Author: Julien Cohen-Adad (modified by Nadia Blostein)
+# Author: Julien Cohen-Adad (modified by Nadia Blostein and Rohan Banerjee)
+
+
+# Uncomment for full verbose
+set -x
+
+# Immediately exit if error
+set -e -o pipefail
+
+# Exit if user presses CTRL+C (Linux) or CMD+C (OSX)
+trap "echo Caught Keyboard Interrupt within script. Exiting now.; exit" INT
+
+SUBJECT=$1
+CONFIG=$2
+
+# Print retrieved variables from the sct_run_batch script to the log (to allow easier debug)
+echo "Retrieved variables from from the caller sct_run_batch:"
+echo "PATH_DATA: ${PATH_DATA}"
+echo "PATH_DATA_PROCESSED: ${PATH_DATA_PROCESSED}"
+echo "PATH_RESULTS: ${PATH_RESULTS}"
+echo "PATH_LOG: ${PATH_LOG}"
+echo "PATH_QC: ${PATH_QC}"
+echo "SUBJECT: ${SUBJECT}"
+echo "CONFIG FILE PATH: ${CONFIG}"
 
 
 # Parsing .json file (`configuration.json`)
 # ======================================================================================================================
+json_file=$CONFIG
 
-json_file="configuration.json"
 # Check if the JSON file exists
 if [ ! -f "$json_file" ]; then
   echo "JSON file not found: $json_file"
@@ -29,7 +52,6 @@ json_data=$(cat "$json_file")
 # Global parameters & Bash settings
 # ======================================================================================================================
 
-SUBJECT=$1
 PATH_DATA=$(echo "$json_data" | sed -n 's/.*"path_data": "\(.*\)".*/\1/p')
 DATA_TYPE=$(echo "$json_data" | sed -n 's/.*"data_type": "\(.*\)".*/\1/p')
 IMAGE_SUFFIX=$(echo "$json_data" | sed -n 's/.*"suffix_image": "\(.*\)".*/\1/p')
@@ -68,41 +90,21 @@ rsync -avzh $PATH_DATA/$SUBJECT/$DATA_TYPE/${SUBJECT}${IMAGE_SUFFIX}.nii.gz $PAT
 # ======================================================================================================================
 
 FILESEG="${SUBJECT}${IMAGE_SUFFIX}_label-SC_mask.nii.gz"
-
-echo "Looking for segmentation: ${FILESEG}"
-if [[ -e "${FILESEG}" ]]; then
-  echo "Found! Using SC segmentation that exists."
-  sct_qc -i ${FILE} -s "${FILESEG}" -p sct_deepseg_sc -qc ${PATH_QC} -qc-subject ${SUBJECT}
-else
-  echo "Not found. Proceeding with automatic segmentation."
-  # Segment spinal cord
-  sct_deepseg_sc -i ${FILE} -o ${FILESEG} -c ${CONTRAST} -qc ${PATH_QC} -qc-subject ${SUBJECT}
-fi
-
+sct_deepseg_sc -i ${FILE} -o ${FILESEG} -c ${CONTRAST} -qc ${PATH_QC} -qc-subject ${SUBJECT}
 
 # Label discs if do not exist
 # ======================================================================================================================
 
-FILELABEL="${SUBJECT}${IMAGE_SUFFIX}_labels-disc.nii.gz"
-
-echo "Looking for disc labels: ${FILELABEL}"
-if [[ -e "${FILELABEL}" ]]; then
-  echo "Found! Using vertebral labels that exist."
-  sct_qc -i ${FILE} -s "${FILELABEL}" -p sct_label_vertebrae -qc ${PATH_QC} -qc-subject ${SUBJECT}
-else
-  echo "Not found. Proceeding with automatic labeling."
-  # Generate labeled segmentation
-  sct_label_vertebrae -i ${FILE} -s "${FILESEG}" -c ${CONTRAST} -qc "${PATH_QC}" -qc-subject "${SUBJECT}"
-  mv "${SUBJECT}${IMAGE_SUFFIX}_label-SC_mask_labeled_discs.nii.gz" "${FILELABEL}"
-  rm "${SUBJECT}${IMAGE_SUFFIX}_label-SC_mask_labeled.nii.gz"
-fi
-
+FILELABEL="${SUBJECT}${IMAGE_SUFFIX}_labeled-discs.nii.gz"
+sct_label_vertebrae -i ${FILE} -s "${FILESEG}" -c ${CONTRAST} -qc "${PATH_QC}" -qc-subject "${SUBJECT}"
+mv "${SUBJECT}${IMAGE_SUFFIX}_label-SC_mask_labeled_discs.nii.gz" "${FILELABEL}"
+rm "${SUBJECT}${IMAGE_SUFFIX}_label-SC_mask_labeled.nii.gz"
 
 # Verify presence of output files and write log file if error
 # ======================================================================================================================
 FILES_TO_CHECK=(
   "$FILESEG"
-  "$FILELABEL"
+  "$FILELABEL" 
 )
 for file in "${FILES_TO_CHECK[@]}"; do
   if [ ! -e "${file}" ]; then