From a5d140273d1581fc8dad46878e3df574f73b33c3 Mon Sep 17 00:00:00 2001 From: Rohan Banerjee Date: Thu, 17 Aug 2023 12:36:39 -0400 Subject: [PATCH] updated preprocess_segment.sh for easier use --- README.md | 9 +++--- configuration_default.json | 2 +- preprocess_normalize.py | 14 ++++----- preprocess_segment.sh | 60 ++++++++++++++++++++------------------ 4 files changed, 43 insertions(+), 42 deletions(-) diff --git a/README.md b/README.md index 5e85c17..d0efacd 100644 --- a/README.md +++ b/README.md @@ -98,7 +98,7 @@ conda activate venv_sct Copy the file `configuration_default.json` and rename it as `configuration.json`. Edit it and modify according to your setup: - `path_data`: Absolute path to the input [BIDS dataset](#dataset-structure); the path should end with `/`. -- `include-list`: List of subjects to include in the preprocessing, separated with a space. +- `include_list`: List of subjects to include in the preprocessing, separated with a space. - `data_type`: [BIDS data type](https://bids-standard.github.io/bids-starter-kit/folders_and_files/folders.html#datatype), same as subfolder name in dataset structure. Typically, it should be "anat". - `contrast`: Contrast to be used by `sct_deepseg_sc` function. - `suffix_image`: Suffix for image data, after subject ID but before file extension (e.g. `_rec-composed_T1w` in `sub-101_rec-composed_T1w.nii.gz`). @@ -115,16 +115,15 @@ Copy the file `configuration_default.json` and rename it as `configuration.json` Run script: ``` -sct_run_batch -script preprocess_segment.sh -config configuration.json -include-list sub-001 sub-002 sub-003 -path-output PATH_OUT -jobs N_CPU +sct_run_batch -script preprocess_segment.sh -config configuration.json -path-output PATH_OUT -jobs N_CPU -script-args configuration.json ``` +> **Note** +> The value `configuration.json` should be the same from both the flags `-config` and `-script-args`. With: - `PATH_OUT`: The location where to output the processed data, results, the logs and the QC information. Example: `/scratch/template_preproc_YYYYMMDD-HHMMSS`. This is a temporary directory in that it is only needed to QC your labels. It therefore cannot be stored inside `path_data`. - `N_CPU`: The number of CPU cores to dedicate to this task (one subject will be process per core). -> **Note** -> Copy-paste the values to the `include-list` key from `configuration.json` to go after `-include-list` option here. - ### 1.4 Quality control (QC) labels * Spinal cord segmentation (or centerlines) and disc labels can be displayed by opening: `PATH_OUT/qc/index.html`; diff --git a/configuration_default.json b/configuration_default.json index 6322978..7d3b7bc 100644 --- a/configuration_default.json +++ b/configuration_default.json @@ -1,6 +1,6 @@ { "path_data": "/path/to/data/", - "include-list": "sub-001 sub-002 sub-003", + "include_list": "sub-001 sub-002 sub-003", "data_type": "anat", "contrast": "t1", "suffix_image": "_T1w", diff --git a/preprocess_normalize.py b/preprocess_normalize.py index 86195e9..557b4de 100644 --- a/preprocess_normalize.py +++ b/preprocess_normalize.py @@ -101,7 +101,7 @@ def read_dataset(fname_json = 'configuration.json', path_data = './'): with open(fname_json) as data_file: dataset_info = json.load(data_file) error = '' - key_list = ["path_data", "include-list", "data_type", "contrast", "suffix_image", "last_disc"] + key_list = ["path_data", "include_list", "data_type", "contrast", "suffix_image", "last_disc"] for key in key_list: if key not in dataset_info.keys(): error += 'Dataset configuration file ' + fname_json + ' must contain the field ' + key + '.\n' @@ -121,7 +121,7 @@ def generate_centerline(dataset_info, algo_fitting = 'linear', smooth = 50, degr :return list of centerline objects """ path_data = dataset_info['path_data'] - list_subjects = dataset_info['include-list'].split(' ') + list_subjects = dataset_info['include_list'].split(' ') last_disc = int(dataset_info['last_disc']) list_centerline = [] current_path = os.getcwd() @@ -468,7 +468,7 @@ def straighten_all_subjects(dataset_info, normalized = False): """ path_data = dataset_info['path_data'] path_template = dataset_info['path_data'] + 'derivatives/template/' - list_subjects = dataset_info['include-list'].split(' ') + list_subjects = dataset_info['include_list'].split(' ') if not os.path.exists(dataset_info['path_data'] + 'derivatives/sct_straighten_spinalcord'): os.makedirs(dataset_info['path_data'] + 'derivatives/sct_straighten_spinalcord') @@ -509,7 +509,7 @@ def normalize_intensity_template(dataset_info, verbose = 1): :return: """ fname_template_centerline = dataset_info['path_data'] + 'derivatives/template/' + 'template_label-centerline.npz' - list_subjects = dataset_info['include-list'].split(' ') + list_subjects = dataset_info['include_list'].split(' ') average_intensity = [] intensity_profiles = {} @@ -616,7 +616,7 @@ def smooth(x, window_len = 11, window = 'hanning'): image_new.save(fname_image_normalized) def copy_preprocessed_images(dataset_info): - list_subjects = dataset_info['include-list'].split(' ') + list_subjects = dataset_info['include_list'].split(' ') tqdm_bar = tqdm(total = len(list_subjects), unit = 'B', unit_scale = True, desc = "Status", ascii = True) @@ -628,7 +628,7 @@ def copy_preprocessed_images(dataset_info): def create_mask_template(dataset_info): path_template = dataset_info['path_data'] + 'derivatives/template/' - subject_name = dataset_info['include-list'].split(' ')[0] + subject_name = dataset_info['include_list'].split(' ')[0] template_mask = Image(path_template + subject_name + dataset_info['suffix_image'] + '_straight_norm.nii.gz') template_mask.data *= 0.0 @@ -643,7 +643,7 @@ def create_mask_template(dataset_info): def convert_data2mnc(dataset_info): path_template = dataset_info['path_data'] + 'derivatives/template/' - list_subjects = dataset_info['include-list'].split(' ') + list_subjects = dataset_info['include_list'].split(' ') path_template_mask = create_mask_template(dataset_info) diff --git a/preprocess_segment.sh b/preprocess_segment.sh index 6f4c974..8f98363 100755 --- a/preprocess_segment.sh +++ b/preprocess_segment.sh @@ -9,13 +9,36 @@ # Example: # ./process_data.sh sub-03 # -# Author: Julien Cohen-Adad (modified by Nadia Blostein) +# Author: Julien Cohen-Adad (modified by Nadia Blostein and Rohan Banerjee) + + +# Uncomment for full verbose +set -x + +# Immediately exit if error +set -e -o pipefail + +# Exit if user presses CTRL+C (Linux) or CMD+C (OSX) +trap "echo Caught Keyboard Interrupt within script. Exiting now.; exit" INT + +SUBJECT=$1 +CONFIG=$2 + +# Print retrieved variables from the sct_run_batch script to the log (to allow easier debug) +echo "Retrieved variables from from the caller sct_run_batch:" +echo "PATH_DATA: ${PATH_DATA}" +echo "PATH_DATA_PROCESSED: ${PATH_DATA_PROCESSED}" +echo "PATH_RESULTS: ${PATH_RESULTS}" +echo "PATH_LOG: ${PATH_LOG}" +echo "PATH_QC: ${PATH_QC}" +echo "SUBJECT: ${SUBJECT}" +echo "CONFIG FILE PATH: ${CONFIG}" # Parsing .json file (`configuration.json`) # ====================================================================================================================== +json_file=$CONFIG -json_file="configuration.json" # Check if the JSON file exists if [ ! -f "$json_file" ]; then echo "JSON file not found: $json_file" @@ -29,7 +52,6 @@ json_data=$(cat "$json_file") # Global parameters & Bash settings # ====================================================================================================================== -SUBJECT=$1 PATH_DATA=$(echo "$json_data" | sed -n 's/.*"path_data": "\(.*\)".*/\1/p') DATA_TYPE=$(echo "$json_data" | sed -n 's/.*"data_type": "\(.*\)".*/\1/p') IMAGE_SUFFIX=$(echo "$json_data" | sed -n 's/.*"suffix_image": "\(.*\)".*/\1/p') @@ -68,41 +90,21 @@ rsync -avzh $PATH_DATA/$SUBJECT/$DATA_TYPE/${SUBJECT}${IMAGE_SUFFIX}.nii.gz $PAT # ====================================================================================================================== FILESEG="${SUBJECT}${IMAGE_SUFFIX}_label-SC_mask.nii.gz" - -echo "Looking for segmentation: ${FILESEG}" -if [[ -e "${FILESEG}" ]]; then - echo "Found! Using SC segmentation that exists." - sct_qc -i ${FILE} -s "${FILESEG}" -p sct_deepseg_sc -qc ${PATH_QC} -qc-subject ${SUBJECT} -else - echo "Not found. Proceeding with automatic segmentation." - # Segment spinal cord - sct_deepseg_sc -i ${FILE} -o ${FILESEG} -c ${CONTRAST} -qc ${PATH_QC} -qc-subject ${SUBJECT} -fi - +sct_deepseg_sc -i ${FILE} -o ${FILESEG} -c ${CONTRAST} -qc ${PATH_QC} -qc-subject ${SUBJECT} # Label discs if do not exist # ====================================================================================================================== -FILELABEL="${SUBJECT}${IMAGE_SUFFIX}_labels-disc.nii.gz" - -echo "Looking for disc labels: ${FILELABEL}" -if [[ -e "${FILELABEL}" ]]; then - echo "Found! Using vertebral labels that exist." - sct_qc -i ${FILE} -s "${FILELABEL}" -p sct_label_vertebrae -qc ${PATH_QC} -qc-subject ${SUBJECT} -else - echo "Not found. Proceeding with automatic labeling." - # Generate labeled segmentation - sct_label_vertebrae -i ${FILE} -s "${FILESEG}" -c ${CONTRAST} -qc "${PATH_QC}" -qc-subject "${SUBJECT}" - mv "${SUBJECT}${IMAGE_SUFFIX}_label-SC_mask_labeled_discs.nii.gz" "${FILELABEL}" - rm "${SUBJECT}${IMAGE_SUFFIX}_label-SC_mask_labeled.nii.gz" -fi - +FILELABEL="${SUBJECT}${IMAGE_SUFFIX}_labeled-discs.nii.gz" +sct_label_vertebrae -i ${FILE} -s "${FILESEG}" -c ${CONTRAST} -qc "${PATH_QC}" -qc-subject "${SUBJECT}" +mv "${SUBJECT}${IMAGE_SUFFIX}_label-SC_mask_labeled_discs.nii.gz" "${FILELABEL}" +rm "${SUBJECT}${IMAGE_SUFFIX}_label-SC_mask_labeled.nii.gz" # Verify presence of output files and write log file if error # ====================================================================================================================== FILES_TO_CHECK=( "$FILESEG" - "$FILELABEL" + "$FILELABEL" ) for file in "${FILES_TO_CHECK[@]}"; do if [ ! -e "${file}" ]; then