diff --git a/bin/wm_append_clusters_to_anatomical_tracts.py b/bin/wm_append_clusters_to_anatomical_tracts.py index c95c8287..e5851dab 100755 --- a/bin/wm_append_clusters_to_anatomical_tracts.py +++ b/bin/wm_append_clusters_to_anatomical_tracts.py @@ -10,177 +10,182 @@ print(f"<{os.path.basename(__file__)}> Error importing white matter analysis package\n") raise -#----------------- -# Parse arguments -#----------------- -parser = argparse.ArgumentParser( - description="Append multiple fiber clusters into anatomical tracts based on the ORG atlas.", - epilog="Written by Fan Zhang, fzhang@bwh.harvard.edu") - -parser.add_argument( - 'inputDirectory', - help='Contains fiber clusters as vtkPolyData file(s).') -parser.add_argument( - 'atlasDirectory', - help='The ORG atlas folder that contains the anatomical tract MRML files.') -parser.add_argument( - 'outputDirectory', - help='The output directory should be a new empty directory. It will be created if needed.') - -args = parser.parse_args() - -inputdir = os.path.abspath(args.inputDirectory) -if not os.path.isdir(args.inputDirectory): - print(f"<{os.path.basename(__file__)}> Error: Input directory", args.inputDirectory, "does not exist.") - exit() - -inputdir_left = os.path.join(inputdir, 'tracts_left_hemisphere') -inputdir_right = os.path.join(inputdir, 'tracts_right_hemisphere') -inputdir_comm = os.path.join(inputdir, 'tracts_commissural') - -if not os.path.isdir(inputdir_left): - print(f"<{os.path.basename(__file__)}> Error: Input directory", inputdir_left, "does not exist.") - exit() -if not os.path.isdir(inputdir_right): - print(f"<{os.path.basename(__file__)}> Error: Input directory", inputdir_right, "does not exist.") - exit() -if not os.path.isdir(inputdir_comm): - print(f"<{os.path.basename(__file__)}> Error: Input directory", inputdir_comm, "does not exist.") - exit() - -atlasdir = os.path.abspath(args.atlasDirectory) -if not os.path.isdir(args.atlasDirectory): - print(f"<{os.path.basename(__file__)}> Error: Atlas directory", args.atlasDirectory, "does not exist.") - exit() - -def list_mrml_files(input_dir): - # Find input files - input_mask = f"{input_dir}/T*.mrml" - input_mrml_fnames = glob.glob(input_mask) - return (input_mrml_fnames) - -mrml_files = list_mrml_files(atlasdir) - -if len(mrml_files) == 0: - print(f"<{os.path.basename(__file__)}> Error: There is no mrml files in the input atlas folder.") -else: - print(f"<{os.path.basename(__file__)}>", len(mrml_files)-1, "mrml files are detected.") - -outdir = os.path.abspath(args.outputDirectory) -if not os.path.exists(args.outputDirectory): - print(f"<{os.path.basename(__file__)}> Output directory", args.outputDirectory, "does not exist, creating it.") - os.makedirs(outdir) - -def output_appended_tract(cluster_vtp_list, outputfile): - appender = vtk.vtkAppendPolyData() - for c_idx in range(len(cluster_vtp_list)): - cluster_vtp = cluster_vtp_list[c_idx] - pd_cluster = wma.io.read_polydata(cluster_vtp) - - vtk_array = vtk.vtkIntArray() - vtk_array.SetName('cluster_idx') - for p_idx in range(0, pd_cluster.GetNumberOfPoints()): - vtk_array.InsertNextTuple1(int(c_idx)) - - pd_cluster.GetPointData().AddArray(vtk_array) - - #print '', cluster_vtp, ', number of fibers', pd_cluster.GetNumberOfLines() - - if (vtk.vtkVersion().GetVTKMajorVersion() >= 6.0): - appender.AddInputData(pd_cluster) - else: - appender.AddInput(pd_cluster) - - appender.Update() - pd_appended_cluster = appender.GetOutput() - - wma.io.write_polydata(pd_appended_cluster, outputfile) - -hemispheric_tracts = ["T_AF", "T_CB", "T_CPC", "T_MdLF", "T_PLIC", "T_SLF-I", "T_SLF-II", "T_SLF-III", "T_EC", "T_EmC", "T_ICP", "T_ILF", "T_IOFF", "T_UF", - "T_Intra-CBLM-I&P", "T_Intra-CBLM-PaT", "T_CR-F", "T_CR-P", "T_CST", "T_SF", "T_SO", "T_SP", "T_TF", "T_TO", "T_TP", - "T_Sup-F", "T_Sup-FP", "T_Sup-O", "T_Sup-OT", "T_Sup-P", "T_Sup-PO", "T_Sup-PT", "T_Sup-T"] - -commissural_tracts = ["T_CC1", "T_CC2", "T_CC3", "T_CC4", "T_CC5", "T_CC6", "T_CC7", "T_MCP"] - - -print(f"<{os.path.basename(__file__)}> hemispheric tracts (left and right): ") -tract_idx = 1 -for tract in hemispheric_tracts: - - print(" *", tract_idx, "-", tract) - tract_idx = tract_idx + 1 - mrml = os.path.join(atlasdir, tract+".mrml") - - if not os.path.exists(mrml): - print(f"<{os.path.basename(__file__)}> Error: Cannot locate", mrml) + +def main(): + #----------------- + # Parse arguments + #----------------- + parser = argparse.ArgumentParser( + description="Append multiple fiber clusters into anatomical tracts based on the ORG atlas.", + epilog="Written by Fan Zhang, fzhang@bwh.harvard.edu") + + parser.add_argument( + 'inputDirectory', + help='Contains fiber clusters as vtkPolyData file(s).') + parser.add_argument( + 'atlasDirectory', + help='The ORG atlas folder that contains the anatomical tract MRML files.') + parser.add_argument( + 'outputDirectory', + help='The output directory should be a new empty directory. It will be created if needed.') + + args = parser.parse_args() + + inputdir = os.path.abspath(args.inputDirectory) + if not os.path.isdir(args.inputDirectory): + print(f"<{os.path.basename(__file__)}> Error: Input directory", args.inputDirectory, "does not exist.") exit() - cluster_vtp_list_left = list() - cluster_vtp_list_right = list() - f = open(mrml) - for line in f: - idx = line.find('.vtp') - if idx > 0: - cluster_vtp_filename = line[idx-13:idx+4] - - cluster_vtp_filename_left = os.path.join(inputdir_left, cluster_vtp_filename); - if not os.path.exists(cluster_vtp_filename_left): - print(f"<{os.path.basename(__file__)}> Error:", cluster_vtp_filename_left, "does not exist.") - exit() - cluster_vtp_list_left.append(cluster_vtp_filename_left) - - cluster_vtp_filename_right = os.path.join(inputdir_right, cluster_vtp_filename); - if not os.path.exists(cluster_vtp_filename_right): - print(f"<{os.path.basename(__file__)}> Error:", cluster_vtp_filename_right, "does not exist.") - exit() - cluster_vtp_list_right.append(cluster_vtp_filename_right) - - output_tract_left = os.path.join(outdir, tract + '_left.vtp') - output_tract_right = os.path.join(outdir, tract + '_right.vtp') - - output_appended_tract(cluster_vtp_list_left, output_tract_left) - output_appended_tract(cluster_vtp_list_right, output_tract_right) - -print(f"<{os.path.basename(__file__)}> commissural tracts: ") -for tract in commissural_tracts: - - print(" *", tract_idx, "-", tract) - tract_idx = tract_idx + 1 - - mrml = os.path.join(atlasdir, tract+".mrml") - - if not os.path.exists(mrml): - print(f"<{os.path.basename(__file__)}> Error: Cannot locate", mrml) + inputdir_left = os.path.join(inputdir, 'tracts_left_hemisphere') + inputdir_right = os.path.join(inputdir, 'tracts_right_hemisphere') + inputdir_comm = os.path.join(inputdir, 'tracts_commissural') + + if not os.path.isdir(inputdir_left): + print(f"<{os.path.basename(__file__)}> Error: Input directory", inputdir_left, "does not exist.") + exit() + if not os.path.isdir(inputdir_right): + print(f"<{os.path.basename(__file__)}> Error: Input directory", inputdir_right, "does not exist.") exit() + if not os.path.isdir(inputdir_comm): + print(f"<{os.path.basename(__file__)}> Error: Input directory", inputdir_comm, "does not exist.") + exit() + + atlasdir = os.path.abspath(args.atlasDirectory) + if not os.path.isdir(args.atlasDirectory): + print(f"<{os.path.basename(__file__)}> Error: Atlas directory", args.atlasDirectory, "does not exist.") + exit() + + def list_mrml_files(input_dir): + # Find input files + input_mask = f"{input_dir}/T*.mrml" + input_mrml_fnames = glob.glob(input_mask) + return (input_mrml_fnames) + + mrml_files = list_mrml_files(atlasdir) + + if len(mrml_files) == 0: + print(f"<{os.path.basename(__file__)}> Error: There is no mrml files in the input atlas folder.") + else: + print(f"<{os.path.basename(__file__)}>", len(mrml_files)-1, "mrml files are detected.") + + outdir = os.path.abspath(args.outputDirectory) + if not os.path.exists(args.outputDirectory): + print(f"<{os.path.basename(__file__)}> Output directory", args.outputDirectory, "does not exist, creating it.") + os.makedirs(outdir) + + def output_appended_tract(cluster_vtp_list, outputfile): + appender = vtk.vtkAppendPolyData() + for c_idx in range(len(cluster_vtp_list)): + cluster_vtp = cluster_vtp_list[c_idx] + pd_cluster = wma.io.read_polydata(cluster_vtp) + + vtk_array = vtk.vtkIntArray() + vtk_array.SetName('cluster_idx') + for p_idx in range(0, pd_cluster.GetNumberOfPoints()): + vtk_array.InsertNextTuple1(int(c_idx)) + + pd_cluster.GetPointData().AddArray(vtk_array) + + #print '', cluster_vtp, ', number of fibers', pd_cluster.GetNumberOfLines() + + if (vtk.vtkVersion().GetVTKMajorVersion() >= 6.0): + appender.AddInputData(pd_cluster) + else: + appender.AddInput(pd_cluster) + + appender.Update() + pd_appended_cluster = appender.GetOutput() + + wma.io.write_polydata(pd_appended_cluster, outputfile) + + hemispheric_tracts = ["T_AF", "T_CB", "T_CPC", "T_MdLF", "T_PLIC", "T_SLF-I", "T_SLF-II", "T_SLF-III", "T_EC", "T_EmC", "T_ICP", "T_ILF", "T_IOFF", "T_UF", + "T_Intra-CBLM-I&P", "T_Intra-CBLM-PaT", "T_CR-F", "T_CR-P", "T_CST", "T_SF", "T_SO", "T_SP", "T_TF", "T_TO", "T_TP", + "T_Sup-F", "T_Sup-FP", "T_Sup-O", "T_Sup-OT", "T_Sup-P", "T_Sup-PO", "T_Sup-PT", "T_Sup-T"] + + commissural_tracts = ["T_CC1", "T_CC2", "T_CC3", "T_CC4", "T_CC5", "T_CC6", "T_CC7", "T_MCP"] + + + print(f"<{os.path.basename(__file__)}> hemispheric tracts (left and right): ") + tract_idx = 1 + for tract in hemispheric_tracts: + + print(" *", tract_idx, "-", tract) + tract_idx = tract_idx + 1 + mrml = os.path.join(atlasdir, tract+".mrml") + + if not os.path.exists(mrml): + print(f"<{os.path.basename(__file__)}> Error: Cannot locate", mrml) + exit() + + cluster_vtp_list_left = list() + cluster_vtp_list_right = list() + f = open(mrml) + for line in f: + idx = line.find('.vtp') + if idx > 0: + cluster_vtp_filename = line[idx-13:idx+4] + + cluster_vtp_filename_left = os.path.join(inputdir_left, cluster_vtp_filename); + if not os.path.exists(cluster_vtp_filename_left): + print(f"<{os.path.basename(__file__)}> Error:", cluster_vtp_filename_left, "does not exist.") + exit() + cluster_vtp_list_left.append(cluster_vtp_filename_left) + + cluster_vtp_filename_right = os.path.join(inputdir_right, cluster_vtp_filename); + if not os.path.exists(cluster_vtp_filename_right): + print(f"<{os.path.basename(__file__)}> Error:", cluster_vtp_filename_right, "does not exist.") + exit() + cluster_vtp_list_right.append(cluster_vtp_filename_right) + + output_tract_left = os.path.join(outdir, tract + '_left.vtp') + output_tract_right = os.path.join(outdir, tract + '_right.vtp') + + output_appended_tract(cluster_vtp_list_left, output_tract_left) + output_appended_tract(cluster_vtp_list_right, output_tract_right) + + print(f"<{os.path.basename(__file__)}> commissural tracts: ") + for tract in commissural_tracts: + + print(" *", tract_idx, "-", tract) + tract_idx = tract_idx + 1 + + mrml = os.path.join(atlasdir, tract+".mrml") + + if not os.path.exists(mrml): + print(f"<{os.path.basename(__file__)}> Error: Cannot locate", mrml) + exit() + + cluster_vtp_list_comm = list() + f = open(mrml) + for line in f: + idx = line.find('.vtp') + if idx > 0: + cluster_vtp_filename = line[idx-13:idx+4] - cluster_vtp_list_comm = list() - f = open(mrml) - for line in f: - idx = line.find('.vtp') - if idx > 0: - cluster_vtp_filename = line[idx-13:idx+4] + cluster_vtp_filename_comm = os.path.join(inputdir_comm, cluster_vtp_filename); + if not os.path.exists(cluster_vtp_filename_comm): + print(f"<{os.path.basename(__file__)}> Error:", cluster_vtp_filename_comm, "does not exist.") + exit() + cluster_vtp_list_comm.append(cluster_vtp_filename_comm) - cluster_vtp_filename_comm = os.path.join(inputdir_comm, cluster_vtp_filename); - if not os.path.exists(cluster_vtp_filename_comm): - print(f"<{os.path.basename(__file__)}> Error:", cluster_vtp_filename_comm, "does not exist.") - exit() - cluster_vtp_list_comm.append(cluster_vtp_filename_comm) + output_tract_comm = os.path.join(outdir, tract + '.vtp') - output_tract_comm = os.path.join(outdir, tract + '.vtp') + output_appended_tract(cluster_vtp_list_comm, output_tract_comm) - output_appended_tract(cluster_vtp_list_comm, output_tract_comm) + def list_cluster_files(input_dir): + # Find input files + input_mask = f"{input_dir}/T_*.vtk" + input_mask2 = f"{input_dir}/T_*.vtp" + input_pd_fnames = glob.glob(input_mask) + glob.glob(input_mask2) + input_pd_fnames = sorted(input_pd_fnames) + return(input_pd_fnames) -def list_cluster_files(input_dir): - # Find input files - input_mask = f"{input_dir}/T_*.vtk" - input_mask2 = f"{input_dir}/T_*.vtp" - input_pd_fnames = glob.glob(input_mask) + glob.glob(input_mask2) - input_pd_fnames = sorted(input_pd_fnames) - return(input_pd_fnames) + list_tracts= list_cluster_files(outdir) -list_tracts= list_cluster_files(outdir) + print('') + print(f'<{os.path.basename(__file__)}> Appended tracts can be found at', outdir, '\n') + print(f'<{os.path.basename(__file__)}> A total of', len(list_tracts), 'tracts\n') -print('') -print(f'<{os.path.basename(__file__)}> Appended tracts can be found at', outdir, '\n') -print(f'<{os.path.basename(__file__)}> A total of', len(list_tracts), 'tracts\n') +if __name__ == "__main__": + main() diff --git a/bin/wm_append_diffusion_measures_across_subjects.py b/bin/wm_append_diffusion_measures_across_subjects.py index 60549a65..704a15e0 100644 --- a/bin/wm_append_diffusion_measures_across_subjects.py +++ b/bin/wm_append_diffusion_measures_across_subjects.py @@ -6,165 +6,171 @@ import os import glob -#----------------- -# Parse arguments -#----------------- -parser = argparse.ArgumentParser( - description="Append diffusion measure files from multiple subjects", - epilog="Written by Fan Zhang, fzhang@bwh.harvard.edu.") -parser.add_argument( - 'inputfolder', - help='A folder contains WMA output by subjects. Each subject should be one folder.') -parser.add_argument( - 'appenedmeasurefile', - help='Output csv file.') - -args = parser.parse_args() - -subject_folders = sorted(glob.glob(os.path.join(args.inputfolder, '*'))) - -subject_folders = [d for d in subject_folders if os.path.isdir(d)] - -subject_IDs = [os.path.basename(folder) for folder in subject_folders] -print() -print('Subject IDs: (n=%d): ' % len(subject_IDs)) -print(subject_IDs) - -# anatomical tracts -stats = pandas.read_table(os.path.join(subject_folders[0], 'AnatomicalTracts/diffusion_measurements_anatomical_tracts.csv'), delimiter=' , ') - -fields = [col.strip() for col in stats.columns] -fields = fields[1:] -print() -print('Tract diffusion measures per subject: (n=%d): ' % len(fields)) -print(fields) - -tracts = stats.to_numpy()[:, 0] -tracts = [os.path.basename(filepath).replace('.vtp', '').replace('.vtk', '').strip() for filepath in tracts] -print() -print('White matter tracts per subject: (n=%d): ' % len(tracts)) -print(tracts) - -appended_fields = ['subjectkey'] -for tract in tracts: - for field in fields: - appended_fields.append("%s.%s" % (tract, field)) - -print() -print('Appended tract diffusion measure fields per subject: (n=%d): ' % len(appended_fields)) -print(appended_fields[:10], ' ... ') - -data = [] -for s_idx, subject_folder in enumerate(subject_folders): - print(' * loading tract feature for subject #%04d (subject ID - %s):' %(s_idx, subject_IDs[s_idx])) - csv = os.path.join(subject_folder, 'AnatomicalTracts/diffusion_measurements_anatomical_tracts.csv') - stats = pandas.read_table(csv, delimiter=' , ') - - stats_data = stats.to_numpy()[:, 1:] - stats_data_vec = stats_data.flatten() - - if stats_data_vec.shape[0] != len(appended_fields) - 1: - print("Error: Check if the diffusion measure file has the same rows and columns with other subjects!") - exit() - - data.append(stats_data_vec) - -data = numpy.array(data) -data = numpy.concatenate([numpy.array(subject_IDs)[:, numpy.newaxis], data], axis = 1) - -df = pandas.DataFrame(data, columns=appended_fields) - -print() -print('Appended tract diffusion measures:') -print(df) - -df.to_csv(os.path.abspath(args.appenedmeasurefile).replace('.csv', '_anatomical_tracts.csv'), index=False) -print() -print('Output file at:', os.path.abspath(args.appenedmeasurefile).replace('.csv', '_anatomical_tracts.csv')) - -# Fiber clusters -stats = pandas.read_table(os.path.join(subject_folders[0], 'FiberClustering/SeparatedClusters/diffusion_measurements_commissural.csv'), delimiter=' , ') - -fields = [col.strip() for col in stats.columns] -fields = fields[1:] -print() -print('Fiber cluster diffusion measures per subject: (n=%d): ' % len(fields)) -print(fields) - -clusters = stats.to_numpy()[:, 0] -clusters = [os.path.basename(filepath).replace('.vtp', '').replace('.vtk', '').strip() for filepath in clusters] -print() -print('Fiber clusters per subject: (n=%d): ' % len(clusters)) - -if len(clusters) == 800 and len(tracts) == 73: - comm_clusters = [3, 8, 33, 40, 46, 52, 56, 57, 62, 68, 69, 73, 86, 91, 109, 110, 114, 142, 144, 145, 146, 159, 163, 250, 251, 252, 257, 262, 263, 268, 271, 305, 311, 314, 322, 330, 334, 338, 342, 350, 363, 371, 375, 403, 410, 437, 440, 448, 456, 465, 468, 475, 484, 485, 488, 519, 522, 525, 543, 545, 549, 557, 576, 577, 582, 587, 591, 597, 601, 614, 620, 623, 627, 633, 645, 653, 658, 663, 670, 677, 683, 702, 770, 781] - comm_clusters = numpy.array(comm_clusters) - hemi_clusters = numpy.setdiff1d(numpy.arange(800), comm_clusters) -elif len(clusters) == 800 and len(tracts) == 74: # CPC separated - comm_clusters = [3, 8, 33, 40, 46, 52, 56, 57, 62, 68, 69, 73, 86, 91, 109, 110, 114, 142, 144, 146, 163, 250, 251, 252, 257, 262, 263, 268, 271, 305, 311, 314, 322, 330, 334, 338, 342, 350, 363, 371, 375, 403, 410, 437, 440, 448, 456, 465, 468, 475, 484, 485, 488, 519, 522, 525, 543, 545, 549, 576, 577, 582, 587, 591, 597, 601, 614, 620, 623, 627, 633, 645, 653, 658, 663, 670, 683, 702, 781] - comm_clusters = numpy.array(comm_clusters) - hemi_clusters = numpy.setdiff1d(numpy.arange(800), comm_clusters) -else: - comm_clusters = None - hemi_clusters = None - -locations = ['left_hemisphere', 'right_hemisphere', 'commissural'] -appended_fields = ['subjectkey'] -clusters = numpy.array(clusters) -for loc in locations: - if loc == 'left_hemisphere' or loc == 'right_hemisphere': - clusters_loc = clusters[hemi_clusters] - else: - clusters_loc = clusters[comm_clusters] - for cluster in clusters_loc: +def main(): + #----------------- + # Parse arguments + #----------------- + parser = argparse.ArgumentParser( + description="Append diffusion measure files from multiple subjects", + epilog="Written by Fan Zhang, fzhang@bwh.harvard.edu.") + parser.add_argument( + 'inputfolder', + help='A folder contains WMA output by subjects. Each subject should be one folder.') + parser.add_argument( + 'appenedmeasurefile', + help='Output csv file.') + + args = parser.parse_args() + + subject_folders = sorted(glob.glob(os.path.join(args.inputfolder, '*'))) + + subject_folders = [d for d in subject_folders if os.path.isdir(d)] + + subject_IDs = [os.path.basename(folder) for folder in subject_folders] + print() + print('Subject IDs: (n=%d): ' % len(subject_IDs)) + print(subject_IDs) + + # anatomical tracts + stats = pandas.read_table(os.path.join(subject_folders[0], 'AnatomicalTracts/diffusion_measurements_anatomical_tracts.csv'), delimiter=' , ') + + fields = [col.strip() for col in stats.columns] + fields = fields[1:] + print() + print('Tract diffusion measures per subject: (n=%d): ' % len(fields)) + print(fields) + + tracts = stats.to_numpy()[:, 0] + tracts = [os.path.basename(filepath).replace('.vtp', '').replace('.vtk', '').strip() for filepath in tracts] + print() + print('White matter tracts per subject: (n=%d): ' % len(tracts)) + print(tracts) + + appended_fields = ['subjectkey'] + for tract in tracts: for field in fields: - appended_fields.append("%s.%s.%s" % (loc, cluster, field)) + appended_fields.append("%s.%s" % (tract, field)) + + print() + print('Appended tract diffusion measure fields per subject: (n=%d): ' % len(appended_fields)) + print(appended_fields[:10], ' ... ') + + data = [] + for s_idx, subject_folder in enumerate(subject_folders): + print(' * loading tract feature for subject #%04d (subject ID - %s):' %(s_idx, subject_IDs[s_idx])) + csv = os.path.join(subject_folder, 'AnatomicalTracts/diffusion_measurements_anatomical_tracts.csv') + stats = pandas.read_table(csv, delimiter=' , ') + + stats_data = stats.to_numpy()[:, 1:] + stats_data_vec = stats_data.flatten() + + if stats_data_vec.shape[0] != len(appended_fields) - 1: + print("Error: Check if the diffusion measure file has the same rows and columns with other subjects!") + exit() + + data.append(stats_data_vec) + + data = numpy.array(data) + data = numpy.concatenate([numpy.array(subject_IDs)[:, numpy.newaxis], data], axis = 1) + + df = pandas.DataFrame(data, columns=appended_fields) + + print() + print('Appended tract diffusion measures:') + print(df) + + df.to_csv(os.path.abspath(args.appenedmeasurefile).replace('.csv', '_anatomical_tracts.csv'), index=False) + print() + print('Output file at:', os.path.abspath(args.appenedmeasurefile).replace('.csv', '_anatomical_tracts.csv')) + + # Fiber clusters + stats = pandas.read_table(os.path.join(subject_folders[0], 'FiberClustering/SeparatedClusters/diffusion_measurements_commissural.csv'), delimiter=' , ') + + fields = [col.strip() for col in stats.columns] + fields = fields[1:] + print() + print('Fiber cluster diffusion measures per subject: (n=%d): ' % len(fields)) + print(fields) + + clusters = stats.to_numpy()[:, 0] + clusters = [os.path.basename(filepath).replace('.vtp', '').replace('.vtk', '').strip() for filepath in clusters] + print() + print('Fiber clusters per subject: (n=%d): ' % len(clusters)) + + if len(clusters) == 800 and len(tracts) == 73: + comm_clusters = [3, 8, 33, 40, 46, 52, 56, 57, 62, 68, 69, 73, 86, 91, 109, 110, 114, 142, 144, 145, 146, 159, 163, 250, 251, 252, 257, 262, 263, 268, 271, 305, 311, 314, 322, 330, 334, 338, 342, 350, 363, 371, 375, 403, 410, 437, 440, 448, 456, 465, 468, 475, 484, 485, 488, 519, 522, 525, 543, 545, 549, 557, 576, 577, 582, 587, 591, 597, 601, 614, 620, 623, 627, 633, 645, 653, 658, 663, 670, 677, 683, 702, 770, 781] + comm_clusters = numpy.array(comm_clusters) + hemi_clusters = numpy.setdiff1d(numpy.arange(800), comm_clusters) + elif len(clusters) == 800 and len(tracts) == 74: # CPC separated + comm_clusters = [3, 8, 33, 40, 46, 52, 56, 57, 62, 68, 69, 73, 86, 91, 109, 110, 114, 142, 144, 146, 163, 250, 251, 252, 257, 262, 263, 268, 271, 305, 311, 314, 322, 330, 334, 338, 342, 350, 363, 371, 375, 403, 410, 437, 440, 448, 456, 465, 468, 475, 484, 485, 488, 519, 522, 525, 543, 545, 549, 576, 577, 582, 587, 591, 597, 601, 614, 620, 623, 627, 633, 645, 653, 658, 663, 670, 683, 702, 781] + comm_clusters = numpy.array(comm_clusters) + hemi_clusters = numpy.setdiff1d(numpy.arange(800), comm_clusters) + else: + comm_clusters = None + hemi_clusters = None + + locations = ['left_hemisphere', 'right_hemisphere', 'commissural'] + appended_fields = ['subjectkey'] + clusters = numpy.array(clusters) + for loc in locations: + if loc == 'left_hemisphere' or loc == 'right_hemisphere': + clusters_loc = clusters[hemi_clusters] + else: + clusters_loc = clusters[comm_clusters] + + for cluster in clusters_loc: + for field in fields: + appended_fields.append("%s.%s.%s" % (loc, cluster, field)) + + print() + print('Appended cluster diffusion measure fields per subject: (n=%d): ' % len(appended_fields)) + + data = [] + for s_idx, subject_folder in enumerate(subject_folders): + print(' * loading cluster feature for subject #%04d (subject ID - %s):' %(s_idx, subject_IDs[s_idx])) + csv = os.path.join(subject_folder, 'FiberClustering/SeparatedClusters/diffusion_measurements_left_hemisphere.csv') + stats = pandas.read_table(csv, delimiter=' , ') -print() -print('Appended cluster diffusion measure fields per subject: (n=%d): ' % len(appended_fields)) + stats_data = stats.to_numpy()[:, 1:] + stats_data = stats_data[hemi_clusters, :] + l_stats_data_vec = stats_data.flatten() -data = [] -for s_idx, subject_folder in enumerate(subject_folders): - print(' * loading cluster feature for subject #%04d (subject ID - %s):' %(s_idx, subject_IDs[s_idx])) - csv = os.path.join(subject_folder, 'FiberClustering/SeparatedClusters/diffusion_measurements_left_hemisphere.csv') - stats = pandas.read_table(csv, delimiter=' , ') + csv = os.path.join(subject_folder, 'FiberClustering/SeparatedClusters/diffusion_measurements_right_hemisphere.csv') + stats = pandas.read_table(csv, delimiter=' , ') - stats_data = stats.to_numpy()[:, 1:] - stats_data = stats_data[hemi_clusters, :] - l_stats_data_vec = stats_data.flatten() + stats_data = stats.to_numpy()[:, 1:] + stats_data = stats_data[hemi_clusters, :] + r_stats_data_vec = stats_data.flatten() - csv = os.path.join(subject_folder, 'FiberClustering/SeparatedClusters/diffusion_measurements_right_hemisphere.csv') - stats = pandas.read_table(csv, delimiter=' , ') + csv = os.path.join(subject_folder, 'FiberClustering/SeparatedClusters/diffusion_measurements_commissural.csv') + stats = pandas.read_table(csv, delimiter=' , ') - stats_data = stats.to_numpy()[:, 1:] - stats_data = stats_data[hemi_clusters, :] - r_stats_data_vec = stats_data.flatten() + stats_data = stats.to_numpy()[:, 1:] + stats_data = stats_data[comm_clusters, :] + c_stats_data_vec = stats_data.flatten() - csv = os.path.join(subject_folder, 'FiberClustering/SeparatedClusters/diffusion_measurements_commissural.csv') - stats = pandas.read_table(csv, delimiter=' , ') + stats_data_vec = numpy.concatenate([l_stats_data_vec, r_stats_data_vec, c_stats_data_vec]) - stats_data = stats.to_numpy()[:, 1:] - stats_data = stats_data[comm_clusters, :] - c_stats_data_vec = stats_data.flatten() + if stats_data_vec.shape[0] != len(appended_fields) - 1: + print("Error: Check if the diffusion measure file has the same rows and columns with other subjects!") + exit() - stats_data_vec = numpy.concatenate([l_stats_data_vec, r_stats_data_vec, c_stats_data_vec]) + data.append(stats_data_vec) - if stats_data_vec.shape[0] != len(appended_fields) - 1: - print("Error: Check if the diffusion measure file has the same rows and columns with other subjects!") - exit() + data = numpy.array(data) + data = numpy.concatenate([numpy.array(subject_IDs)[:, numpy.newaxis], data], axis = 1) - data.append(stats_data_vec) + df = pandas.DataFrame(data, columns=appended_fields) -data = numpy.array(data) -data = numpy.concatenate([numpy.array(subject_IDs)[:, numpy.newaxis], data], axis = 1) + print() + print('Appended cluster diffusion measures:') + print(df) -df = pandas.DataFrame(data, columns=appended_fields) + df.to_csv(os.path.abspath(args.appenedmeasurefile).replace('.csv', '_fiber_clusters.csv'), index=False) + print() + print('Output file at:', os.path.abspath(args.appenedmeasurefile).replace('.csv', '_fiber_clusters.csv')) -print() -print('Appended cluster diffusion measures:') -print(df) -df.to_csv(os.path.abspath(args.appenedmeasurefile).replace('.csv', '_fiber_clusters.csv'), index=False) -print() -print('Output file at:', os.path.abspath(args.appenedmeasurefile).replace('.csv', '_fiber_clusters.csv')) +if __name__ == "__main__": + main() diff --git a/bin/wm_average_tract_measures.py b/bin/wm_average_tract_measures.py index 4007dc0b..d3757d80 100644 --- a/bin/wm_average_tract_measures.py +++ b/bin/wm_average_tract_measures.py @@ -6,112 +6,118 @@ import os import re -#----------------- -# Parse arguments -#----------------- -parser = argparse.ArgumentParser( - description="Compute averaged statistics of the anatomical tracts. For diffusion measure with multiple statistics, only the Mean will be outputted.", - epilog="Written by Fan Zhang, fzhang@bwh.harvard.edu.") -parser.add_argument( - 'inputmeasure', - help='A .csv or .txt measurement files.') -parser.add_argument( - 'outoutmeasure', - help='Output csv file.') -parser.add_argument( - '-appendedTractName', action="store", type=str, default="", - help="Name of the appended tracts.") -parser.add_argument( - '-tractList', action="store", type=str, nargs='+', - help='A list of tracts to be appended, e.g., AF_left AF_right.') - -args = parser.parse_args() - -stats = pandas.read_table(args.inputmeasure, delimiter=',') - -fields = [] -for col in stats.columns: - fields.append(col) -fields = numpy.array(fields) - -print(fields) - -print('All available tracts:') -all_tracts = [f_name.replace('.Num_Points', '') for f_name in fields if f_name.endswith('.Num_Points')] -print('N = ', str(len(all_tracts)), ':') -print(all_tracts) - -append_list = [] -print('Tracts and related measures to be appended.') -for t_idx, tract in enumerate(args.tractList): - print('*', t_idx, '-', tract) - indices = [index for index in range(len(fields)) if fields[index].startswith(tract+'.Num_') or re.search(tract+".*.Mean", fields[index])] - if len(indices) == 0: - print("Error: tract not founded in the input file.") - exit() - print(fields[indices]) - append_list.append(indices) - -append_list = numpy.array(append_list) -append_measures = [field.replace(args.tractList[-1], '') for field in fields[indices]] - -print("Output measures:", append_measures) - -avg_stats = [stats.to_numpy()[:, 0]] -for m_idx, m_name in enumerate(append_measures): - - if m_name == '.Num_Points': - - avg_stat = numpy.sum(stats.to_numpy()[:, append_list[:, m_idx]], axis=1) - - elif m_name == '.Num_Fibers': - - avg_stat = numpy.sum(stats.to_numpy()[:, append_list[:, m_idx]], axis=1) - - elif m_name == '.Mean_Length': # weighted by NoS - - weight = stats.to_numpy()[:, append_list[:, 1]] - val = stats.to_numpy()[:, append_list[:, m_idx]] - - val_weighted_sum = numpy.sum(val * weight, axis=1) - weight_sum = numpy.sum(weight, axis=1) - - empty_indices = numpy.where(weight_sum == 0)[0] - weight_sum[empty_indices] = 1 - - avg_stat = val_weighted_sum / weight_sum - avg_stat[empty_indices] = 0 - - else: # weighted by NoP - - weight = stats.to_numpy()[:, append_list[:, 0]] - val = stats.to_numpy()[:, append_list[:, m_idx]] - - val_weighted_sum = numpy.sum(val.astype(numpy.double) * weight.astype(numpy.double), axis=1) - weight_sum = numpy.sum(weight, axis=1) - - empty_indices = numpy.where(weight_sum == 0)[0] - weight_sum[empty_indices] = 1 - - avg_stat = val_weighted_sum / weight_sum - avg_stat[empty_indices] = -1 - - avg_stats.append(avg_stat) - -avg_stats = numpy.array(avg_stats) -avg_stats = avg_stats.transpose() - -column_names = ['subjectkey'] -for m in append_measures: - column_names.append(args.appendedTractName+m) - -df = pandas.DataFrame(avg_stats, columns=column_names) - -print('Averaged tract measures:') -print(df) - -df.to_csv(args.outoutmeasure, index=False) -print() -print('Output file at:', os.path.abspath(args.outoutmeasure)) - -exit() \ No newline at end of file + +def main(): + #----------------- + # Parse arguments + #----------------- + parser = argparse.ArgumentParser( + description="Compute averaged statistics of the anatomical tracts. For diffusion measure with multiple statistics, only the Mean will be outputted.", + epilog="Written by Fan Zhang, fzhang@bwh.harvard.edu.") + parser.add_argument( + 'inputmeasure', + help='A .csv or .txt measurement files.') + parser.add_argument( + 'outoutmeasure', + help='Output csv file.') + parser.add_argument( + '-appendedTractName', action="store", type=str, default="", + help="Name of the appended tracts.") + parser.add_argument( + '-tractList', action="store", type=str, nargs='+', + help='A list of tracts to be appended, e.g., AF_left AF_right.') + + args = parser.parse_args() + + stats = pandas.read_table(args.inputmeasure, delimiter=',') + + fields = [] + for col in stats.columns: + fields.append(col) + fields = numpy.array(fields) + + print(fields) + + print('All available tracts:') + all_tracts = [f_name.replace('.Num_Points', '') for f_name in fields if f_name.endswith('.Num_Points')] + print('N = ', str(len(all_tracts)), ':') + print(all_tracts) + + append_list = [] + print('Tracts and related measures to be appended.') + for t_idx, tract in enumerate(args.tractList): + print('*', t_idx, '-', tract) + indices = [index for index in range(len(fields)) if fields[index].startswith(tract+'.Num_') or re.search(tract+".*.Mean", fields[index])] + if len(indices) == 0: + print("Error: tract not founded in the input file.") + exit() + print(fields[indices]) + append_list.append(indices) + + append_list = numpy.array(append_list) + append_measures = [field.replace(args.tractList[-1], '') for field in fields[indices]] + + print("Output measures:", append_measures) + + avg_stats = [stats.to_numpy()[:, 0]] + for m_idx, m_name in enumerate(append_measures): + + if m_name == '.Num_Points': + + avg_stat = numpy.sum(stats.to_numpy()[:, append_list[:, m_idx]], axis=1) + + elif m_name == '.Num_Fibers': + + avg_stat = numpy.sum(stats.to_numpy()[:, append_list[:, m_idx]], axis=1) + + elif m_name == '.Mean_Length': # weighted by NoS + + weight = stats.to_numpy()[:, append_list[:, 1]] + val = stats.to_numpy()[:, append_list[:, m_idx]] + + val_weighted_sum = numpy.sum(val * weight, axis=1) + weight_sum = numpy.sum(weight, axis=1) + + empty_indices = numpy.where(weight_sum == 0)[0] + weight_sum[empty_indices] = 1 + + avg_stat = val_weighted_sum / weight_sum + avg_stat[empty_indices] = 0 + + else: # weighted by NoP + + weight = stats.to_numpy()[:, append_list[:, 0]] + val = stats.to_numpy()[:, append_list[:, m_idx]] + + val_weighted_sum = numpy.sum(val.astype(numpy.double) * weight.astype(numpy.double), axis=1) + weight_sum = numpy.sum(weight, axis=1) + + empty_indices = numpy.where(weight_sum == 0)[0] + weight_sum[empty_indices] = 1 + + avg_stat = val_weighted_sum / weight_sum + avg_stat[empty_indices] = -1 + + avg_stats.append(avg_stat) + + avg_stats = numpy.array(avg_stats) + avg_stats = avg_stats.transpose() + + column_names = ['subjectkey'] + for m in append_measures: + column_names.append(args.appendedTractName+m) + + df = pandas.DataFrame(avg_stats, columns=column_names) + + print('Averaged tract measures:') + print(df) + + df.to_csv(args.outoutmeasure, index=False) + print() + print('Output file at:', os.path.abspath(args.outoutmeasure)) + + exit() + + +if __name__ == "__main__": + main() diff --git a/bin/wm_change_nrrd_dir.py b/bin/wm_change_nrrd_dir.py index c19c4d55..1da730ad 100644 --- a/bin/wm_change_nrrd_dir.py +++ b/bin/wm_change_nrrd_dir.py @@ -21,63 +21,65 @@ raise -#----------------- -# Parse arguments -#----------------- -parser = argparse.ArgumentParser( - description="Make sign change of the gradient direction", - epilog="Written by Fan Zhang, fzhang@bwh.harvard.edu") - -parser.add_argument( - 'inputnrrd', - help='Nrrd header in .nhdr format') -parser.add_argument( - 'outputnrrd', - help='New Nrrd header') -parser.add_argument( - '-d', action="store", dest="dim", type=str, - help='The dimension to change: x, y, or z.') - -args = parser.parse_args() - - -inputnrrd = args.inputnrrd -outputnrrd = args.outputnrrd - -with open(inputnrrd) as f: - contents = f.readlines() - -outstr = '' -for line in contents: - tmpstr = line - if line.startswith('DWMRI_gradient'): - strs = line.split(':=') - dirs = strs[1].split(' ') - - if args.dim == 'x': - val = float(dirs[0]) - if val != 0: - val = -val - dirs[0] = str(val) - elif args.dim == 'y': - val = float(dirs[1]) - if val != 0: - val = -val - dirs[1] = str(val) - elif args.dim == 'z': - val = float(dirs[2]) - if val != 0: - val = -val - dirs[2] = str(val) + '\n' - - tmpstr = strs[0] + ':=' + dirs[0] + ' ' + dirs[1] + ' ' + dirs[2] - - outstr += tmpstr - -with open(outputnrrd, 'w') as o: - o.write(outstr) - -print('Done! New nrrd header: ' + outputnrrd) - - - \ No newline at end of file +def main(): + #----------------- + # Parse arguments + #----------------- + parser = argparse.ArgumentParser( + description="Make sign change of the gradient direction", + epilog="Written by Fan Zhang, fzhang@bwh.harvard.edu") + + parser.add_argument( + 'inputnrrd', + help='Nrrd header in .nhdr format') + parser.add_argument( + 'outputnrrd', + help='New Nrrd header') + parser.add_argument( + '-d', action="store", dest="dim", type=str, + help='The dimension to change: x, y, or z.') + + args = parser.parse_args() + + + inputnrrd = args.inputnrrd + outputnrrd = args.outputnrrd + + with open(inputnrrd) as f: + contents = f.readlines() + + outstr = '' + for line in contents: + tmpstr = line + if line.startswith('DWMRI_gradient'): + strs = line.split(':=') + dirs = strs[1].split(' ') + + if args.dim == 'x': + val = float(dirs[0]) + if val != 0: + val = -val + dirs[0] = str(val) + elif args.dim == 'y': + val = float(dirs[1]) + if val != 0: + val = -val + dirs[1] = str(val) + elif args.dim == 'z': + val = float(dirs[2]) + if val != 0: + val = -val + dirs[2] = str(val) + '\n' + + tmpstr = strs[0] + ':=' + dirs[0] + ' ' + dirs[1] + ' ' + dirs[2] + + outstr += tmpstr + + with open(outputnrrd, 'w') as o: + o.write(outstr) + + print('Done! New nrrd header: ' + outputnrrd) + + +if __name__ == "__main__": + main() diff --git a/utilities/wm_assess_cluster_location.py b/utilities/wm_assess_cluster_location.py index 83ef9305..286f42c4 100644 --- a/utilities/wm_assess_cluster_location.py +++ b/utilities/wm_assess_cluster_location.py @@ -10,143 +10,149 @@ print(f"<{os.path.basename(__file__)}> Error importing white matter analysis package\n") raise -#----------------- -# Parse arguments -#----------------- -parser = argparse.ArgumentParser( - description="Decide a cluster belonging to commissural or hemispheric in the atlas.", - epilog="Written by Fan Zhang, fzhang@bwh.harvard.edu") - -parser.add_argument("-v", "--version", - action="version", default=argparse.SUPPRESS, - version='1.0', - help="Show program's version number and exit") -parser.add_argument( - 'inputAtlasDirectory', - help='Directory of the fiber clustering atlas. An output CSV file will be generated, showing the location of each fiber cluster: left-hemisphere, right-hemisphere, commissural, or Not Given if the fibers of a cluster are relatively even distributed in these three regions. ') -parser.add_argument( - '-pthresh', action="store", dest="hemispherePercentThreshold", type=float, default=0.6, - help='(A same pthresh value should be used in ) The percent of a fiber that has to be in one hemisphere to consider the fiber as part of that hemisphere (rather than a commissural fiber). Default number is 0.6, where a higher number will tend to label fewer fibers as hemispheric and more fibers as commissural (not strictly in one hemisphere or the other), while a lower number will be stricter about what is classified as commissural.') -parser.add_argument( - '-advanced_times_threshold', action="store", dest="advanced_times_threshold", type=float, default=3, - help='(Advanced parameter should be used according to applications.) For one cluster, if it has the hemispheric (left or right) fibers three times more than the commissural part, this cluster will be considered as a hemispheric cluster. In a similar way, one cluster needs to have the commissural fibers three times more than the hemispheric (left and right) fibers to be considered as a commissural cluster. Users can change -advanced_num_threshold to have more strict classification. For example, if advanced_num_threshold = 5, the cluster needs to have the commissural fibers five times more than the hemispheric (left and right) fibers to be considered as a commissural cluster.') - -args = parser.parse_args() - -if not os.path.isdir(args.inputAtlasDirectory): - print("Error: Input directory", args.inputTractDirectory, "does not exist.") - exit() - -print(f"<{os.path.basename(__file__)}>. Starting processing.") -print("") -print("=====input atlas directory======\n", args.inputAtlasDirectory) -print("=====pthresh====\n", args.hemispherePercentThreshold) -print("=====advanced_times_threshold====\n", args.advanced_times_threshold) - -def list_cluster_files(input_dir): - # Find input files - input_mask = f"{input_dir}/cluster_*.vtk" - input_mask2 = f"{input_dir}/cluster_*.vtp" - input_pd_fnames = glob.glob(input_mask) + glob.glob(input_mask2) - input_pd_fnames = sorted(input_pd_fnames) - return(input_pd_fnames) - -input_polydatas = list_cluster_files(args.inputAtlasDirectory) -number_of_clusters = len(input_polydatas) - -points_per_fiber = 40 -print("%20s: %10s %10s %10s - %10s" \ - % ('cluster', 'left', 'right', 'comm', 'locations')) - -output_file = open(os.path.join(args.inputAtlasDirectory, 'clusters_location.txt'), 'w') -outstr = 'cluster' + '\t'+ 'left hemispheric fibers' + '\t'+ 'right hemispheric fibers' + '\t'+ 'commissural fibers' + '\t'+ 'location' + '\n' - -hemi_list = [] -comm_list = [] -ng_list = [] -for fname in input_polydatas: - - fname_base = os.path.basename(fname) - pd = wma.io.read_polydata(fname) - - fibers = wma.fibers.FiberArray() - fibers.points_per_fiber = points_per_fiber - fibers.hemisphere_percent_threshold = args.hemispherePercentThreshold - # must request hemisphere computation from object - fibers.hemispheres = True - # Now convert to array with points and hemispheres as above - fibers.convert_from_polydata(pd) - - num_left = len(fibers.index_left_hem) - num_right = len(fibers.index_right_hem) - num_comm = len(fibers.index_commissure) - - location = '' - if num_comm == 0: - location = 'hemispheric' - hemi_list.append(fname_base) - elif num_left > args.advanced_times_threshold * num_comm or num_right > args.advanced_times_threshold * num_comm: - location = 'hemispheric' - hemi_list.append(fname_base) - elif num_comm > args.advanced_times_threshold * num_left and num_comm > args.advanced_times_threshold * num_right: - location = 'commissural' - comm_list.append(fname_base) - else: - location = 'Not Given' - ng_list.append(fname_base) +def main(): + #----------------- + # Parse arguments + #----------------- + parser = argparse.ArgumentParser( + description="Decide a cluster belonging to commissural or hemispheric in the atlas.", + epilog="Written by Fan Zhang, fzhang@bwh.harvard.edu") + + parser.add_argument("-v", "--version", + action="version", default=argparse.SUPPRESS, + version='1.0', + help="Show program's version number and exit") + parser.add_argument( + 'inputAtlasDirectory', + help='Directory of the fiber clustering atlas. An output CSV file will be generated, showing the location of each fiber cluster: left-hemisphere, right-hemisphere, commissural, or Not Given if the fibers of a cluster are relatively even distributed in these three regions. ') + parser.add_argument( + '-pthresh', action="store", dest="hemispherePercentThreshold", type=float, default=0.6, + help='(A same pthresh value should be used in ) The percent of a fiber that has to be in one hemisphere to consider the fiber as part of that hemisphere (rather than a commissural fiber). Default number is 0.6, where a higher number will tend to label fewer fibers as hemispheric and more fibers as commissural (not strictly in one hemisphere or the other), while a lower number will be stricter about what is classified as commissural.') + parser.add_argument( + '-advanced_times_threshold', action="store", dest="advanced_times_threshold", type=float, default=3, + help='(Advanced parameter should be used according to applications.) For one cluster, if it has the hemispheric (left or right) fibers three times more than the commissural part, this cluster will be considered as a hemispheric cluster. In a similar way, one cluster needs to have the commissural fibers three times more than the hemispheric (left and right) fibers to be considered as a commissural cluster. Users can change -advanced_num_threshold to have more strict classification. For example, if advanced_num_threshold = 5, the cluster needs to have the commissural fibers five times more than the hemispheric (left and right) fibers to be considered as a commissural cluster.') + + args = parser.parse_args() + + if not os.path.isdir(args.inputAtlasDirectory): + print("Error: Input directory", args.inputTractDirectory, "does not exist.") + exit() + + print(f"<{os.path.basename(__file__)}>. Starting processing.") + print("") + print("=====input atlas directory======\n", args.inputAtlasDirectory) + print("=====pthresh====\n", args.hemispherePercentThreshold) + print("=====advanced_times_threshold====\n", args.advanced_times_threshold) + + def list_cluster_files(input_dir): + # Find input files + input_mask = f"{input_dir}/cluster_*.vtk" + input_mask2 = f"{input_dir}/cluster_*.vtp" + input_pd_fnames = glob.glob(input_mask) + glob.glob(input_mask2) + input_pd_fnames = sorted(input_pd_fnames) + return(input_pd_fnames) + + input_polydatas = list_cluster_files(args.inputAtlasDirectory) + number_of_clusters = len(input_polydatas) + + points_per_fiber = 40 print("%20s: %10s %10s %10s - %10s" \ - % (fname_base, num_left, num_right, num_comm, location)) - - outstr = outstr + fname_base + '\t'+ str(num_left) + '\t'+ str(num_right) + '\t'+ str(num_comm) + '\t'+ location + '\n' - -output_file.write(outstr) -output_file.close() - -# hemisphere -number_of_files = len(hemi_list) -step = int(100 * 255.0 / (number_of_files - 1)) -R = numpy.array(list(range(0, 100 * 255 + 1, step))) / 100.0 -G = numpy.abs(list(range(100 * -127, 100 * 128 + 1, step))) * 2.0 / 100.0 -B = numpy.array(list(range(100 * 255 + 1, 0, -step))) / 100.0 - -colors = list() -idx = 0 -for pd in hemi_list: - colors.append([R[idx], G[idx], B[idx]]) - idx += 1 -colors = numpy.array(colors) -hemi_mrml_filename = os.path.join(args.inputAtlasDirectory, "clusters_location_hemispheric.mrml") -wma.mrml.write(hemi_list, colors, hemi_mrml_filename, ratio=1.0) - -#commissural -number_of_files = len(comm_list) -step = int(100 * 255.0 / (number_of_files - 1)) -R = numpy.array(list(range(0, 100 * 255 + 1, step))) / 100.0 -G = numpy.abs(list(range(100 * -127, 100 * 128 + 1, step))) * 2.0 / 100.0 -B = numpy.array(list(range(100 * 255 + 1, 0, -step))) / 100.0 - -colors = list() -idx = 0 -for pd in comm_list: - colors.append([R[idx], G[idx], B[idx]]) - idx += 1 -colors = numpy.array(colors) -comm_mrml_filename = os.path.join(args.inputAtlasDirectory, "clusters_location_commissural.mrml") -wma.mrml.write(comm_list, colors, comm_mrml_filename, ratio=1.0) - -#Not Given -number_of_files = len(ng_list) -step = int(100 * 255.0 / (number_of_files - 1)) -R = numpy.array(list(range(0, 100 * 255 + 1, step))) / 100.0 -G = numpy.abs(list(range(100 * -127, 100 * 128 + 1, step))) * 2.0 / 100.0 -B = numpy.array(list(range(100 * 255 + 1, 0, -step))) / 100.0 - -colors = list() -idx = 0 -for pd in ng_list: - colors.append([R[idx], G[idx], B[idx]]) - idx += 1 -colors = numpy.array(colors) -ng_mrml_filename = os.path.join(args.inputAtlasDirectory, "clusters_location_not_given.mrml") -wma.mrml.write(ng_list, colors, ng_mrml_filename, ratio=1.0) + % ('cluster', 'left', 'right', 'comm', 'locations')) + + output_file = open(os.path.join(args.inputAtlasDirectory, 'clusters_location.txt'), 'w') + outstr = 'cluster' + '\t'+ 'left hemispheric fibers' + '\t'+ 'right hemispheric fibers' + '\t'+ 'commissural fibers' + '\t'+ 'location' + '\n' + + hemi_list = [] + comm_list = [] + ng_list = [] + for fname in input_polydatas: + + fname_base = os.path.basename(fname) + pd = wma.io.read_polydata(fname) + + fibers = wma.fibers.FiberArray() + fibers.points_per_fiber = points_per_fiber + fibers.hemisphere_percent_threshold = args.hemispherePercentThreshold + # must request hemisphere computation from object + fibers.hemispheres = True + # Now convert to array with points and hemispheres as above + fibers.convert_from_polydata(pd) + + num_left = len(fibers.index_left_hem) + num_right = len(fibers.index_right_hem) + num_comm = len(fibers.index_commissure) + + location = '' + if num_comm == 0: + location = 'hemispheric' + hemi_list.append(fname_base) + elif num_left > args.advanced_times_threshold * num_comm or num_right > args.advanced_times_threshold * num_comm: + location = 'hemispheric' + hemi_list.append(fname_base) + elif num_comm > args.advanced_times_threshold * num_left and num_comm > args.advanced_times_threshold * num_right: + location = 'commissural' + comm_list.append(fname_base) + else: + location = 'Not Given' + ng_list.append(fname_base) + + print("%20s: %10s %10s %10s - %10s" \ + % (fname_base, num_left, num_right, num_comm, location)) + + outstr = outstr + fname_base + '\t'+ str(num_left) + '\t'+ str(num_right) + '\t'+ str(num_comm) + '\t'+ location + '\n' + + output_file.write(outstr) + output_file.close() + + # hemisphere + number_of_files = len(hemi_list) + step = int(100 * 255.0 / (number_of_files - 1)) + R = numpy.array(list(range(0, 100 * 255 + 1, step))) / 100.0 + G = numpy.abs(list(range(100 * -127, 100 * 128 + 1, step))) * 2.0 / 100.0 + B = numpy.array(list(range(100 * 255 + 1, 0, -step))) / 100.0 + + colors = list() + idx = 0 + for pd in hemi_list: + colors.append([R[idx], G[idx], B[idx]]) + idx += 1 + colors = numpy.array(colors) + hemi_mrml_filename = os.path.join(args.inputAtlasDirectory, "clusters_location_hemispheric.mrml") + wma.mrml.write(hemi_list, colors, hemi_mrml_filename, ratio=1.0) + + #commissural + number_of_files = len(comm_list) + step = int(100 * 255.0 / (number_of_files - 1)) + R = numpy.array(list(range(0, 100 * 255 + 1, step))) / 100.0 + G = numpy.abs(list(range(100 * -127, 100 * 128 + 1, step))) * 2.0 / 100.0 + B = numpy.array(list(range(100 * 255 + 1, 0, -step))) / 100.0 + + colors = list() + idx = 0 + for pd in comm_list: + colors.append([R[idx], G[idx], B[idx]]) + idx += 1 + colors = numpy.array(colors) + comm_mrml_filename = os.path.join(args.inputAtlasDirectory, "clusters_location_commissural.mrml") + wma.mrml.write(comm_list, colors, comm_mrml_filename, ratio=1.0) + + #Not Given + number_of_files = len(ng_list) + step = int(100 * 255.0 / (number_of_files - 1)) + R = numpy.array(list(range(0, 100 * 255 + 1, step))) / 100.0 + G = numpy.abs(list(range(100 * -127, 100 * 128 + 1, step))) * 2.0 / 100.0 + B = numpy.array(list(range(100 * 255 + 1, 0, -step))) / 100.0 + + colors = list() + idx = 0 + for pd in ng_list: + colors.append([R[idx], G[idx], B[idx]]) + idx += 1 + colors = numpy.array(colors) + ng_mrml_filename = os.path.join(args.inputAtlasDirectory, "clusters_location_not_given.mrml") + wma.mrml.write(ng_list, colors, ng_mrml_filename, ratio=1.0) + + +if __name__ == "__main__": + main() diff --git a/utilities/wm_compute_FA_from_DWIs.py b/utilities/wm_compute_FA_from_DWIs.py index 4e0ae824..bb662b8f 100644 --- a/utilities/wm_compute_FA_from_DWIs.py +++ b/utilities/wm_compute_FA_from_DWIs.py @@ -17,52 +17,56 @@ def list_nhdr_files(input_dir): input_pd_fnames = sorted(input_pd_fnames) return(input_pd_fnames) -#----------------- -# Parse arguments -#----------------- -parser = argparse.ArgumentParser( - description="Measures mean FA, etc. in tractography clusters in a directory.", - epilog="Written by Lauren O\'Donnell, odonnell@bwh.harvard.edu. Please reference \"O'Donnell, Lauren J., and C-F. Westin. Automatic tractography segmentation using a high-dimensional white matter atlas. Medical Imaging, IEEE Transactions on 26.11 (2007): 1562-1575.\"") -parser.add_argument("-v", "--version", - action="version", default=argparse.SUPPRESS, - version='1.0', - help="Show program's version number and exit") -parser.add_argument( - 'inputDirectoryDWI', - help='A directory of DWIs (nhdr or nrrd).') -parser.add_argument( - 'inputDirectoryMask', - help='A directory of masks (nhdr or nrrd).') -parser.add_argument( - 'outputDirectory', - help='Output data will be saved here.') -args = parser.parse_args() +def main(): + #----------------- + # Parse arguments + #----------------- + parser = argparse.ArgumentParser( + description="Measures mean FA, etc. in tractography clusters in a directory.", + epilog="Written by Lauren O\'Donnell, odonnell@bwh.harvard.edu. Please reference \"O'Donnell, Lauren J., and C-F. Westin. Automatic tractography segmentation using a high-dimensional white matter atlas. Medical Imaging, IEEE Transactions on 26.11 (2007): 1562-1575.\"") + parser.add_argument("-v", "--version", + action="version", default=argparse.SUPPRESS, + version='1.0', + help="Show program's version number and exit") + parser.add_argument( + 'inputDirectoryDWI', + help='A directory of DWIs (nhdr or nrrd).') + parser.add_argument( + 'inputDirectoryMask', + help='A directory of masks (nhdr or nrrd).') + parser.add_argument( + 'outputDirectory', + help='Output data will be saved here.') -if not os.path.isdir(args.inputDirectoryDWI): - print("Error: Input directory", args.inputDirectory, "does not exist.") - exit() + args = parser.parse_args() -if not os.path.isdir(args.inputDirectoryMask): - print("Error: Input directory", args.inputDirectory, "does not exist.") - exit() + if not os.path.isdir(args.inputDirectoryDWI): + print("Error: Input directory", args.inputDirectory, "does not exist.") + exit() -outdir = args.outputDirectory -if not os.path.exists(outdir): - print(f"<{os.path.basename(__file__)}> Output directory", outdir, "does not exist, creating it.") - os.makedirs(outdir) + if not os.path.isdir(args.inputDirectoryMask): + print("Error: Input directory", args.inputDirectory, "does not exist.") + exit() -# get inputs -dwi_list = list_nhdr_files(args.inputDirectoryDWI) -mask_list = list_nhdr_files(args.inputDirectoryMask) + outdir = args.outputDirectory + if not os.path.exists(outdir): + print(f"<{os.path.basename(__file__)}> Output directory", outdir, "does not exist, creating it.") + os.makedirs(outdir) -# create FA images -for (dwi, mask) in zip(dwi_list, mask_list): - subject_id = os.path.splitext(os.path.basename(dwi))[0] - fname_out_dti = os.path.join(args.outputDirectory, subject_id + '_DTI.nhdr') - fname_out_b0 = os.path.join(args.outputDirectory, subject_id + '_B0.nhdr') - fname_out_fa = os.path.join(args.outputDirectory, subject_id + '_FA.nhdr') - print("/Applications/Slicer.app/Contents/lib/Slicer-4.4/cli-modules/DWIToDTIEstimation -m ", mask, dwi, fname_out_dti, fname_out_b0) - print("/Applications/Slicer.app/Contents/lib/Slicer-4.4/cli-modules/DiffusionTensorScalarMeasurements", fname_out_dti, fname_out_fa, "-e FractionalAnisotropy &") + # get inputs + dwi_list = list_nhdr_files(args.inputDirectoryDWI) + mask_list = list_nhdr_files(args.inputDirectoryMask) - + # create FA images + for (dwi, mask) in zip(dwi_list, mask_list): + subject_id = os.path.splitext(os.path.basename(dwi))[0] + fname_out_dti = os.path.join(args.outputDirectory, subject_id + '_DTI.nhdr') + fname_out_b0 = os.path.join(args.outputDirectory, subject_id + '_B0.nhdr') + fname_out_fa = os.path.join(args.outputDirectory, subject_id + '_FA.nhdr') + print("/Applications/Slicer.app/Contents/lib/Slicer-4.4/cli-modules/DWIToDTIEstimation -m ", mask, dwi, fname_out_dti, fname_out_b0) + print("/Applications/Slicer.app/Contents/lib/Slicer-4.4/cli-modules/DiffusionTensorScalarMeasurements", fname_out_dti, fname_out_fa, "-e FractionalAnisotropy &") + + +if __name__ == "__main__": + main() diff --git a/utilities/wm_compute_TAP.py b/utilities/wm_compute_TAP.py index 56b2feee..b244d358 100755 --- a/utilities/wm_compute_TAP.py +++ b/utilities/wm_compute_TAP.py @@ -7,93 +7,99 @@ import vtk import whitematteranalysis as wma -parser = argparse.ArgumentParser( - description="Compute Tract Anatomical Profile", - epilog="Written by Fan Zhang, zhangfanmark@gmail.com.") -parser.add_argument("-v", "--version", - action="version", default=argparse.SUPPRESS, - version='1.0', - help="Show program's version number and exit") -parser.add_argument( - 'inputDirectory', - help='A directory containing subdirectories for all clustered subjects.') - -args = parser.parse_args() - -if not os.path.isdir(args.inputDirectory): - print("Error: Input directory", args.inputDirectory, "does not exist or is not a directory.") - exit() - -def list_vtk_files(input_dir): - input_mask = f"{input_dir}/*.vtk" - input_mask2 = f"{input_dir}/*.vtp" - input_pd_fnames = glob.glob(input_mask) + glob.glob(input_mask2) - input_pd_fnames = sorted(input_pd_fnames) - return(input_pd_fnames) - -def compute_TAP(inpd): - - inpointdata = inpd.GetPointData() - NoS = inpd.GetNumberOfLines() - - TAP_str = "" - if NoS != 0: - point_data_array_indices = list(range(inpointdata.GetNumberOfArrays())) - for idx in point_data_array_indices: - array = inpointdata.GetArray(idx) - - if array.GetName() == 'region_label': - inpd.GetLines().InitTraversal() - fiber_regions = [] - for lidx in range(0, inpd.GetNumberOfLines()): - ptids = vtk.vtkIdList() - inpd.GetLines().GetNextCell(ptids) - - regions = numpy.zeros(ptids.GetNumberOfIds()) - for pidx in range(0, ptids.GetNumberOfIds()): - regions[pidx] = array.GetTuple(ptids.GetId(pidx))[0] - - regions = numpy.unique(regions) - fiber_regions.append(regions) - - all_regions = numpy.unique(numpy.concatenate(fiber_regions)) - - r_prob = [] - r_list = [] - for r in all_regions: - count = 0 - for fiber_region in fiber_regions: - if r in fiber_region: - count = count + 1 - - prob = count / NoS - r_prob.append(prob) - r_list.append(r) - - r_prob = numpy.array(r_prob) - r_list = numpy.array(r_list) - arg = numpy.argsort(-r_prob) - r_prob = r_prob[arg] - r_list = r_list[arg] - - for r, p in zip(r_list, r_prob): - TAP_str += "'%d:%0.6f'," % (r, p) - - return TAP_str - - -vtk_files = list_vtk_files(args.inputDirectory) - -all_TAP_str = "Cluster Index,Region:Probability,\n" -for vtk_file in vtk_files: - print(vtk_file) - tract_name = os.path.basename(vtk_file).replace(".vtp", "").replace(".vtk", "") - pd = wma.io.read_polydata(vtk_file) - TAP_str = compute_TAP(pd) - all_TAP_str += tract_name + "," + TAP_str + "\n" - -print(all_TAP_str) - -f = open(os.path.join(args.inputDirectory, 'TAP.csv'), 'a') -f.write(all_TAP_str) -f.close() + +def main(): + parser = argparse.ArgumentParser( + description="Compute Tract Anatomical Profile", + epilog="Written by Fan Zhang, zhangfanmark@gmail.com.") + parser.add_argument("-v", "--version", + action="version", default=argparse.SUPPRESS, + version='1.0', + help="Show program's version number and exit") + parser.add_argument( + 'inputDirectory', + help='A directory containing subdirectories for all clustered subjects.') + + args = parser.parse_args() + + if not os.path.isdir(args.inputDirectory): + print("Error: Input directory", args.inputDirectory, "does not exist or is not a directory.") + exit() + + def list_vtk_files(input_dir): + input_mask = f"{input_dir}/*.vtk" + input_mask2 = f"{input_dir}/*.vtp" + input_pd_fnames = glob.glob(input_mask) + glob.glob(input_mask2) + input_pd_fnames = sorted(input_pd_fnames) + return(input_pd_fnames) + + def compute_TAP(inpd): + + inpointdata = inpd.GetPointData() + NoS = inpd.GetNumberOfLines() + + TAP_str = "" + if NoS != 0: + point_data_array_indices = list(range(inpointdata.GetNumberOfArrays())) + for idx in point_data_array_indices: + array = inpointdata.GetArray(idx) + + if array.GetName() == 'region_label': + inpd.GetLines().InitTraversal() + fiber_regions = [] + for lidx in range(0, inpd.GetNumberOfLines()): + ptids = vtk.vtkIdList() + inpd.GetLines().GetNextCell(ptids) + + regions = numpy.zeros(ptids.GetNumberOfIds()) + for pidx in range(0, ptids.GetNumberOfIds()): + regions[pidx] = array.GetTuple(ptids.GetId(pidx))[0] + + regions = numpy.unique(regions) + fiber_regions.append(regions) + + all_regions = numpy.unique(numpy.concatenate(fiber_regions)) + + r_prob = [] + r_list = [] + for r in all_regions: + count = 0 + for fiber_region in fiber_regions: + if r in fiber_region: + count = count + 1 + + prob = count / NoS + r_prob.append(prob) + r_list.append(r) + + r_prob = numpy.array(r_prob) + r_list = numpy.array(r_list) + arg = numpy.argsort(-r_prob) + r_prob = r_prob[arg] + r_list = r_list[arg] + + for r, p in zip(r_list, r_prob): + TAP_str += "'%d:%0.6f'," % (r, p) + + return TAP_str + + + vtk_files = list_vtk_files(args.inputDirectory) + + all_TAP_str = "Cluster Index,Region:Probability,\n" + for vtk_file in vtk_files: + print(vtk_file) + tract_name = os.path.basename(vtk_file).replace(".vtp", "").replace(".vtk", "") + pd = wma.io.read_polydata(vtk_file) + TAP_str = compute_TAP(pd) + all_TAP_str += tract_name + "," + TAP_str + "\n" + + print(all_TAP_str) + + f = open(os.path.join(args.inputDirectory, 'TAP.csv'), 'a') + f.write(all_TAP_str) + f.close() + + +if __name__ == "__main__": + main() diff --git a/utilities/wm_extract_cluster.py b/utilities/wm_extract_cluster.py index 96782782..8e9ec40e 100755 --- a/utilities/wm_extract_cluster.py +++ b/utilities/wm_extract_cluster.py @@ -7,64 +7,68 @@ import numpy import whitematteranalysis as wma -parser = argparse.ArgumentParser( - description="Grab one cluster from within all subject directories and rename to include subject ID. Output all clusters into the output directory", - epilog="Written by Lauren O\'Donnell, odonnell@bwh.harvard.edu. Please reference \"O'Donnell, Lauren J., and C-F. Westin. Automatic tractography segmentation using a high-dimensional white matter atlas. Medical Imaging, IEEE Transactions on 26.11 (2007): 1562-1575.\"") -parser.add_argument("-v", "--version", - action="version", default=argparse.SUPPRESS, - version='1.0', - help="Show program's version number and exit") -parser.add_argument( - 'cluster', type=int, - help='The cluster number you would like to copy for all subjects.') -parser.add_argument( - 'inputDirectory', - help='A directory containing subdirectories for all clustered subjects.') -parser.add_argument( - 'outputDirectory', - help='The output directory will be created if it does not exist.') +def main(): + parser = argparse.ArgumentParser( + description="Grab one cluster from within all subject directories and rename to include subject ID. Output all clusters into the output directory", + epilog="Written by Lauren O\'Donnell, odonnell@bwh.harvard.edu. Please reference \"O'Donnell, Lauren J., and C-F. Westin. Automatic tractography segmentation using a high-dimensional white matter atlas. Medical Imaging, IEEE Transactions on 26.11 (2007): 1562-1575.\"") + parser.add_argument("-v", "--version", + action="version", default=argparse.SUPPRESS, + version='1.0', + help="Show program's version number and exit") + parser.add_argument( + 'cluster', type=int, + help='The cluster number you would like to copy for all subjects.') + parser.add_argument( + 'inputDirectory', + help='A directory containing subdirectories for all clustered subjects.') + parser.add_argument( + 'outputDirectory', + help='The output directory will be created if it does not exist.') -args = parser.parse_args() + args = parser.parse_args() -if not os.path.isdir(args.inputDirectory): - print(f"<{os.path.basename(__file__)}> Error: Input directory", args.inputDirectory, "does not exist or is not a directory.") - exit() -outdir = args.outputDirectory -if not os.path.exists(outdir): - print(f"<{os.path.basename(__file__)}> Output directory", outdir, "does not exist, creating it.") - os.makedirs(outdir) + if not os.path.isdir(args.inputDirectory): + print(f"<{os.path.basename(__file__)}> Error: Input directory", args.inputDirectory, "does not exist or is not a directory.") + exit() -# cluster filename we want -fname_c = f'cluster_{args.cluster:05d}.vtp' -# output MRML filename -fname_mrml = f'all_clusters_{args.cluster:05d}.mrml' -fname_mrml = os.path.join(outdir, fname_mrml) + outdir = args.outputDirectory + if not os.path.exists(outdir): + print(f"<{os.path.basename(__file__)}> Output directory", outdir, "does not exist, creating it.") + os.makedirs(outdir) -# Find input directories that may contain clusters -input_mask = f"{args.inputDirectory}/*" -input_directories = sorted(glob.glob(input_mask)) + # cluster filename we want + fname_c = f'cluster_{args.cluster:05d}.vtp' + # output MRML filename + fname_mrml = f'all_clusters_{args.cluster:05d}.mrml' + fname_mrml = os.path.join(outdir, fname_mrml) -# Loop over inputs and try to find clusters -fname_list = list() -for dir in input_directories: - if os.path.isdir(dir): - subject_id = os.path.basename(dir) - print(dir) - fname = os.path.join(dir, fname_c) - if os.path.exists(fname): - fname1 = subject_id+'_'+fname_c - fname_list.append(fname1) - fname2 = os.path.join(outdir, fname1) - print(fname, "===>>>>", fname2) - shutil.copy(fname, fname2) + # Find input directories that may contain clusters + input_mask = f"{args.inputDirectory}/*" + input_directories = sorted(glob.glob(input_mask)) -# also output a MRML file to load them all into Slicer -# for now, random colors -colors = numpy.random.random((len(fname_list),3))*255.0 -wma.mrml.write(fname_list, colors, fname_mrml, ratio = 1.0) + # Loop over inputs and try to find clusters + fname_list = list() + for dir in input_directories: + if os.path.isdir(dir): + subject_id = os.path.basename(dir) + print(dir) + fname = os.path.join(dir, fname_c) + if os.path.exists(fname): + fname1 = subject_id+'_'+fname_c + fname_list.append(fname1) + fname2 = os.path.join(outdir, fname1) + print(fname, "===>>>>", fname2) + shutil.copy(fname, fname2) + # also output a MRML file to load them all into Slicer + # for now, random colors + colors = numpy.random.random((len(fname_list),3))*255.0 + wma.mrml.write(fname_list, colors, fname_mrml, ratio = 1.0) + +if __name__ == "__main__": + main() diff --git a/utilities/wm_extract_clusters_by_endpoints.py b/utilities/wm_extract_clusters_by_endpoints.py index 8041f5e5..6f713e06 100755 --- a/utilities/wm_extract_clusters_by_endpoints.py +++ b/utilities/wm_extract_clusters_by_endpoints.py @@ -9,185 +9,191 @@ print(f"<{os.path.basename(__file__)}> Error importing white matter analysis package\n") raise -#----------------- -# Parse arguments -#----------------- -parser = argparse.ArgumentParser( - description="Extract fiber clusters that connect to one particular region, such as one cortical parcel or one fMRI functional area. This is based on the results from .", - epilog="Written by Fan Zhang, fzhang@bwh.harvard.edu") - -parser.add_argument("-v", "--version", - action="version", default=argparse.SUPPRESS, - version='1.0', - help="Show program's version number and exit") -parser.add_argument( - 'inputDirectory', - help='Contains endpoint overlap percentage measurement as txt files, which are computed from .') -parser.add_argument( - '-r', type=str, dest="region", - help='Label of region-of-interest as in the header of the measurement files.') -parser.add_argument( - '-p', type=int, dest="percentage_threshold", default='10', - help='Percentage threshold [0, 100] that is used to decide if fiber cluster connect to the input region in individual subject. For each subject, if over p/100 of a fiber cluster\'s endpoints overlap with the region-of-interest, we consider this cluster and the input region are connected in this subject.') -parser.add_argument( - '-s', type=int, dest="subject_number_threshold", default='1', - help='Subject number threshold that is used to decide if fiber cluster connect to the input region in the population. Across all subjects, if a cluster is connected to the input region in at least s subjects, we consider this cluster as one output result.') -parser.add_argument( - '-fc_folder', type=str, dest="fiber_cluster_folder", - help='Contains the fiber clustering result, in which each sub folder represents one subject. If specified, a mrml file that displays all output clusters will be generated. ') - -args = parser.parse_args() - -if not os.path.isdir(args.inputDirectory): - print("Error: Input directory", args.inputDirectory, "does not exist.") - exit() - -percentage_threshold = args.percentage_threshold -if percentage_threshold < 0 or percentage_threshold > 100: - print("Error: Percentage threshold should be in 0 to 100:") - exit() - -measurement_list = wma.tract_measurement.load_measurement_in_folder(args.inputDirectory, hierarchy = 'Column', separator = 'Tab') -if len(measurement_list) < 1: - print("Error: No measurement files found in directory:", args.inputDirectory) - exit() - -number_of_subjects = len(measurement_list) -print("\n== Number of subjects data found:", number_of_subjects) - -# print the header -print("\n== Region labels found in the measurement file:") -header = measurement_list[0].measurement_header -print(header) - -region = args.region -print("\n== Input region label:", region) - -# region_exist = False -# region_index = 0 -# for variable in header: -# if region == variable: -# region_exist = True -# break -# region_index = region_index + 1 -# -# if not region_exist: -# print "\nError: region", region, "does not exist." -# exit() -# else: -# print "Region found at position", region_index+1 - -subject_number_threshold = args.subject_number_threshold -if subject_number_threshold < 0 or subject_number_threshold > number_of_subjects: - print("Error: Subject number threshold should be in 0 to number of subject ("+ str(number_of_subjects)+ ")") - exit() - -# sanity check all measured variables are the same -print("\n== Testing if all subjects have the same measurement header.") -region_indices = [] -for subject_measured in measurement_list: - header = subject_measured.measurement_header - region_exist = False - region_index = 0 - for variable in header: - if region == variable: - region_exist = True - break - region_index = region_index + 1 - - if not region_exist: - print("\nError: region", region, "does not exist in", subject_measured.case_id) + +def main(): + #----------------- + # Parse arguments + #----------------- + parser = argparse.ArgumentParser( + description="Extract fiber clusters that connect to one particular region, such as one cortical parcel or one fMRI functional area. This is based on the results from .", + epilog="Written by Fan Zhang, fzhang@bwh.harvard.edu") + + parser.add_argument("-v", "--version", + action="version", default=argparse.SUPPRESS, + version='1.0', + help="Show program's version number and exit") + parser.add_argument( + 'inputDirectory', + help='Contains endpoint overlap percentage measurement as txt files, which are computed from .') + parser.add_argument( + '-r', type=str, dest="region", + help='Label of region-of-interest as in the header of the measurement files.') + parser.add_argument( + '-p', type=int, dest="percentage_threshold", default='10', + help='Percentage threshold [0, 100] that is used to decide if fiber cluster connect to the input region in individual subject. For each subject, if over p/100 of a fiber cluster\'s endpoints overlap with the region-of-interest, we consider this cluster and the input region are connected in this subject.') + parser.add_argument( + '-s', type=int, dest="subject_number_threshold", default='1', + help='Subject number threshold that is used to decide if fiber cluster connect to the input region in the population. Across all subjects, if a cluster is connected to the input region in at least s subjects, we consider this cluster as one output result.') + parser.add_argument( + '-fc_folder', type=str, dest="fiber_cluster_folder", + help='Contains the fiber clustering result, in which each sub folder represents one subject. If specified, a mrml file that displays all output clusters will be generated. ') + + args = parser.parse_args() + + if not os.path.isdir(args.inputDirectory): + print("Error: Input directory", args.inputDirectory, "does not exist.") + exit() + + percentage_threshold = args.percentage_threshold + if percentage_threshold < 0 or percentage_threshold > 100: + print("Error: Percentage threshold should be in 0 to 100:") + exit() + + measurement_list = wma.tract_measurement.load_measurement_in_folder(args.inputDirectory, hierarchy = 'Column', separator = 'Tab') + if len(measurement_list) < 1: + print("Error: No measurement files found in directory:", args.inputDirectory) + exit() + + number_of_subjects = len(measurement_list) + print("\n== Number of subjects data found:", number_of_subjects) + + # print the header + print("\n== Region labels found in the measurement file:") + header = measurement_list[0].measurement_header + print(header) + + region = args.region + print("\n== Input region label:", region) + + # region_exist = False + # region_index = 0 + # for variable in header: + # if region == variable: + # region_exist = True + # break + # region_index = region_index + 1 + # + # if not region_exist: + # print "\nError: region", region, "does not exist." + # exit() + # else: + # print "Region found at position", region_index+1 + + subject_number_threshold = args.subject_number_threshold + if subject_number_threshold < 0 or subject_number_threshold > number_of_subjects: + print("Error: Subject number threshold should be in 0 to number of subject ("+ str(number_of_subjects)+ ")") exit() + + # sanity check all measured variables are the same + print("\n== Testing if all subjects have the same measurement header.") + region_indices = [] + for subject_measured in measurement_list: + header = subject_measured.measurement_header + region_exist = False + region_index = 0 + for variable in header: + if region == variable: + region_exist = True + break + region_index = region_index + 1 + + if not region_exist: + print("\nError: region", region, "does not exist in", subject_measured.case_id) + exit() + else: + print(subject_measured.case_id, ", total number of regions:",len(subject_measured.measurement_header), ", input region found at position", region_index+1) + region_indices.append(region_index) + + # Make subject id list for reporting of any potential outliers + subject_id_list = [] + for subject_measured in measurement_list: + subject_id_list.append(subject_measured.case_id) + subject_id_list = numpy.array(subject_id_list) + + # sanity check number of clusters is the same for all subjects + print("\n== Testing if all subjects have the same number of clusters.") + test = True + number_of_clusters = measurement_list[0].measurement_matrix[:,0].shape[0] + print("Group number of clusters (from first subject):", number_of_clusters) + for subject_measured, id in zip(measurement_list, subject_id_list): + nc = subject_measured.measurement_matrix[:,0].shape[0] + if not nc == number_of_clusters: + print(nc, " : ", id) + test = False + if test: + print("Passed. All subjects have the same number of clusters ("+ str(number_of_clusters) + ").") else: - print(subject_measured.case_id, ", total number of regions:",len(subject_measured.measurement_header), ", input region found at position", region_index+1) - region_indices.append(region_index) - -# Make subject id list for reporting of any potential outliers -subject_id_list = [] -for subject_measured in measurement_list: - subject_id_list.append(subject_measured.case_id) -subject_id_list = numpy.array(subject_id_list) - -# sanity check number of clusters is the same for all subjects -print("\n== Testing if all subjects have the same number of clusters.") -test = True -number_of_clusters = measurement_list[0].measurement_matrix[:,0].shape[0] -print("Group number of clusters (from first subject):", number_of_clusters) -for subject_measured, id in zip(measurement_list, subject_id_list): - nc = subject_measured.measurement_matrix[:,0].shape[0] - if not nc == number_of_clusters: - print(nc, " : ", id) - test = False -if test: - print("Passed. All subjects have the same number of clusters ("+ str(number_of_clusters) + ").") -else: - print("ERROR: All subjects do not have the same number of clusters. There was an earlier error in clustering or measurement that must be fixed before analysis.") - -print("\n== Histogram of the overlap percentage of each cluster to the input region.") -percentage_range = list(range(0, 100, 5)) -print_title = False -for sub_id, subject_measured, region_index in zip(subject_id_list, measurement_list, region_indices): - - subject_percentage_distribution = subject_measured.measurement_matrix[:, region_index] - - percent_str = sub_id + '(' + str(subject_measured.measurement_header[region_index]) + ')' - title_str = ("{:<"+str(len(sub_id)+len(region)+2)+"}").format('Percentage:') - for p in percentage_range: - percent_str = percent_str + ', ' + f"{str(sum(subject_percentage_distribution > p)):<4}" - title_str = title_str + ', ' + f"{str(p):<4}" - - if not print_title: - print(title_str) - print_title = True - - print(percent_str) - -print("\n== Number of clusters whose percentages are over the percentage threshold.") -connected_matrix = [] -for sub_id, subject_measured, region_index in zip(subject_id_list, measurement_list, region_indices): - subject_percentage_distribution = subject_measured.measurement_matrix[:, region_index] - connected_matrix.append(subject_percentage_distribution > percentage_threshold) - print("%s has %4d / %4d clusters connected to the input region." % (sub_id, sum(subject_percentage_distribution > percentage_threshold), number_of_clusters)) - -print("\n== Number of clusters that connect to the input region across all subjects.") -num_subjects_per_cluster = sum(connected_matrix) -for num_idx in range(number_of_subjects+1): - print("Clusters that are connected in", num_idx, '(subject number threshold) subjects: ', sum(num_subjects_per_cluster == num_idx)) - -print("\n== Result clusters that are detected by at least",subject_number_threshold,'subjects') -output_cluster_idx = numpy.where(num_subjects_per_cluster >= subject_number_threshold)[0] -output_cluster_idx = output_cluster_idx + 1 -print("Total", len(output_cluster_idx), "clusters.") -print(output_cluster_idx) - -if not (args.fiber_cluster_folder is None): - print("\n== Output mrml to the fiber cluster folder") - sub_folder = os.listdir(args.fiber_cluster_folder)[0] - print(sub_folder) - pd_cluster_3 = wma.io.list_vtk_files(os.path.join(args.fiber_cluster_folder, sub_folder))[2] - suffix = os.path.split(pd_cluster_3)[1][13:-4] - - cluster_polydatas = [] - for c_idx in output_cluster_idx: - cluster_polydatas.append("cluster_"+str(c_idx).zfill(5)+suffix+".vtp") - - number_of_files = len(cluster_polydatas) - - step = int(100 * 255.0 / (number_of_files - 1)) - R = numpy.array(list(range(0, 100 * 255 + 1, step))) / 100.0 - G = numpy.abs(list(range(100 * -127, 100 * 128 + 1, step))) * 2.0 / 100.0 - B = numpy.array(list(range(100 * 255 + 1, 0, -step))) / 100.0 - - colors = list() - idx = 0 - for pd in cluster_polydatas: - colors.append([R[idx], G[idx], B[idx]]) - idx += 1 - colors = numpy.array(colors) - - mrml_filename = "cluster_connecting_region_" + region + ".mrml" - - for sub_folder in os.listdir(args.fiber_cluster_folder): - wma.mrml.write(cluster_polydatas, colors, os.path.join(args.fiber_cluster_folder+'/'+sub_folder, mrml_filename), ratio=1.0) - -print('Done!' + '\n') + print("ERROR: All subjects do not have the same number of clusters. There was an earlier error in clustering or measurement that must be fixed before analysis.") + + print("\n== Histogram of the overlap percentage of each cluster to the input region.") + percentage_range = list(range(0, 100, 5)) + print_title = False + for sub_id, subject_measured, region_index in zip(subject_id_list, measurement_list, region_indices): + + subject_percentage_distribution = subject_measured.measurement_matrix[:, region_index] + + percent_str = sub_id + '(' + str(subject_measured.measurement_header[region_index]) + ')' + title_str = ("{:<"+str(len(sub_id)+len(region)+2)+"}").format('Percentage:') + for p in percentage_range: + percent_str = percent_str + ', ' + f"{str(sum(subject_percentage_distribution > p)):<4}" + title_str = title_str + ', ' + f"{str(p):<4}" + + if not print_title: + print(title_str) + print_title = True + + print(percent_str) + + print("\n== Number of clusters whose percentages are over the percentage threshold.") + connected_matrix = [] + for sub_id, subject_measured, region_index in zip(subject_id_list, measurement_list, region_indices): + subject_percentage_distribution = subject_measured.measurement_matrix[:, region_index] + connected_matrix.append(subject_percentage_distribution > percentage_threshold) + print("%s has %4d / %4d clusters connected to the input region." % (sub_id, sum(subject_percentage_distribution > percentage_threshold), number_of_clusters)) + + print("\n== Number of clusters that connect to the input region across all subjects.") + num_subjects_per_cluster = sum(connected_matrix) + for num_idx in range(number_of_subjects+1): + print("Clusters that are connected in", num_idx, '(subject number threshold) subjects: ', sum(num_subjects_per_cluster == num_idx)) + + print("\n== Result clusters that are detected by at least",subject_number_threshold,'subjects') + output_cluster_idx = numpy.where(num_subjects_per_cluster >= subject_number_threshold)[0] + output_cluster_idx = output_cluster_idx + 1 + print("Total", len(output_cluster_idx), "clusters.") + print(output_cluster_idx) + + if not (args.fiber_cluster_folder is None): + print("\n== Output mrml to the fiber cluster folder") + sub_folder = os.listdir(args.fiber_cluster_folder)[0] + print(sub_folder) + pd_cluster_3 = wma.io.list_vtk_files(os.path.join(args.fiber_cluster_folder, sub_folder))[2] + suffix = os.path.split(pd_cluster_3)[1][13:-4] + + cluster_polydatas = [] + for c_idx in output_cluster_idx: + cluster_polydatas.append("cluster_"+str(c_idx).zfill(5)+suffix+".vtp") + + number_of_files = len(cluster_polydatas) + + step = int(100 * 255.0 / (number_of_files - 1)) + R = numpy.array(list(range(0, 100 * 255 + 1, step))) / 100.0 + G = numpy.abs(list(range(100 * -127, 100 * 128 + 1, step))) * 2.0 / 100.0 + B = numpy.array(list(range(100 * 255 + 1, 0, -step))) / 100.0 + + colors = list() + idx = 0 + for pd in cluster_polydatas: + colors.append([R[idx], G[idx], B[idx]]) + idx += 1 + colors = numpy.array(colors) + + mrml_filename = "cluster_connecting_region_" + region + ".mrml" + + for sub_folder in os.listdir(args.fiber_cluster_folder): + wma.mrml.write(cluster_polydatas, colors, os.path.join(args.fiber_cluster_folder+'/'+sub_folder, mrml_filename), ratio=1.0) + + print('Done!' + '\n') + + +if __name__ == "__main__": + main() diff --git a/utilities/wm_flatten_length_distribution.py b/utilities/wm_flatten_length_distribution.py index 75b3e54a..161d00de 100644 --- a/utilities/wm_flatten_length_distribution.py +++ b/utilities/wm_flatten_length_distribution.py @@ -20,137 +20,142 @@ raise -#----------------- -# Parse arguments -#----------------- -parser = argparse.ArgumentParser( - description="Samples fibers such that the output distribution of fiber lengths is approximately flat, within the input length range.", - epilog="Written by Lauren O\'Donnell, odonnell@bwh.harvard.edu", - version='1.0') - -parser.add_argument( - 'inputDirectory', - help='Contains whole-brain tractography as vtkPolyData file(s).') -parser.add_argument( - 'outputDirectory', - help='The output directory should be a new empty directory. It will be created if needed.') -parser.add_argument( - '-f', action="store", dest="numberOfFibers", type=int, - help='Number of fibers to keep from each dataset.') -parser.add_argument( - '-lmin', action="store", dest="fiberLengthMin", type=int, - help='Minimum length (in mm) of fibers to keep.') -parser.add_argument( - '-lmax', action="store", dest="fiberLengthMax", type=int, - help='Maximum length (in mm) of fibers to keep.') -parser.add_argument( - '-nbins', action="store", dest="numberOfBins", type=int, default=10, - help='Number of bins for sampling the fiber length distribution.') -parser.add_argument( - '-nf', action="store", dest="fibersPerBin", type=int, default=1000, - help='Number of fibers sampled per bin. If you have specified the total number of fibers, that will override this parameter.') -parser.add_argument( - '-j', action="store", dest="numberOfJobs", type=int, - help='Number of processors to use.') - - -args = parser.parse_args() - - -if not os.path.isdir(args.inputDirectory): - print("Error: Input directory", args.inputDirectory, "does not exist.") +def main(): + #----------------- + # Parse arguments + #----------------- + parser = argparse.ArgumentParser( + description="Samples fibers such that the output distribution of fiber lengths is approximately flat, within the input length range.", + epilog="Written by Lauren O\'Donnell, odonnell@bwh.harvard.edu", + version='1.0') + + parser.add_argument( + 'inputDirectory', + help='Contains whole-brain tractography as vtkPolyData file(s).') + parser.add_argument( + 'outputDirectory', + help='The output directory should be a new empty directory. It will be created if needed.') + parser.add_argument( + '-f', action="store", dest="numberOfFibers", type=int, + help='Number of fibers to keep from each dataset.') + parser.add_argument( + '-lmin', action="store", dest="fiberLengthMin", type=int, + help='Minimum length (in mm) of fibers to keep.') + parser.add_argument( + '-lmax', action="store", dest="fiberLengthMax", type=int, + help='Maximum length (in mm) of fibers to keep.') + parser.add_argument( + '-nbins', action="store", dest="numberOfBins", type=int, default=10, + help='Number of bins for sampling the fiber length distribution.') + parser.add_argument( + '-nf', action="store", dest="fibersPerBin", type=int, default=1000, + help='Number of fibers sampled per bin. If you have specified the total number of fibers, that will override this parameter.') + parser.add_argument( + '-j', action="store", dest="numberOfJobs", type=int, + help='Number of processors to use.') + + + args = parser.parse_args() + + + if not os.path.isdir(args.inputDirectory): + print("Error: Input directory", args.inputDirectory, "does not exist.") + exit() + + outdir = args.outputDirectory + if not os.path.exists(outdir): + print("Output directory", outdir, "does not exist, creating it.") + os.makedirs(outdir) + + print(f"{os.path.basename(__file__)}. Starting streamline length distribution flattening.") + print("") + print("=====input directory======\n", args.inputDirectory) + print("=====output directory=====\n", args.outputDirectory) + print("==========================") + + if args.numberOfFibers is not None: + print("fibers to retain per subject: ", args.numberOfFibers) + args.fibersPerBin = numpy.divide(args.numberOfFibers,args.numberOfBins) + else: + print("fibers to retain per subject: ALL") + + if args.fiberLengthMin is not None: + print("minimum length of fibers to retain (in mm): ", args.fiberLengthMin) + else: + print("minimum length of fibers to retain (in mm): 0") + + if args.fiberLengthMax is not None: + print("maximum length of fibers to retain (in mm): ", args.fiberLengthMax) + + print("Bins:", args.numberOfBins) + print("Fibers per bin:", args.fibersPerBin) + + if args.numberOfJobs is not None: + parallel_jobs = args.numberOfJobs + else: + parallel_jobs = 1 + print('Using N jobs:', parallel_jobs) + + + print("==========================") + + # ======================================================================= + # Above this line is argument parsing. Below this line is the pipeline. + # ======================================================================= + + # Loop over input DWIs + inputMask1 = f"{args.inputDirectory}/*.vtk" + inputMask2 = f"{args.inputDirectory}/*.vtp" + + inputPolyDatas = glob.glob(inputMask1) + glob.glob(inputMask2) + + print(f"<{os.path.basename(__file__)}> Input number of files: ", len(inputPolyDatas)) + + # for testing + #inputPolyDatas = inputPolyDatas[0:2] + + def pipeline(inputPolyDatas, sidx, args): + # get subject identifier from unique input filename + # ------------------- + subjectID = os.path.splitext(os.path.basename(inputPolyDatas[sidx]))[0] + id_msg = f"<{os.path.basename(__file__)}> ", sidx + 1, "/", len(inputPolyDatas) + msg = "**Starting subject:", subjectID + print(id_msg + msg) + + # read input vtk data + # ------------------- + msg = "**Reading input:", subjectID + print(id_msg + msg) + + wm = wma.io.read_polydata(inputPolyDatas[sidx]) + + num_lines = wm.GetNumberOfLines() + #print "Input number of fibers", num_lines + + wm2 = wma.filter.flatten_length_distribution(wm, args.fiberLengthMin, args.fiberLengthMax, args.numberOfBins, args.fibersPerBin, verbose=True) + + # outputs + # ------------------- + msg = "**Writing output data for subject:", subjectID + print(id_msg, msg) + + fname = os.path.join(args.outputDirectory, subjectID+'_flat.vtp') + try: + print("Writing output polydata", fname, "...") + wma.io.write_polydata(wm2, fname) + print("Wrote output", fname, ".") + except: + print("Unknown exception in IO") + raise + del wm2 + + # loop over all inputs + Parallel(n_jobs=parallel_jobs, verbose=0)( + delayed(pipeline)(inputPolyDatas, sidx, args) + for sidx in range(0, len(inputPolyDatas))) + exit() -outdir = args.outputDirectory -if not os.path.exists(outdir): - print("Output directory", outdir, "does not exist, creating it.") - os.makedirs(outdir) - -print(f"{os.path.basename(__file__)}. Starting streamline length distribution flattening.") -print("") -print("=====input directory======\n", args.inputDirectory) -print("=====output directory=====\n", args.outputDirectory) -print("==========================") - -if args.numberOfFibers is not None: - print("fibers to retain per subject: ", args.numberOfFibers) - args.fibersPerBin = numpy.divide(args.numberOfFibers,args.numberOfBins) -else: - print("fibers to retain per subject: ALL") - -if args.fiberLengthMin is not None: - print("minimum length of fibers to retain (in mm): ", args.fiberLengthMin) -else: - print("minimum length of fibers to retain (in mm): 0") - -if args.fiberLengthMax is not None: - print("maximum length of fibers to retain (in mm): ", args.fiberLengthMax) - -print("Bins:", args.numberOfBins) -print("Fibers per bin:", args.fibersPerBin) - -if args.numberOfJobs is not None: - parallel_jobs = args.numberOfJobs -else: - parallel_jobs = 1 -print('Using N jobs:', parallel_jobs) - - -print("==========================") - -# ======================================================================= -# Above this line is argument parsing. Below this line is the pipeline. -# ======================================================================= - -# Loop over input DWIs -inputMask1 = f"{args.inputDirectory}/*.vtk" -inputMask2 = f"{args.inputDirectory}/*.vtp" - -inputPolyDatas = glob.glob(inputMask1) + glob.glob(inputMask2) - -print(f"<{os.path.basename(__file__)}> Input number of files: ", len(inputPolyDatas)) - -# for testing -#inputPolyDatas = inputPolyDatas[0:2] - -def pipeline(inputPolyDatas, sidx, args): - # get subject identifier from unique input filename - # ------------------- - subjectID = os.path.splitext(os.path.basename(inputPolyDatas[sidx]))[0] - id_msg = f"<{os.path.basename(__file__)}> ", sidx + 1, "/", len(inputPolyDatas) - msg = "**Starting subject:", subjectID - print(id_msg + msg) - - # read input vtk data - # ------------------- - msg = "**Reading input:", subjectID - print(id_msg + msg) - - wm = wma.io.read_polydata(inputPolyDatas[sidx]) - - num_lines = wm.GetNumberOfLines() - #print "Input number of fibers", num_lines - - wm2 = wma.filter.flatten_length_distribution(wm, args.fiberLengthMin, args.fiberLengthMax, args.numberOfBins, args.fibersPerBin, verbose=True) - - # outputs - # ------------------- - msg = "**Writing output data for subject:", subjectID - print(id_msg, msg) - - fname = os.path.join(args.outputDirectory, subjectID+'_flat.vtp') - try: - print("Writing output polydata", fname, "...") - wma.io.write_polydata(wm2, fname) - print("Wrote output", fname, ".") - except: - print("Unknown exception in IO") - raise - del wm2 - -# loop over all inputs -Parallel(n_jobs=parallel_jobs, verbose=0)( - delayed(pipeline)(inputPolyDatas, sidx, args) - for sidx in range(0, len(inputPolyDatas))) - -exit() + +if __name__ == "__main__": + main() diff --git a/utilities/wm_laterality_all.py b/utilities/wm_laterality_all.py index bfc815b2..a990e6ce 100755 --- a/utilities/wm_laterality_all.py +++ b/utilities/wm_laterality_all.py @@ -11,187 +11,193 @@ print(f"{os.path.basename(__file__)}> Error importing white matter analysis package\n") raise -#----------------- -# Parse arguments -#----------------- -parser = argparse.ArgumentParser( - description="Applies white matter laterality pipeline to input directory.", - epilog="Written by Lauren O\'Donnell, odonnell@bwh.harvard.edu") - -parser.add_argument("-v", "--version", - action="version", default=argparse.SUPPRESS, - version='1.0', - help="Show program's version number and exit") -parser.add_argument( - 'inputDirectory', - help='Contains whole-brain tractography as vtkPolyData file(s).') -parser.add_argument( - 'outputDirectory', - help='The output directory should be a new empty directory.') -parser.add_argument( - '-f', action="store", dest="numberOfFibers", type=int, - help='Number of fibers to analyze from each dataset.') -parser.add_argument( - '-l', action="store", dest="fiberLength", type=int, - help='Minimum length (in mm) of fibers to analyze.') -parser.add_argument( - '-s', action="store", dest="sigma", type=float, - help='Sigma for laterality computation. Useful values are 10-50 (mm).') -parser.add_argument( - '-t', action="store", dest="threshold", type=float, - help='Threshold lower fiber distances to 0. Useful range 0-5mm.') -parser.add_argument( - '-rm_outlier', action='store_true', dest="flag_removeOutliers") -parser.add_argument( - '-equal_fiber_num', action='store_true', dest="flag_equalFibers", - help='To analyze an equal number of fibers per hemisphere') -parser.add_argument( - '-fibers_per_hem', action="store", dest="numberOfFibersPerHem", type=int, - help='Number of fibers to analyze from each hemisphere.') - -args = parser.parse_args() - -if not os.path.isdir(args.inputDirectory): - print("Error: Input directory", args.inputDirectory, "does not exist.") - exit() - -outdir = args.outputDirectory -if not os.path.exists(outdir): - print(f"<{os.path.basename(__file__)}> Output directory", outdir, "does not exist, creating it.") - os.makedirs(outdir) - -print("f<{os.path.basename(__file__)}> Starting white matter laterality computation.") -print("") -print("=====input directory======\n", args.inputDirectory) -print("=====output directory=====\n", args.outputDirectory) -print("==========================") -if args.numberOfFibers is not None: - print("fibers to analyze per subject: ", args.numberOfFibers) -else: - print("fibers to analyze per subject: ALL") +def main(): + #----------------- + # Parse arguments + #----------------- + parser = argparse.ArgumentParser( + description="Applies white matter laterality pipeline to input directory.", + epilog="Written by Lauren O\'Donnell, odonnell@bwh.harvard.edu") + + parser.add_argument("-v", "--version", + action="version", default=argparse.SUPPRESS, + version='1.0', + help="Show program's version number and exit") + parser.add_argument( + 'inputDirectory', + help='Contains whole-brain tractography as vtkPolyData file(s).') + parser.add_argument( + 'outputDirectory', + help='The output directory should be a new empty directory.') + parser.add_argument( + '-f', action="store", dest="numberOfFibers", type=int, + help='Number of fibers to analyze from each dataset.') + parser.add_argument( + '-l', action="store", dest="fiberLength", type=int, + help='Minimum length (in mm) of fibers to analyze.') + parser.add_argument( + '-s', action="store", dest="sigma", type=float, + help='Sigma for laterality computation. Useful values are 10-50 (mm).') + parser.add_argument( + '-t', action="store", dest="threshold", type=float, + help='Threshold lower fiber distances to 0. Useful range 0-5mm.') + parser.add_argument( + '-rm_outlier', action='store_true', dest="flag_removeOutliers") + parser.add_argument( + '-equal_fiber_num', action='store_true', dest="flag_equalFibers", + help='To analyze an equal number of fibers per hemisphere') + parser.add_argument( + '-fibers_per_hem', action="store", dest="numberOfFibersPerHem", type=int, + help='Number of fibers to analyze from each hemisphere.') + + args = parser.parse_args() + + if not os.path.isdir(args.inputDirectory): + print("Error: Input directory", args.inputDirectory, "does not exist.") + exit() + + outdir = args.outputDirectory + if not os.path.exists(outdir): + print(f"<{os.path.basename(__file__)}> Output directory", outdir, "does not exist, creating it.") + os.makedirs(outdir) + + print("f<{os.path.basename(__file__)}> Starting white matter laterality computation.") + print("") + print("=====input directory======\n", args.inputDirectory) + print("=====output directory=====\n", args.outputDirectory) + print("==========================") -if args.fiberLength is not None: - print("minimum length of fibers to analyze (in mm): ", args.fiberLength) - -if args.sigma is not None: - print("sigma for laterality index computation: ", args.sigma) + if args.numberOfFibers is not None: + print("fibers to analyze per subject: ", args.numberOfFibers) + else: + print("fibers to analyze per subject: ALL") -if args.threshold is not None: - print("intra-fiber distance threshold (in mm): ", args.threshold) + if args.fiberLength is not None: + print("minimum length of fibers to analyze (in mm): ", args.fiberLength) -if args.flag_removeOutliers: - print("Automatic outlier removal is ON.") -else: - print("Automatic outlier removal is OFF.") + if args.sigma is not None: + print("sigma for laterality index computation: ", args.sigma) -if args.flag_equalFibers: - print("Use equal fiber number from each hemisphere is ON.") -else: - print("Use equal fiber number from each hemisphere is OFF. Using input fiber number.") + if args.threshold is not None: + print("intra-fiber distance threshold (in mm): ", args.threshold) -if args.numberOfFibersPerHem is not None: - print("fibers to analyze per hemisphere: ", args.numberOfFibersPerHem) -else: - print("fibers to analyze per hemisphere: all or equal") + if args.flag_removeOutliers: + print("Automatic outlier removal is ON.") + else: + print("Automatic outlier removal is OFF.") -print("==========================") + if args.flag_equalFibers: + print("Use equal fiber number from each hemisphere is ON.") + else: + print("Use equal fiber number from each hemisphere is OFF. Using input fiber number.") -# ======================================================================= -# Above this line is argument parsing. Below this line is the pipeline. -# ======================================================================= + if args.numberOfFibersPerHem is not None: + print("fibers to analyze per hemisphere: ", args.numberOfFibersPerHem) + else: + print("fibers to analyze per hemisphere: all or equal") -inputPolyDatas = wma.io.list_vtk_files(args.inputDirectory) + print("==========================") -print(f"<{os.path.basename(__file__)}> Input number of files: ", len(inputPolyDatas)) + # ======================================================================= + # Above this line is argument parsing. Below this line is the pipeline. + # ======================================================================= -# loop over all inputs -for sidx in range(0, len(inputPolyDatas)): + inputPolyDatas = wma.io.list_vtk_files(args.inputDirectory) - # get subject identifier from unique input filename - # ------------------- - subjectID = os.path.splitext(os.path.basename(inputPolyDatas[sidx]))[0] - id_msg = f"<{os.path.basename(__file__)}> ", sidx + 1, "/", len(inputPolyDatas) - msg = "**Starting subject:", subjectID - print(id_msg, msg) + print(f"<{os.path.basename(__file__)}> Input number of files: ", len(inputPolyDatas)) - # read input vtk data - # ------------------- - msg = "**Reading input:", subjectID - print(id_msg, msg) + # loop over all inputs + for sidx in range(0, len(inputPolyDatas)): - wm = wma.io.read_polydata(inputPolyDatas[sidx]) + # get subject identifier from unique input filename + # ------------------- + subjectID = os.path.splitext(os.path.basename(inputPolyDatas[sidx]))[0] + id_msg = f"<{os.path.basename(__file__)}> ", sidx + 1, "/", len(inputPolyDatas) + msg = "**Starting subject:", subjectID + print(id_msg, msg) - # remove short fibers - # ------------------- - if args.fiberLength is not None: - msg = "**Preprocessing:", subjectID + # read input vtk data + # ------------------- + msg = "**Reading input:", subjectID print(id_msg, msg) - wm = wma.filter.preprocess(wm, args.fiberLength, remove_u=True, remove_u_endpoint_dist=50, remove_brainstem=True) - print("Number of fibers retained: ", wm.GetNumberOfLines()) + wm = wma.io.read_polydata(inputPolyDatas[sidx]) - # remove outlier fibers - # ------------------- - if args.flag_removeOutliers: - msg = "**Removing outliers:", subjectID - print(id_msg, msg) + # remove short fibers + # ------------------- + if args.fiberLength is not None: + msg = "**Preprocessing:", subjectID + print(id_msg, msg) - # if it's huge downsample to twice requested size first - if args.numberOfFibers is not None: - if (wm.GetNumberOfLines() > args.numberOfFibers * 2): - wm = wma.filter.downsample( - wm, args.numberOfFibers * 2) - print(wm.GetNumberOfLines()) + wm = wma.filter.preprocess(wm, args.fiberLength, remove_u=True, remove_u_endpoint_dist=50, remove_brainstem=True) + print("Number of fibers retained: ", wm.GetNumberOfLines()) - outlierThreshold = 10 - wm = wma.filter.remove_outliers(wm, outlierThreshold) + # remove outlier fibers + # ------------------- + if args.flag_removeOutliers: + msg = "**Removing outliers:", subjectID + print(id_msg, msg) - # downsample if requested - # ------------------- - if args.numberOfFibers is not None: - msg = "**Downsampling input:", subjectID - print(id_msg, msg) + # if it's huge downsample to twice requested size first + if args.numberOfFibers is not None: + if (wm.GetNumberOfLines() > args.numberOfFibers * 2): + wm = wma.filter.downsample( + wm, args.numberOfFibers * 2) + print(wm.GetNumberOfLines()) - wm = wma.filter.downsample(wm, args.numberOfFibers) + outlierThreshold = 10 + wm = wma.filter.remove_outliers(wm, outlierThreshold) - # midsagittal alignment is already done - wm_align = wm + # downsample if requested + # ------------------- + if args.numberOfFibers is not None: + msg = "**Downsampling input:", subjectID + print(id_msg, msg) - # compute laterality on each dataset - # ------------------- - msg = "**Computing laterality:", subjectID - print(id_msg, msg) + wm = wma.filter.downsample(wm, args.numberOfFibers) - laterality = wma.laterality.WhiteMatterLaterality() - if args.sigma is not None: - laterality.sigma = args.sigma - if args.threshold is not None: - laterality.threshold = float(args.threshold) - else: - laterality.threshold = 0.0 - if args.flag_equalFibers: - laterality.use_equal_fibers = True - else: - laterality.use_equal_fibers = False + # midsagittal alignment is already done + wm_align = wm - if args.numberOfFibersPerHem is not None: - laterality.fibers_per_hemisphere = args.numberOfFibersPerHem + # compute laterality on each dataset + # ------------------- + msg = "**Computing laterality:", subjectID + print(id_msg, msg) + + laterality = wma.laterality.WhiteMatterLaterality() + if args.sigma is not None: + laterality.sigma = args.sigma + if args.threshold is not None: + laterality.threshold = float(args.threshold) + else: + laterality.threshold = 0.0 + if args.flag_equalFibers: + laterality.use_equal_fibers = True + else: + laterality.use_equal_fibers = False + + if args.numberOfFibersPerHem is not None: + laterality.fibers_per_hemisphere = args.numberOfFibersPerHem + + laterality_results = laterality.compute(wm_align) + + # outputs + # ------------------- + msg = "**Writing output data for subject:", subjectID + print(id_msg, msg) - laterality_results = laterality.compute(wm_align) + outdir = os.path.join(args.outputDirectory, subjectID) + try: + #print "Writing output files..." + laterality_results.write(outdir) + print("wrote output") + except: + print("Unknown exception in IO") + raise - # outputs - # ------------------- - msg = "**Writing output data for subject:", subjectID - print(id_msg, msg) + exit() - outdir = os.path.join(args.outputDirectory, subjectID) - try: - #print "Writing output files..." - laterality_results.write(outdir) - print("wrote output") - except: - print("Unknown exception in IO") - raise -exit() +if __name__ == "__main__": + main() diff --git a/utilities/wm_measure_all_clusters.py b/utilities/wm_measure_all_clusters.py index a33f2a79..9acf448f 100755 --- a/utilities/wm_measure_all_clusters.py +++ b/utilities/wm_measure_all_clusters.py @@ -9,108 +9,114 @@ print(f"<{os.path.basename(__file__)}> Error importing white matter analysis package\n") raise -#----------------- -# Parse arguments -#----------------- -parser = argparse.ArgumentParser( - description="Measures mean FA, etc. in tractography clusters in a directory.", - epilog="Written by Lauren O\'Donnell, odonnell@bwh.harvard.edu. Please reference \"O'Donnell, Lauren J., and C-F. Westin. Automatic tractography segmentation using a high-dimensional white matter atlas. Medical Imaging, IEEE Transactions on 26.11 (2007): 1562-1575.\"") -parser.add_argument("-v", "--version", - action="version", default=argparse.SUPPRESS, - version='1.0', - help="Show program's version number and exit") -parser.add_argument( - 'inputDirectory', - help='A directory of tractography as vtkPolyData (.vtk or .vtp).') -parser.add_argument( - 'outputFile', - help='Output measurements will be recorded here.') - -args = parser.parse_args() - -if not os.path.isdir(args.inputDirectory): - print("Error: Input directory", args.inputDirectory, "does not exist.") - exit() - -print(f"<{os.path.basename(__file__)}> Starting computation.") -print("") -print("=====input directory ======\n", args.inputDirectory) -print("=====output file =====\n", args.outputFile) -print("==========================") -print("") - - -# ======================================================================= -# Above this line is argument parsing. Below this line is the pipeline. -# ======================================================================= - -def compute_point_data_stats(pd, array_name): - point_array = pd.GetPointData().GetArray(array_name) - if point_array is None: - return None - # make sure this is a one-component scalar - if point_array.GetNumberOfComponents() > 1: - print("Error in compute_point_data_stats: Array", array_name, "has more than one component", point_array.GetNumberOfComponents(), ".") - return None - print(point_array) - num_points = pd.GetNumberOfPoints() - points_copy = numpy.zeros(num_points) - - for pidx in range(0, num_points): - if (pidx % 1000) == 0: - print("Point", pidx, '/', num_points) - # this assumes we have scalars here - points_copy[pidx] = point_array.GetTuple(pidx)[0] - - points_mean = numpy.mean(points_copy) - #points_std = numpy.std(points_copy) - #points_median = numpy.median(points_copy) - - print("Mean ", array_name, ":", points_mean) - - return points_mean - - - -input_polydatas = wma.io.list_vtk_files(args.inputDirectory) -number_of_clusters = len(input_polydatas) - -input_polydatas = input_polydatas[0:10] - -print(f"<{os.path.basename(__file__)}> Input number of vtk/vtp files: ", number_of_clusters) - -scalars = ['FA', 'Trace', 'FA1', 'FA2', 'Trace1', 'Trace2'] - -output_rows = list() - -# read in data -input_pds = list() -for fname in input_polydatas: - print(fname) - # read data - print(f"<{os.path.basename(__file__)}> Reading input file:", fname) - pd = wma.io.read_polydata(fname) - # preprocessing step: minimum length - print(f"<{os.path.basename(__file__)}> Computing stats for input file:", fname) - output_row = list() - output_row.append(fname) - for sc in scalars: - stat = compute_point_data_stats(pd, sc) - if stat is not None: - output_row.append(sc) - output_row.append(stat) - output_rows.append(output_row) - -# output a csv file -f = open(args.outputFile, 'w') - -for row in output_rows: - outstr = '' - for item in row: - if outstr != '': - outstr = outstr + '\t' - outstr = outstr + str(item) - outstr = outstr + '\n' - f.write(outstr) - -f.close() + +def main(): + #----------------- + # Parse arguments + #----------------- + parser = argparse.ArgumentParser( + description="Measures mean FA, etc. in tractography clusters in a directory.", + epilog="Written by Lauren O\'Donnell, odonnell@bwh.harvard.edu. Please reference \"O'Donnell, Lauren J., and C-F. Westin. Automatic tractography segmentation using a high-dimensional white matter atlas. Medical Imaging, IEEE Transactions on 26.11 (2007): 1562-1575.\"") + parser.add_argument("-v", "--version", + action="version", default=argparse.SUPPRESS, + version='1.0', + help="Show program's version number and exit") + parser.add_argument( + 'inputDirectory', + help='A directory of tractography as vtkPolyData (.vtk or .vtp).') + parser.add_argument( + 'outputFile', + help='Output measurements will be recorded here.') + + args = parser.parse_args() + + if not os.path.isdir(args.inputDirectory): + print("Error: Input directory", args.inputDirectory, "does not exist.") + exit() + + print(f"<{os.path.basename(__file__)}> Starting computation.") + print("") + print("=====input directory ======\n", args.inputDirectory) + print("=====output file =====\n", args.outputFile) + print("==========================") + print("") + + + # ======================================================================= + # Above this line is argument parsing. Below this line is the pipeline. + # ======================================================================= + + def compute_point_data_stats(pd, array_name): + point_array = pd.GetPointData().GetArray(array_name) + if point_array is None: + return None + # make sure this is a one-component scalar + if point_array.GetNumberOfComponents() > 1: + print("Error in compute_point_data_stats: Array", array_name, "has more than one component", point_array.GetNumberOfComponents(), ".") + return None + print(point_array) + num_points = pd.GetNumberOfPoints() + points_copy = numpy.zeros(num_points) + + for pidx in range(0, num_points): + if (pidx % 1000) == 0: + print("Point", pidx, '/', num_points) + # this assumes we have scalars here + points_copy[pidx] = point_array.GetTuple(pidx)[0] + + points_mean = numpy.mean(points_copy) + #points_std = numpy.std(points_copy) + #points_median = numpy.median(points_copy) + + print("Mean ", array_name, ":", points_mean) + + return points_mean + + + + input_polydatas = wma.io.list_vtk_files(args.inputDirectory) + number_of_clusters = len(input_polydatas) + + input_polydatas = input_polydatas[0:10] + + print(f"<{os.path.basename(__file__)}> Input number of vtk/vtp files: ", number_of_clusters) + + scalars = ['FA', 'Trace', 'FA1', 'FA2', 'Trace1', 'Trace2'] + + output_rows = list() + + # read in data + input_pds = list() + for fname in input_polydatas: + print(fname) + # read data + print(f"<{os.path.basename(__file__)}> Reading input file:", fname) + pd = wma.io.read_polydata(fname) + # preprocessing step: minimum length + print(f"<{os.path.basename(__file__)}> Computing stats for input file:", fname) + output_row = list() + output_row.append(fname) + for sc in scalars: + stat = compute_point_data_stats(pd, sc) + if stat is not None: + output_row.append(sc) + output_row.append(stat) + output_rows.append(output_row) + + # output a csv file + f = open(args.outputFile, 'w') + + for row in output_rows: + outstr = '' + for item in row: + if outstr != '': + outstr = outstr + '\t' + outstr = outstr + str(item) + outstr = outstr + '\n' + f.write(outstr) + + f.close() + + +if __name__ == "__main__": + main() diff --git a/utilities/wm_measure_endpoint_overlap.py b/utilities/wm_measure_endpoint_overlap.py index 3d2ae810..7408eece 100755 --- a/utilities/wm_measure_endpoint_overlap.py +++ b/utilities/wm_measure_endpoint_overlap.py @@ -10,113 +10,119 @@ print(f"<{os.path.basename(__file__)}> Error importing white matter analysis package\n") raise -#----------------- -# Parse arguments -#----------------- -parser = argparse.ArgumentParser( - description="Measure overlaps of fiber clusters with cortical parcellation or fMRI functional areas. This is based on the 3D Slicer module FiberEndPointFromLabelMap.", - epilog="Written by Fan Zhang, fzhang@bwh.harvard.edu") - -parser.add_argument("-v", "--version", - action="version", default=argparse.SUPPRESS, - version='1.0', - help="Show program's version number and exit") -parser.add_argument( - 'inputTractDirectory', - help='Directory of fiber clustering results obtained by of multiple subjects. Make sure only the fiber clustering results are stored in this folder, making one subdirectory corresponding to one subject.') -parser.add_argument( - 'inputLabelMapDirectory', - help='Contains the parcellation or functional areas as label map files. Make sure that the input tract files and the label map files match each other in alphabetical order.') -parser.add_argument( - 'outputDirectory', - help='Directory of output CSV files that shows the percentage of fiber clusters\' endpoint connecting to certain regions in the label map.') -parser.add_argument( - 'modulePath', - help='Path of the 3D Slicer FiberEndPointFromLabelMap module.') -parser.add_argument( - '-j', action="store", dest="numberOfJobs", type=int, - help='Number of processors to use.') - -args = parser.parse_args() - -if not os.path.isdir(args.inputTractDirectory): - print("Error: Input directory", args.inputTractDirectory, "does not exist.") - exit() - -if not os.path.isdir(args.inputLabelMapDirectory): - print("Error: Input label map directory", args.inputLabelMapDirectory, "does not exist.") - exit() - -if not os.path.exists(args.modulePath): - print("Error: FiberEndPointFromLabelMap", args.modulePath, "does not exist.") - exit() - -if args.numberOfJobs is not None: - number_of_jobs = args.numberOfJobs -else: - number_of_jobs = 1 - -if not os.path.exists(args.outputDirectory): - print("Output directory", args.outputDirectory, "does not exist, creating it.") - os.makedirs(args.outputDirectory) - -print(f"<{os.path.basename(__file__)}>. Starting processing.") -print("") -print("=====input fiber cluster directory======\n", args.inputTractDirectory) -print("=====input label map directory======\n", args.inputLabelMapDirectory) -print("=====output directory=====\n", args.outputDirectory) -print("=====module path====\n", args.modulePath) -print('=====using N jobs:', number_of_jobs, "====\n") - -tract_dir_list = os.listdir(args.inputTractDirectory) -tract_dir_list = sorted(tract_dir_list) - -print(f"<{os.path.basename(__file__)}> found", len(tract_dir_list), "subjects.") - -def list_label_map_files(input_dir): - # Find input files - input_mask = f"{input_dir}/*.nrrd" - input_mask2 = f"{input_dir}/*.nhdr" - input_fnames = glob.glob(input_mask) + glob.glob(input_mask2) - input_fnames = sorted(input_fnames) - return(input_fnames) - -label_map_file_list = list_label_map_files(args.inputLabelMapDirectory) - -print(f"<{os.path.basename(__file__)}> found", len(label_map_file_list), "label maps. \n") - -if len(tract_dir_list) != len(label_map_file_list): - print("Error: The number of subjects", len(tract_dir_list), "should be equal to the number of label maps", len(label_map_file_list)) - exit() - -def extract_endpoint(tract_dir, lalel_map_file, args): - - pds = wma.io.list_vtk_files(os.path.join(args.inputTractDirectory, tract_dir)) - print(f"<{os.path.basename(__file__)}> Computing:", os.path.join(args.inputTractDirectory, tract_dir)) - print(" using", lalel_map_file) - print(" with:", len(pds), "vtk/vtp files.") - - sub_name = os.path.split(tract_dir)[1] - os.system(args.modulePath + ' ' + lalel_map_file + ' ' + os.path.join(args.inputTractDirectory, tract_dir) + ' ' + \ - os.path.join(args.outputDirectory, sub_name+'_endpoint.txt')+ \ - ' > ' + os.path.join(args.outputDirectory, 'log'+sub_name)) - -Parallel(n_jobs=number_of_jobs, verbose=1)( - delayed(extract_endpoint)(tract_dir, label_map_file, args) - for tract_dir, label_map_file in zip(tract_dir_list, label_map_file_list)) - -def list_txt_files(input_dir): - # Find input files - input_mask = f"{input_dir}/*.txt" - input_fnames = glob.glob(input_mask) - input_fnames = sorted(input_fnames) - - return input_fnames - -endpoint_txt_list = list_txt_files(args.outputDirectory) -print(f"<{os.path.basename(__file__)}> Endpoint analysis were measured for", len(endpoint_txt_list), "subjects.") - -if len(tract_dir_list) != len(endpoint_txt_list): - print("Error: The numbers of inputs and outputs are different. Check the log file of each subject.") -else: - os.system("rm -rf "+os.path.join(args.outputDirectory, 'log*')) + +def main(): + #----------------- + # Parse arguments + #----------------- + parser = argparse.ArgumentParser( + description="Measure overlaps of fiber clusters with cortical parcellation or fMRI functional areas. This is based on the 3D Slicer module FiberEndPointFromLabelMap.", + epilog="Written by Fan Zhang, fzhang@bwh.harvard.edu") + + parser.add_argument("-v", "--version", + action="version", default=argparse.SUPPRESS, + version='1.0', + help="Show program's version number and exit") + parser.add_argument( + 'inputTractDirectory', + help='Directory of fiber clustering results obtained by of multiple subjects. Make sure only the fiber clustering results are stored in this folder, making one subdirectory corresponding to one subject.') + parser.add_argument( + 'inputLabelMapDirectory', + help='Contains the parcellation or functional areas as label map files. Make sure that the input tract files and the label map files match each other in alphabetical order.') + parser.add_argument( + 'outputDirectory', + help='Directory of output CSV files that shows the percentage of fiber clusters\' endpoint connecting to certain regions in the label map.') + parser.add_argument( + 'modulePath', + help='Path of the 3D Slicer FiberEndPointFromLabelMap module.') + parser.add_argument( + '-j', action="store", dest="numberOfJobs", type=int, + help='Number of processors to use.') + + args = parser.parse_args() + + if not os.path.isdir(args.inputTractDirectory): + print("Error: Input directory", args.inputTractDirectory, "does not exist.") + exit() + + if not os.path.isdir(args.inputLabelMapDirectory): + print("Error: Input label map directory", args.inputLabelMapDirectory, "does not exist.") + exit() + + if not os.path.exists(args.modulePath): + print("Error: FiberEndPointFromLabelMap", args.modulePath, "does not exist.") + exit() + + if args.numberOfJobs is not None: + number_of_jobs = args.numberOfJobs + else: + number_of_jobs = 1 + + if not os.path.exists(args.outputDirectory): + print("Output directory", args.outputDirectory, "does not exist, creating it.") + os.makedirs(args.outputDirectory) + + print(f"<{os.path.basename(__file__)}>. Starting processing.") + print("") + print("=====input fiber cluster directory======\n", args.inputTractDirectory) + print("=====input label map directory======\n", args.inputLabelMapDirectory) + print("=====output directory=====\n", args.outputDirectory) + print("=====module path====\n", args.modulePath) + print('=====using N jobs:', number_of_jobs, "====\n") + + tract_dir_list = os.listdir(args.inputTractDirectory) + tract_dir_list = sorted(tract_dir_list) + + print(f"<{os.path.basename(__file__)}> found", len(tract_dir_list), "subjects.") + + def list_label_map_files(input_dir): + # Find input files + input_mask = f"{input_dir}/*.nrrd" + input_mask2 = f"{input_dir}/*.nhdr" + input_fnames = glob.glob(input_mask) + glob.glob(input_mask2) + input_fnames = sorted(input_fnames) + return(input_fnames) + + label_map_file_list = list_label_map_files(args.inputLabelMapDirectory) + + print(f"<{os.path.basename(__file__)}> found", len(label_map_file_list), "label maps. \n") + + if len(tract_dir_list) != len(label_map_file_list): + print("Error: The number of subjects", len(tract_dir_list), "should be equal to the number of label maps", len(label_map_file_list)) + exit() + + def extract_endpoint(tract_dir, lalel_map_file, args): + + pds = wma.io.list_vtk_files(os.path.join(args.inputTractDirectory, tract_dir)) + print(f"<{os.path.basename(__file__)}> Computing:", os.path.join(args.inputTractDirectory, tract_dir)) + print(" using", lalel_map_file) + print(" with:", len(pds), "vtk/vtp files.") + + sub_name = os.path.split(tract_dir)[1] + os.system(args.modulePath + ' ' + lalel_map_file + ' ' + os.path.join(args.inputTractDirectory, tract_dir) + ' ' + \ + os.path.join(args.outputDirectory, sub_name+'_endpoint.txt')+ \ + ' > ' + os.path.join(args.outputDirectory, 'log'+sub_name)) + + Parallel(n_jobs=number_of_jobs, verbose=1)( + delayed(extract_endpoint)(tract_dir, label_map_file, args) + for tract_dir, label_map_file in zip(tract_dir_list, label_map_file_list)) + + def list_txt_files(input_dir): + # Find input files + input_mask = f"{input_dir}/*.txt" + input_fnames = glob.glob(input_mask) + input_fnames = sorted(input_fnames) + + return input_fnames + + endpoint_txt_list = list_txt_files(args.outputDirectory) + print(f"<{os.path.basename(__file__)}> Endpoint analysis were measured for", len(endpoint_txt_list), "subjects.") + + if len(tract_dir_list) != len(endpoint_txt_list): + print("Error: The numbers of inputs and outputs are different. Check the log file of each subject.") + else: + os.system("rm -rf "+os.path.join(args.outputDirectory, 'log*')) + + +if __name__ == "__main__": + main() diff --git a/utilities/wm_statistics.py b/utilities/wm_statistics.py index e40b282b..49de8719 100644 --- a/utilities/wm_statistics.py +++ b/utilities/wm_statistics.py @@ -10,388 +10,393 @@ import whitematteranalysis as wma import matplotlib.pyplot as plt -#----------------- -# Parse arguments -#----------------- -parser = argparse.ArgumentParser( - description="Compute requested statistics comparing groups in the input groups file. Please verify your input by running wm_quality_control_cluster measurements before running this program.", - epilog="Written by Lauren O\'Donnell, odonnell@bwh.harvard.edu. Please reference \"O'Donnell, Lauren J., and C-F. Westin. Automatic tractography segmentation using a high-dimensional white matter atlas. Medical Imaging, IEEE Transactions on 26.11 (2007): 1562-1575.\"", - version='1.0') -parser.add_argument( - 'inputDirectory', - help='A directory of .csv or txt measurement files from Slicer tractography measurements.') -parser.add_argument( - 'subjectsInformationFile', - help='A file in excel format: column 1 must be subject ID, column 2 must be group label, and columns 3, 4, etc. contain other relevant information such as age or covariates.') -parser.add_argument( - '-atlas', action="store", dest="atlasDirectory", default=None, - help='A directory containing a whitematteranalysis atlas (atlas.vtp, clusters, etc.). This allows output of a MRML file if whole-brain cluster statistics were performed.') -parser.add_argument( - '-m', action="store", dest="measure", default='tensor1.FractionalAnisotropy', - help='Chosen measure(s) for statistics. Examples include tensor1.FractionalAnisotropy, tensor2.FractionalAnisotropy, FreeWater, Num_Fibers, tensor1.Trace, tensor1.MaxEigenvalue, etc. Run wm_quality_control_cluster_measurement.py to print available measures in the dataset. ') -parser.add_argument( - '-alpha', action="store", dest="FDR_alpha", type=float, default=0.1, - help='For FDR multiple comparison correction, the allowable percentage on average of false positives. A number from 0 to 1. Values of 0.05 or 0.10 are useful.') -parser.add_argument('--hierarchy', dest='hierarchy', action='store_true') -parser.add_argument('--no-hierarchy', dest='hierarchy', action='store_false') -parser.set_defaults(hierarchy=True) -parser.add_argument('-mode', dest='mode', action='store', default='all', help='all, clusters, toplevel, sublevel') - - -parser.add_argument('--onetail', dest='one_tailed', action='store_true') -parser.set_defaults(one_tailed=False) - -args = parser.parse_args() - -if not os.path.isdir(args.inputDirectory): - print(f"<{os.path.basename(__file__)}> Error: Input directory", args.inputDirectory, "does not exist.") - exit() - -if not os.path.exists(args.subjectsInformationFile): - print(f"<{os.path.basename(__file__)}> Error: Input file", args.subjectsInformationFile, "does not exist.") - exit() - -if args.atlasDirectory: - if not os.path.isdir(args.atlasDirectory): - print(f"<{os.path.basename(__file__)}> Error: Input directory", args.atlasDirectory, "does not exist.") + +def main(): + #----------------- + # Parse arguments + #----------------- + parser = argparse.ArgumentParser( + description="Compute requested statistics comparing groups in the input groups file. Please verify your input by running wm_quality_control_cluster measurements before running this program.", + epilog="Written by Lauren O\'Donnell, odonnell@bwh.harvard.edu. Please reference \"O'Donnell, Lauren J., and C-F. Westin. Automatic tractography segmentation using a high-dimensional white matter atlas. Medical Imaging, IEEE Transactions on 26.11 (2007): 1562-1575.\"", + version='1.0') + parser.add_argument( + 'inputDirectory', + help='A directory of .csv or txt measurement files from Slicer tractography measurements.') + parser.add_argument( + 'subjectsInformationFile', + help='A file in excel format: column 1 must be subject ID, column 2 must be group label, and columns 3, 4, etc. contain other relevant information such as age or covariates.') + parser.add_argument( + '-atlas', action="store", dest="atlasDirectory", default=None, + help='A directory containing a whitematteranalysis atlas (atlas.vtp, clusters, etc.). This allows output of a MRML file if whole-brain cluster statistics were performed.') + parser.add_argument( + '-m', action="store", dest="measure", default='tensor1.FractionalAnisotropy', + help='Chosen measure(s) for statistics. Examples include tensor1.FractionalAnisotropy, tensor2.FractionalAnisotropy, FreeWater, Num_Fibers, tensor1.Trace, tensor1.MaxEigenvalue, etc. Run wm_quality_control_cluster_measurement.py to print available measures in the dataset. ') + parser.add_argument( + '-alpha', action="store", dest="FDR_alpha", type=float, default=0.1, + help='For FDR multiple comparison correction, the allowable percentage on average of false positives. A number from 0 to 1. Values of 0.05 or 0.10 are useful.') + parser.add_argument('--hierarchy', dest='hierarchy', action='store_true') + parser.add_argument('--no-hierarchy', dest='hierarchy', action='store_false') + parser.set_defaults(hierarchy=True) + parser.add_argument('-mode', dest='mode', action='store', default='all', help='all, clusters, toplevel, sublevel') + + + parser.add_argument('--onetail', dest='one_tailed', action='store_true') + parser.set_defaults(one_tailed=False) + + args = parser.parse_args() + + if not os.path.isdir(args.inputDirectory): + print(f"<{os.path.basename(__file__)}> Error: Input directory", args.inputDirectory, "does not exist.") exit() -measurement = args.measure -fdr_q = args.FDR_alpha -analyze_hierarchy = args.hierarchy -mode = args.mode -atlas_directory = args.atlasDirectory - -# Read and check data -measurement_list = wma.tract_measurement.load_measurement_in_folder(args.inputDirectory, hierarchy = 'Column', separator = 'Tab') -print("Measurement directory:", args.inputDirectory) -number_of_subjects = len(measurement_list) -print("Number of subjects data found:", number_of_subjects) -if number_of_subjects < 1: - print("ERROR, no measurement files found in directory:", args.inputDirectory) - exit() -header = measurement_list[0].measurement_header -#print "Measurement header is:", header -#print "Clusters found:", measurement_list[0].cluster_path -number_of_clusters = measurement_list[0].measurement_matrix[:,0].shape[0] -print("Number of measurement regions (clusters and hierarchy groups) is:", number_of_clusters) - -# Read and check subject ID and other information -dg = wma.tract_measurement.load_demographics(args.subjectsInformationFile) -case_id_list = dg.case_id_list -group_id_list = dg.group_id_list -age_list = dg.get_demographics_by_header('Age') -age_list = list(map(float, age_list)) - -# Check subject IDs from excel versus the input directory. -print("Checking subject information in input file and directory.") -if len(case_id_list) == len(group_id_list) & len(group_id_list) == number_of_subjects: - print("Subject counts in excel subject ID list, group list, and measurement directory match.") -else: - print("ERROR: Subject counts in excel subject ID list, group list, and measurement directory don't match:", end=' ') - print(len(case_id_list), len(group_id_list), number_of_subjects) - exit() -for subject_measured, subj_id, group in zip(measurement_list, case_id_list, group_id_list): - #print subject_measured.case_id, subj_id, group - if not str(subj_id) in subject_measured.case_id: - print("ERROR: id list and input data mismatch.") - print("ERROR at:", subject_measured.case_id, subj_id, group) + if not os.path.exists(args.subjectsInformationFile): + print(f"<{os.path.basename(__file__)}> Error: Input file", args.subjectsInformationFile, "does not exist.") exit() -print("Dataset passed. Subject IDs in subject information file match subject IDs in measurement directory.") - -# Figure out what the groups are -study_groups = list(set(group_id_list)) -print("Study groups found:", study_groups) -if len(study_groups) > 2: - print("ERROR: this code can currently only handle two study groups.") - exit() -print("Subjects per group:", numpy.sum(numpy.array(group_id_list)==study_groups[0]), numpy.sum(numpy.array(group_id_list)==study_groups[1])) - -print("Performing statistics for measure:", measurement) - -# Note this hierarchy code will be removed/simplified when the Slicer measurement file is simplified -# ------ -# Now analyze only the hierarchy or only the clusters. Find the appropriate indices. -measurement_list[0].measurement_matrix[:,0].shape[0] -hierarchy_cluster_list = [] -hierarchy_region_list = [] -hierarchy_toplevel_list = [] -hierarchy_cluster_name_list = [] -hierarchy_region_name_list = [] -hierarchy_toplevel_name_list = [] -label_list = [] - -hierarchy_depth = [] -idx = 0 -for region in measurement_list[0].cluster_path: - # if it has the word :cluster_, it's a cluster leaf of a hierarchy... - if ":cluster_" in region: - hierarchy_cluster_list.append(idx) - hierarchy_cluster_name_list.append(region) - elif ":" in region: - if analyze_hierarchy: - # this is a sublevel of the hierarchy, temporarily assume it's what we want, and all are on same level - hierarchy_region_list.append(idx) - hierarchy_region_name_list.append(region) - else: - if analyze_hierarchy: - # grab only the top level for the stats - # this will be either a hierarchy node or a regular cluster with no hierarchy - hierarchy_toplevel_list.append(idx) - hierarchy_toplevel_name_list.append(region) - # figure out how many levels in this hierarchy. Stats should be performed at each level only - hierarchy_depth.append(region.count(":")) - idx += 1 - -#print hierarchy_region_name_list -#print hierarchy_region_list - -print("Toplevel:") -#print hierarchy_toplevel_name_list - -# TODO: use this only if hierarchy -#mode = "sublevel" -#mode = "toplevel" -#mode = "clusters" -#mode = "all" -# sublevels -if mode == "sublevel": - regions_for_stats = numpy.array(hierarchy_region_list) - names_for_stats = hierarchy_region_name_list -elif mode == "toplevel": - # top level - regions_for_stats = numpy.array(hierarchy_toplevel_list) - names_for_stats = hierarchy_toplevel_name_list -elif mode == "clusters": - # clusters - regions_for_stats = numpy.array(hierarchy_cluster_list) - names_for_stats = hierarchy_cluster_name_list -elif mode == "all": - # we are measuring using cluster_*.vtp not a hierarchy - regions_for_stats = numpy.array(list(range(number_of_clusters))) - names_for_stats = [] - for fname in measurement_list[0].cluster_path: - #print os.path.splitext(fname) - #print os.path.split(os.path.split(os.path.splitext(fname)[0])[1])[1] - names_for_stats.append(os.path.split(os.path.split(os.path.splitext(fname)[0])[1])[1]) - #names_for_stats = measurement_list[0].cluster_path -number_of_tests = len(regions_for_stats) -# Note this hierarchy code will be removed/simplified when the Slicer measurement file is simplified -# ------ - - -# plot the measurement of interest across all subjects and clusters -fname = 'plot_all_data_'+measurement+'.pdf' -vidx = list(header).index(measurement) -plt.figure() -for subject_measured, group in zip(measurement_list, group_id_list): - value_list = subject_measured.measurement_matrix[:,vidx] - if group == study_groups[0]: - color = 'b' - else: - color = 'r' - #plt.plot(numpy.sort(value_list), color) - plt.plot(value_list, color+'.') -plt.title(measurement+' measurements by group') -plt.savefig(fname) -plt.close() -print("Saved data plot:", fname) - -# get data by group to perform stats on measurement of interest -vidx = list(header).index(measurement) -data_0 = [] -data_1 = [] -for subject_measured, group in zip(measurement_list, group_id_list): - #data = subject_measured.measurement_matrix[:,vidx] - data = subject_measured.measurement_matrix[regions_for_stats,vidx] - if group == study_groups[0]: - data_0.append(data) - elif group == study_groups[1]: - data_1.append(data) -data_0 = numpy.array(data_0) -data_1 = numpy.array(data_1) - -# Statistical test in each cluster or input region -print("Doing t-tests in each cluster or region.") -p_val_list = [] -data_list = [] -for c in range(number_of_tests): - #t, p = scipy.stats.ttest_ind(data_0[:,c], data_1[:,c]) - # ignore nan clusters for missing data. this also prevents nan p-values - c_data_0 = data_0[:,c] - c_data_1 = data_1[:,c] - shape_before = numpy.array([c_data_0.shape[0], c_data_1.shape[0]]) - c_data_0 = c_data_0[~numpy.isnan(c_data_0)] - c_data_1 = c_data_1[~numpy.isnan(c_data_1)] - data_list.append(c_data_0) - data_list.append(c_data_1) - shape_after = numpy.array([c_data_0.shape[0], c_data_1.shape[0]]) - # warn if any cluster is totally absent - if len(c_data_0) == 0 | len(c_data_1) == 0: - print("Empty cluster across all subjects indicates data issue:", c) - # warn if empty clusters - nan_count = shape_before - shape_after - if numpy.sum(nan_count) > 0: - print("\tCluster", c, ": Warning. Empty/nan found in :", nan_count, "subjects.") - t, p = scipy.stats.ttest_ind(c_data_0, c_data_1) - #print c_data_0.shape, c_data_1.shape - if args.one_tailed: - p_val_list.append(p / 2.0) - else: - p_val_list.append(p) - -uncorrected_significant = numpy.sum(numpy.array(p_val_list) < 0.05) -print("Uncorrected:", uncorrected_significant, "/", number_of_tests, ":", 100*uncorrected_significant/float(number_of_tests), "%") - -## if analyze_hierarchy: -## for name, p_val in zip(names_for_stats, p_val_list): -## print name, ":", p_val - -# bonferroni -threshold = 0.05 / number_of_clusters -corrected_significant = numpy.sum(numpy.array(p_val_list) < threshold) -print("Bonferroni:", corrected_significant, "/", number_of_tests, ":", 100*corrected_significant/float(number_of_tests), "%") - -# FDR -reject_null, corrected_p = statsmodels.sandbox.stats.multicomp.fdrcorrection0(numpy.array(p_val_list), alpha=fdr_q, method='indep') -#reject_null, corrected_p = statsmodels.sandbox.stats.multicomp.fdrcorrection0(numpy.array(p_val_list), alpha=fdr_q, method='negcorr') -corrected_significant = numpy.sum(reject_null) -print("FDR at alpha/q =", fdr_q, ":", corrected_significant, "/", number_of_tests, ":", 100*corrected_significant/float(number_of_tests), "%") - -## # plot the data we tested, sorted by p-value -## cluster_order = numpy.argsort(numpy.array(p_val_list)) -## plt.figure() -## plt.plot(data_1.T[cluster_order,:],'ro',markersize=2) -## plt.plot(data_0.T[cluster_order,:],'bo',markersize=2) -## plt.title(measurement) -## plt.savefig('tested_'+measurement+'.pdf') -## plt.close() - -fname = 'plot_region_means_'+measurement+'.pdf' -print("Plotting mean values per cluster:", fname) -cluster_mean_0 = numpy.nanmean(data_0, axis=0) -cluster_mean_1 = numpy.nanmean(data_1, axis=0) -# Plot the mean values of the groups against each other in each cluster -# and show which ones were significant -#print "MEAN SHAPES:", cluster_mean_0.shape, cluster_mean_1.shape -plt.figure() -# plot the line if they were equal -xmin = numpy.min(cluster_mean_0) -xmax = numpy.max(cluster_mean_0) -plt.plot([xmin,xmax],[xmin,xmax]) -# plot the means -#markerfacecolor='none' -#plt.plot(cluster_mean_0, cluster_mean_1, facecolors='none', edgecolors='k', markersize=3) -plt.plot(cluster_mean_0, cluster_mean_1, 'o', markerfacecolor='none', markersize=3) -plt.xlabel(study_groups[0]) -plt.ylabel(study_groups[1]) -# plot the significant ones on top -#plt.plot(cluster_mean_0[reject_null], cluster_mean_1[reject_null],'ro',markersize=3) -plt.plot(cluster_mean_0[reject_null], cluster_mean_1[reject_null],'ro',markersize=4) -plt.title(measurement+' region means') -plt.axis('equal') -plt.savefig(fname) -plt.close() - -fname = 'plot_boxplot_'+measurement+'.pdf' -print("Plotting complete boxplot of all data:", fname) -# Get information to plot all significant and non-significant data in a boxplot. -label_list = [] -text_list = [] -text_pos_list = [] -for p_val, region, c in zip(corrected_p, names_for_stats, list(range(number_of_tests))): - #print region - label_list.append(region[-4:]+'_0') - label_list.append(region[-4:]+'_1') - if p_val < fdr_q: - text_list.append('*') - text_pos_list.append(c*2 + 0.5) + + if args.atlasDirectory: + if not os.path.isdir(args.atlasDirectory): + print(f"<{os.path.basename(__file__)}> Error: Input directory", args.atlasDirectory, "does not exist.") + exit() + + measurement = args.measure + fdr_q = args.FDR_alpha + analyze_hierarchy = args.hierarchy + mode = args.mode + atlas_directory = args.atlasDirectory + + # Read and check data + measurement_list = wma.tract_measurement.load_measurement_in_folder(args.inputDirectory, hierarchy = 'Column', separator = 'Tab') + print("Measurement directory:", args.inputDirectory) + number_of_subjects = len(measurement_list) + print("Number of subjects data found:", number_of_subjects) + if number_of_subjects < 1: + print("ERROR, no measurement files found in directory:", args.inputDirectory) + exit() + header = measurement_list[0].measurement_header + #print "Measurement header is:", header + #print "Clusters found:", measurement_list[0].cluster_path + number_of_clusters = measurement_list[0].measurement_matrix[:,0].shape[0] + print("Number of measurement regions (clusters and hierarchy groups) is:", number_of_clusters) + + # Read and check subject ID and other information + dg = wma.tract_measurement.load_demographics(args.subjectsInformationFile) + case_id_list = dg.case_id_list + group_id_list = dg.group_id_list + age_list = dg.get_demographics_by_header('Age') + age_list = list(map(float, age_list)) + + # Check subject IDs from excel versus the input directory. + print("Checking subject information in input file and directory.") + if len(case_id_list) == len(group_id_list) & len(group_id_list) == number_of_subjects: + print("Subject counts in excel subject ID list, group list, and measurement directory match.") else: - text_list.append('') - text_pos_list.append(c*2 + 0.5) -# Make the boxplot -# try to make it big enough to fit all the data -plt.figure(figsize=(20,20)) -x_pos_list = list(range(len(data_list))) -#plt.boxplot(data_list, labels=label_list, showmeans=True, positions=x_pos_list) -plt.boxplot(data_list, showmeans=True, positions=x_pos_list) -plt.title(measurement+' all regions') -ylim = plt.gca().get_ylim()[1] -for text, pos in zip(text_list, text_pos_list): - plt.text(pos, ylim*0.95, text, bbox=dict(facecolor='red', alpha=0.5)) -plt.xticks(x_pos_list, label_list, rotation='vertical') -plt.savefig(fname) -plt.close() - -fname = 'plot_boxplot_sig_'+measurement+'.pdf' -print("Plotting significant boxplot:", fname) -# Now make a boxplot of only the significant ones -# The data list length is number of clusters * number of groups, which must be 2 for now -significant_data_list = [] -significant_labels_list = [] -significant_groups_list = [] -idx = 0 -for c, sig in zip(list(range(number_of_clusters)), reject_null): - for g in study_groups: - if sig: - significant_data_list.append(data_list[idx]) - significant_labels_list.append(label_list[idx]) - significant_groups_list.append(g) + print("ERROR: Subject counts in excel subject ID list, group list, and measurement directory don't match:", end=' ') + print(len(case_id_list), len(group_id_list), number_of_subjects) + exit() + for subject_measured, subj_id, group in zip(measurement_list, case_id_list, group_id_list): + #print subject_measured.case_id, subj_id, group + if not str(subj_id) in subject_measured.case_id: + print("ERROR: id list and input data mismatch.") + print("ERROR at:", subject_measured.case_id, subj_id, group) + exit() + print("Dataset passed. Subject IDs in subject information file match subject IDs in measurement directory.") + + # Figure out what the groups are + study_groups = list(set(group_id_list)) + print("Study groups found:", study_groups) + if len(study_groups) > 2: + print("ERROR: this code can currently only handle two study groups.") + exit() + print("Subjects per group:", numpy.sum(numpy.array(group_id_list)==study_groups[0]), numpy.sum(numpy.array(group_id_list)==study_groups[1])) + + print("Performing statistics for measure:", measurement) + + # Note this hierarchy code will be removed/simplified when the Slicer measurement file is simplified + # ------ + # Now analyze only the hierarchy or only the clusters. Find the appropriate indices. + measurement_list[0].measurement_matrix[:,0].shape[0] + hierarchy_cluster_list = [] + hierarchy_region_list = [] + hierarchy_toplevel_list = [] + hierarchy_cluster_name_list = [] + hierarchy_region_name_list = [] + hierarchy_toplevel_name_list = [] + label_list = [] + + hierarchy_depth = [] + idx = 0 + for region in measurement_list[0].cluster_path: + # if it has the word :cluster_, it's a cluster leaf of a hierarchy... + if ":cluster_" in region: + hierarchy_cluster_list.append(idx) + hierarchy_cluster_name_list.append(region) + elif ":" in region: + if analyze_hierarchy: + # this is a sublevel of the hierarchy, temporarily assume it's what we want, and all are on same level + hierarchy_region_list.append(idx) + hierarchy_region_name_list.append(region) + else: + if analyze_hierarchy: + # grab only the top level for the stats + # this will be either a hierarchy node or a regular cluster with no hierarchy + hierarchy_toplevel_list.append(idx) + hierarchy_toplevel_name_list.append(region) + # figure out how many levels in this hierarchy. Stats should be performed at each level only + hierarchy_depth.append(region.count(":")) idx += 1 -# try to make it big enough to fit all the data -plt.figure(figsize=(20,20)) -if len(significant_data_list) > 0: - positions = list(range(len(significant_data_list))) - # regular boxplot - plt.boxplot(significant_data_list, positions=positions) - plt.xticks(positions, significant_labels_list, rotation='vertical') - # also plot means in per-group color - print(len(significant_data_list), len(significant_groups_list), len(positions)) - for d, g, p in zip(significant_data_list, significant_groups_list, positions): - if g == study_groups[0]: - color = 'bo' - else: - color ='ro' - plt.plot(p, numpy.mean(d), color) - # Pad margins so that markers don't get clipped by the axes - plt.margins(0.2) -plt.title(measurement+' significant regions') -plt.savefig(fname) -plt.close() - -# Choose to see original p values in MRML by uncommenting this line -#output_p_val_list = p_val_list -output_p_val_list = corrected_p - -if atlas_directory: - # save a MRML file with tracts colored by p-value - fname = './test_'+measurement+'.mrml' - print("Saving MRML visualization:", fname) - # find clusters in subject and atlas input directories - input_mask = f"{atlas_directory}/cluster_*.vtp" - atlas_clusters = sorted(glob.glob(input_mask)) - number_of_files = len(atlas_clusters) - if number_of_files == number_of_clusters: - colors = [] - significant_clusters = [] - for p_val, cluster in zip(output_p_val_list, atlas_clusters): - if p_val < fdr_q: - #if p_val < 0.05: - #colors.append([(1-p_val)*255, p_val*(255/2), p_val*(255/2)]) - colors.append([(1-10*p_val)*200+55, (1-10*p_val)*200+55, 0]) - significant_clusters.append(cluster) - ## elif p_val < 0.10: - ## colors.append([(1-p_val)*(255/2), p_val*(255/2), p_val*(255/2)]) - ## significant_clusters.append(cluster) - ## elif p_val < 0.15: - ## colors.append([50,50,50]) - ## significant_clusters.append(cluster) - #else: - #colors.append([50,50,50]) - colors = numpy.array(colors) - #print colors - #wma.mrml.write(atlas_clusters, colors, './test_'+str(vidx)+'.mrml', ratio=1.0) - wma.mrml.write(significant_clusters, colors, fname, ratio=1.0) - else: - print("Error: atlas directory and measurements have different cluster numbers:", number_of_files, number_of_clusters) + #print hierarchy_region_name_list + #print hierarchy_region_list + + print("Toplevel:") + #print hierarchy_toplevel_name_list + + # TODO: use this only if hierarchy + #mode = "sublevel" + #mode = "toplevel" + #mode = "clusters" + #mode = "all" + # sublevels + if mode == "sublevel": + regions_for_stats = numpy.array(hierarchy_region_list) + names_for_stats = hierarchy_region_name_list + elif mode == "toplevel": + # top level + regions_for_stats = numpy.array(hierarchy_toplevel_list) + names_for_stats = hierarchy_toplevel_name_list + elif mode == "clusters": + # clusters + regions_for_stats = numpy.array(hierarchy_cluster_list) + names_for_stats = hierarchy_cluster_name_list + elif mode == "all": + # we are measuring using cluster_*.vtp not a hierarchy + regions_for_stats = numpy.array(list(range(number_of_clusters))) + names_for_stats = [] + for fname in measurement_list[0].cluster_path: + #print os.path.splitext(fname) + #print os.path.split(os.path.split(os.path.splitext(fname)[0])[1])[1] + names_for_stats.append(os.path.split(os.path.split(os.path.splitext(fname)[0])[1])[1]) + #names_for_stats = measurement_list[0].cluster_path + number_of_tests = len(regions_for_stats) + # Note this hierarchy code will be removed/simplified when the Slicer measurement file is simplified + # ------ + + + # plot the measurement of interest across all subjects and clusters + fname = 'plot_all_data_'+measurement+'.pdf' + vidx = list(header).index(measurement) + plt.figure() + for subject_measured, group in zip(measurement_list, group_id_list): + value_list = subject_measured.measurement_matrix[:,vidx] + if group == study_groups[0]: + color = 'b' + else: + color = 'r' + #plt.plot(numpy.sort(value_list), color) + plt.plot(value_list, color+'.') + plt.title(measurement+' measurements by group') + plt.savefig(fname) + plt.close() + print("Saved data plot:", fname) + + # get data by group to perform stats on measurement of interest + vidx = list(header).index(measurement) + data_0 = [] + data_1 = [] + for subject_measured, group in zip(measurement_list, group_id_list): + #data = subject_measured.measurement_matrix[:,vidx] + data = subject_measured.measurement_matrix[regions_for_stats,vidx] + if group == study_groups[0]: + data_0.append(data) + elif group == study_groups[1]: + data_1.append(data) + data_0 = numpy.array(data_0) + data_1 = numpy.array(data_1) + + # Statistical test in each cluster or input region + print("Doing t-tests in each cluster or region.") + p_val_list = [] + data_list = [] + for c in range(number_of_tests): + #t, p = scipy.stats.ttest_ind(data_0[:,c], data_1[:,c]) + # ignore nan clusters for missing data. this also prevents nan p-values + c_data_0 = data_0[:,c] + c_data_1 = data_1[:,c] + shape_before = numpy.array([c_data_0.shape[0], c_data_1.shape[0]]) + c_data_0 = c_data_0[~numpy.isnan(c_data_0)] + c_data_1 = c_data_1[~numpy.isnan(c_data_1)] + data_list.append(c_data_0) + data_list.append(c_data_1) + shape_after = numpy.array([c_data_0.shape[0], c_data_1.shape[0]]) + # warn if any cluster is totally absent + if len(c_data_0) == 0 | len(c_data_1) == 0: + print("Empty cluster across all subjects indicates data issue:", c) + # warn if empty clusters + nan_count = shape_before - shape_after + if numpy.sum(nan_count) > 0: + print("\tCluster", c, ": Warning. Empty/nan found in :", nan_count, "subjects.") + t, p = scipy.stats.ttest_ind(c_data_0, c_data_1) + #print c_data_0.shape, c_data_1.shape + if args.one_tailed: + p_val_list.append(p / 2.0) + else: + p_val_list.append(p) + + uncorrected_significant = numpy.sum(numpy.array(p_val_list) < 0.05) + print("Uncorrected:", uncorrected_significant, "/", number_of_tests, ":", 100*uncorrected_significant/float(number_of_tests), "%") + + ## if analyze_hierarchy: + ## for name, p_val in zip(names_for_stats, p_val_list): + ## print name, ":", p_val + + # bonferroni + threshold = 0.05 / number_of_clusters + corrected_significant = numpy.sum(numpy.array(p_val_list) < threshold) + print("Bonferroni:", corrected_significant, "/", number_of_tests, ":", 100*corrected_significant/float(number_of_tests), "%") + + # FDR + reject_null, corrected_p = statsmodels.sandbox.stats.multicomp.fdrcorrection0(numpy.array(p_val_list), alpha=fdr_q, method='indep') + #reject_null, corrected_p = statsmodels.sandbox.stats.multicomp.fdrcorrection0(numpy.array(p_val_list), alpha=fdr_q, method='negcorr') + corrected_significant = numpy.sum(reject_null) + print("FDR at alpha/q =", fdr_q, ":", corrected_significant, "/", number_of_tests, ":", 100*corrected_significant/float(number_of_tests), "%") + + ## # plot the data we tested, sorted by p-value + ## cluster_order = numpy.argsort(numpy.array(p_val_list)) + ## plt.figure() + ## plt.plot(data_1.T[cluster_order,:],'ro',markersize=2) + ## plt.plot(data_0.T[cluster_order,:],'bo',markersize=2) + ## plt.title(measurement) + ## plt.savefig('tested_'+measurement+'.pdf') + ## plt.close() + + fname = 'plot_region_means_'+measurement+'.pdf' + print("Plotting mean values per cluster:", fname) + cluster_mean_0 = numpy.nanmean(data_0, axis=0) + cluster_mean_1 = numpy.nanmean(data_1, axis=0) + # Plot the mean values of the groups against each other in each cluster + # and show which ones were significant + #print "MEAN SHAPES:", cluster_mean_0.shape, cluster_mean_1.shape + plt.figure() + # plot the line if they were equal + xmin = numpy.min(cluster_mean_0) + xmax = numpy.max(cluster_mean_0) + plt.plot([xmin,xmax],[xmin,xmax]) + # plot the means + #markerfacecolor='none' + #plt.plot(cluster_mean_0, cluster_mean_1, facecolors='none', edgecolors='k', markersize=3) + plt.plot(cluster_mean_0, cluster_mean_1, 'o', markerfacecolor='none', markersize=3) + plt.xlabel(study_groups[0]) + plt.ylabel(study_groups[1]) + # plot the significant ones on top + #plt.plot(cluster_mean_0[reject_null], cluster_mean_1[reject_null],'ro',markersize=3) + plt.plot(cluster_mean_0[reject_null], cluster_mean_1[reject_null],'ro',markersize=4) + plt.title(measurement+' region means') + plt.axis('equal') + plt.savefig(fname) + plt.close() + + fname = 'plot_boxplot_'+measurement+'.pdf' + print("Plotting complete boxplot of all data:", fname) + # Get information to plot all significant and non-significant data in a boxplot. + label_list = [] + text_list = [] + text_pos_list = [] + for p_val, region, c in zip(corrected_p, names_for_stats, list(range(number_of_tests))): + #print region + label_list.append(region[-4:]+'_0') + label_list.append(region[-4:]+'_1') + if p_val < fdr_q: + text_list.append('*') + text_pos_list.append(c*2 + 0.5) + else: + text_list.append('') + text_pos_list.append(c*2 + 0.5) + # Make the boxplot + # try to make it big enough to fit all the data + plt.figure(figsize=(20,20)) + x_pos_list = list(range(len(data_list))) + #plt.boxplot(data_list, labels=label_list, showmeans=True, positions=x_pos_list) + plt.boxplot(data_list, showmeans=True, positions=x_pos_list) + plt.title(measurement+' all regions') + ylim = plt.gca().get_ylim()[1] + for text, pos in zip(text_list, text_pos_list): + plt.text(pos, ylim*0.95, text, bbox=dict(facecolor='red', alpha=0.5)) + plt.xticks(x_pos_list, label_list, rotation='vertical') + plt.savefig(fname) + plt.close() + + fname = 'plot_boxplot_sig_'+measurement+'.pdf' + print("Plotting significant boxplot:", fname) + # Now make a boxplot of only the significant ones + # The data list length is number of clusters * number of groups, which must be 2 for now + significant_data_list = [] + significant_labels_list = [] + significant_groups_list = [] + idx = 0 + for c, sig in zip(list(range(number_of_clusters)), reject_null): + for g in study_groups: + if sig: + significant_data_list.append(data_list[idx]) + significant_labels_list.append(label_list[idx]) + significant_groups_list.append(g) + idx += 1 + # try to make it big enough to fit all the data + plt.figure(figsize=(20,20)) + if len(significant_data_list) > 0: + positions = list(range(len(significant_data_list))) + # regular boxplot + plt.boxplot(significant_data_list, positions=positions) + plt.xticks(positions, significant_labels_list, rotation='vertical') + # also plot means in per-group color + print(len(significant_data_list), len(significant_groups_list), len(positions)) + for d, g, p in zip(significant_data_list, significant_groups_list, positions): + if g == study_groups[0]: + color = 'bo' + else: + color ='ro' + plt.plot(p, numpy.mean(d), color) + # Pad margins so that markers don't get clipped by the axes + plt.margins(0.2) + plt.title(measurement+' significant regions') + plt.savefig(fname) + plt.close() + + # Choose to see original p values in MRML by uncommenting this line + #output_p_val_list = p_val_list + output_p_val_list = corrected_p + + if atlas_directory: + # save a MRML file with tracts colored by p-value + fname = './test_'+measurement+'.mrml' + print("Saving MRML visualization:", fname) + # find clusters in subject and atlas input directories + input_mask = f"{atlas_directory}/cluster_*.vtp" + atlas_clusters = sorted(glob.glob(input_mask)) + number_of_files = len(atlas_clusters) + if number_of_files == number_of_clusters: + colors = [] + significant_clusters = [] + for p_val, cluster in zip(output_p_val_list, atlas_clusters): + if p_val < fdr_q: + #if p_val < 0.05: + #colors.append([(1-p_val)*255, p_val*(255/2), p_val*(255/2)]) + colors.append([(1-10*p_val)*200+55, (1-10*p_val)*200+55, 0]) + significant_clusters.append(cluster) + ## elif p_val < 0.10: + ## colors.append([(1-p_val)*(255/2), p_val*(255/2), p_val*(255/2)]) + ## significant_clusters.append(cluster) + ## elif p_val < 0.15: + ## colors.append([50,50,50]) + ## significant_clusters.append(cluster) + #else: + #colors.append([50,50,50]) + colors = numpy.array(colors) + #print colors + #wma.mrml.write(atlas_clusters, colors, './test_'+str(vidx)+'.mrml', ratio=1.0) + wma.mrml.write(significant_clusters, colors, fname, ratio=1.0) + else: + print("Error: atlas directory and measurements have different cluster numbers:", number_of_files, number_of_clusters) + + +if __name__ == "__main__": + main() diff --git a/utilities/wm_statistics_export_data.py b/utilities/wm_statistics_export_data.py index 6cd036a6..2fd08c48 100644 --- a/utilities/wm_statistics_export_data.py +++ b/utilities/wm_statistics_export_data.py @@ -8,103 +8,106 @@ import whitematteranalysis as wma -#----------------- -# Parse arguments -#----------------- -parser = argparse.ArgumentParser( - description="Export all subjects selected measures into one excel-readable file for use in statistics programs. Please verify your input by running wm_quality_control_cluster measurements before running this program.", - epilog="Written by Lauren O\'Donnell, odonnell@bwh.harvard.edu. Please reference \"O'Donnell, Lauren J., and C-F. Westin. Automatic tractography segmentation using a high-dimensional white matter atlas. Medical Imaging, IEEE Transactions on 26.11 (2007): 1562-1575.\"", - version='1.0') - -parser.add_argument( - 'inputDirectory', - help='A directory of .csv or txt measurement files from Slicer tractography measurements.') -parser.add_argument( - '-m', action="store", nargs='+', dest="measures", default='tensor1.FractionalAnisotropy', - help='Chosen measure(s) for export. Examples include tensor1.FractionalAnisotropy, tensor2.FractionalAnisotropy, FreeWater, Num_Fibers, tensor1.Trace, tensor1.MaxEigenvalue, etc. Run wm_quality_control_cluster_measurement.py to print available measures in the dataset. ') -parser.add_argument( - 'subjectsInformationFile', - help='A file in excel format: column 1 must be subject ID.') -parser.add_argument( - 'outputFileName', - help='A file name for output (ending in .txt or .csv).') - -args = parser.parse_args() - -if not os.path.isdir(args.inputDirectory): - print(f"<{os.path.basename(__file__)}> Error: Input directory", args.inputDirectory, "does not exist.") - exit() - -if not os.path.exists(args.subjectsInformationFile): - print(f"<{os.path.basename(__file__)}> Error: Input file", args.subjectsInformationFile, "does not exist.") - exit() - -measurements = args.measures - -# Read and check data -measurement_list = wma.tract_measurement.load_measurement_in_folder(args.inputDirectory, hierarchy = 'Column', separator = 'Tab') -print("Measurement directory:", args.inputDirectory) -number_of_subjects = len(measurement_list) -print("Number of subjects data found:", number_of_subjects) -if number_of_subjects < 1: - print("ERROR, no measurement files found in directory:", args.inputDirectory) - exit() -header = measurement_list[0].measurement_header -region_list = measurement_list[0].cluster_path -#print "Measurement header is:", header -#print "Clusters found:", measurement_list[0].cluster_path -number_of_clusters = measurement_list[0].measurement_matrix[:,0].shape[0] -print("Number of measurement regions (clusters and hierarchy groups) is:", number_of_clusters) - -# Read and check subject ID list -dg = wma.tract_measurement.load_demographics(args.subjectsInformationFile) -case_id_list = dg.case_id_list - -# Check subject IDs from excel versus the input directory. -print("Checking subject IDs match in excel file and measurement directory.") -if len(case_id_list) == number_of_subjects: - print("Subject counts in excel subject ID list and measurement directory match.") -else: - print("ERROR: Subject counts in excel subject ID list and measurement directory don't match:", end=' ') - print(len(case_id_list), number_of_subjects) - exit() -for subject_measured, subject_id in zip(measurement_list, case_id_list): - if not str(subject_id) in subject_measured.case_id: - print("ERROR: id list and input data mismatch.") - print("ERROR at:", subject_measured.case_id, subject_id) +def main(): + #----------------- + # Parse arguments + #----------------- + parser = argparse.ArgumentParser( + description="Export all subjects selected measures into one excel-readable file for use in statistics programs. Please verify your input by running wm_quality_control_cluster measurements before running this program.", + epilog="Written by Lauren O\'Donnell, odonnell@bwh.harvard.edu. Please reference \"O'Donnell, Lauren J., and C-F. Westin. Automatic tractography segmentation using a high-dimensional white matter atlas. Medical Imaging, IEEE Transactions on 26.11 (2007): 1562-1575.\"", + version='1.0') + + parser.add_argument( + 'inputDirectory', + help='A directory of .csv or txt measurement files from Slicer tractography measurements.') + parser.add_argument( + '-m', action="store", nargs='+', dest="measures", default='tensor1.FractionalAnisotropy', + help='Chosen measure(s) for export. Examples include tensor1.FractionalAnisotropy, tensor2.FractionalAnisotropy, FreeWater, Num_Fibers, tensor1.Trace, tensor1.MaxEigenvalue, etc. Run wm_quality_control_cluster_measurement.py to print available measures in the dataset. ') + parser.add_argument( + 'subjectsInformationFile', + help='A file in excel format: column 1 must be subject ID.') + parser.add_argument( + 'outputFileName', + help='A file name for output (ending in .txt or .csv).') + + args = parser.parse_args() + + if not os.path.isdir(args.inputDirectory): + print(f"<{os.path.basename(__file__)}> Error: Input directory", args.inputDirectory, "does not exist.") exit() -print("Dataset passed. Subject IDs in subject information excel file match subject IDs in measurement directory.") - -# get data for export -vidx_list = []; # index of values of interest -for measure in measurements: - vidx_list.append(list(header).index(measure)) -print("Column indices of measures for export:", vidx_list) - -# reformat this information for export. -# export format for header is subject ID, region1.measure1, region1.measure2, ..., regionN.measureN -# export format for row is subject ID, measures... - -output_header = [] -output_header.append('SubjectID') -for region in region_list: - print(region) - for vidx in vidx_list: - measure_name = header[vidx] - output_header.append(region+'.'+measure_name) - -print(output_header) -with open(args.outputFileName, 'wb') as csvfile: - spamwriter = csv.writer(csvfile, delimiter='\t') - spamwriter.writerow(output_header) - for subject_id, subject_measured in zip(case_id_list, measurement_list): - output_data = [] - output_data.append(subject_id) - region_id = 0 - for region in region_list: - for vidx in vidx_list: - output_data.append(subject_measured.measurement_matrix[region_id,vidx]) - region_id += 1 - spamwriter.writerow(output_data) + if not os.path.exists(args.subjectsInformationFile): + print(f"<{os.path.basename(__file__)}> Error: Input file", args.subjectsInformationFile, "does not exist.") + exit() + + measurements = args.measures + # Read and check data + measurement_list = wma.tract_measurement.load_measurement_in_folder(args.inputDirectory, hierarchy = 'Column', separator = 'Tab') + print("Measurement directory:", args.inputDirectory) + number_of_subjects = len(measurement_list) + print("Number of subjects data found:", number_of_subjects) + if number_of_subjects < 1: + print("ERROR, no measurement files found in directory:", args.inputDirectory) + exit() + header = measurement_list[0].measurement_header + region_list = measurement_list[0].cluster_path + #print "Measurement header is:", header + #print "Clusters found:", measurement_list[0].cluster_path + number_of_clusters = measurement_list[0].measurement_matrix[:,0].shape[0] + print("Number of measurement regions (clusters and hierarchy groups) is:", number_of_clusters) + + # Read and check subject ID list + dg = wma.tract_measurement.load_demographics(args.subjectsInformationFile) + case_id_list = dg.case_id_list + + # Check subject IDs from excel versus the input directory. + print("Checking subject IDs match in excel file and measurement directory.") + if len(case_id_list) == number_of_subjects: + print("Subject counts in excel subject ID list and measurement directory match.") + else: + print("ERROR: Subject counts in excel subject ID list and measurement directory don't match:", end=' ') + print(len(case_id_list), number_of_subjects) + exit() + for subject_measured, subject_id in zip(measurement_list, case_id_list): + if not str(subject_id) in subject_measured.case_id: + print("ERROR: id list and input data mismatch.") + print("ERROR at:", subject_measured.case_id, subject_id) + exit() + print("Dataset passed. Subject IDs in subject information excel file match subject IDs in measurement directory.") + + # get data for export + vidx_list = []; # index of values of interest + for measure in measurements: + vidx_list.append(list(header).index(measure)) + print("Column indices of measures for export:", vidx_list) + + # reformat this information for export. + # export format for header is subject ID, region1.measure1, region1.measure2, ..., regionN.measureN + # export format for row is subject ID, measures... + + output_header = [] + output_header.append('SubjectID') + for region in region_list: + print(region) + for vidx in vidx_list: + measure_name = header[vidx] + output_header.append(region+'.'+measure_name) + + print(output_header) + with open(args.outputFileName, 'wb') as csvfile: + spamwriter = csv.writer(csvfile, delimiter='\t') + spamwriter.writerow(output_header) + for subject_id, subject_measured in zip(case_id_list, measurement_list): + output_data = [] + output_data.append(subject_id) + region_id = 0 + for region in region_list: + for vidx in vidx_list: + output_data.append(subject_measured.measurement_matrix[region_id,vidx]) + region_id += 1 + spamwriter.writerow(output_data) + + +if __name__ == "__main__": + main() diff --git a/utilities/wm_transform_polydata.py b/utilities/wm_transform_polydata.py index 09ff1058..33f22c22 100644 --- a/utilities/wm_transform_polydata.py +++ b/utilities/wm_transform_polydata.py @@ -21,123 +21,128 @@ raise -#----------------- -# Parse arguments -#----------------- -parser = argparse.ArgumentParser( - description="Applies preprocessing to input directory. Downsamples, removes short fibers. Preserves tensors and scalar point data along retained fibers.", - epilog="Written by Lauren O\'Donnell, odonnell@bwh.harvard.edu", - version='1.0') - -parser.add_argument( - 'inputDirectory', - help='Contains whole-brain tractography as vtkPolyData file(s).') -parser.add_argument( - 'outputDirectory', - help='The output directory should be a new empty directory. It will be created if needed.') -parser.add_argument( - 'transformFile', - help='The AFFINE transform in MNI format (for ITK format, please use Slicer to transform).') -## parser.add_argument( -## '-f', action="store", dest="numberOfFibers", type=int, -## help='Number of fibers to keep from each dataset.') -## parser.add_argument( -## '-l', action="store", dest="fiberLength", type=int, -## help='Minimum length (in mm) of fibers to keep.') -parser.add_argument( - '-j', action="store", dest="numberOfJobs", type=int, - help='Number of processors to use.') -parser.add_argument( - '-invert', action='store_true', dest="invert_flag", - help='Apply the inverse of the transform.') - -args = parser.parse_args() - - -if not os.path.isdir(args.inputDirectory): - print("Error: Input directory", args.inputDirectory, "does not exist.") - exit() - -outdir = args.outputDirectory -if not os.path.exists(outdir): - print("Output directory", outdir, "does not exist, creating it.") - os.makedirs(outdir) - -print("") -print("=====input directory======\n", args.inputDirectory) -print("=====output directory=====\n", args.outputDirectory) -print("==========================") - -print('CPUs detected:', multiprocessing.cpu_count()) -if args.numberOfJobs is not None: - parallel_jobs = args.numberOfJobs -else: - parallel_jobs = multiprocessing.cpu_count() -print('Using N jobs:', parallel_jobs) - - -print("==========================") - -# ======================================================================= -# Above this line is argument parsing. Below this line is the pipeline. -# ======================================================================= - - -# Loop over input DWIs -inputPolyDatas = wma.io.list_vtk_files(args.inputDirectory) - -print(f"<{os.path.basename(__file__)}> Input number of files: ", len(inputPolyDatas)) - -# Read in the transform to print its contents for the user -reader = vtk.vtkMNITransformReader() -reader.SetFileName(args.transformFile) -reader.Update() -transform = reader.GetTransform() -print(transform) - -def pipeline(inputPolyDatas, sidx, args): - # get subject identifier from unique input filename - # ------------------- - #subjectID = os.path.splitext(os.path.basename(inputPolyDatas[sidx]))[0] - fname = os.path.basename(inputPolyDatas[sidx]) - print(f"<{os.path.basename(__file__)}> ", sidx + 1, "/", len(inputPolyDatas)) - - # read input vtk data - # ------------------- - inpd = wma.io.read_polydata(inputPolyDatas[sidx]) - - # Read in the transform +def main(): + #----------------- + # Parse arguments + #----------------- + parser = argparse.ArgumentParser( + description="Applies preprocessing to input directory. Downsamples, removes short fibers. Preserves tensors and scalar point data along retained fibers.", + epilog="Written by Lauren O\'Donnell, odonnell@bwh.harvard.edu", + version='1.0') + + parser.add_argument( + 'inputDirectory', + help='Contains whole-brain tractography as vtkPolyData file(s).') + parser.add_argument( + 'outputDirectory', + help='The output directory should be a new empty directory. It will be created if needed.') + parser.add_argument( + 'transformFile', + help='The AFFINE transform in MNI format (for ITK format, please use Slicer to transform).') + ## parser.add_argument( + ## '-f', action="store", dest="numberOfFibers", type=int, + ## help='Number of fibers to keep from each dataset.') + ## parser.add_argument( + ## '-l', action="store", dest="fiberLength", type=int, + ## help='Minimum length (in mm) of fibers to keep.') + parser.add_argument( + '-j', action="store", dest="numberOfJobs", type=int, + help='Number of processors to use.') + parser.add_argument( + '-invert', action='store_true', dest="invert_flag", + help='Apply the inverse of the transform.') + + args = parser.parse_args() + + + if not os.path.isdir(args.inputDirectory): + print("Error: Input directory", args.inputDirectory, "does not exist.") + exit() + + outdir = args.outputDirectory + if not os.path.exists(outdir): + print("Output directory", outdir, "does not exist, creating it.") + os.makedirs(outdir) + + print("") + print("=====input directory======\n", args.inputDirectory) + print("=====output directory=====\n", args.outputDirectory) + print("==========================") + + print('CPUs detected:', multiprocessing.cpu_count()) + if args.numberOfJobs is not None: + parallel_jobs = args.numberOfJobs + else: + parallel_jobs = multiprocessing.cpu_count() + print('Using N jobs:', parallel_jobs) + + + print("==========================") + + # ======================================================================= + # Above this line is argument parsing. Below this line is the pipeline. + # ======================================================================= + + + # Loop over input DWIs + inputPolyDatas = wma.io.list_vtk_files(args.inputDirectory) + + print(f"<{os.path.basename(__file__)}> Input number of files: ", len(inputPolyDatas)) + + # Read in the transform to print its contents for the user reader = vtk.vtkMNITransformReader() reader.SetFileName(args.transformFile) reader.Update() transform = reader.GetTransform() + print(transform) + + def pipeline(inputPolyDatas, sidx, args): + # get subject identifier from unique input filename + # ------------------- + #subjectID = os.path.splitext(os.path.basename(inputPolyDatas[sidx]))[0] + fname = os.path.basename(inputPolyDatas[sidx]) + print(f"<{os.path.basename(__file__)}> ", sidx + 1, "/", len(inputPolyDatas)) + + # read input vtk data + # ------------------- + inpd = wma.io.read_polydata(inputPolyDatas[sidx]) + + # Read in the transform + reader = vtk.vtkMNITransformReader() + reader.SetFileName(args.transformFile) + reader.Update() + transform = reader.GetTransform() + + if args.invert_flag == True: + transform.Inverse() + + # Apply the transform + transformer = vtk.vtkTransformPolyDataFilter() + transformer.SetInput(inpd) + transformer.SetTransform(transform) + transformer.Update() + outpd = transformer.GetOutput() + + # outputs + # ------------------- + fname = os.path.join(args.outputDirectory, fname) + try: + print("Writing output polydata", fname, "...") + wma.io.write_polydata(outpd, fname) + except: + print("Unknown exception in IO") + raise + del outpd + del inpd + + # loop over all inputs + Parallel(n_jobs=parallel_jobs, verbose=0)( + delayed(pipeline)(inputPolyDatas, sidx, args) + for sidx in range(0, len(inputPolyDatas))) + + print("Launched all jobs") + exit() + - if args.invert_flag == True: - transform.Inverse() - - # Apply the transform - transformer = vtk.vtkTransformPolyDataFilter() - transformer.SetInput(inpd) - transformer.SetTransform(transform) - transformer.Update() - outpd = transformer.GetOutput() - - # outputs - # ------------------- - fname = os.path.join(args.outputDirectory, fname) - try: - print("Writing output polydata", fname, "...") - wma.io.write_polydata(outpd, fname) - except: - print("Unknown exception in IO") - raise - del outpd - del inpd - -# loop over all inputs -Parallel(n_jobs=parallel_jobs, verbose=0)( - delayed(pipeline)(inputPolyDatas, sidx, args) - for sidx in range(0, len(inputPolyDatas))) - -print("Launched all jobs") -exit() +if __name__ == "__main__": + main()