diff --git a/papers_scripts/scidata2023/from_revision/compute_motion_histogram.py b/papers_scripts/scidata2023/from_revision/compute_motion_histogram.py new file mode 100644 index 0000000..fdc3b19 --- /dev/null +++ b/papers_scripts/scidata2023/from_revision/compute_motion_histogram.py @@ -0,0 +1,121 @@ +""" +Compute the motion histogram across sessions and participants for tasks in +the 3rd release +""" +# %% +import os +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt +from matplotlib.ticker import FuncFormatter +from ibc_public.utils_data import (data_parser, get_subject_session, + DERIVATIVES, CONDITIONS) +# %% +def motion_histogram(db): + """compute motion histograms from realignment files""" + rps = list(db[db.contrast == 'motion'].path) + n_bins = 100 + bins = np.linspace(-2, 2, n_bins + 1) + # store the histogram counts for a different motion parameter + H = np.zeros((6, n_bins)) + xlist = np.empty((6, 0)) + for rp in rps: + X = np.loadtxt(rp).T + X[3:] *= (180. / np.pi) + # add the histogram counts to the corresponding rows + H += np.array([np.histogram(x, bins)[0] for x in X]) + # concatenate motion parameter data from different files into a + # single array + xlist = np.hstack((xlist, X)) + + # Process values to get convidence intervals + xlist.sort(1) + left, right = int(.0005 * xlist.shape[1]), int(.9995 * xlist.shape[1]) + print('.999 confindence interval') + print(xlist[:, left]) + # [-0.67661475 -0.82066769 -1.6521591 -1.56599341 -1.06614193 -1.09133088] + print(xlist[:, right]) + # [1.06848 0.89511545 2.5317982 1.87914269 1.19409916 0.97554424] + left, right = int(.005 * xlist.shape[1]), int(.995 * xlist.shape[1]) + print('.99 confindence interval') + print(xlist[:, left]) + # [-0.46837345 -0.54565559 -1.2850646 -0.95525076 -0.70048078 -0.42188997] + print(xlist[:, right]) + # [0.65827423 0.61529233 1.4997323 1.33685458 0.77069424 0.56606078] + + # Plot the histograms + colors = ['b', 'g', 'r', 'c', 'm', 'y'] + H = (H.T / H.sum(1)) #normalized histogram counts for each parameter + mbins = .5 * (bins[1:] + bins[:-1]) # bin centers + plt.figure(figsize=(6, 4)) + #plt.plot(mbins, H, linewidth=1) + for i, color in enumerate(colors): + plt.plot(mbins, H[:,i], linewidth=1, color=color) + plt.fill(mbins, H, alpha=.3) + plt.legend(['translation x', 'translation y', 'translation z', + 'rotation x', 'rotation y', 'rotation z'], fontsize=10) + plt.xlabel('mm/degrees') + plt.ylabel('normalized histogram') + + # Set y-axis tick formatter to display two decimal digits + def format_y_tick(value, _): + return f'{value:.2f}' + plt.gca().yaxis.set_major_formatter( + FuncFormatter(format_y_tick)) + + plt.title(f"Histogram of motion parameters") + # plot the confidence intervals + for i, color in enumerate(colors): + plt.plot([xlist[i, left], xlist[i, right]], + [-0.001 - .003 * i, -.001 - .003 * i], linewidth=3, + color=color) + plt.plot([xlist[i, left], xlist[i, right]], [-0.018, -.018], color='w') + plt.axis('tight') + plt.subplots_adjust(bottom=.12, left=.14) + plt.savefig(os.path.join(cache, f"motion_across_release3.png"), + dpi=600) + plt.savefig(os.path.join(cache, f"motion_across_release3.pdf"), + dpi=600) + +# %% +# ######################## GENERAL INPUTS ############################## +sub_num = [1, 2, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14, 15] +TASKS = ['ClipsTrn', 'ClipsVal', 'Raiders','WedgeAnti','WedgeClock', + 'ContRing','ExpRing'] +sess_names = ["clips1","clips2", "clips3", "clips4", + "raiders1", "raiders2"] + +cache = '/storage/store3/work/aponcema/IBC_paper3/cache_two' +mem = '/storage/store3/work/aponcema/IBC_paper3/cache_two' + +sub_path = [os.path.join(DERIVATIVES, 'sub-%02d' % s) for s in sub_num] +PTS = [os.path.basename(full_path) for full_path in sub_path] +# %% +# ######################## RUN ############################## +if __name__ == '__main__': + db = data_parser(derivatives=DERIVATIVES,subject_list = PTS, + task_list=TASKS,conditions=CONDITIONS) + # %% + # Make a sub_db with the sessions for each subject + subject_sessions = get_subject_session(sess_names) + subs_sess = {} + + for sub, ses in subject_sessions: + if sub not in subs_sess: + subs_sess[sub] = [ses] + else: + if ses not in subs_sess[sub]: + subs_sess[sub].append(ses) + subs_sess = dict(sorted(subs_sess.items())) + + # %% + new_db_ = [] + for sub in subs_sess: + for ses in subs_sess[sub]: + subses_db = db[(db.subject == sub) & (db.session == ses)] + new_db_.append(subses_db) + new_db = pd.concat(new_db_, ignore_index=True) + + # %% + motion_histogram(new_db) +# %% diff --git a/papers_scripts/scidata2023/from_revision/framewise_displacement.py b/papers_scripts/scidata2023/from_revision/framewise_displacement.py new file mode 100644 index 0000000..3b00f47 --- /dev/null +++ b/papers_scripts/scidata2023/from_revision/framewise_displacement.py @@ -0,0 +1,214 @@ +# %% +"""Framewise-displacement (FD) calculation. +from: https://gist.github.com/JulianKlug/68ca5379935e0eedb9bdeed5ab03cf3a +""" +import os +import numpy as np +import pandas as pd +import seaborn as sns +import matplotlib.pyplot as plt +# from ibc_public.utils_data import (data_parser, get_subject_session, +# DERIVATIVES, CONDITIONS) +# %% +# ############################ FUNCTIONS ##################################### + +def symlog_transform(arr, shift): + logv = np.abs(arr)*(10.**shift) + logv[np.where(logv < 1.)] = 1. + logv = np.sign(arr)*np.log10(logv) + + return logv + +def framewise_displacement(motion_params: np.ndarray): + """Calculate framewise Displacement (FD) as per Power et al., 2012""" + motion_diff = np.diff(motion_params, axis=0, prepend=0) + FD = np.sum(np.abs(motion_diff[:, 0:3]) + 50 * np.abs(motion_diff[:, 3:]), + axis=1) + return FD +# %% +def framewise_displacement_from_file(in_file: str): + """Get the motion params from a motion file.""" + head_motion = np.loadtxt(in_file) + FD = framewise_displacement(head_motion) + return FD +# %% +def FD_subject(sub:str, task:str, db:pd.DataFrame): + """Get the framewise displacement for a subject in a single array""" + db_sub = db[(db['subject'] == sub) & (db['task'].str.contains(task)) & \ + (db['contrast'] == 'motion')] + if db_sub.empty: + print(f'No motion files found for {sub} {task}') + return None + all_FD = [framewise_displacement_from_file(row['path']) + for _, row in db_sub.iterrows()] + sub_FD = np.concatenate(all_FD) + return sub_FD + +def _handle_missing_data(all_FD): + """For the cases where a sbject didn't perform a task, fill with NaNs.""" + for task_idx, task_data in enumerate(all_FD): + for subj_idx, subj_data in enumerate(task_data): + if subj_data is None: # Check if subj_data is None + if subj_idx > 0: + all_FD[task_idx][subj_idx] = [np.nan] *\ + len(all_FD[task_idx][subj_idx - 1]) + else: + all_FD[task_idx][subj_idx] = [np.nan] * 300 + return all_FD +# %% +def create_df_for_plotting(all_FD, PTS, grouped_tasks): + """Create a dataframe for plotting the FD data.""" + all_FD = _handle_missing_data(all_FD) + plot_data = {'Subject': [], 'Task': [], 'FD': []} + for task_idx, task_data in enumerate(all_FD): + for sub_idx, sub_data in enumerate(task_data): + for fd in sub_data: + plot_data['Subject'].append(PTS[sub_idx]) + plot_data['Task'].append(grouped_tasks[task_idx]) + plot_data['FD'].append(fd) + df = pd.DataFrame(plot_data) + return df + +# %% +def plot_subs_FD_distribution(df_plot, out_dir=''): + """Plot the distribution of framewise displacement for all subjects.""" + + #plt.figure(figsize=(10, 7)) + #sns.boxplot(data=df_plot, x='Subject', y='FD', hue='Task') + plt.figure(figsize=(8, 10)) + sns.boxplot(data=df_plot, x='FD', y='Subject', hue='Task') + #plt.ylabel('Framewise Displacement [mm]') + #plt.xlabel(None) + #plt.ylim(0, 1.0) # limit to 0.1mm + plt.xlabel('Framewise Displacement [mm]') + plt.ylabel(None) + plt.xlim(0.0, 1.0) + plt.tight_layout() + #plt.savefig(os.path.join(out_dir, f'FD.png'), dpi=300) + plt.savefig(os.path.join(out_dir, 'FD_hor.png'), dpi=300) +# %% +def subplot_task_FD(df_to_plot, out_dir=''): + + fig, axes = plt.subplots(4, 1, figsize=(9, 12), sharey=True) + axes = axes.flatten() + for i, task in enumerate(df_to_plot['Task'].unique()): + task_data = df_to_plot[df_to_plot['Task'] == task] + + # task_data_f = task_data[task_data['FD'] <= 0.9] + + # logdata = symlog_transform(task_data['FD'].values, 2) + # task_data_f = task_data.drop(['FD'], axis=1) + # task_data_f['FD'] = logdata + + new_fd = np.where( + task_data['FD'].values < .01, .01, task_data['FD'].values) + new_subject_labels = np.array([ + 'S' + s[-2:] for s in task_data['Subject'].values]) + task_data_f = task_data.drop(['FD'], axis=1) + task_data_f = task_data.drop(['Subject'], axis=1) + task_data_f['FD'] = new_fd + task_data_f['Subject'] = new_subject_labels + + default_palette = sns.color_palette() + blues_palette = sns.color_palette(palette='Blues') + tab_blue = [default_palette[0]] + blue = blues_palette[5] + flierprops = dict(marker='D', markersize=2.5, markerfacecolor='k', + markeredgecolor='k') + + sns.boxplot(data=task_data_f, + x='Subject', + y='FD', + ax=axes[i], + hue='Subject', + palette=tab_blue, + medianprops = dict(color=blue, linewidth=1.5, alpha=.6), + whis=.4, + widths = .65, + whiskerprops=dict(linewidth=1.5), + flierprops=flierprops, + **{'boxprops': {'alpha': 0.6, 'edgecolor': 'black', + 'linewidth': 1.5}}) + axes[i].set_title(task, x=.5, y=.9, + fontdict=dict(fontsize=16)) + axes[i].set_xlabel(None) + + # Plot in the log scale + plt.yscale('log') + + if i != 3: + # Remove x-axis + axes[i].spines['bottom'].set_visible(False) + # Remove x-axis ticks + axes[i].tick_params(axis='x', which='both', bottom=False) + # Remove x-axis tick labels + axes[i].set_xticklabels([]) + if i==2: + # ylabel = r'$\mathrm{log}_{10}(10^2\mathrm{mm})$' + # ylabel= r'$\mathrm{log}_{10}\left( \mathrm{FD(mm)} \times 10^2 \right)$' + ylabel = 'Framewise Displacement (mm)' + ylabel = axes[i].set_ylabel(ylabel, fontsize=16, labelpad=8) + # Set y location of y-axis label + ylabel.set_y(1.1) + else: + # Remove y-axis labels + axes[i].set_ylabel('') + + # Set fontsize of x- and y-axis tick labels + axes[i].tick_params(axis='x', labelsize=14) + axes[i].tick_params(axis='y', labelsize=14) + # Hide the right and top spines + axes[i].spines['right'].set_visible(False) + axes[i].spines['top'].set_visible(False) + + plt.tight_layout() + plt.savefig(os.path.join(out_dir, 'FD_subplot.pdf'), dpi=300) + +# %% +if __name__ == '__main__': + # ########################### INPUTS ##################################### + # cache = mem = '/storage/store3/work/aponcema/IBC_paper3/cache_two' + sub_num = [1, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14, 15] + TASKS = ['ClipsTrn','ClipsVal','Raiders','WedgeAnti','WedgeClock', + 'ContRing','ExpRing'] + sess_names = ["clips1","clips2","clips3","clips4","raiders1","raiders2"] + # sub_path = [os.path.join(DERIVATIVES, 'sub-%02d' % s) for s in sub_num] + # PTS = [os.path.basename(full_path) for full_path in sub_path] + # %% + # ############################## RUN ##################################### + # db = data_parser(derivatives=DERIVATIVES,subject_list = PTS, + # task_list=TASKS,conditions=CONDITIONS,) + + db_path = 'df_to_plot.csv' + db = pd.read_csv(db_path) + + # # %% + # # Make a sub_db with the sessions for each subject + # subject_sessions = get_subject_session(sess_names) + # sub_sess = { + # sub: sorted(set(ses for s, ses in subject_sessions if s == sub)) + # for sub in PTS + # } + # # %% + # new_db_ = [db[(db['subject'] == sub) & (db['session'] == ses)] + # for sub, ses_list in sub_sess.items() for ses in ses_list] + # new_db = pd.concat(new_db_, ignore_index=True) + + # all_FD = [] + # grouped_tasks = ["Clips", "Raiders", "Wedge", "Ring"] + # for task in grouped_tasks: + # all_subs_FD = [FD_subject(sub, task, new_db) for sub in PTS] + # all_FD.append(all_subs_FD) + + # all_FD = db['FD'] + # PTS = db['Subject'] + + # %% + # df_to_plot = create_df_for_plotting(all_FD, PTS, grouped_tasks) + + # plot all the data in a single plot + # plot_subs_FD_distribution(db) + + # plot each task as subplots + subplot_task_FD(db) + # %% diff --git a/papers_scripts/scidata2023/from_revision/show_fsrmcorr_fdrthr.py b/papers_scripts/scidata2023/from_revision/show_fsrmcorr_fdrthr.py new file mode 100644 index 0000000..aa10c80 --- /dev/null +++ b/papers_scripts/scidata2023/from_revision/show_fsrmcorr_fdrthr.py @@ -0,0 +1,198 @@ +""" +Get a FDR-correction threshold for the t-values of the OLS results and plot +the median correlation values on the surface of fsaverage for the Raiders or +Clips task. +""" +# %% +import os +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt +import matplotlib.image as mpimg +from nilearn.datasets import fetch_surf_fsaverage +from nilearn.plotting import plot_stat_map, plot_surf_stat_map +from nilearn.glm import fdr_threshold +from scipy import stats +import ibc_public.utils_data +from ibc_fastsrm_utils import flatten, reshape_preprocdata, stacker +# %% +# ############################# FUNCTIONS ########################### +def zval_conversion(tval, dof): + """ Convert t-values from permuted_ols to z-values. + Parameters + ---------- + tval : array-like + t-values from the OLS results + dof : int + Degrees of freedom as number of subjects minus 1 + Returns + ------- + zval : array-like + """ + pval = stats.t.sf(tval, dof) + one_minus_pval = stats.t.cdf(tval, dof) + zval_sf = stats.norm.isf(pval) + zval_cdf = stats.norm.ppf(one_minus_pval) + zval = np.empty(pval.shape) + use_cdf = zval_sf < 0 + use_sf = np.logical_not(use_cdf) + zval[np.atleast_1d(use_cdf)] = zval_cdf[use_cdf] + zval[np.atleast_1d(use_sf)] = zval_sf[use_sf] + + return zval + +def mask_data(median_corr, stats_data, threshold): + """ Mask the median correlation values with the z-values threshold. + Parameters + ---------- + median_corr : array-like + Median of the individual correlation values + stats_data : array-like + z-values from the OLS results + threshold : float + Threshold for the FDR correction + Returns + ------- + masked_data_lh : array-like + Masked median correlation values for the left hemisphere + masked_data_rh : array-like + Masked median correlation values for the right hemisphere + """ + # Create a mask where the zvalues are greater than the threshold + mask = np.where(stats_data > threshold, 1, 0) + masked_data = np.multiply(median_corr, mask) + # Split the masked data into left and right hemispheres + masked_data_lh = np.split(masked_data, 2, axis=0)[0] + masked_data_rh = np.split(masked_data, 2, axis=0)[1] + + return masked_data_lh, masked_data_rh + +def plot_slices(masked_data_lh, masked_data_rh, thr=0.01, cmap='viridis', + clbar=False, dpi=300): + """ Plot the masked median correlation values on the surface of fsaverage. + Parameters + ---------- + masked_data_lh : array-like + Masked median correlation values for the left hemisphere + masked_data_rh : array-like + Masked median correlation values for the right hemisphere + thr : float + Threshold for the masked data, decided so the plot is not cluttered + with super small values + Returns + ------- + ofs_clone : list + List of the output files for the plots + """ + + ofs = [] + for hemi in ['left', 'right']: + for view in ['lateral', 'medial']: + fig = os.path.join(mem, "mediancorr_correctedmask_surf_" + + f"{hemi[0]}h{view[0]}_{TASKS[0]}.svg") + fig_png = os.path.join(mem, "mediancorr_correctedmask_surf_" + + f"{hemi[0]}h{view[0]}_{TASKS[0]}.png") + if hemi == 'left': + mesh = fsaverage.infl_left + bg_map = fsaverage.sulc_left + stat = masked_data_lh + else: + mesh = fsaverage.infl_right + bg_map = fsaverage.sulc_right + stat = masked_data_rh + plot_surf_stat_map(mesh, stat, bg_map=bg_map, hemi=hemi, + view=view, colorbar=clbar, threshold=thr, + vmax=0.6, vmin=0, output_file=fig, cmap=cmap) + plot_surf_stat_map(mesh, stat, bg_map=bg_map, hemi=hemi, + view=view, colorbar=clbar, threshold=thr, + vmax=0.6, vmin=0, output_file=fig_png, + cmap=cmap, dpi=dpi,) + ofs.append(fig_png) + + ofs_clone = [] + ofs_clone = ofs[:2] + ofs_clone.append(ofs[-1]) + ofs_clone.append(ofs[2]) + #print(ofs_clone) + + return ofs_clone +# %% +# ############################# INPUTS ############################## + +alt_parent_dir = '/storage/store/work/agrilopi' +fastsrm_dir = 'fastsrm' +main_dir = 'encoding_analysis' +data_dir = os.path.join(alt_parent_dir, fastsrm_dir, main_dir) +mem = '/storage/store3/work/aponcema/IBC_paper3/cache_two' + +# %% +#TASKS = ['Clips'] +TASKS = ['Raiders'] +fsaverage = fetch_surf_fsaverage(mesh='fsaverage') +if TASKS == ['Clips']: + suffix = 'clips' +else: + suffix = 'raiders' + +corr_results = np.load(os.path.join(data_dir, suffix, + 'surface_corr' + '_' + + suffix + '.npy')) #(12, 2, 327684) +indiv_corr = np.load(os.path.join(data_dir, suffix, + 'surface_individual_corr' + '_' + + suffix + '.npy')) #(12, 327684) +ols_results = np.load(os.path.join(data_dir, suffix, + 'surface_group_corr' + '_' + + suffix + '.npz')) #t, logp_max_t, h0_max_t +# %% +# # ############################# MAIN ############################## + +if __name__ == '__main__': + + # Degrees of freedom as number of subjects minus 1 + dof = indiv_corr.shape[0]-1 + # get the t-values from the OLS results + tvals = np.reshape(ols_results['t'],(ols_results['t'].shape[1],)) #(327684,) + # Compute z-values from t-values + zvals = zval_conversion(tvals, dof) #(327684,) + # Compute threshold for a given FDR correction + threshold = fdr_threshold(zvals, alpha=0.05,) + + # Compute the median of the individual correlations + median_corr = np.median(indiv_corr, axis=0) + # Mask the median correlation values with the z-values threshold + masked_data_lh, masked_data_rh = mask_data(median_corr, zvals, threshold) + # Get side plots of the masked median correlation on surface + side_figs = plot_slices(masked_data_lh, masked_data_rh, thr=0.1) + + # %% + # Get the 4 views and plot them in a single figure + plt.figure(figsize=(8, 1.6)) + for q, output in enumerate(side_figs): + if q == 0: + #ax = plt.axes([0.235 * q -.005, 0, .30, 1.]) + ax = plt.axes([0.225 * q -.005, 0, .23, 1.]) + ax.imshow(mpimg.imread(output)[70:-85, 40:-10]) + + elif q == 1: + ax = plt.axes([0.227, 0, .23, 1.]) + ax.imshow(mpimg.imread(output)[70:-85, 40:-10]) + elif q == 2: + ax = plt.axes([0.225 * q + 0.012, 0, .23, 1.]) + ax.imshow(mpimg.imread(output)[70:-85, 40:-10]) + else: + ax = plt.axes([0.70, 0, .23, 1.]) + ax.imshow(mpimg.imread(output)[70:-85, 40:-10]) + ax.axis('off') + plt.suptitle(f'{TASKS[0]}', fontsize=14, y=1.05) + plt.subplots_adjust(left=0, right=1, top=1, bottom=0, hspace=0, wspace=0) + save_fig = True + if save_fig: + plt.savefig(os.path.join(mem, f'mediancorr_fsrm_{TASKS[0]}_fdr.svg'), + bbox_inches='tight') + plt.savefig(os.path.join(mem, f'mediancorr_fsrm_{TASKS[0]}_fdr.png'), + dpi=300,bbox_inches='tight') + plt.savefig(os.path.join(mem, f'mediancorr_fsrm_{TASKS[0]}_fdr.pdf'), + dpi=300,bbox_inches='tight') + else: + plt.show() + # %% \ No newline at end of file