Skip to content

Commit

Permalink
Merge pull request #10 from EbonyGunwhy/main
Browse files Browse the repository at this point in the history
feat: update scripts to comply with modified Reproducibility study functionality
  • Loading branch information
EbonyGunwhy authored Sep 23, 2023
2 parents f49ed92 + b2f57ee commit a1bae54
Show file tree
Hide file tree
Showing 6 changed files with 156 additions and 77 deletions.
14 changes: 9 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -97,14 +97,14 @@ results/
| | |
| | |---- 01_model_outputs/
| | | |---- figures/
| | | | |---- per_drug/
| | | | |---- per_substudy/
| | | | |---- per_rat/
| | | |---- relaxation_rates_and_signals
| | | |---- all_parameters.csv
| | |---- 02_effect_sizes/
| | | |---- figures/
| | | |---- effect_sizes.csv
| | | |---- fit_errors.txt
| | |---- 02_analyses/
| | | |---- figures/
| | | |---- repeatability/
```

As the tracer kinetic model used in this study produces estimated parameter
Expand All @@ -113,7 +113,11 @@ Therefore, upon each execution of the code, a top-level directory named after
the date the analysis was conducted is created for storing the results from
that particular execution. For reference, the results and figures presented in
the accompanying manuscript to the `Six Test Compounds` study were created using the
outputs contained in `results/SixTestCompounds/2022-09-01`.
outputs contained in `results/SixTestCompounds/2022-09-01`. Please note that the
results contained in the Zenodo archive were created using a previous release of this
software (https://github.com/QIB-Sheffield/TRISTAN-rat/releases/tag/v1.0.0) which
ouput a slightly modified results folder structure. Please see the previous version
notes for more details.

`01_model_ouputs` contains all outputs generated as result of the tracer kinetic
model fitting. Within this, plotted signal time curves for each acquistion per
Expand Down
13 changes: 9 additions & 4 deletions src/Reproducibility.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,11 @@ def main(study: str
# Split control and treatment groups
signal_dict = signals.split_groups(files, filenames)
# Fit data and get all estimated parameter variables
all_parameters = signals.fit_data(study, filenames, files,
signal_dict, TristanRat)
all_parameters = signals.fit_data(study,
filenames,
files,
signal_dict,
TristanRat)

# Get time curve averages per drug and per day
subject_list = signals.get_subject_list(signal_dict)
Expand Down Expand Up @@ -363,7 +366,8 @@ def main(study: str
None,
'rocket',
95,
ylabels=['$K_{trans}$', '$k_{bh}$'])
ylabels=['$K_{trans}$', '$k_{bh}$'],
sharey=False)
# plot saline-rifampicin data
print("saline-rifampicin: Plotting individual biomarker distributions between Day 1 and Day 2")
plots.pairplots(study,
Expand All @@ -375,7 +379,8 @@ def main(study: str
None,
'rocket',
95,
ylabels=['$K_{trans}$', '$k_{bh}$'])
ylabels=['$K_{trans}$', '$k_{bh}$'],
sharey=False)

# MIXED ANOVA (saline retest data)
print("Performing mixed ANOVA for saline-saline retest data")
Expand Down
116 changes: 81 additions & 35 deletions src/SixTestCompounds.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
the tracer kinetic modelling and generate all resulting reports
and figures for a specific study of interest.
"""
from pathlib import Path
import os
import shutil
import argparse
import itertools
import data
Expand All @@ -20,8 +23,30 @@ def main(study: str
Args:
study: Study name of interest (e.g., 'SixTestCompounds')
"""
# Get files and filenames
files, filenames = data.get_files(study, '01_signals')
# Get original files and filenames
orig_files, orig_filenames = data.get_files(study, '01_signals')
# Modify filenames to include study number
# to comply with updated functionality used in Reproducibility study
renamed_folder = os.path.join(Path(orig_files[0]).parent.parent,
'01_signals_renamed')
data.make_dir(renamed_folder)
for n in orig_files:
if Path(n).stem.split('_')[0]=='Asunaprevir':
shutil.copyfile(n, os.path.join(renamed_folder, '5_' + Path(n).stem + '.csv'))
elif Path(n).stem.split('_')[0]=='Pioglitazone':
shutil.copyfile(n, os.path.join(renamed_folder, '6_' + Path(n).stem + '.csv'))
elif Path(n).stem.split('_')[0]=='Ketoconazole':
shutil.copyfile(n, os.path.join(renamed_folder, '7_' + Path(n).stem + '.csv'))
elif Path(n).stem.split('_')[0]=='Cyclosporine':
shutil.copyfile(n, os.path.join(renamed_folder, '8_' + Path(n).stem + '.csv'))
elif Path(n).stem.split('_')[0]=='Bosentan':
shutil.copyfile(n, os.path.join(renamed_folder, '10_' + Path(n).stem + '.csv'))
elif Path(n).stem.split('_')[0]=='BosentanHigh':
shutil.copyfile(n, os.path.join(renamed_folder, '9_' + Path(n).stem + '.csv'))
elif Path(n).stem.split('_')[0]=='Rifampicin':
shutil.copyfile(n, os.path.join(renamed_folder, '12_' + Path(n).stem + '.csv'))
# Get updated files and filenames
files, filenames = data.get_files(study, '01_signals_renamed')
# Split control and treatment groups
signal_dict = signals.split_groups(files, filenames)
# Fit data and get all estimated parameter variables
Expand All @@ -36,31 +61,40 @@ def main(study: str
for curve in ['Delta R1 Liver (s-1)', 'Delta R1 Liver fit (s-1)',
'Delta R1 Spleen (s-1)']:
signals.get_average_curves(signal_dict, subject_list, curve)

# Update dictionary keys for average delta R1 plots

fits = signal_dict
fits['G2 Ciclosporin'] = fits.pop('Cyclosporine')
fits['G2 Rifampicin'] = fits.pop('Rifampicin')
fits['D Ketoconazole'] = fits.pop('Ketoconazole')
fits['E Asunaprevir'] = fits.pop('Asunaprevir')
fits['E Pioglitazone'] = fits.pop('Pioglitazone')
fits['G1 Bosentan_2mg'] = fits.pop('Bosentan')
fits['G1 Bosentan_high'] = fits.pop('BosentanHigh')

# Plot average delta R1 time curves per drug and per day
for drug, day in list(itertools.product(fits.keys(), [1, 2])):
print(f"{drug}, Liver fit: Saving average deltaR1 plot")
# For fitted liver data
plots.get_deltaR1_plots(fits, drug, 'Liver', study,
is_fitted=True, YLIM=(-1.5, 4.5))
# For observed liver data only
print(f"{drug}, Liver: Saving average deltaR1 plot")
plots.get_deltaR1_plots(fits, drug, 'Liver', study,
is_fitted=False, YLIM=(-1.5, 4.5))
# For observed spleen data only
print(f"{drug}, Spleen: Saving average deltaR1 plot")
plots.get_deltaR1_plots(fits, drug, 'Spleen', study,
is_fitted=False, YLIM=(-0.25, 1))
for substudy, day in list(itertools.product(fits.keys(), [1, 2])):
try:
print(f"{substudy}, Liver fit: Saving average deltaR1 plot")
# For fitted liver data
plots.get_deltaR1_plots(fits,
substudy,
'Liver',
study,
is_fitted=True,
YLIM=(-1.5, 4.5))
# For observed liver data only
print(f"{substudy}, Liver: Saving average deltaR1 plot")
plots.get_deltaR1_plots(fits,
substudy,
'Liver',
study,
is_fitted=False,
YLIM=(-1.5, 4.5))
# For observed spleen data only
print(f"{substudy}, Spleen: Saving average deltaR1 plot")
plots.get_deltaR1_plots(fits,
substudy,
'Spleen',
study,
is_fitted=False,
YLIM=(-0.25, 1))
except KeyError:
continue

# Convert substudy string number labels to integers
all_parameters['Substudy'] = all_parameters['Substudy'].astype(int)

# Create dictionary to rename sites into more comprehensive format
site_names = {'Bosentan': 'Bosentan_2mg',
Expand All @@ -76,8 +110,12 @@ def main(study: str
# Remove missing data
# and computational fitting errors from all estimated parameter data
print("Removing computational fitting errors and missing data")
all_parameters_cleaned = analyses.remove_data_errors(all_parameters,
study)
all_parameters_cleaned = data.remove_data_errors(all_parameters,
study)
# Remove subjects with insufficient number of observations
all_parameters_cleaned = (data
.remove_insufficient_data(all_parameters_cleaned,
study))

# Create list of condition variables to group by
variables = ['Drug', 'Symbol', 'Site']
Expand All @@ -86,17 +124,25 @@ def main(study: str

# Obtain effect size summaries and save as csv
print("Calculating average effect sizes")
analyses.save_effect_sizes(all_parameters_cleaned,
params,
variables,
study)
# Get statistical summary for saline-saline data
single_subject, overall = (analyses.get_retest_results(study,
'effects',
all_parameters_cleaned.query("Symbol in @params")))

# Plot biomarker distributions between Day 1 and Day 2 per rat
for biomarker in params:
print(f"{biomarker}: Plotting individual biomarker \
print("Plotting individual biomarker \
distributions between Day 1 and Day 2")
plots.pairplots(all_parameters, str(biomarker), study)

plots.pairplots(study,
'effects',
all_parameters_cleaned.query("Symbol in @params"),
'Day',
'Rat',
'Drug',
'Symbol',
'rocket',
95,
ylabels=['$K_{trans}$', '$k_{he}$', '$k_{bh}$'],
sharey='row')
print("Done!")


Expand Down
27 changes: 18 additions & 9 deletions src/analyses.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,15 +339,24 @@ def get_retest_results(study: str,
substudy average levels, respectively.
"""
# Pivot single-subject data
data_per_subject = pd.pivot_table(cleaned_parameter_data,
values='Value',
index=['Substudy',
'Symbol',
'Site',
'Fstrength',
'Time_period',
'Rat'],
columns='Day')
if study=='Reproducibility':
data_per_subject = pd.pivot_table(cleaned_parameter_data,
values='Value',
index=['Substudy',
'Symbol',
'Site',
'Fstrength',
'Time_period',
'Rat'],
columns='Day')
else:
data_per_subject = pd.pivot_table(cleaned_parameter_data,
values='Value',
index=['Substudy',
'Symbol',
'Site',
'Rat'],
columns='Day')
# Get mean substudy values
data_per_substudy = (cleaned_parameter_data
.groupby(['Substudy',
Expand Down
36 changes: 23 additions & 13 deletions src/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,8 @@ def get_metadata(filename: str,
within the file. Works only when filename is formatted as
a string containing study descriptors (metadata) separated
by underscores, i.e.,
filename = "compound_site_RatNumber_dayNumber_dataType"
e.g., "Asunaprevir_E_Rat2_2_Signals"
filename = "substudy_compound_site_RatNumber_dayNumber_dataType"
e.g., "5_Asunaprevir_E_Rat2_2_Signals"
Args:
filename: File name of interest.
Expand Down Expand Up @@ -258,17 +258,27 @@ def remove_insufficient_data(parameter_data: pd.DataFrame,
Returns:
Cleaned DataFrame.
"""
data_pivoted = pd.pivot_table(parameter_data,
values='Value',
columns=['Symbol'],
index=['Substudy',
'Drug',
'Site',
'Fstrength',
'Site_Fstrength',
'Time_period',
'Rat',
'Day'])
if study=='Reproducibility':
data_pivoted = pd.pivot_table(parameter_data,
values='Value',
columns=['Symbol'],
index=['Substudy',
'Drug',
'Site',
'Fstrength',
'Site_Fstrength',
'Time_period',
'Rat',
'Day'])
else:
data_pivoted = pd.pivot_table(parameter_data,
values='Value',
columns=['Symbol'],
index=['Substudy',
'Drug',
'Site',
'Rat',
'Day'])
# Remove subjects with missing acquisition on day 1 or day 2
missing_days_removed = (data_pivoted[data_pivoted
.groupby(['Substudy',
Expand Down
27 changes: 16 additions & 11 deletions src/plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,8 +163,8 @@ def get_deltaR1_plots(signals: dict,
fig_name = f"{substudy}_{ROI}_deltaR1"

plt.suptitle(f"Group mean {ROI} gadoxetate profiles in control and \
inhibitory phases \n (error bars represent \
standard deviation)")
\n inhibitory phases \
\n (error bars represent standard deviation)")
g.set_title(f"{substudy}", weight='bold')
g.set_xlabel("Time [min]", weight='bold')
g.set_ylabel("\u0394 $R_{1}$ [$s^{-1}$]", weight='bold')
Expand Down Expand Up @@ -302,6 +302,7 @@ def pairplots(study: str,
palette: str,
error: int,
ylabels: list,
sharey: 'str'
) -> None:
"""Plots paired data distributions per biomarker.
Expand Down Expand Up @@ -329,24 +330,28 @@ def pairplots(study: str,
col=col,
row=row,
kind="point",
sharey=False,
sharey=sharey,
palette=palette,
height=8,
aspect=1,
legend=False,
ci=error)

(g.set_titles("")
.axes[0, 1].set(ylim=([0, 0.4])))
g.axes[0, 0].set(ylim=([0, 1.5]))

if study=='Reproducibility':
(g.set_titles("")
.axes[0, 1].set(ylim=([0, 0.4])))
g.axes[0, 0].set(ylim=([0, 1.5]))
for i in range(len(ylabels)):
g.axes[0, i].set_ylabel(f"{ylabels[i]} [mL/min/mL]")
else:
g.set_titles(template='{col_name}')
for i in range(len(ylabels)):
g.axes[i, 0].set_ylabel(f"{ylabels[i]} [mL/min/mL]")

for ax in g.axes.flatten():
ax.tick_params(labelleft=True, labelbottom=True)

plt.legend(title='Rat', bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)

for i in range(len(ylabels)):
g.axes[0, i].set_ylabel(f"{ylabels[i]} [mL/min/mL]")
plt.legend(title=hue, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)

g.fig.tight_layout()
save_name = data.get_results_folder(study,
Expand Down

0 comments on commit a1bae54

Please sign in to comment.