Skip to content

Commit

Permalink
scripted creating gains
Browse files Browse the repository at this point in the history
  • Loading branch information
rland93 committed May 24, 2020
1 parent 0c22d11 commit f3c69ff
Show file tree
Hide file tree
Showing 6 changed files with 723 additions and 3 deletions.
7 changes: 4 additions & 3 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
uci_apc/__pycache__*
uci_apc/results*
venv*
uci_apc/src/__pycache__*
uci_apc/src/results*
uci_apc/src/venv*
uci_apc/dfs/*
386 changes: 386 additions & 0 deletions src/analysis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,386 @@
import string
import random
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.gridspec as gridspec
import matplotlib.ticker as mtick
import datetime
from os import walk
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()

# path to save figures
FRIENDLY_DATE_STR = str(datetime.datetime.strftime( datetime.datetime.now(), "%Y%m%d%H%M%S"))
DPI = 300

# interval of time ticks on x-axis
HR_INTERVAL = 2

# range of good to bad BG readings
GOOD_CONTROL_LOWER = 80
GOOD_CONTROL_UPPER = 160

# no. of standard deviations to plot (BG timeseries chart)
nstd=1

# CHO is displayed as CHO per minute for whatever
# interval we sample. In this case, our interval
# is 3 minutes. Therefore, we multiply CHO value
# by 3 to get the total CHO dosed in that minute.
# the simulator defines a "meal" as CHO dosed in
# a single minute; complex meals are not considered
INTERVAL = 3
CHO_COLOR = 'crimson'



# collected timeseries
def h_ts_collected(df):
'''
Create a single timeseries of collected data from multiple patients/runs.
Parameters
----------
df: pandas DataFrame object
dataframe
Returns
-------
dictionary:
"index" : Series of datetime objects representing the timeseries. Type: pandas Series
"bg" : Mean, Max, Min, Upper Env, Lower Env of blood glucose over the timeseries. Type: pandas DataFrame
"HBGI" : Mean, Max, Min, Upper Env, Lower Env of HBGI over the timeseries. Type: pandas DataFrame
"LBGI" : Mean, Max, Min, Upper Env, Lower Env of LBGI glucose over the timeseries. Type: pandas DataFrame
'''
bg = pd.DataFrame( {
"Mean" : df.loc(axis=1)[:,:,"BG"].mean(axis=1),
"Max" : df.loc(axis=1)[:,:,"BG"].max(axis=1),
"Min" : df.loc(axis=1)[:,:,"BG"].min(axis=1),
"Upper Envelope" : df.loc(axis=1)[:,:,"BG"].mean(axis=1) + nstd * df.loc(axis=1)[:,:,"BG"].std(axis=1),
"Lower Envelope" : df.loc(axis=1)[:,:,"BG"].mean(axis=1) - nstd * df.loc(axis=1)[:,:,"BG"].std(axis=1)
})
hbgi = pd.DataFrame( {
"Mean" : df.loc(axis=1)[:,:,"HBGI"].mean(axis=1),
"Max" : df.loc(axis=1)[:,:,"HBGI"].max(axis=1),
"Min" : df.loc(axis=1)[:,:,"HBGI"].min(axis=1),
"Upper Envelope" : df.loc(axis=1)[:,:,"HBGI"].mean(axis=1) + nstd * df.loc(axis=1)[:,:,"HBGI"].std(axis=1),
"Lower Envelope" : df.loc(axis=1)[:,:,"HBGI"].mean(axis=1) - nstd * df.loc(axis=1)[:,:,"HBGI"].std(axis=1)
})
lbgi = pd.DataFrame( {
"Mean" : df.loc(axis=1)[:,:,"LBGI"].mean(axis=1),
"Max" : df.loc(axis=1)[:,:,"LBGI"].max(axis=1),
"Min" : df.loc(axis=1)[:,:,"LBGI"].min(axis=1),
"Upper Envelope" : df.loc(axis=1)[:,:,"LBGI"].mean(axis=1) + nstd * df.loc(axis=1)[:,:,"LBGI"].std(axis=1),
"Lower Envelope" : df.loc(axis=1)[:,:,"LBGI"].mean(axis=1) - nstd * df.loc(axis=1)[:,:,"LBGI"].std(axis=1)
})

return {"index": df.index,
"bg" : bg,
"HBGI" : hbgi,
"LBGI" : lbgi}

def h_ts_sample(df):
'''
Parameters
----------
df: pandas DataFrame object
dataframe
Returns
-------
pandas DataFrame
Non-MultiIndex Dataframe of timeseries data from a single patient.
'''
# get set of runs, pts in cols
cols = df.columns.tolist()
run = random.choice(tuple(set([i[0] for i in cols]))) # random run
pt = random.choice(tuple(set([i[1] for i in cols]))) # random patient
# we have to transpose before we get rid of the level
# TODO this is an absolutely AWFUL way of doing this.
return (df.loc(axis=1)[run, pt,:].transpose().droplevel(1).droplevel(0).transpose(), run, pt)

def h_gen_bg_bins(binsize, lower_lim, upper_lim):
'''
Generate an array of bins of a certain size.
Parameters
----------
binsize: int
The bin size, in mg/dl.
lower_lim: int
The lower limit of the bins that are to be generated, in mg/dl. Not inclusive.
upper_lim: int
The upper limit of the bins that are to be generated, in mg/dl. Inclusive.
Returns
-------
int
The next bin divider.
Examples
--------
>>> print([x for x in h_gen_bg_bins(10, 0, 50)])
[0,10,20,30,40,50]
'''
x = lower_lim
while x <= upper_lim:
yield x
x +=binsize

def rand_pt_bg_ts(df, path):
'''
Generate a blood glucose time series plot for a random patient and save to disk.
Parameters
----------
df: pandas DataFrame
dataframe
Returns
-------
None
'''
fig = plt.figure(figsize=(20,10))
gs = gridspec.GridSpec(6,2)
plt.subplots_adjust(
hspace=1,
left=.05,
right=.95,
top=.95,
bottom=.13)
pt_data = h_ts_sample(df)
bg_ts = plt.subplot(gs[:4,:])
bg_ts.plot(pt_data[0].index, pt_data[0]['BG'], label='Blood Glucose')
bg_ts.grid(axis='x', which='both')
bg_ts.grid(axis='y', which='major')
bg_ts.xaxis.set_minor_locator(mdates.HourLocator(interval=HR_INTERVAL))
bg_ts.xaxis.set_minor_formatter(mdates.DateFormatter('%H:%M\n'))
bg_ts.xaxis.set_major_locator(mdates.DayLocator())
bg_ts.xaxis.set_major_formatter(mdates.DateFormatter('\n%b %d'))
bg_ts.set_ylabel('Blood Glucose (mg/dl)')
bg_ts.axhspan(ymin=GOOD_CONTROL_LOWER, ymax=GOOD_CONTROL_UPPER, color='green', alpha=0.08)
bg_ts.plot(pt_data[0].index, pt_data[0]['CGM'], label='Insulin')
bg_ts.legend()
ins_ts = plt.subplot(gs[4:6,:])
ins_ts.plot(pt_data[0].index, pt_data[0]['insulin'])
ins_ts.grid(axis='x', which='both')
ins_ts.xaxis.set_minor_locator(mdates.HourLocator(interval=HR_INTERVAL))
ins_ts.xaxis.set_minor_formatter(mdates.DateFormatter('%H:%M\n'))
ins_ts.xaxis.set_major_locator(mdates.DayLocator())
ins_ts.xaxis.set_major_formatter(mdates.DateFormatter('\n%b %d'))
plt.savefig(
path + '-rand_pt_ts.png',
dpi=DPI,
transparent=False)
plt.close(fig)

def bg_ts(df, path):
'''
Generate a time series plot containing blood glucose (mean, +-1 std, and max/min envelopes) and mean HBGI/LBGI for collected all patients and save to disk.
Parameters
----------
df: pandas DataFrame
dataframe
Returns
-------
None
'''
# setup
data = h_ts_collected(df)
# bg plot
fig = plt.figure(figsize=(20,10))
gs = gridspec.GridSpec(6,2)
plt.subplots_adjust(
hspace=1,
left=.05,
right=.95,
top=.95,
bottom=.13)
# subplot areas
bg_ts = plt.subplot(gs[:4,:])
# plots
bg_ts.fill_between(data["index"], data["bg"]["Upper Envelope"], data["bg"]["Lower Envelope"], alpha=0.08, color='blue', label="std")
bg_ts.plot(data["index"], data["bg"]["Mean"], label="Mean", color='blue')
bg_ts.plot(data["index"], data["bg"]["Max"],linestyle='--', color='mediumpurple', label='Max')
bg_ts.plot(data["index"], data["bg"]["Min"], linestyle='--', color='slategrey', label='Min')
# grids, legend, etc.
bg_ts.grid(axis='x', which='both')
bg_ts.grid(axis='y', which='major')
bg_ts.xaxis.set_minor_locator(mdates.HourLocator(interval=HR_INTERVAL))
bg_ts.xaxis.set_minor_formatter(mdates.DateFormatter('%H:%M\n'))
bg_ts.xaxis.set_major_locator(mdates.DayLocator())
bg_ts.xaxis.set_major_formatter(mdates.DateFormatter('\n%b %d'))
bg_ts.set_ylabel('Blood Glucose (mg/dl)')
bg_ts.axhspan(ymin=GOOD_CONTROL_LOWER, ymax=GOOD_CONTROL_UPPER, color='green', alpha=0.08)
bg_ts.legend()
# indicator plot
# subplot area
indicators_ts = plt.subplot(gs[4:,:])
# plots
indicators_ts.plot(data["index"], data["HBGI"]["Mean"], label="HBGI", color='steelblue')
indicators_ts.plot(data["index"], data["LBGI"]["Mean"], label="LBGI", color='maroon')
# grids, legend, etc.
indicators_ts.legend()
indicators_ts.grid(axis='x', which='both')
indicators_ts.xaxis.set_minor_locator(mdates.HourLocator(interval=HR_INTERVAL))
indicators_ts.xaxis.set_minor_formatter(mdates.DateFormatter('%H:%M\n'))
indicators_ts.xaxis.set_major_locator(mdates.DayLocator())
indicators_ts.xaxis.set_major_formatter(mdates.DateFormatter('\n%b %d'))
indicators_ts.set_ylabel('HBGI/LBGI Indicator')
plt.savefig(
path + '-bg_ts.png',
dpi=DPI,
transparent=False)
plt.close(fig)

def h_get_bg_derivative(df):
'''
Get the bg derivative as a time series.
Parameters
----------
df: pandas DataFrame
dataframe
Returns
-------
pandas DataFrame
Glucose derivative bg(t+1) - bg(t) indexed by time. Last value is zero.
Index: datetime
Columns: derivative
'''
data = h_ts_collected(df)
index = len(data['bg'].index.values.tolist()) - 1
derivs = []
for i in range(0, index):
dbg = data['bg']['Mean'].iloc[i+1] - data['bg']['Mean'].iloc[i]
derivs.append(dbg/float(INTERVAL))
# just add a zero at the end since we calculated derivative forward
derivs.append(0)
return pd.DataFrame(derivs, index=data['index'], columns=['Derivative'])

def bg_deriv_hist(df, bins, path):
'''
Save a histogram of the blood glucose derivatives to disk.
Parameters
----------
df: pandas DataFrame
dataframe
bins: list of int
List of integers representing bin boundaries
Returns
-------
None
'''
data = h_get_bg_derivative(df)
fig = plt.figure(figsize=(10,10))
gs = gridspec.GridSpec(1,1)
# subplot areas
bg_deriv = plt.subplot(gs[:,:])
bg_deriv.set_title("Glucose Derivative by Count")
counts = pd.cut(data['Derivative'], bins=bins).value_counts(sort=False)
x = counts.index.values.astype(str)
y = counts/counts.sum() * 100

bg_deriv.grid(axis='y', which='both')
bg_deriv.bar(x,y,width=0.95)
bg_deriv.set_title("Glucose Derivative by Count")
bg_deriv.set_ylabel("Percentage of vals in range")
bg_deriv.yaxis.set_major_formatter(mtick.PercentFormatter())
bg_deriv.set_xlabel("Range")
for label in bg_deriv.xaxis.get_ticklabels():
label.set_rotation(90)
plt.savefig(
path + 'bg_deriv_hist.png',
dpi=DPI,
transparent=False)
plt.close(fig)

def bg_counts(df, bins, path):
'''
Parameters
---------
df: pandas DataFrame
dataframe
bins: list of int
List of integers representing bin boundaries
Returns
-------
None
'''
counts = pd.cut(df.loc(axis=1)[:,:,'BG'].unstack(), bins=bins).value_counts(sort=False)
x = counts.index.values.astype(str) # labels
y = counts / counts.sum() * 100

fig = plt.figure(figsize=(10,10))
gs = gridspec.GridSpec(1,1)
bg_counts = plt.subplot(gs[:,:])
bg_counts.grid(axis='y', which='both')
bg_counts.bar(x,y,width=0.95)
bg_counts.set_title("Glucose Values by Count")
bg_counts.set_ylabel("Percentage of vals in range")
bg_counts.yaxis.set_major_formatter(mtick.PercentFormatter())
bg_counts.set_xlabel("Range")
for label in bg_counts.xaxis.get_ticklabels():
label.set_rotation(90)
plt.savefig(
path + '-bg_counts.png',
dpi=DPI,
transparent=False)
plt.close(fig)

def h_gen_poincare_df(df, sample_delta, sample_interval):
'''
Generate a dataframe containing blood glucose at time t, and blood glucose at time t+1, indexed by t.
'''
bg_means = h_ts_collected(df)['bg']['Mean']
no_samples = len(bg_means.index.values)
bg_t1 = [bg_means[i+sample_delta] for i in range(0, no_samples - sample_delta)]
bg_t = [bg_means[i] for i in range(0, no_samples - sample_delta)]
indices = [i for i in bg_means.index.values[:(no_samples - sample_delta)]]
delta_col_name = str('t_' + sample_delta)
return pd.DataFrame([bg_t, bg_t1], index=indices, columns=['t_0', 't_' + str(sample_delta)])


def tabulate(df, low, high, save):
'''
Generate a table of useful numerical measures of controller performance. The range specified by params low and high is the euglycemic range.
Parameters
----------
df: pandas DataFrame
low: int
The limit of low blood glucose
high: int
The limit of high blood glucose
'''
bgs = df[:,:,"BG"]
print(bgs)
# Time in percentage
# No. of simulations with BG by bin
# CVGA zone count
# mean, max, min, std
# LBGI mean, max, min, std
# Total insulin dose (mean)




if __name__ == '__main__':
dfs = []
# get filenames of dataframes
for (dirpath, dirnames, filenames) in walk('./dfs/'):
dfs.extend(filenames)
for df in dfs:
data = pd.read_pickle('./dfs/' + df)
figname = df[:-4]
bg_ts(data, './results/' + figname)
# bins = [x for x in h_gen_bg_bins(40,20,400)]
# bg_counts(data, bins, './results/' + figname)
Loading

0 comments on commit f3c69ff

Please sign in to comment.