Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 63 additions & 17 deletions epi_inference/reconstruction/recon_stochastic_wf.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import sys
import datetime
#import pandas as pd
import pandas as pd
from pyutilib.misc import timing

from ..engine.task import Task
Expand All @@ -11,11 +11,11 @@

from ..util import load_population, save_results
from ..collect.misc import load_collect
from ..reconstruction.stochastic import stochastic_reconstruction
from ..reconstruction.stochastic import stochastic_reconstruction, stochastic_reconstruction_from_daily_transmissions
from ..reconstruction.common import reported_cases_from_cumulative


def run_county(county, df, population, CONFIG, warnings):
def run_county(county, reported_cases_df, population, CONFIG, warnings, alternate_transmissions_df=None):
#
# Initialize results dictionary
#
Expand All @@ -26,23 +26,60 @@ def run_county(county, df, population, CONFIG, warnings):
#
# Get the cumulative cases
#
cumulative_reported_cases = df[county].to_list()
cumulative_reported_cases = reported_cases_df[county].to_list()

# reconstruct the states
Cdates = [datetime.date.fromisoformat(day) for day in df.index.to_list()]
#
# Get the reported cases per day from the cumulative reported cases
Cdates = [datetime.date.fromisoformat(day) for day in reported_cases_df.index.to_list()]
reported_cases_per_day = \
reported_cases_from_cumulative(dates=Cdates,
cumulative_reported_cases=cumulative_reported_cases)

#
# Setup arguments for stochastic_reconstruction
args = {'dates':reported_cases_per_day.dates,
'reported_cases_per_day':reported_cases_per_day.values,
'population':population,
'n_steps_per_day':CONFIG['n_steps_per_day']}
for option in ['reporting_delay_mean', 'reporting_delay_dev', 'reporting_multiplier', 'fixed_incubation', 'infectious_lower', 'infectious_upper', 'seed']:
if option in CONFIG:
args[option] = CONFIG[option]
res = stochastic_reconstruction(**args)
#
if 'alternate_transmissions_file' in CONFIG:
assert alternate_transmissions_df is not None
# merge the reported cases for the county in with the transmissions

dates = [datetime.date.fromisoformat(day) for day in alternate_transmissions_df.index.to_list()]
args = {'dates':dates,
'transmissions': alternate_transmissions_df[county].to_list(),
'population':population,
'n_steps_per_day':CONFIG['n_steps_per_day']}
for option in ['fixed_incubation', 'infectious_lower', 'infectious_upper', 'seed']:
if option in CONFIG:
args[option] = CONFIG[option]
res = stochastic_reconstruction_from_daily_transmissions(**args)

# add the reported cases to the results (need to match the dates - this could be done more efficiently
date_set = set(dates)
# get the reported cases for each day in dates
reported_cases_dict = {d:v for d,v in zip(reported_cases_per_day.dates, reported_cases_per_day.values) if d in date_set}
original_reported_cases_for_dates = list()
found_first = False
for d in dates:
# check that all the transmission dates are in reported cases dict
if d not in reported_cases_dict:
# reported cases may start later than transmissions
# we allow this and put zeros on the front, but
# throw an error if a date is missing after the first
# coinciding dates
assert found_first is False
print(d, 'in dates, but not in reported_cases_dict')
original_reported_cases_for_dates.append(0)
else:
original_reported_cases_for_dates.append(reported_cases_dict[d])
res.orig_rep_cases = original_reported_cases_for_dates
else:
args = {'dates':reported_cases_per_day.dates,
'reported_cases_per_day':reported_cases_per_day.values,
'population':population,
'n_steps_per_day':CONFIG['n_steps_per_day']}
for option in ['reporting_delay_mean', 'reporting_delay_dev', 'reporting_multiplier', 'fixed_incubation', 'infectious_lower', 'infectious_upper', 'seed']:
if option in CONFIG:
args[option] = CONFIG[option]
res = stochastic_reconstruction(**args)

results['dates'] = res.dates
results['transmissions'] = res.transmissions
Expand All @@ -65,23 +102,32 @@ def run(CONFIG, warnings):
#
# Load the case data
#
df = load_collect(CONFIG['input_csv'])
reported_df = load_collect(CONFIG['input_csv'])
#
# Load transmissions file if it exists
#
transmissions_file = CONFIG.get('alternate_transmissions_file', None)
transmissions_df = None
if transmissions_file is not None:
transmissions_df = pd.read_csv(transmissions_file, index_col='Date')

#
# Perform construction
#
results = {}
if 'county' in CONFIG:
counties = [CONFIG['county']]
else:
counties = df.keys()
counties = reported_df.keys()

if CONFIG['verbose']:
timing.tic()
for t in counties:
if t not in population_df[CONFIG['population_csv']['population']]:
warnings.append("WARNING: county %s does not have population data available" % str(t))
continue
results[t] = run_county(t, df, population_df[CONFIG['population_csv']['population']][t], CONFIG, warnings)
results[t] = run_county(t, reported_df, population_df[CONFIG['population_csv']['population']][t],
CONFIG, warnings, alternate_transmissions_df=transmissions_df)
if CONFIG['verbose']:
timing.toc("Serial Execution")
#
Expand Down
132 changes: 131 additions & 1 deletion epi_inference/reconstruction/stochastic.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ def stochastic_reconstruction(*, dates, reported_cases_per_day, population, n_st
# for the population value specified
# Todo: flag a warning
Stout = S[t]

S[t+1] = S[t] - Stout
Etout = np.random.binomial(E[t], prob_E[t])
E[t+1] = E[t] + Stout - Etout
Expand Down Expand Up @@ -195,6 +195,136 @@ def stochastic_reconstruction(*, dates, reported_cases_per_day, population, n_st
# compartments on the dates in tdates
return Bunch(dates=t_daily_dates, S=S, E=E, I1=I1, I2=I2, I3=I3, R=R, transmissions=T, orig_rep_cases=output_reported_cases)

def stochastic_reconstruction_from_daily_transmissions(*, dates, transmissions, population, n_steps_per_day,
fixed_incubation=5.2,
infectious_lower=2.6, infectious_upper=6,
seed=0):
"""
This function computes a reconstruction of the SEIIIR compartments based on the transmissions provided.
The transmissions in time cause movement from S->E, and then a discrete-time SEIIIR model is used
to populate the compartments.

Parameters
----------
dates : list
list of datetime objects corresponding to the dates of the transmissions
transmissions : list
list of the transmissions per day
population : float
population of this node (county)
n_steps_per_day : int
the number of timesteps to take in one day (e.g., value of 4 means the delta_t = 0.25 days)
fixed_incubation : float
the incubation period in days
infectious_lower : float
the infectious period is drawn from a uniform distribution between infectious_lower and infectious_upper
infectious_upper : float
the infectious period is drawn from a uniform distribution between infectious_lower and infectious_upper
seed: int
the seed for the random number generator

Returns
-------
dict : a dict of the dates and the compartments
"""
if seed:
np.random.seed(seed)

n_r_days = len(dates)
assert len(dates) == len(transmissions)

# create a vector representing the transmissions by timestep instead of by day
tcases_timestep = [0]*n_r_days*n_steps_per_day
# allocate cases "evenly" to the timesteps per day
for day_idx in range(len(transmissions)):
step_in_day = 0
for i in range(transmissions[day_idx]):
tcases_timestep[day_idx*n_steps_per_day + step_in_day] += 1
step_in_day += 1
if step_in_day >= n_steps_per_day:
step_in_day = 0

# todo: add this back in - needed for downstream
#output_reported_cases = [0]*padding_days
#output_reported_cases.extend(reported_cases_per_day[: -int_delay])

# use these transmissions to generate a single stochastic reconstruction
# of each of the compartments (SEIIIR)
S = np.zeros(len(tcases_timestep))
S[0] = population
E = np.zeros(len(tcases_timestep))
I1 = np.zeros(len(tcases_timestep))
I2 = np.zeros(len(tcases_timestep))
I3 = np.zeros(len(tcases_timestep))
R = np.zeros(len(tcases_timestep))

# Approach 1:
# - sigma is fixed at 1/5.2 days
# - serial interval is drawn from uniform 6.5-8.2, and used
# to compute gamma
# It might be better to just draw sigma and gamma from separate
# distributions here.
# Here, the serial interval is drawn once and that value is used
# for the entire simulation, however, it could be drawn for each day
# as well.
# Note: These are drawn for every day, but they are the same for each day
# - this is done so the code is ready for different values on each day if
# we decide to do that.
sigma = 1.0/fixed_incubation*np.ones(len(tcases_timestep)) # days
infectious_period = np.random.uniform(infectious_lower, infectious_upper)*np.ones(len(tcases_timestep)) # CDL , size=len(tcases_timestep))
gamma = np.reciprocal(infectious_period)

prob_E = 1-np.exp(-1.0/n_steps_per_day*sigma)
prob_III = 1-np.exp(-1.0/n_steps_per_day*3*gamma)

# loop through all of the days and compute the compartments
# with the stochastic simulations
for t in range(len(tcases_timestep)-1):
Stout = tcases_timestep[t]
if Stout >= S[t]:
# reconstruction indicates the number of infections
# exceed the population - flag this for error reporting
# means the reported cases or reporting factor are too large
# for the population value specified
# Todo: flag a warning
Stout = S[t]

S[t+1] = S[t] - Stout
Etout = np.random.binomial(E[t], prob_E[t])
E[t+1] = E[t] + Stout - Etout
I1tout = np.random.binomial(I1[t], prob_III[t])
I1[t+1] = I1[t] + Etout - I1tout
I2tout = np.random.binomial(I2[t], prob_III[t])
I2[t+1] = I2[t] + I1tout - I2tout
I3tout = np.random.binomial(I3[t], prob_III[t])
I3[t+1] = I3[t] + I2tout - I3tout
R[t+1] = R[t] + I3tout

# now bring these vectors back to days
S = [S[t] for t in range(len(S)) if t % n_steps_per_day == 0]
E = [E[t] for t in range(len(E)) if t % n_steps_per_day == 0]
I1 = [I1[t] for t in range(len(I1)) if t % n_steps_per_day == 0]
I2 = [I2[t] for t in range(len(I2)) if t % n_steps_per_day == 0]
I3 = [I3[t] for t in range(len(I3)) if t % n_steps_per_day == 0]
R = [R[t] for t in range(len(R)) if t % n_steps_per_day == 0]
T = [None]*len(S)
for day_idx in range(len(T)):
T[day_idx] = 0
for t in range(n_steps_per_day):
T[day_idx] += tcases_timestep[t+day_idx*n_steps_per_day]
assert len(dates) == len(T)
assert len(dates) == len(S)

# return the output
# - dates: dates for all the other compartments
# - transmissions: transmissions (reportable cases at the time
# of the initial transmission
# - S,E,I1,I2,I3,R: numbers of individuals in each of the
# compartments on the dates in tdates
# orig_rep_cases added in the workflow
return Bunch(dates=dates, S=S, E=E, I1=I1, I2=I2, I3=I3, R=R, transmissions=T) #, orig_rep_cases=output_reported_cases)


def np_stochastic_reconstruction(*, dates,
reported_cases_per_day,
counties,
Expand Down
Loading