Skip to content

Depression play

Asif Tamuri edited this page Jun 13, 2018 · 8 revisions
from datetime import datetime

from pandas import Timestamp as Date
from pandas.tseries.offsets import DateOffset
import pandas as pd

import numpy as np

population_size = 10000
rng = np.random.RandomState()

def get_population_props():
    properties = [ 'is_depressed', 'date_init_depression', 'date_depression_resolved', 
            'ever_depressed', 'prob_3m_resol_depression', 'is_pregnant', 'female', 
            'male', 'date_of_birth', 'has_hyptension', 'has_chronic_back_pain']
    df = pd.DataFrame(index=range(0, population_size), columns=properties)
    df.loc[:, 'is_depressed'] = False
    df.loc[:, 'ever_depressed'] = False
    df.loc[:, 'is_pregnant'] = False
    df.loc[:, 'female'] = np.random.choice([True, False], replace=True, size=len(df))
    df.loc[:, 'male'] = ~df['female']
    df.loc[:, 'date_of_birth'] = df.apply(lambda x:datetime(np.random.randint(1950, 2010), np.random.randint(1, 12), np.random.randint(1, 28)), axis=1)
    df.loc[:, 'has_hyptension'] = np.random.choice([True, False], replace=True, size=len(df), p=[0.1, 0.9])
    df.loc[:, 'has_chronic_back_pain'] = np.random.choice([True, False], replace=True, size=len(df), p=[0.1, 0.9])
    return df

params = {
'base_3m_prob_depression': 0.0001,
'rr_depression_low_ses': 3,
'rr_depression_chron_cond': 1.25,
'rr_depression_pregnancy': 3,
'rr_depression_female': 1.5,
'rr_depression_prev_episode': 50,
'rr_depression_age_15_20': 1,
'rr_depression_age_60plus': 3,
'depression_resolution_rates': [0.2, 0.3, 0.5, 0.7, 0.95],
'rr_resol_depress_chron_cond': 0.75
}

def single_step(df, now):
    not_depressed = (~df.is_depressed).copy() # otherwise, is a view which will change as we make more people depressed
    depressed = (df.is_depressed).copy()      # ditto

    ago_15yr = now - DateOffset(years=15)
    ago_20yr = now - DateOffset(years=20)
    ago_60yr = now - DateOffset(years=60)

    # effective prob depression is for entire population. TODO: only not_depressed?
    effective_prob_depression = pd.Series(0, index=df.index)
    effective_prob_depression.loc[not_depressed] = params['base_3m_prob_depression']
    effective_prob_depression.loc[not_depressed & df.is_pregnant] *= params['rr_depression_pregnancy']
    effective_prob_depression.loc[not_depressed & ~df.ever_depressed] *= params['rr_depression_prev_episode']
    effective_prob_depression.loc[not_depressed & df.date_of_birth.between(ago_20yr, ago_15yr)] *= params['rr_depression_age_15_20']
    effective_prob_depression.loc[not_depressed & (df.date_of_birth > ago_60yr)] *= params['rr_depression_age_60plus']
    effective_prob_depression.loc[not_depressed & df.female] *= params['rr_depression_female']
    has_chronic_condition = df.has_hyptension & df.has_chronic_back_pain
    effective_prob_depression.loc[not_depressed & has_chronic_condition] *= params['rr_depression_chron_cond']

    # this is only for not_depressed in population
    new_depressed = effective_prob_depression.loc[not_depressed] > rng.rand(sum(not_depressed))
    new_depressed = new_depressed[new_depressed == True].index
    print('\tnew depressed:', len(new_depressed))
    df.loc[new_depressed, 'is_depressed'] = True
    df.loc[new_depressed, 'ever_depressed'] = True
    df.loc[new_depressed, 'data_init_depression'] = now
    df.loc[new_depressed, 'data_depression_resolved'] = None
    df.loc[new_depressed, 'prob_3m_resol_depression'] = rng.choice(params['depression_resolution_rates'], size=len(new_depressed))

    # continuation or resolution of depression
    effective_prob_recover = df.loc[depressed, 'prob_3m_resol_depression']
    effective_prob_recover[df.loc[depressed & has_chronic_condition].index] *= params['rr_resol_depress_chron_cond']
    resolved_depress = effective_prob_recover > rng.rand(len(effective_prob_recover))
    resolved_depress = resolved_depress[resolved_depress == True].index
    df.loc[resolved_depress, 'is_depressed'] = False
    df.loc[resolved_depress, 'date_depression_resolved'] = now
    df.loc[resolved_depress, 'date_init_depression'] = None
    print('\tno longer depressed', len(resolved_depress))
    print('\ttotal depressed', sum(df.is_depressed))

population = get_population_props()
now = pd.to_datetime('today')
for timestep in range(0,10):
    single_step(population, now)
    print(now)
    now = now + pd.DateOffset(months=3)
Clone this wiki locally