Skip to content

Depression play

Asif Tamuri edited this page Jun 14, 2018 · 8 revisions

This is a standalone implementation of the depression module in order to explore different approaches.

from datetime import datetime

from pandas import Timestamp as Date
from pandas.tseries.offsets import DateOffset
import pandas as pd

import numpy as np

population_size = 10000

def get_population_props():
    """Returns a dummy population Dataframe ready for depression module. Some
    properties are randomly assigned"""
    properties = [ 'is_depressed', 'date_init_depression', 'date_depression_resolved', 
            'ever_depressed', 'prob_3m_resol_depression', 'is_pregnant', 'female', 
            'male', 'date_of_birth', 'has_hyptension', 'has_chronic_back_pain']
    df = pd.DataFrame(index=range(0, population_size), columns=properties)
    df.index.name = 'person'
    df.loc[:, 'is_depressed'] = False
    df.loc[:, 'ever_depressed'] = False
    df.loc[:, 'is_pregnant'] = False
    df.loc[:, 'female'] = rng.choice([True, False], replace=True, size=len(df))
    df.loc[:, 'male'] = ~df['female']
    df.loc[:, 'date_of_birth'] = df.apply(lambda x:datetime(rng.randint(1950, 2010), rng.randint(1, 12), rng.randint(1, 28)), axis=1)
    df.loc[:, 'has_hyptension'] = rng.choice([True, False], replace=True, size=len(df), p=[0.1, 0.9])
    df.loc[:, 'has_chronic_back_pain'] = rng.choice([True, False], replace=True, size=len(df), p=[0.1, 0.9])
    return df

params = {
    'base_3m_prob_depression': 0.0001,
    'rr_depression_low_ses': 3,
    'rr_depression_chron_cond': 1.25,
    'rr_depression_pregnancy': 3,
    'rr_depression_female': 1.5,
    'rr_depression_prev_episode': 50,
    'rr_depression_age_15_20': 1,
    'rr_depression_age_60plus': 3,
    'depression_resolution_rates': [0.2, 0.3, 0.5, 0.7, 0.95],
    'rr_resol_depress_chron_cond': 0.75
}

def single_step(df, now):
    """A single step of the depression module"""
    # TODO: check whether we still need this
    not_depressed = (~df.is_depressed).copy() # otherwise, is a view which will change as we make more people depressed
    depressed = (df.is_depressed).copy()      # ditto

    ago_15yr = now - DateOffset(years=15)
    ago_20yr = now - DateOffset(years=20)
    ago_60yr = now - DateOffset(years=60)

    # effective prob depression is for entire population.
    effective_prob_depression = pd.Series(params['base_3m_prob_depression'], index=df[not_depressed].index)
    effective_prob_depression.loc[df.is_pregnant] *= params['rr_depression_pregnancy']
    effective_prob_depression.loc[~df.ever_depressed] *= params['rr_depression_prev_episode']
    effective_prob_depression.loc[df.date_of_birth.between(ago_20yr, ago_15yr)] *= params['rr_depression_age_15_20']
    effective_prob_depression.loc[(df.date_of_birth > ago_60yr)] *= params['rr_depression_age_60plus']
    effective_prob_depression.loc[df.female] *= params['rr_depression_female']
    has_chronic_condition = df.has_hyptension & df.has_chronic_back_pain
    effective_prob_depression.loc[has_chronic_condition] *= params['rr_depression_chron_cond']

    # this is only for not_depressed in population
    new_depressed = effective_prob_depression > rng.rand(len(effective_prob_depression))
    new_depressed = new_depressed[new_depressed == True].index
    print('\tnew depressed:', len(new_depressed))
    df.loc[new_depressed, 'is_depressed'] = True
    df.loc[new_depressed, 'ever_depressed'] = True
    df.loc[new_depressed, 'data_init_depression'] = now
    df.loc[new_depressed, 'data_depression_resolved'] = None
    df.loc[new_depressed, 'prob_3m_resol_depression'] = rng.choice(params['depression_resolution_rates'], size=len(new_depressed))

    # continuation or resolution of depression
    effective_prob_recover = df.loc[depressed, 'prob_3m_resol_depression']
    effective_prob_recover[df.loc[depressed & has_chronic_condition].index] *= params['rr_resol_depress_chron_cond']
    resolved_depress = effective_prob_recover > rng.rand(len(effective_prob_recover))
    resolved_depress = resolved_depress[resolved_depress == True].index
    df.loc[resolved_depress, 'is_depressed'] = False
    df.loc[resolved_depress, 'date_depression_resolved'] = now
    df.loc[resolved_depress, 'date_init_depression'] = None
    print('\tno longer depressed', len(resolved_depress))
    print('\ttotal depressed', sum(df.is_depressed))

# Run the depression module for ten 3-month events
rng = np.random.RandomState(seed=123456789)
population = get_population_props()
now = pd.to_datetime('today')
for timestep in range(0,3):
    print(now)
    single_step(population, now)
    now = now + pd.DateOffset(months=3)
Clone this wiki locally