Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Extensions to demographics.py #902

Merged
merged 16 commits into from
Feb 17, 2024
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions .github/workflows/deploy_docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v2 # If you're using actions/checkout@v2 you must set persist-credentials to false in most cases for the deployment to work correctly.
uses: actions/checkout@v4
with:
persist-credentials: false

Expand All @@ -26,14 +26,12 @@ jobs:
shell: bash -l {0}
run: |
pip install -e .
pip install jupyter-book>=0.11.3
pip install sphinxcontrib-bibtex>=2.0.0
python -m ipykernel install --user --name=ogcore-dev
cd docs
jb build ./book

- name: Deploy
uses: JamesIves/github-pages-deploy-action@releases/v3
uses: JamesIves/github-pages-deploy-action@v4
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
BRANCH: gh-pages # The branch the action should deploy to.
Expand Down
4 changes: 1 addition & 3 deletions .github/workflows/docs_check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v2 # If you're using actions/checkout@v2 you must set persist-credentials to false in most cases for the deployment to work correctly.
uses: actions/checkout@v4 # If you're using actions/checkout@v2 you must set persist-credentials to false in most cases for the deployment to work correctly.
with:
persist-credentials: false

Expand All @@ -23,8 +23,6 @@ jobs:
shell: bash -l {0}
run: |
pip install -e .
pip install jupyter-book>=0.11.3
pip install sphinxcontrib-bibtex>=2.0.0
python -m ipykernel install --user --name=ogcore-dev
cd docs
jb build ./book
9 changes: 9 additions & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ channels:
- conda-forge
dependencies:
- python>=3.7.7, <3.12
- numpy
- ipython
- setuptools
- scipy>=1.7.1
Expand All @@ -12,6 +13,13 @@ dependencies:
- dask-core>=2.30.0
- distributed>=2.30.1
- paramtools>=0.15.0
- sphinx
- sphinx-argparse
- sphinxcontrib-bibtex>=2.0.0
- sphinx-math-dollar
- pydata-sphinx-theme
- jupyter-book>=0.11.3
- jupyter
- pytest>=6.0
- pytest-xdist
- pylint
Expand All @@ -22,3 +30,4 @@ dependencies:
- pip
- pip:
- pygam
- linecheck
188 changes: 142 additions & 46 deletions ogcore/demographics.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,12 +252,19 @@ def get_pop(
S=80,
min_age=0,
max_age=99,
infer_pop=False,
fert_rates=None,
mort_rates=None,
infmort_rates=None,
imm_rates=None,
initial_pop=None,
pre_pop_dist=None,
country_id=UN_COUNTRY_CODE,
start_year=START_YEAR,
end_year=END_YEAR,
):
"""
Retrives the population distribution data from the UN data API
Retrieves the population distribution data from the UN data API

Args:
E (int): number of model periods in which agent is not
Expand All @@ -267,6 +274,20 @@ def get_pop(
min_age (int): age in years at which agents are born, >= 0
max_age (int): age in years at which agents die with certainty,
>= 4, < 100 (max age in UN data is 99, 100+ i same group)
infer_pop (bool): =True if want to infer the population
from the given fertility, mortality, and immigration rates
fert_rates (Numpy array): fertility rates for each year of data
and model age
mort_rates (Numpy array): mortality rates for each year of data
and model age
infmort_rates (Numpy array): infant mortality rates for each year
of data
imm_rates (Numpy array): immigration rates for reach year of data
and model age
initial_pop_data (Pandas DataFrame): initial population data
for the first year of model calibration (start_year)
pre_pop_dist (Numpy array): population distribution for the year
before the initial year for calibration
country_id (str): country id for UN data
start_year (int): start year data
end_year (int): end year for data
Expand All @@ -279,35 +300,85 @@ def get_pop(
# Generate time path of the nonstationary population distribution
# Get path up to end of data year
pop_2D = np.zeros((end_year + 1 - start_year + 1, E + S))
# Read UN data
for y in range(start_year, end_year + 2):
pop_data = get_un_data(
if infer_pop:
if pre_pop_dist is None:
pre_pop_data = get_un_data(
"47",
country_id=country_id,
start_year=start_year - 1,
end_year=start_year - 1,
)
pre_pop_sample = pre_pop_data[
(pre_pop_data["age"] >= min_age)
& (pre_pop_data["age"] <= max_age)
]
pre_pop = pre_pop_sample.value.values
else:
pre_pop = pre_pop_dist
if initial_pop is None:
initial_pop_data = get_un_data(
"47",
country_id=country_id,
start_year=start_year,
end_year=start_year,
)
initial_pop_sample = initial_pop_data[
(pre_pop_data["age"] >= min_age)
& (pre_pop_data["age"] <= max_age)
]
initial_pop = initial_pop_sample.value.values
# Check that have all necessary inputs to infer the population
# distribution
assert not [
x
for x in (fert_rates, mort_rates, infmort_rates, imm_rates)
if x is None
]
len_pop_dist = end_year + 1 - start_year
pop_2D = np.zeros((len_pop_dist, E + S))
# set initial population distribution in the counterfactual to
# the first year of the user provided distribution
pop_2D[0, :] = initial_pop
for t in range(1, len_pop_dist):
# find newborns next period
newborns = np.dot(fert_rates[t - 1, :], pop_2D[t - 1, :])
pop_2D[t, 0] = (1 - infmort_rates[t - 1]) * newborns + imm_rates[
t - 1, 0
] * pop_2D[t - 1, 0]
pop_2D[t, 1:] = (
pop_2D[t - 1, :-1] * (1 - mort_rates[t - 1, :-1])
+ pop_2D[t - 1, 1:] * imm_rates[t - 1, 1:]
)
else:
# Read UN data
for y in range(start_year, end_year + 2):
pop_data = get_un_data(
"47",
country_id=country_id,
start_year=y,
end_year=y,
)
pop_data_sample = pop_data[
(pop_data["age"] >= min_age) & (pop_data["age"] <= max_age)
]
pop = pop_data_sample.value.values
# Generate the current population distribution given that E+S might
# be less than max_age-min_age+1
# age_per_EpS = np.arange(1, E + S + 1)
pop_EpS = pop_rebin(pop, E + S)
pop_2D[y - start_year, :] = pop_EpS
# get population distribution one year before initial year for
# calibration of omega_S_preTP
pre_pop_data = get_un_data(
"47",
country_id=country_id,
start_year=y,
end_year=y,
start_year=start_year - 1,
end_year=start_year - 1,
)
pop_data_sample = pop_data[
(pop_data["age"] >= min_age) & (pop_data["age"] <= max_age)
pre_pop_sample = pre_pop_data[
(pre_pop_data["age"] >= min_age) & (pre_pop_data["age"] <= max_age)
]
pop = pop_data_sample.value.values
# Generate the current population distribution given that E+S might
# be less than max_age-min_age+1
# age_per_EpS = np.arange(1, E + S + 1)
pop_EpS = pop_rebin(pop, E + S)
pop_2D[y - start_year, :] = pop_EpS
# get population distribution one year before initial year for
# calibration of omega_S_preTP
pre_pop_data = get_un_data(
"47",
country_id=country_id,
start_year=start_year - 1,
end_year=start_year - 1,
)
pre_pop_sample = pre_pop_data[
(pre_pop_data["age"] >= min_age) & (pre_pop_data["age"] <= max_age)
]
pre_pop = pre_pop_sample.value.values
pre_pop = pre_pop_sample.value.values

return pop_2D, pre_pop

Expand Down Expand Up @@ -518,6 +589,7 @@ def get_pop_objs(
mort_rates=None,
infmort_rates=None,
imm_rates=None,
infer_pop=False,
pop_dist=None,
pre_pop_dist=None,
country_id=UN_COUNTRY_CODE,
Expand Down Expand Up @@ -546,6 +618,7 @@ def get_pop_objs(
length T0
imm_rates (array_like): user provided immigration rates, dimensions
are T0 x E+S
infer_pop (bool): =True if want to infer the population
pop_dist (array_like): user provided population distribution,
dimensions are T0+1 x E+S
pre_pop_dist (array_like): user provided population distribution
Expand All @@ -555,7 +628,7 @@ def get_pop_objs(
initial_data_year (int): initial year of data to use
(not relevant if have user provided data)
final_data_year (int): final year of data to use,
T0=intial_year-final_year + 1
T0=initial_year-final_year + 1
pop_dist (array_like): user provided population distribution, last
dimension is of length E+S
GraphDiag (bool): =True if want graphical output and printed
Expand Down Expand Up @@ -623,7 +696,7 @@ def get_pop_objs(
fert_rates,
np.tile(
fert_rates[-1, :].reshape(1, E + S),
(T - fert_rates.shape[0], 1),
(T + S - fert_rates.shape[0], 1),
),
),
axis=0,
Expand Down Expand Up @@ -653,31 +726,55 @@ def get_pop_objs(
mort_rates,
np.tile(
mort_rates[-1, :].reshape(1, E + S),
(T - mort_rates.shape[0], 1),
(T + S - mort_rates.shape[0], 1),
),
),
axis=0,
)
infmort_rates = np.concatenate(
(
infmort_rates,
np.tile(infmort_rates[-1], (T - infmort_rates.shape[0])),
np.tile(infmort_rates[-1], (T + S - infmort_rates.shape[0])),
)
)
mort_rates_S = mort_rates[:, E:]
# Get population distribution if not provided
if pop_dist is None:
pop_2D, pre_pop = get_pop(
E,
S,
min_age,
max_age,
country_id,
initial_data_year,
final_data_year,
)
# or if just provide initial pop and infer_pop=True
if (pop_dist is None) or (pop_dist is not None and infer_pop is True):
if infer_pop:
if pop_dist is not None:
initial_pop = pop_dist[0, :].reshape(1, pop_dist.shape[-1])
else:
initial_pop = None
pop_2D, pre_pop = get_pop(
E,
S,
min_age,
max_age,
infer_pop,
fert_rates,
mort_rates,
infmort_rates,
imm_rates,
initial_pop,
pre_pop_dist,
country_id,
initial_data_year,
final_data_year,
)
else:
pop_2D, pre_pop = get_pop(
E,
S,
min_age,
max_age,
country_id=country_id,
start_year=initial_data_year,
end_year=final_data_year,
)
else:
# Check first dims of pop_dist as input by user
print("T0 = ", T0)
assert pop_dist.shape[0] == T0 + 1 # population needs to be
# one year longer in order to find immigration rates
assert pop_dist.shape[-1] == E + S
Expand Down Expand Up @@ -719,14 +816,14 @@ def get_pop_objs(
imm_rates_orig,
np.tile(
imm_rates_orig[-1, :].reshape(1, E + S),
(T - imm_rates_orig.shape[0], 1),
(T + S - imm_rates_orig.shape[0], 1),
),
),
axis=0,
)
# If the population distribution was given, check it for consistency
# with the fertility, mortality, and immigration rates
if pop_dist is not None:
if pop_dist is not None and not infer_pop:
len_pop_dist = pop_dist.shape[0]
pop_counter_2D = np.zeros((len_pop_dist, E + S))
# set initial population distribution in the counterfactual to
Expand Down Expand Up @@ -817,7 +914,7 @@ def get_pop_objs(
imm_rates_orig[:fixper, E:],
np.tile(
imm_rates_adj[E:].reshape(1, S),
(T - fixper, 1),
(T + S - fixper, 1),
),
),
axis=0,
Expand Down Expand Up @@ -968,13 +1065,12 @@ def get_pop_objs(
path=OUTPUT_DIR,
)

# return omega_path_S, g_n_SS, omega_SSfx, survival rates,
# mort_rates_S, and g_n_path
# Return objects in a dictionary
pop_dict = {
"omega": omega_path_S,
"g_n_ss": g_n_SS,
"omega_SS": omega_SSfx[-S:] / omega_SSfx[-S:].sum(),
"rho": [mort_rates_S],
"rho": mort_rates_S,
"g_n": g_n_path,
"imm_rates": imm_rates_mat,
"omega_S_preTP": omega_S_preTP,
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
include_packages=True,
python_requires=">=3.7.7, <3.12",
install_requires=[
"numpy",
"scipy>=1.7.1",
"pandas>=1.2.5",
"matplotlib",
Expand Down
Loading
Loading