Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updates to demographics.py for changes in UN API #936

Merged
merged 15 commits into from
Jun 11, 2024
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,4 @@ regression/OUTPUT_BASELINE/*
regression/OUTPUT_REFORM*
.vscode/
*default.profraw
*un_api_token.txt
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [0.11.8] - 2024-06-09 01:00:00

### Added

- Updates to `demographics.py` module to accept token for UN World Population Prospects database access or to download data from the [Population-Data](https://github.com/EAPD-DRB/Population-Data) repository.

## [0.11.7] - 2024-06-07 01:00:00

Expand Down Expand Up @@ -233,6 +238,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Any earlier versions of OG-USA can be found in the [`OG-Core`](https://github.com/PSLmodels/OG-Core) repository [release history](https://github.com/PSLmodels/OG-Core/releases) from [v.0.6.4](https://github.com/PSLmodels/OG-Core/releases/tag/v0.6.4) (Jul. 20, 2021) or earlier.


[0.11.8]: https://github.com/PSLmodels/OG-Core/compare/v0.11.7...v0.11.8
[0.11.7]: https://github.com/PSLmodels/OG-Core/compare/v0.11.6...v0.11.7
[0.11.6]: https://github.com/PSLmodels/OG-Core/compare/v0.11.5...v0.11.6
[0.11.5]: https://github.com/PSLmodels/OG-Core/compare/v0.11.4...v0.11.5
Expand Down
2 changes: 1 addition & 1 deletion ogcore/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,4 @@
from ogcore.txfunc import *
from ogcore.utils import *

__version__ = "0.11.7"
__version__ = "0.11.8"
153 changes: 106 additions & 47 deletions ogcore/demographics.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,17 +66,24 @@ def get_un_data(
+ "?format=csv"
)

# Check for a file named "un_api_token.txt" in the current directory
if os.path.exists(os.path.join("un_api_token.txt")):
with open(os.path.join("un_api_token.txt"), "r") as file:
UN_TOKEN = file.read().strip()
else: # if file not exist, prompt user for token
UN_TOKEN = input(
"Please enter your UN API token (press return if you do not have one): "
)
# write the UN_TOKEN to a file to find in the future
with open(os.path.join("un_api_token.txt"), "w") as file:
file.write(UN_TOKEN)

# get data from url
response = get_legacy_session().get(target)
payload = {}
headers = {"Authorization": "Bearer " + UN_TOKEN}
response = get_legacy_session().get(target, headers=headers, data=payload)
# Check if the request was successful before processing
if response.status_code == 200:

# if want to download the data
# with open("downloaded_datan.csv", "wb") as f:
# f.write(response.content)
# df = pd.read_csv("downloaded_datan.csv")
# else
# print("TARGET: ", target)
csvStringIO = StringIO(response.text)
df = pd.read_csv(csvStringIO, sep="|", header=1)

Expand All @@ -93,10 +100,40 @@ def get_un_data(
df.year = df.year.astype(int)
df = df[df.age < 100] # need to drop 100+ age category
else:
# Read from UN GH Repo:
print(
f"Failed to retrieve population data. HTTP status code: {response.status_code}"
f"Failed to retrieve population data from UN. Reading "
+ " from https://github.com/EAPD-DRB/Population-Data "
+ "instead of UN WPP API"
)
country_dict = {
"840": "USA",
"710": "ZAF",
"458": "MYS",
"356": "IND",
"826": "UK",
}
un_variable_dict = {
"68": "fertility_rates",
"80": "mortality_rates",
"47": "population",
}
country = country_dict[country_id]
variable = un_variable_dict[variable_code]
url = (
"https://raw.githubusercontent.com/EAPD-DRB/"
+ "Population-Data/main/"
+ "Data/{c}/UN_{v}_data.csv".format(c=country, v=variable)
)
assert False
df = pd.read_csv(url)
# keep just the years requested
df = df[(df.year >= start_year) & (df.year <= end_year)]

# Do we still want to keep the status code for failures?
# print(
# f"Failed to retrieve population data. HTTP status code: {response.status_code}"
# )
# assert False

return df

Expand Down Expand Up @@ -136,11 +173,15 @@ def get_fert(
"""
# initialize fert rates array
fert_rates_2D = np.zeros((end_year + 1 - start_year, totpers))
# Read UN data, 1 year at a time
# Read UN data
df = get_un_data(
"68", country_id=country_id, start_year=start_year, end_year=end_year
)
# CLean and rebin data
for y in range(start_year, end_year + 1):
df = get_un_data("68", country_id=country_id, start_year=y, end_year=y)
df_y = df[(df.age >= min_age) & (df.age <= max_age) & (df.year == y)]
# put in vector
fert_rates = df.value.values
fert_rates = df_y.value.values
# fill in with zeros for ages < 15 and > 49
# NOTE: this assumes min_year < 15 and max_age > 49
fert_rates = np.append(fert_rates, np.zeros(max_age - 49))
Expand All @@ -163,7 +204,7 @@ def get_fert(

# Create plots if needed
if graph:
if plot_path:
if plot_path is not None:
pp.plot_fert_rates(
[fert_rates_2D],
start_year=start_year,
Expand Down Expand Up @@ -219,10 +260,14 @@ def get_mort(
mort_rates_2D = np.zeros((end_year + 1 - start_year, totpers))
infmort_rate_vec = np.zeros(end_year + 1 - start_year)
# Read UN data
df = get_un_data(
"80", country_id=country_id, start_year=start_year, end_year=end_year
)
# CLean and rebin data
for y in range(start_year, end_year + 1):
df = get_un_data("80", country_id=country_id, start_year=y, end_year=y)
df_y = df[(df.age >= min_age) & (df.age <= max_age) & (df.year == y)]
# put in vector
mort_rates_data = df.value.values
mort_rates_data = df_y.value.values
# In UN data, mortality rates for 0 year olds are the infant
# mortality rates
infmort_rate = mort_rates_data[0]
Expand All @@ -249,7 +294,7 @@ def get_mort(

# Create plots if needed
if graph:
if plot_path:
if plot_path is not None:
pp.plot_mort_rates_data(
mort_rates_2D,
start_year,
Expand Down Expand Up @@ -322,7 +367,7 @@ def get_pop(
"""
# Generate time path of the nonstationary population distribution
# Get path up to end of data year
pop_2D = np.zeros((end_year + 1 - start_year + 1, E + S))
pop_2D = np.zeros((end_year + 2 - start_year, E + S))
if infer_pop:
if pre_pop_dist is None:
pre_pop_data = get_un_data(
Expand All @@ -331,11 +376,17 @@ def get_pop(
start_year=start_year - 1,
end_year=start_year - 1,
)
if download_path:
pre_pop_data.to_csv(
os.path.join(download_path, "raw_pre_pop_data_UN.csv"),
index=False,
)
pre_pop_sample = pre_pop_data[
(pre_pop_data["age"] >= min_age)
& (pre_pop_data["age"] <= max_age)
]
pre_pop = pre_pop_sample.value.values
pre_pop_dist = pop_rebin(pre_pop, E + S)
else:
pre_pop = pre_pop_dist
if initial_pop is None:
Expand All @@ -350,6 +401,7 @@ def get_pop(
& (pre_pop_data["age"] <= max_age)
]
initial_pop = initial_pop_sample.value.values
initial_pop = pop_rebin(initial_pop, E + S)
# Check that have all necessary inputs to infer the population
# distribution
assert not [
Expand All @@ -374,22 +426,27 @@ def get_pop(
)
else:
# Read UN data
pop_data = get_un_data(
"47",
country_id=country_id,
start_year=start_year,
end_year=end_year
+ 2, # note go to + 2 because needed to infer immigration for end_year
)
# CLean and rebin data
for y in range(start_year, end_year + 2):
pop_data = get_un_data(
"47",
country_id=country_id,
start_year=y,
end_year=y,
)
pop_data_sample = pop_data[
(pop_data["age"] >= min_age) & (pop_data["age"] <= max_age)
(pop_data["age"] >= min_age)
& (pop_data["age"] <= max_age)
& (pop_data["year"] == y)
]
pop = pop_data_sample.value.values
# Generate the current population distribution given that E+S might
# be less than max_age-min_age+1
# age_per_EpS = np.arange(1, E + S + 1)
pop_EpS = pop_rebin(pop, E + S)
pop_2D[y - start_year, :] = pop_EpS

# get population distribution one year before initial year for
# calibration of omega_S_preTP
pre_pop_data = get_un_data(
Expand Down Expand Up @@ -527,25 +584,27 @@ def get_imm_rates(
assert fert_rates.shape == mort_rates.shape
assert infmort_rates is not None
assert infmort_rates.shape[0] == mort_rates.shape[0]
# Read UN data
for y in range(start_year, end_year + 1):
if pop_dist is None:
# need to read UN population data by age for each year
df = get_un_data(
"47", country_id=country_id, start_year=y, end_year=y
)
pop_t = df[(df.age < 100) & (df.age >= 0)].value.values
if pop_dist is None:
# need to read UN population data
df = get_un_data(
"47",
country_id=country_id,
start_year=start_year,
end_year=end_year + 2,
)
pop_dist = np.zeros((end_year + 2 - start_year, totpers))
for y in range(start_year, end_year + 1):
pop_t = df[
(df.age < 100) & (df.age >= 0) & (df.year == y)
].value.values
pop_t = pop_rebin(pop_t, totpers)
df = get_un_data(
"47", country_id=country_id, start_year=y + 1, end_year=y + 1
)
pop_tp1 = df[(df.age < 100) & (df.age >= 0)].value.values
pop_tp1 = pop_rebin(pop_tp1, totpers)
else:
# Make sure shape conforms
assert pop_dist.shape[1] == mort_rates.shape[1]
pop_t = pop_dist[y - start_year, :]
pop_tp1 = pop_dist[y - start_year + 1, :]
pop_dist[y - start_year, :] = pop_t
# Make sure shape conforms
assert pop_dist.shape[1] == mort_rates.shape[1]
assert pop_dist.shape[0] == end_year - start_year + 2
for y in range(start_year, end_year + 1):
pop_t = pop_dist[y - start_year, :]
pop_tp1 = pop_dist[y + 1 - start_year, :]
# initialize imm_rate vector
imm_rates = np.zeros(totpers)
# back out imm rates by age for each year
Expand All @@ -570,7 +629,7 @@ def get_imm_rates(

# Create plots if needed
if graph:
if plot_path:
if plot_path is not None:
pp.plot_imm_rates(
imm_rates_2D,
start_year,
Expand Down Expand Up @@ -640,7 +699,7 @@ def get_pop_objs(
pre_pop_dist=None,
country_id=UN_COUNTRY_CODE,
initial_data_year=START_YEAR - 1,
final_data_year=START_YEAR + 2, # as default data year goes until T1
final_data_year=START_YEAR + 2,
GraphDiag=True,
download_path=None,
):
Expand Down Expand Up @@ -710,8 +769,8 @@ def get_pop_objs(
final_data_year,
)
assert E + S <= max_age - min_age + 1
assert initial_data_year >= 2011 and initial_data_year <= 2100
assert final_data_year >= 2011 and final_data_year <= 2100
assert initial_data_year >= 2011 and initial_data_year <= 2100 - 1
assert final_data_year >= 2011 and final_data_year <= 2100 - 1
# Ensure that the last year of data used is before SS transition assumed
# Really, it will need to be well before this
assert final_data_year > initial_data_year
Expand Down
1 change: 0 additions & 1 deletion ogcore/txfunc.py
Original file line number Diff line number Diff line change
Expand Up @@ -806,7 +806,6 @@ def txfunc_est(
phi1_init = 1.0
phi2_init = 1.0
params_init = np.array([phi0_init, phi1_init, phi2_init])
print("Initial phi0, phi1, phi2: ", params_init)
tx_objs = (
np.array([None]),
X,
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setuptools.setup(
name="ogcore",
version="0.11.7",
version="0.11.8",
author="Jason DeBacker and Richard W. Evans",
license="CC0 1.0 Universal (CC0 1.0) Public Domain Dedication",
description="A general equilibribum overlapping generations model for fiscal policy analysis",
Expand Down
4 changes: 4 additions & 0 deletions tests/test_demographics.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ def test_get_pop_objs_read_UN_data():
GraphDiag=False,
)

assert isinstance(pop_dict, dict)


def test_get_pop_objs():
"""
Expand Down Expand Up @@ -286,6 +288,8 @@ def test_get_imm_rates():
mort_rates=mort_rates,
infmort_rates=infmort_rates,
pop_dist=pop_dist,
start_year=2024,
end_year=2025,
graph=True,
)
assert imm_rates.shape[1] == S
Expand Down
Loading