PSLmodels · rickecon · Jun 11, 2024 · May 31, 2024 · May 31, 2024 · May 31, 2024
diff --git a/.gitignore b/.gitignore
@@ -55,3 +55,4 @@ regression/OUTPUT_BASELINE/*
 regression/OUTPUT_REFORM*
 .vscode/
 *default.profraw
+*un_api_token.txt
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,11 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [0.11.8] - 2024-06-09 01:00:00
+
+### Added
+
+- Updates to `demographics.py` module to accept token for UN World Population Prospects database access or to download data from the [Population-Data](https://github.com/EAPD-DRB/Population-Data) repository.
 
 ## [0.11.7] - 2024-06-07 01:00:00
 
@@ -233,6 +238,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Any earlier versions of OG-USA can be found in the [`OG-Core`](https://github.com/PSLmodels/OG-Core) repository [release history](https://github.com/PSLmodels/OG-Core/releases) from [v.0.6.4](https://github.com/PSLmodels/OG-Core/releases/tag/v0.6.4) (Jul. 20, 2021) or earlier.
 
 
+[0.11.8]: https://github.com/PSLmodels/OG-Core/compare/v0.11.7...v0.11.8
 [0.11.7]: https://github.com/PSLmodels/OG-Core/compare/v0.11.6...v0.11.7
 [0.11.6]: https://github.com/PSLmodels/OG-Core/compare/v0.11.5...v0.11.6
 [0.11.5]: https://github.com/PSLmodels/OG-Core/compare/v0.11.4...v0.11.5

diff --git a/ogcore/__init__.py b/ogcore/__init__.py
@@ -20,4 +20,4 @@
 from ogcore.txfunc import *
 from ogcore.utils import *
 
-__version__ = "0.11.7"
+__version__ = "0.11.8"
diff --git a/ogcore/demographics.py b/ogcore/demographics.py
@@ -66,17 +66,24 @@ def get_un_data(
         + "?format=csv"
     )
 
+    # Check for a file named "un_api_token.txt" in the current directory
+    if os.path.exists(os.path.join("un_api_token.txt")):
+        with open(os.path.join("un_api_token.txt"), "r") as file:
+            UN_TOKEN = file.read().strip()
+    else:  # if file not exist, prompt user for token
+        UN_TOKEN = input(
+            "Please enter your UN API token (press return if you do not have one): "
+        )
+        # write the UN_TOKEN to a file to find in the future
+        with open(os.path.join("un_api_token.txt"), "w") as file:
+            file.write(UN_TOKEN)
+
     # get data from url
-    response = get_legacy_session().get(target)
+    payload = {}
+    headers = {"Authorization": "Bearer " + UN_TOKEN}
+    response = get_legacy_session().get(target, headers=headers, data=payload)
     # Check if the request was successful before processing
     if response.status_code == 200:
-
-        # if want to download the data
-        # with open("downloaded_datan.csv", "wb") as f:
-        #     f.write(response.content)
-        # df = pd.read_csv("downloaded_datan.csv")
-        # else
-        # print("TARGET: ", target)
         csvStringIO = StringIO(response.text)
         df = pd.read_csv(csvStringIO, sep="|", header=1)
 
@@ -93,10 +100,40 @@ def get_un_data(
         df.year = df.year.astype(int)
         df = df[df.age < 100]  # need to drop 100+ age category
     else:
+        # Read from UN GH Repo:
         print(
-            f"Failed to retrieve population data. HTTP status code: {response.status_code}"
+            f"Failed to retrieve population data from UN. Reading "
+            + " from https://github.com/EAPD-DRB/Population-Data "
+            + "instead of UN WPP API"
+        )
+        country_dict = {
+            "840": "USA",
+            "710": "ZAF",
+            "458": "MYS",
+            "356": "IND",
+            "826": "UK",
+        }
+        un_variable_dict = {
+            "68": "fertility_rates",
+            "80": "mortality_rates",
+            "47": "population",
+        }
+        country = country_dict[country_id]
+        variable = un_variable_dict[variable_code]
+        url = (
+            "https://raw.githubusercontent.com/EAPD-DRB/"
+            + "Population-Data/main/"
+            + "Data/{c}/UN_{v}_data.csv".format(c=country, v=variable)
         )
-        assert False
+        df = pd.read_csv(url)
+        # keep just the years requested
+        df = df[(df.year >= start_year) & (df.year <= end_year)]
+
+        # Do we still want to keep the status code for failures?
+        # print(
+        #     f"Failed to retrieve population data. HTTP status code: {response.status_code}"
+        # )
+        # assert False
 
     return df
 
@@ -136,11 +173,15 @@ def get_fert(
     """
     # initialize fert rates array
     fert_rates_2D = np.zeros((end_year + 1 - start_year, totpers))
-    # Read UN data, 1 year at a time
+    # Read UN data
+    df = get_un_data(
+        "68", country_id=country_id, start_year=start_year, end_year=end_year
+    )
+    # CLean and rebin data
     for y in range(start_year, end_year + 1):
-        df = get_un_data("68", country_id=country_id, start_year=y, end_year=y)
+        df_y = df[(df.age >= min_age) & (df.age <= max_age) & (df.year == y)]
         # put in vector
-        fert_rates = df.value.values
+        fert_rates = df_y.value.values
         # fill in with zeros for ages  < 15 and > 49
         # NOTE: this assumes min_year < 15 and max_age > 49
         fert_rates = np.append(fert_rates, np.zeros(max_age - 49))
@@ -163,7 +204,7 @@ def get_fert(
 
     # Create plots if needed
     if graph:
-        if plot_path:
+        if plot_path is not None:
             pp.plot_fert_rates(
                 [fert_rates_2D],
                 start_year=start_year,
@@ -219,10 +260,14 @@ def get_mort(
     mort_rates_2D = np.zeros((end_year + 1 - start_year, totpers))
     infmort_rate_vec = np.zeros(end_year + 1 - start_year)
     # Read UN data
+    df = get_un_data(
+        "80", country_id=country_id, start_year=start_year, end_year=end_year
+    )
+    # CLean and rebin data
     for y in range(start_year, end_year + 1):
-        df = get_un_data("80", country_id=country_id, start_year=y, end_year=y)
+        df_y = df[(df.age >= min_age) & (df.age <= max_age) & (df.year == y)]
         # put in vector
-        mort_rates_data = df.value.values
+        mort_rates_data = df_y.value.values
         # In UN data, mortality rates for 0 year olds are the infant
         # mortality rates
         infmort_rate = mort_rates_data[0]
@@ -249,7 +294,7 @@ def get_mort(
 
     # Create plots if needed
     if graph:
-        if plot_path:
+        if plot_path is not None:
             pp.plot_mort_rates_data(
                 mort_rates_2D,
                 start_year,
@@ -322,7 +367,7 @@ def get_pop(
     """
     # Generate time path of the nonstationary population distribution
     # Get path up to end of data year
-    pop_2D = np.zeros((end_year + 1 - start_year + 1, E + S))
+    pop_2D = np.zeros((end_year + 2 - start_year, E + S))
     if infer_pop:
         if pre_pop_dist is None:
             pre_pop_data = get_un_data(
@@ -331,11 +376,17 @@ def get_pop(
                 start_year=start_year - 1,
                 end_year=start_year - 1,
             )
+            if download_path:
+                pre_pop_data.to_csv(
+                    os.path.join(download_path, "raw_pre_pop_data_UN.csv"),
+                    index=False,
+                )
             pre_pop_sample = pre_pop_data[
                 (pre_pop_data["age"] >= min_age)
                 & (pre_pop_data["age"] <= max_age)
             ]
             pre_pop = pre_pop_sample.value.values
+            pre_pop_dist = pop_rebin(pre_pop, E + S)
         else:
             pre_pop = pre_pop_dist
         if initial_pop is None:
@@ -350,6 +401,7 @@ def get_pop(
                 & (pre_pop_data["age"] <= max_age)
             ]
             initial_pop = initial_pop_sample.value.values
+            initial_pop = pop_rebin(initial_pop, E + S)
         # Check that have all necessary inputs to infer the population
         # distribution
         assert not [
@@ -374,22 +426,27 @@ def get_pop(
             )
     else:
         # Read UN data
+        pop_data = get_un_data(
+            "47",
+            country_id=country_id,
+            start_year=start_year,
+            end_year=end_year
+            + 2,  # note go to + 2 because needed to infer immigration for end_year
+        )
+        # CLean and rebin data
         for y in range(start_year, end_year + 2):
-            pop_data = get_un_data(
-                "47",
-                country_id=country_id,
-                start_year=y,
-                end_year=y,
-            )
             pop_data_sample = pop_data[
-                (pop_data["age"] >= min_age) & (pop_data["age"] <= max_age)
+                (pop_data["age"] >= min_age)
+                & (pop_data["age"] <= max_age)
+                & (pop_data["year"] == y)
             ]
             pop = pop_data_sample.value.values
             # Generate the current population distribution given that E+S might
             # be less than max_age-min_age+1
             # age_per_EpS = np.arange(1, E + S + 1)
             pop_EpS = pop_rebin(pop, E + S)
             pop_2D[y - start_year, :] = pop_EpS
+
         # get population distribution one year before initial year for
         # calibration of omega_S_preTP
         pre_pop_data = get_un_data(
@@ -527,25 +584,27 @@ def get_imm_rates(
         assert fert_rates.shape == mort_rates.shape
         assert infmort_rates is not None
         assert infmort_rates.shape[0] == mort_rates.shape[0]
-    # Read UN data
-    for y in range(start_year, end_year + 1):
-        if pop_dist is None:
-            # need to read UN population data by age for each year
-            df = get_un_data(
-                "47", country_id=country_id, start_year=y, end_year=y
-            )
-            pop_t = df[(df.age < 100) & (df.age >= 0)].value.values
+    if pop_dist is None:
+        # need to read UN population data
+        df = get_un_data(
+            "47",
+            country_id=country_id,
+            start_year=start_year,
+            end_year=end_year + 2,
+        )
+        pop_dist = np.zeros((end_year + 2 - start_year, totpers))
+        for y in range(start_year, end_year + 1):
+            pop_t = df[
+                (df.age < 100) & (df.age >= 0) & (df.year == y)
+            ].value.values
             pop_t = pop_rebin(pop_t, totpers)
-            df = get_un_data(
-                "47", country_id=country_id, start_year=y + 1, end_year=y + 1
-            )
-            pop_tp1 = df[(df.age < 100) & (df.age >= 0)].value.values
-            pop_tp1 = pop_rebin(pop_tp1, totpers)
-        else:
-            # Make sure shape conforms
-            assert pop_dist.shape[1] == mort_rates.shape[1]
-            pop_t = pop_dist[y - start_year, :]
-            pop_tp1 = pop_dist[y - start_year + 1, :]
+            pop_dist[y - start_year, :] = pop_t
+    # Make sure shape conforms
+    assert pop_dist.shape[1] == mort_rates.shape[1]
+    assert pop_dist.shape[0] == end_year - start_year + 2
+    for y in range(start_year, end_year + 1):
+        pop_t = pop_dist[y - start_year, :]
+        pop_tp1 = pop_dist[y + 1 - start_year, :]
         # initialize imm_rate vector
         imm_rates = np.zeros(totpers)
         # back out imm rates by age for each year
@@ -570,7 +629,7 @@ def get_imm_rates(
 
     # Create plots if needed
     if graph:
-        if plot_path:
+        if plot_path is not None:
             pp.plot_imm_rates(
                 imm_rates_2D,
                 start_year,
@@ -640,7 +699,7 @@ def get_pop_objs(
     pre_pop_dist=None,
     country_id=UN_COUNTRY_CODE,
     initial_data_year=START_YEAR - 1,
-    final_data_year=START_YEAR + 2,  # as default data year goes until T1
+    final_data_year=START_YEAR + 2,
     GraphDiag=True,
     download_path=None,
 ):
@@ -710,8 +769,8 @@ def get_pop_objs(
         final_data_year,
     )
     assert E + S <= max_age - min_age + 1
-    assert initial_data_year >= 2011 and initial_data_year <= 2100
-    assert final_data_year >= 2011 and final_data_year <= 2100
+    assert initial_data_year >= 2011 and initial_data_year <= 2100 - 1
+    assert final_data_year >= 2011 and final_data_year <= 2100 - 1
     # Ensure that the last year of data used is before SS transition assumed
     # Really, it will need to be well before this
     assert final_data_year > initial_data_year

diff --git a/ogcore/txfunc.py b/ogcore/txfunc.py
@@ -806,7 +806,6 @@ def txfunc_est(
             phi1_init = 1.0
             phi2_init = 1.0
             params_init = np.array([phi0_init, phi1_init, phi2_init])
-        print("Initial phi0, phi1, phi2: ", params_init)
         tx_objs = (
             np.array([None]),
             X,

diff --git a/setup.py b/setup.py
@@ -5,7 +5,7 @@
 
 setuptools.setup(
     name="ogcore",
-    version="0.11.7",
+    version="0.11.8",
     author="Jason DeBacker and Richard W. Evans",
     license="CC0 1.0 Universal (CC0 1.0) Public Domain Dedication",
     description="A general equilibribum overlapping generations model for fiscal policy analysis",

diff --git a/tests/test_demographics.py b/tests/test_demographics.py
@@ -52,6 +52,8 @@ def test_get_pop_objs_read_UN_data():
         GraphDiag=False,
     )
 
+    assert isinstance(pop_dict, dict)
+
 
 def test_get_pop_objs():
     """
@@ -286,6 +288,8 @@ def test_get_imm_rates():
         mort_rates=mort_rates,
         infmort_rates=infmort_rates,
         pop_dist=pop_dist,
+        start_year=2024,
+        end_year=2025,
         graph=True,
     )
     assert imm_rates.shape[1] == S