Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

OBR March EFO update #840

Merged
merged 4 commits into from
Apr 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions changelog_entry.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
- bump: minor
changes:
added:
- Add test cases for housing benefit
changed:
- OBR forecast update.

14 changes: 0 additions & 14 deletions policyengine_uk/data/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,8 @@
FRS_2020_21,
FRS_2021_22,
RawFRS_2021_22,
PooledFRS_2018_20,
PooledFRS_2019_21,
SPIEnhancedFRS_2019_20,
SPIEnhancedPooledFRS_2018_20,
SPIEnhancedPooledFRS_2019_21,
CalibratedFRS_2019_20,
CalibratedFRS_2019_21,
CalibratedSPIEnhancedFRS_2019_20,
CalibratedSPIEnhancedPooledFRS_2018_20,
CalibratedSPIEnhancedPooledFRS_2019_21,
EnhancedFRS,
UKMOD_FRS_2018,
Expand All @@ -24,15 +17,8 @@
FRS_2020_21,
FRS_2021_22,
RawFRS_2021_22,
PooledFRS_2018_20,
PooledFRS_2019_21,
SPIEnhancedFRS_2019_20,
SPIEnhancedPooledFRS_2018_20,
SPIEnhancedPooledFRS_2019_21,
CalibratedFRS_2019_20,
CalibratedFRS_2019_21,
CalibratedSPIEnhancedFRS_2019_20,
CalibratedSPIEnhancedPooledFRS_2018_20,
CalibratedSPIEnhancedPooledFRS_2019_21,
EnhancedFRS,
UKMOD_FRS_2018,
Expand Down
8 changes: 1 addition & 7 deletions policyengine_uk/data/datasets/frs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,12 @@
from .frs import FRS, FRS_2018_19, FRS_2019_20, FRS_2020_21, FRS_2021_22
from .spi_enhanced_frs import (
SPIEnhancedFRS,
SPIEnhancedFRS_2019_20,
SPIEnhancedPooledFRS_2018_20,
SPIEnhancedPooledFRS_2019_21,
)
from .calibration.calibrated_frs import (
CalibratedFRS,
CalibratedFRS_2019_20,
CalibratedFRS_2019_21,
CalibratedSPIEnhancedFRS_2019_20,
CalibratedSPIEnhancedPooledFRS_2018_20,
CalibratedSPIEnhancedPooledFRS_2019_21,
)
from .stacked_frs import StackedFRS, PooledFRS_2018_20, PooledFRS_2019_21
from .stacked_frs import StackedFRS, PooledFRS_2019_21
from .enhanced_frs import EnhancedFRS
from .ukmod import UKMOD_FRS_2018
5 changes: 3 additions & 2 deletions policyengine_uk/data/datasets/frs/calibration/calibrate.py
Original file line number Diff line number Diff line change
Expand Up @@ -584,7 +584,8 @@ def calibrate(
starting_loss = loss.item()
loss.backward()
optimizer.step()
if i % 5 == 0:
if i % 250 == 0:
yield adjusted_weights.detach().numpy()
current_loss = loss.item()
progress_bar.set_description_str(
f"Calibrating weights | Loss = {current_loss:,.3f}"
Expand Down Expand Up @@ -617,4 +618,4 @@ def calibrate(

print(f"Loss reduction: {loss_reduction:.3%}")

return adjusted_weights.detach().numpy()
yield adjusted_weights.detach().numpy()
65 changes: 13 additions & 52 deletions policyengine_uk/data/datasets/frs/calibration/calibrated_frs.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@
from ..stacked_frs import PooledFRS_2019_21
from ..uprated_frs import UpratedFRS
from ..spi_enhanced_frs import (
SPIEnhancedFRS_2019_20,
SPIEnhancedPooledFRS_2018_20,
SPIEnhancedPooledFRS_2019_21,
)

Expand All @@ -27,7 +25,7 @@ class CalibratedFRS(Dataset):
input_dataset: Type[Dataset]
time_period: int
epochs: int = None
learning_rate: float = 5e2
learning_rate: float = 1e3
min_loss: float = 0.035
log_dir: str = "."
time_period: str = None
Expand Down Expand Up @@ -63,73 +61,36 @@ def generate(self):
from .calibrate import calibrate

new_data = {}
input_dataset = self.input_dataset()
input_dataset = self.input_dataset(require=True)
data = input_dataset.load()
for year in range(self.time_period, self.time_period + self.num_years):
year = str(year)
adjusted_weights = calibrate(
self.input_dataset.name,
time_period=year,
training_log_path="calibration_log_cps.csv.gz",
overwrite_existing_log=year == str(self.time_period),
)
for variable in input_dataset.variables:
if variable not in new_data:
new_data[variable] = {}
if variable == "household_weight":
new_data[variable][year] = adjusted_weights
pass
elif "_weight" not in variable and (
(year == str(self.time_period)) or ("_id" in variable)
):
new_data[variable][year] = data[variable][...]

self.save_dataset(new_data)


CalibratedFRS_2019_20 = CalibratedFRS.from_dataset(
FRS_2019_20,
"calibrated_frs_2019",
"Calibrated FRS 2019-20",
new_num_years=1,
)


CalibratedFRS_2020_21 = CalibratedFRS.from_dataset(
UpratedFRS.from_dataset(FRS_2020_21, out_year=2023),
"calibrated_frs_2020",
"Calibrated FRS 2020-21",
new_num_years=1,
log_folder=".",
)

CalibratedFRS_2019_21 = CalibratedFRS.from_dataset(
PooledFRS_2019_21,
"calibrated_frs_2019_21",
"Calibrated FRS 2019-21",
new_num_years=2,
log_folder=".",
)

adjusted_weights = calibrate(
self.input_dataset.name,
time_period=year,
training_log_path="calibration_log.csv.gz",
overwrite_existing_log=year == str(self.time_period),
)
for partial_weights in adjusted_weights:
new_data["household_weight"][year] = partial_weights

CalibratedSPIEnhancedFRS_2019_20 = CalibratedFRS.from_dataset(
SPIEnhancedFRS_2019_20,
"calibrated_spi_enhanced_frs_2019",
"Calibrated SPI-enhanced FRS 2019-20",
new_num_years=1,
)
self.save_dataset(new_data)

CalibratedSPIEnhancedPooledFRS_2018_20 = CalibratedFRS.from_dataset(
SPIEnhancedPooledFRS_2018_20,
"calibrated_spi_enhanced_pooled_frs_2018_20",
"Calibrated SPI-enhanced FRS 2018-20",
log_folder=".",
new_num_years=3,
)

CalibratedSPIEnhancedPooledFRS_2019_21 = CalibratedFRS.from_dataset(
SPIEnhancedPooledFRS_2019_21,
"calibrated_spi_enhanced_pooled_frs_2019_21",
"Calibrated SPI-enhanced FRS 2019-21",
log_folder=".",
new_num_years=5,
new_num_years=7,
)
19 changes: 6 additions & 13 deletions policyengine_uk/data/datasets/frs/enhanced_frs.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import numpy as np
from typing import Type
from ..utils import STORAGE_FOLDER
from .stacked_frs import PooledFRS_2018_20
from .frs import FRS_2019_20
from .calibration.calibrated_frs import CalibratedSPIEnhancedPooledFRS_2019_21
import yaml
Expand Down Expand Up @@ -65,15 +64,15 @@ def generate(self):
frs_household_weight = simulation.calculate("household_weight").values
for imputation_model, targets in zip(
[consumption, vat, wealth],
[consumption_targets, {}, wealth_targets],
[{}, {}, {}],
):
i += 1
predictors = imputation_model.X_columns

X_input = simulation.calculate_dataframe(
predictors, map_to="household"
)
if i == 1:
if i == 3:
# WAS doesn't sample NI -> put NI households in Wales (closest aggregate)
X_input.loc[
X_input["region"] == "NORTHERN_IRELAND", "region"
Expand All @@ -86,7 +85,7 @@ def generate(self):
target_values,
X_input,
frs_household_weight,
max_iterations=8,
max_iterations=3,
)
else:
quantiles = None
Expand All @@ -96,12 +95,7 @@ def generate(self):

for output_variable in Y_output.columns:
values = Y_output[output_variable].values
data[output_variable] = {
year: values
for year in range(
self.time_period, self.time_period + self.num_years
)
}
data[output_variable] = {self.time_period: values}

self.save_dataset(data)

Expand All @@ -110,7 +104,6 @@ def generate(self):
CalibratedSPIEnhancedPooledFRS_2019_21,
"enhanced_frs",
"Enhanced FRS",
new_time_period=2023,
new_num_years=5,
new_url="release://policyengine/non-public-microdata/uk-2023-dec-calibration/enhanced_frs.h5",
new_num_years=7,
new_url="release://policyengine/non-public-microdata/uk-2024-march-efo/enhanced_frs.h5",
)
16 changes: 6 additions & 10 deletions policyengine_uk/data/datasets/frs/frs.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,32 +110,28 @@ def generate(self):
frs.close()


FRS_2019_20 = FRS.from_dataset(
RawFRS_2019_20,
"frs_2019",
"FRS 2019-20",
new_url="release://policyengine/non-public-microdata/2023-q2-calibration/frs_2019.h5",
)

FRS_2018_19 = FRS.from_dataset(
RawFRS_2018_19,
"frs_2018",
"FRS 2018-19",
new_url="release://policyengine/non-public-microdata/2023-q2-calibration/frs_2018.h5",
)

FRS_2019_20 = FRS.from_dataset(
RawFRS_2019_20,
"frs_2019",
"FRS 2019-20",
)

FRS_2020_21 = FRS.from_dataset(
RawFRS_2020_21,
"frs_2020",
"FRS 2020-21",
new_url="release://policyengine/non-public-microdata/2023-q2-calibration/frs_2020.h5",
)

FRS_2021_22 = FRS.from_dataset(
RawFRS_2021_22,
"frs_2021",
"FRS 2021-22",
new_url="release://policyengine/non-public-microdata/2023-dec-calibration/frs_2021.h5",
)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,11 @@

sim = Microsimulation()

total_income = sim.calculate("total_income", 2023)
total_income = sim.calculate("total_income", 2021)
cgt_revenue = system.parameters.calibration.programs.capital_gains.total

lower_income_bounds = list(splines)
uprating_from_2017 = cgt_revenue("2023-01-01") / cgt_revenue("2017-01-01")
uprating_from_2017 = cgt_revenue("2021-01-01") / cgt_revenue("2017-01-01")


def impute_capital_gains(total_income: float, age: float) -> float:
Expand Down
14 changes: 1 addition & 13 deletions policyengine_uk/data/datasets/frs/spi_enhanced_frs.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import numpy as np
from typing import Type
from ..utils import STORAGE_FOLDER
from .stacked_frs import PooledFRS_2018_20, PooledFRS_2019_21
from .stacked_frs import PooledFRS_2019_21
from .frs import FRS_2019_20
from .uprated_frs import UpratedFRS

Expand Down Expand Up @@ -121,24 +121,12 @@ def generate(self):
self.save_dataset(new_values)


SPIEnhancedPooledFRS_2018_20 = SPIEnhancedFRS.from_dataset(
PooledFRS_2018_20,
"spi_enhanced_pooled_frs_2018_20",
"SPI-enhanced FRS 2018-20",
)

SPIEnhancedPooledFRS_2019_21 = SPIEnhancedFRS.from_dataset(
PooledFRS_2019_21,
"spi_enhanced_pooled_frs_2019_21",
"SPI-enhanced FRS 2019-21",
)

SPIEnhancedFRS_2019_20 = SPIEnhancedFRS.from_dataset(
UpratedFRS.from_dataset(FRS_2019_20),
"spi_enhanced_frs_2019",
"SPI-enhanced FRS 2019-20",
)

IMPUTATIONS = [
"employment_income",
"self_employment_income",
Expand Down
23 changes: 5 additions & 18 deletions policyengine_uk/data/datasets/frs/stacked_frs.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ class StackedDatasetFromDataset(StackedFRS):
return StackedDatasetFromDataset

def generate(self):
sub_datasets = [dataset() for dataset in self.sub_datasets]
sub_datasets = [dataset(require=True) for dataset in self.sub_datasets]
variable_names = sub_datasets[0].variables
data = {}
for variable in variable_names:
Expand All @@ -59,27 +59,14 @@ def generate(self):
self.save_dataset(data)


PooledFRS_2018_20 = StackedFRS.from_dataset(
[
UpratedFRS.from_dataset(FRS_2018_19),
UpratedFRS.from_dataset(FRS_2019_20),
UpratedFRS.from_dataset(FRS_2020_21),
],
[0.0, 0.0, 1.0],
"pooled_frs_2018_20",
"FRS 2018-20",
2023,
)

PooledFRS_2019_21 = StackedFRS.from_dataset(
[
UpratedFRS.from_dataset(FRS_2019_20),
UpratedFRS.from_dataset(FRS_2020_21),
UpratedFRS.from_dataset(FRS_2021_22),
UpratedFRS.from_dataset(FRS_2019_20, out_year=2021),
UpratedFRS.from_dataset(FRS_2020_21, out_year=2021),
UpratedFRS.from_dataset(FRS_2021_22, out_year=2021),
],
[0.0, 0.0, 1.0],
"pooled_frs_2019_21",
"FRS 2019-21",
2023,
# new_url="release://policyengine/non-public-microdata/2023-dec-calibration/pooled_frs_2019_21.h5",
2021,
)
2 changes: 1 addition & 1 deletion policyengine_uk/data/datasets/frs/uprated_frs.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ class UpratedFRS(Dataset):
@staticmethod
def from_dataset(
dataset: Type[Dataset],
out_year: int = 2023,
out_year: int = 2024,
):
class UpratedFRSFromDataset(UpratedFRS):
name = f"{dataset.name}_uprated_{out_year}"
Expand Down
Binary file modified policyengine_uk/data/gov/enhanced_frs_brmas.csv.gz
Binary file not shown.
Loading
Loading