Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

NI validation #779

Merged
merged 9 commits into from
Dec 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .vscode/python.code-snippets
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
" entity = ${4:Person}",
" definition_period = ${5:YEAR}",
" value_type = ${6:float}",
" unit = \"${7:currency-GBP}\"",
" unit = ${7:GBP}",
"",
""
],
Expand Down
4 changes: 4 additions & 0 deletions changelog_entry.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
- bump: minor
changes:
changed:
- Validated and standardised National Insurance variables.
2 changes: 2 additions & 0 deletions policyengine_uk/data/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
CalibratedSPIEnhancedPooledFRS_2018_20,
CalibratedSPIEnhancedPooledFRS_2019_21,
EnhancedFRS,
UKMOD_FRS_2018,
)

DATASETS = [
Expand All @@ -34,4 +35,5 @@
CalibratedSPIEnhancedPooledFRS_2018_20,
CalibratedSPIEnhancedPooledFRS_2019_21,
EnhancedFRS,
UKMOD_FRS_2018,
]
1 change: 1 addition & 0 deletions policyengine_uk/data/datasets/frs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,4 @@
)
from .stacked_frs import StackedFRS, PooledFRS_2018_20, PooledFRS_2019_21
from .enhanced_frs import EnhancedFRS
from .ukmod import UKMOD_FRS_2018
63 changes: 63 additions & 0 deletions policyengine_uk/data/datasets/frs/ukmod.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import pandas as pd
from policyengine_uk.data.storage import STORAGE_FOLDER
import numpy as np
from policyengine_core.data import Dataset


class UKMOD_FRS_2018(Dataset):
name = "ukmod_frs_2018"
label = "UKMOD (2018-19 FRS)"
data_format = Dataset.TIME_PERIOD_ARRAYS
file_path = STORAGE_FOLDER / "ukmod_frs_2018.h5"
time_period = "2018"

def generate(self):
data = {}
ukmod_output = pd.read_csv(
STORAGE_FOLDER / "uk_2018_std.txt", delimiter="\t"
)
ukmod_input = pd.read_csv(
STORAGE_FOLDER / "uk_2018_a4.txt", delimiter="\t"
)
output_columns = [
column
for column in ukmod_output.columns
if column not in ukmod_input.columns
]
ukmod = pd.merge(
ukmod_output[output_columns + ["idperson"]],
ukmod_input,
on="idperson",
how="right",
)
# Add ID variables first
data["person_id"] = ukmod.idperson
data["person_benunit_id"] = person_benunit_id = (
ukmod.idorigbenunit * 10 + ukmod.idorighh
)
data["person_household_id"] = person_household_id = (
ukmod.idorighh * 100
)
data["person_state_id"] = np.ones_like(ukmod.idperson)

data["benunit_id"] = person_benunit_id.unique()
data["household_id"] = person_household_id.unique()
data["state_id"] = np.array([1])

data["age"] = ukmod.dag.values
data["gender"] = np.where(
ukmod.dgn == 0,
"FEMALE",
"MALE",
).astype("S")
data["employment_income"] = ukmod.yem.values * 12
data["self_employment_income"] = ukmod.yse.values * 12
data["pension_income"] = ukmod.ypp.values * 12
data["statutory_sick_pay"] = ukmod.bhlwk.values * 12
data["statutory_maternity_pay"] = ukmod.bmact_s.values * 12
data["statutory_paternity_pay"] = ukmod.bpact_s.values * 12

for variable in data:
data[variable] = {"2018": data[variable]}

self.save_dataset(data)
3 changes: 3 additions & 0 deletions policyengine_uk/data/storage/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from pathlib import Path

STORAGE_FOLDER = Path(__file__).parent
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ values:
2012-01-01: 62
2014-01-01: 63
2016-01-01: 64
2018-01-01: 65
2019-01-01: 65
2020-01-01: 66
metadata:
unit: year
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ metadata:
- https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/882271/Table-a4.pdf
- https://www.gov.uk/government/publications/rates-and-allowances-national-insurance-contributions/rates-and-allowances-national-insurance-contributions
unit: currency-GBP
uprating: gov.benefit_uprating_cpi
values:
2015-04-06: 155
2016-04-06: 155
Expand All @@ -24,3 +23,4 @@ values:
title: The Social Security (Contributions) (Rates, Limits and Thresholds Amendments
and National Insurance Funds Payments) Regulations 2022(6)
value: 175
2024-01-01: 175
66 changes: 66 additions & 0 deletions policyengine_uk/tests/microsimulation/test_against_ukmod.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
from policyengine_uk import Microsimulation
from policyengine_uk.data.datasets import UKMOD_FRS_2018
from policyengine_uk.data.storage import STORAGE_FOLDER
import pandas as pd
import numpy as np
import pytest

SKIP_UKMOD_TESTS = True

if not SKIP_UKMOD_TESTS:
ukmod_output = pd.read_csv(
STORAGE_FOLDER / "uk_2018_std.txt", delimiter="\t"
)
ukmod_input = pd.read_csv(
STORAGE_FOLDER / "uk_2018_a4.txt", delimiter="\t"
)
output_columns = [
column
for column in ukmod_output.columns
if column not in ukmod_input.columns
]
ukmod = pd.merge(
ukmod_output[output_columns + ["idperson"]],
ukmod_input,
on="idperson",
how="right",
)

UKMOD_FRS_2018().generate()
sim = Microsimulation(dataset="ukmod_frs_2018")


@pytest.mark.skip(reason="UKMOD data not publicly shareable")
def test_ni_class_1():
# NI Class 1 income matches.
assert np.allclose(
sim.calculate("ni_class_1_income").values,
ukmod.il_empniearns.values * 12,
atol=1,
)


@pytest.mark.skip(reason="UKMOD data not publicly shareable")
def test_ni_class_1_employee():
# NI contributions are off by more because the thresholds change mid-year,
# and PolicyEngine simulates over the full year while UKMOD simulates one
# month.
assert np.allclose(
sim.calculate("ni_class_1_employee").values,
ukmod.tscee_s.values * 12,
atol=50,
)


@pytest.mark.skip(reason="UKMOD data not publicly shareable")
def test_ni_self_employed():
# NI self-employed contributions don't match entirely for people with both
# self-employed and employment income. This might be due to a different
# interpretation of the rules around capped NI contributions (our Class 4
# maximum uses the legislation as a reference).

error = np.abs(
sim.calculate("ni_self_employed").values - ukmod.tscse_s.values * 12
)

assert (error < 50).mean() > 0.99

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
- name: NI Class 1 employee contributions - below PT
period: 2023
input:
ni_class_1_income: 11_000
output:
ni_class_1_employee: 0

- name: NI Class 1 employee contributions - between PT and UEL
period: 2023
absolute_error_margin: 1
input:
ni_class_1_income: 30_000
output:
ni_class_1_employee: (30_000 - 12_570) * 0.12

- name: NI Class 1 employee contributions - above UEL
period: 2023
absolute_error_margin: 1
input:
ni_class_1_income: 70000
output:
ni_class_1_employee: (50_270 - 12_570) * 0.12 + (70_000 - 50_270) * 0.0325
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
- name: NI Class 1 employee additional contributions - income below UEL
period: 2023
absolute_error_margin: 1
input:
ni_class_1_income: 30_000
output:
ni_class_1_employee_additional: 0

- name: NI Class 1 employee additional contributions - income above UEL
period: 2023
absolute_error_margin: 1
input:
ni_class_1_income: 100_000
output:
ni_class_1_employee_additional: (100_000 - 50_270) * 0.0325
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
- name: £20k income has NI Class 1 liability
period: 2023
absolute_error_margin: 1
input:
employment_income: 20_000
output:
ni_class_1_employee_primary: (20_000 - 12_570) * 0.12
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
- name: NI Class 1 employer contributions for low income in 2023
period: 2023
absolute_error_margin: 0.01
input:
ni_class_1_income: 8_000 # Annual income below secondary threshold
output:
ni_class_1_employer: 0.00 # Expected employer contributions

- name: NI Class 1 employer contributions for moderate income in 2023
period: 2023
absolute_error_margin: 0.01
input:
ni_class_1_income: 30_000 # Annual income above secondary threshold but below upper limit
output:
ni_class_1_employer: (30_000 - 175 * 52) * 0.138 # Expected employer contributions
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
- name: NI Class 1 income sums income components.
period: 2023
input:
employment_income: 1
statutory_sick_pay: 2
statutory_maternity_pay: 4
statutory_paternity_pay: 8
output:
ni_class_1_income: 15
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
- name: Child isn't liable for NI.
period: 2023
input:
age: 15
output:
ni_liable: false

- name: Working-age adult is liable for NI.
period: 2023
input:
age: 35
output:
ni_liable: true

- name: Retired adult isn't liable for NI.
period: 2023
input:
age: 70
output:
ni_liable: false
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
- name: NI Class 2 - under LPL.
period: 2023
input:
self_employment_income: 5_000
output:
ni_class_2: 0

- name: NI Class 2 - over LPL.
period: 2023
input:
self_employment_income: 15_000
output:
ni_class_2: 3.15 * 52
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
- name: NI Class 4 - under UPL.
period: 2023
absolute_error_margin: 1
input:
self_employment_income: 30_000
output:
ni_class_4: 1628
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
- name: NI Class 4 - over UPL has maximum capped.
period: 2023
absolute_error_margin: 1
input:
self_employment_income: 100_000
output:
ni_class_4_main: 3452
Loading
Loading