From 1719e7090aaeb1ae5f277b27fd44b2d75d5b8346 Mon Sep 17 00:00:00 2001 From: Luke Hubbard <104831245+lhubbardONS@users.noreply.github.com> Date: Tue, 1 Oct 2024 14:10:40 +0100 Subject: [PATCH] Fixing errors with standardising factor calculation without domain (#91) --- mbs_results/unsorted/selective_editing.py | 12 ++++++------ tests/create_standardising_factor_data.csv | 4 ++-- tests/test_unsorted/test_selective_editing.py | 3 ++- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/mbs_results/unsorted/selective_editing.py b/mbs_results/unsorted/selective_editing.py index 2c9d678a..214ec19c 100644 --- a/mbs_results/unsorted/selective_editing.py +++ b/mbs_results/unsorted/selective_editing.py @@ -94,18 +94,18 @@ def create_standardising_factor( previous_df = dataframe[(dataframe[period] == previous_period)] previous_df = previous_df[previous_df[question_code].isin([40, 49])] - previous_df["standardising_factor"] = ( + # The standardising factor is created for each record before summing for each + # domain-question grouping. + previous_df["unit_standardising_factor"] = ( previous_df[predicted_value] * previous_df[a_weight] * previous_df[o_weight] * previous_df[g_weight] ) - previous_df = previous_df.assign( - standardising_factor=lambda x: x.groupby([domain, question_code]).transform( - "sum" - )["standardising_factor"] - ).astype({"standardising_factor": "float"}) + previous_df["standardising_factor"] = previous_df.groupby([domain, question_code])[ + "unit_standardising_factor" + ].transform("sum") output_df = previous_df[ [ diff --git a/tests/create_standardising_factor_data.csv b/tests/create_standardising_factor_data.csv index 28b853e0..712deeba 100755 --- a/tests/create_standardising_factor_data.csv +++ b/tests/create_standardising_factor_data.csv @@ -10,6 +10,6 @@ reference,period,domain,question_code,predicted_value,imputation_marker,a_weight 10005,202401,17,40,1001,FIR,2.0,2.0,3.0,95,12012.0 10005,202402,19,40,532,FIR,7.0,3.0,3.0,29, 10006,202401,19,40,336,C,2.0,1.0,4.0,14,2688.0 -10006,202401,19,49,461,C,3.0,1.0,4.0,58,9084.0 -10007,202401,19,49,222,BIR,4.0,2.0,2.0,67,9084.0 +10006,202401,19,49,461,C,3.0,1.0,4.0,58,5532.0 +10007,202401,,49,222,BIR,4.0,2.0,2.0,67, 10008,202401,17,40,,,4.0,1.0,5.0,48,12012.0 diff --git a/tests/test_unsorted/test_selective_editing.py b/tests/test_unsorted/test_selective_editing.py index 8a8438d0..68211744 100755 --- a/tests/test_unsorted/test_selective_editing.py +++ b/tests/test_unsorted/test_selective_editing.py @@ -33,7 +33,8 @@ def test_create_standardising_factor( create_standardising_factor_data, ): expected_output = create_standardising_factor_data[ - create_standardising_factor_data["standardising_factor"].notna() + (create_standardising_factor_data["period"] == 202401) + & (create_standardising_factor_data["question_code"].isin([40, 49])) ] expected_output = expected_output[ [