diff --git a/data/census/filtered.py b/data/census/filtered.py index d5fd5b60..c1080e5c 100644 --- a/data/census/filtered.py +++ b/data/census/filtered.py @@ -24,9 +24,12 @@ def execute(context): initial_persons = len(df["person_id"].unique()) removed_persons = np.count_nonzero(df["household_id"].isin(remove_ids)) - # Verify with requested codes + # Filter requested codes df_codes = context.stage("data.spatial.codes") + requested_departements = df_codes["departement_id"].unique() + df = df[df["departement_id"].isin(requested_departements)] + excess_communes = set(df["commune_id"].unique()) - set(df_codes["commune_id"].unique()) if not excess_communes == {"undefined"}: raise RuntimeError("Found additional communes: %s" % excess_communes)