Skip to content

Commit

Permalink
Fix classified product migration - see HEA-154
Browse files Browse the repository at this point in the history
Includes renaming cpcv2 to cpc as the primary key,
and creating a separate cpcv2 JSONField to store
the correspondence codes from CPC v2 to v2.1,
in the same way that the hs2012 field is used.
  • Loading branch information
rhunwicks committed Jan 25, 2024
1 parent 4b66d91 commit a020b4c
Show file tree
Hide file tree
Showing 26 changed files with 9,175 additions and 294 deletions.
3 changes: 1 addition & 2 deletions apps/baseline/migrations/0001_initial.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,10 @@


class Migration(migrations.Migration):

initial = True

dependencies = [
("common", "0006_unitofmeasure_aliases"),
("common", "0001_initial"),
("metadata", "0001_initial"),
]

Expand Down
6 changes: 3 additions & 3 deletions apps/baseline/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -537,7 +537,7 @@ def get_by_natural_key(
else:
criteria["wealth_group__community__isnull"] = True
if product:
criteria["product__cpcv2"] = product
criteria["product__cpc"] = product
else:
criteria["product__isnull"] = True

Expand Down Expand Up @@ -1759,7 +1759,7 @@ def clean(self):
if not self.crop_id.startswith("R01"):
raise ValidationError(
_(
"Crop type for Community Crop Production must have a CPCv2 code beginning R01 (Agriculture Products)" # NOQA: E501
"Crop type for Community Crop Production must have a CPC code code beginning R01 (Agriculture Products)" # NOQA: E501
)
)
super().clean()
Expand Down Expand Up @@ -1825,7 +1825,7 @@ class CommunityLivestock(common_models.Model):
def clean(self):
if not self.livestock_id.startswith("L021"):
raise ValidationError(
_("Livestock type for Community Livestock must have a CPCv2 code beginning L021 (Live animals)")
_("Livestock type for Community Livestock must have a CPC code code beginning L021 (Live animals)")
)
super().clean()

Expand Down
4 changes: 2 additions & 2 deletions apps/baseline/tests/factories.py
Original file line number Diff line number Diff line change
Expand Up @@ -603,7 +603,7 @@ class Meta:
community = factory.SubFactory(CommunityFactory)
crop = factory.SubFactory(
"common.tests.factories.ClassifiedProductFactory",
cpcv2=factory.Iterator([f"R019{n}" for n in range(1, 10)]),
cpc=factory.Iterator([f"R019{n}" for n in range(1, 10)]),
)
crop_purpose = factory.Iterator(["food", "cash"])
season = factory.SubFactory(SeasonFactory)
Expand All @@ -627,7 +627,7 @@ class Meta:
community = factory.SubFactory(CommunityFactory)
livestock = factory.SubFactory(
"common.tests.factories.ClassifiedProductFactory",
cpcv2=factory.Iterator([f"L021{n}" for n in range(1, 10)]),
cpc=factory.Iterator([f"L021{n}" for n in range(1, 10)]),
)
birth_interval = factory.Sequence(lambda n: n + 1)
wet_season_lactation_period = fuzzy.FuzzyInteger(1, 80)
Expand Down
12 changes: 6 additions & 6 deletions apps/baseline/tests/test_admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,17 +243,17 @@ def test_livelihoodstrategy_search_fields(self):
{"q": self.strategy1.livelihood_zone_baseline.livelihood_zone.name_en},
)
self.assertEqual(response.status_code, 200)
self.assertContains(response, self.strategy1.product.cpcv2)
self.assertNotContains(response, self.strategy2.product.cpcv2)
self.assertContains(response, self.strategy1.product.cpc)
self.assertNotContains(response, self.strategy2.product.cpc)

def test_livelihoodstrategy_list_filter(self):
response = self.client.get(
self.url,
{"strategy_type": self.strategy2.strategy_type},
)
self.assertEqual(response.status_code, 200)
self.assertContains(response, self.strategy2.product.cpcv2)
self.assertNotContains(response, self.strategy1.product.cpcv2)
self.assertContains(response, self.strategy2.product.cpc)
self.assertNotContains(response, self.strategy1.product.cpc)


class WealthGroupAdminTest(TestCase):
Expand Down Expand Up @@ -570,9 +570,9 @@ def setUpTestData(cls):
cls.url = "admin:baseline_communitylivestock_changelist"
cls.community1 = CommunityFactory(name="Test Community")
cls.livestockproduction1 = CommunityLivestockFactory(
community=cls.community1, livestock=ClassifiedProductFactory(cpcv2="L021001")
community=cls.community1, livestock=ClassifiedProductFactory(cpc="L021001")
)
cls.livestockproduction2 = CommunityLivestockFactory(livestock=ClassifiedProductFactory(cpcv2="L021002"))
cls.livestockproduction2 = CommunityLivestockFactory(livestock=ClassifiedProductFactory(cpc="L021002"))
cls.site = AdminSite()
activate("en")

Expand Down
6 changes: 3 additions & 3 deletions apps/baseline/tests/test_pipelines.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import pandas as pd
from django.test import TestCase
from kiluigi.utils import submit_task

Expand All @@ -21,9 +22,9 @@ class IngestionPipelineTestCase(TestCase):
@classmethod
def setUpTestData(cls):
CountryFactory(iso3166a2="MW", iso3166a3="MWI", iso3166n3=454, iso_en_ro_name="Malawi")
ClassifiedProductFactory(cpcv2="L02151", description="Chickens", aliases=["chicken", "hen", "hens"]),
ClassifiedProductFactory(cpc="L02151", description="Chickens", aliases=["chicken", "hen", "hens"]),
ClassifiedProductFactory(
cpcv2="L02111AP", description_en="Cattle, oxen, unspecified", common_name_en="Oxen", aliases=["ox"]
cpc="L02111AP", description_en="Cattle, oxen, unspecified", common_name_en="Oxen", aliases=["ox"]
)
UnitOfMeasureFactory(abbreviation="acre", unit_type=UnitOfMeasure.AREA, aliases=["acres"], conversion=None)
LivelihoodCategoryFactory(
Expand Down Expand Up @@ -241,7 +242,6 @@ def test_import_baseline(self):
# Capture logging and direct writes to stdout (because loaddata writes
# to stdout directly), so that unit test output is still clean.
with conditional_logging():

task = ImportBaseline(
bss_path="apps/baseline/tests/bss.xlsx", metadata_path="apps/baseline/tests/metadata.xlsx"
)
Expand Down
14 changes: 9 additions & 5 deletions apps/common/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@


class CurrencyAdmin(admin.ModelAdmin):

list_display = ("iso4217a3", "iso4217n3", "iso_en_name")
search_fields = ["iso4217a3", "iso4217n3", "iso_en_name"]

Expand All @@ -38,24 +37,27 @@ class CountryClassifiedProductAliasesInline(InlineModelAdmin):
class ClassifiedProductAdmin(TreeAdmin):
form = movenodeform_factory(ClassifiedProduct)
fields = (
"cpcv2",
"cpc",
*translation_fields("description"),
*translation_fields("common_name"),
"scientific_name",
"unit_of_measure",
"kcals_per_unit",
"aliases",
"cpcv2",
"hs2012",
"_position",
"_ref_node_id",
)
list_display = ("cpcv2", "description", "common_name", "scientific_name")
ordering = ["cpcv2"]
list_display = ("cpc", "description", "common_name", "scientific_name")
ordering = ["cpc"]
search_fields = (
"^cpcv2",
"^cpc",
*translation_fields("description"),
*translation_fields("common_name"),
"scientific_name",
"cpcv2",
"hs2012",
"per_country_aliases__country__iso_en_ro_name",
"per_country_aliases__country__iso_en_name",
"per_country_aliases__country__iso_en_ro_proper",
Expand All @@ -76,10 +78,12 @@ class UnitOfMeasureAdmin(admin.ModelAdmin):
list_display = (
"abbreviation",
"description",
"aliases",
)
search_fields = (
"abbreviation",
*translation_fields("description"),
"aliases",
)
list_filter = ("unit_type",)
inlines = [UnitOfMeasureConversionInline]
Expand Down
31 changes: 28 additions & 3 deletions apps/common/lookups.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,7 @@ def get_instances(self, df, column, related_models=None):

class CountryClassifiedProductAliasesLookup(Lookup):
model = CountryClassifiedProductAliases
id_fields = ["product__cpcv2"]
id_fields = ["product__cpc"]
parent_fields = ["country"]
lookup_fields = ["aliases"]
require_match = False
Expand Down Expand Up @@ -348,11 +348,12 @@ class ClassifiedProductLookup(Lookup):
# products to be leaf nodes in the tree. If that behavior is required in
# HEA, then pass `filters={"numchild": 0}` when instantiating the Lookup.
model = ClassifiedProduct
id_fields = ["cpcv2"]
id_fields = ["cpc"]
lookup_fields = (
*translation_fields("common_name"),
*translation_fields("description"),
"aliases",
"cpcv2",
"hs2012",
)

Expand All @@ -367,14 +368,38 @@ def get_lookup_df(self):
"""
df = super().get_lookup_df()

# Split hs2012 into columns
# Split hs2012 and cpcv2 into columns
# Start by replacing None/null alias values with an empty list
df["cpcv2"] = df["cpcv2"].apply(lambda x: x if x else [])
df["hs2012"] = df["hs2012"].apply(lambda x: x if x else [])
# Append the hs2012 codes as separate columns
df = pd.concat([df.drop("cpcv2", axis="columns"), pd.DataFrame(df["cpcv2"].tolist())], axis="columns")
df = pd.concat([df.drop("hs2012", axis="columns"), pd.DataFrame(df["hs2012"].tolist())], axis="columns")

return df

def prepare_lookup_df(self):
"""
Remove parent nodes with a single child to avoid spurious multiple match errors.
Some Classified Product parent nodes have a single child that is the identical to the parent. For example:
- R015 Edible roots and tubers with high starch or inulin content
- R0151 Potatoes
- R01510 Potatoes
In this case, a search for "potatoes" should return the leaf node, R01510, and ignore the parent R0151. To do
this we need to remove the R0151 row from the lookup_df, otherwise the search raises:
ValueError: ClassifiedProductLookup found multiple ClassifiedProduct matches for some rows
"""
df = super().prepare_lookup_df()
# Drop any rows where the lookup_key is duplicated but there is another row in the dataframe with the same
# lookup_value suffixed with "0", which is the convention CPC uses for a parent with a single child.
unwanted_parents = df[
(df["lookup_key"].duplicated(keep=False)) & (df["lookup_value"] + "0").isin(df["lookup_value"])
]
df = pd.concat([df, unwanted_parents]).drop_duplicates(keep=False)
return df


class UnitOfMeasureLookup(Lookup):
model = UnitOfMeasure
Expand Down
138 changes: 0 additions & 138 deletions apps/common/migrations/0005_load_classified_product.py

This file was deleted.

Loading

0 comments on commit a020b4c

Please sign in to comment.