Productionize aggregation endpoint, see #HEA-659

FEWS-NET · Jan 28, 2025 · 3eaf69a · 3eaf69a
1 parent da40467
commit 3eaf69a
Show file tree

Hide file tree

Showing 6 changed files with 522 additions and 248 deletions.
diff --git a/apps/baseline/models.py b/apps/baseline/models.py
@@ -1123,7 +1123,7 @@ class LivelihoodActivity(common_models.Model):
     quantity_sold = models.PositiveIntegerField(blank=True, null=True, verbose_name=_("Quantity Sold/Exchanged"))
     quantity_other_uses = models.PositiveIntegerField(blank=True, null=True, verbose_name=_("Quantity Other Uses"))
     # Can normally be calculated / validated as `quantity_produced + quantity_purchased - quantity_sold - quantity_other_uses`  # NOQA: E501
-    # but there are exceptions, such as MilkProduction, where there is also an amount used for ButterProduction, is this captured quantity_other_uses?  # NOQA: E501
+    # but there are exceptions, such as MilkProduction which also stores MilkProduction.quantity_butter_production
     quantity_consumed = models.PositiveIntegerField(blank=True, null=True, verbose_name=_("Quantity Consumed"))
 
     price = models.FloatField(

diff --git a/apps/baseline/serializers.py b/apps/baseline/serializers.py
@@ -1,11 +1,10 @@
-from django.db.models import Sum
+from django.db.models import F, FloatField, Sum
 from django.utils import translation
-from rest_framework import fields as rest_framework_fields
 from rest_framework import serializers
 from rest_framework_gis.serializers import GeoFeatureModelSerializer
 
 from common.fields import translation_fields
-from metadata.models import LivelihoodStrategyType
+from common.serializers import AggregatingSerializer
 
 from .models import (
     BaselineLivelihoodActivity,
@@ -1472,28 +1471,78 @@ def get_wealth_group_label(self, obj):
         return str(obj.wealth_group)
 
 
-class DictQuerySetField(rest_framework_fields.SerializerMethodField):
-    def __init__(self, field_name=None, **kwargs):
-        self.field_name = field_name
-        super().__init__(**kwargs)
+class LivelihoodZoneBaselineReportSerializer(AggregatingSerializer):
+    """
+    There are two ‘levels’ of filter needed on this endpoint. The standard ones which are already on the LZB endpoint
+    filter the LZBs that are returned (eg, population range and wealth group). Let’s call them ‘global’ filters.
+    Everything needs filtering by wealth group or population, if those filters are active.
 
-    def to_representation(self, obj):
-        return self.parent.get_field(obj, self.field_name)
+    The data ‘slice’ strategy type and product filters do not remove LZBs from the results by themselves; they
+    only exclude values from the calculated slice statistics.
 
+    If a user selects Sorghum, that filters the kcals income for our slice. The kcals income for the slice is then
+    divided by the kcals income on the global set for the kcals income percent.
+
+    The global filters are identical to those already on the LZB endpoint (and will always be - it is sharing the
+    code). These are applied to the LZB, row and slice totals.
+
+    The slice filters are:
+
+      - slice_by_product (for multiple, repeat the parameter, eg, slice_by_product=R0&slice_by_product=B01). These
+        match any CPC code that starts with the value. (The client needs to convert the selected product to CPC.)
+
+      - slice_by_strategy_type - you can specify multiple, and you need to pass the code not the label (which could be
+        translated). (These are case-insensitive but otherwise must be an exact match.)
+
+    The slice is defined by matching any of the products, AND any of the strategy types (as opposed to OR).
+
+    Translated fields, eg, name, description, are rendered in the currently selected locale if possible. (Except
+    Country, which has different translations following ISO.) This can be selected in the UI or set using eg,
+    &language=pt which overrides the UI selection.
+
+    You select the fields you want using the &fields= parameter in the usual way. If you omit the fields parameter all
+    fields are returned. These are currently the same field list as the normal LZB endpoint, plus the aggregations,
+    called slice_sum_kcals_consumed, sum_kcals_consumed, kcals_consumed_percent, plus product CPC and product common
+    name translated. If you omit a field, the statistics for that field will be aggregated together.
+
+    The ordering code is also shared with the normal LZB endpoint, which uses the standard
+    &ordering= parameter. If none are specified, the results are sorted by the aggregations descending, ie,
+    biggest percentage first.
+
+    The strategy type codes are:
+        MilkProduction
+        ButterProduction
+        MeatProduction
+        LivestockSale
+        CropProduction
+        FoodPurchase
+        PaymentInKind
+        ReliefGiftOther
+        Hunting
+        Fishing
+        WildFoodGathering
+        OtherCashIncome
+        OtherPurchase
+
+    The product hierarchy can be retrieved from the classified product endpoint /api/classifiedproduct/.
+
+    You can then filter by any of the calculated fields. To do so, prefix the field name with min_ or max_.
+    """
 
-class LivelihoodZoneBaselineReportSerializer(serializers.ModelSerializer):
     class Meta:
         model = LivelihoodZoneBaseline
         fields = (
-            "id",
-            "name",
-            "description",
             "source_organization",
             "source_organization_name",
-            "livelihood_zone",
-            "livelihood_zone_name",
             "country_pk",
             "country_iso_en_name",
+            "livelihoodzone_pk",
+            "livelihood_zone",
+            "livelihood_zone_name",
+            "id",
+            "name",
+            "description",
+            "wealth_group_category_code",
             "main_livelihood_category",
             "bss",
             "currency",
@@ -1503,112 +1552,33 @@ class Meta:
             "valid_to_date",  # to display "is latest" / "is historic" in the UI for each ref yr
             "population_source",
             "population_estimate",
-            "livelihoodzone_pk",
-            "livelihood_strategy_pk",
             "strategy_type",
+            "livelihood_strategy_pk",
             "livelihood_activity_pk",
-            "wealth_group_category_code",
-            "population_estimate",
-            "slice_sum_kcals_consumed",
-            "sum_kcals_consumed",
-            "kcals_consumed_percent",
             "product_cpc",
             "product_common_name",
         )
 
-    # For each of these aggregates the following calculation columns are added:
-    #   (a) Total at the LZB level (filtered by population, wealth group, etc), eg, sum_kcals_consumed.
-    #   (b) Total for the selected product/strategy type slice, eg, slice_sum_kcals_consumed.
-    #   (c) The percentage the slice represents of the whole, eg, kcals_consumed_percent.
-    # Filters are automatically created, eg, min_kcals_consumed_percent and max_kcals_consumed_percent.
-    # If no ordering is specified by the FilterSet, the results are ordered by percent descending in the order here.
     aggregates = {
         "kcals_consumed": Sum,
+        "income": Sum,
+        "expenditure": Sum,
+        "percentage_kcals": Sum,
+        "kcal_income_sum": Sum(
+            (
+                F("livelihood_strategies__livelihoodactivity__quantity_purchased")
+                + F("livelihood_strategies__livelihoodactivity__quantity_produced")
+            )
+            * F("livelihood_strategies__product__kcals_per_unit"),
+            output_field=FloatField(),
+        ),
     }
 
-    # For each of these pairs, a URL parameter is created "slice_{field}", eg, ?slice_product=
-    # They can appear zero, one or multiple times in the URL, and define a sub-slice of the row-level data.
-    # A slice includes activities with ANY of the products, AND, ANY of the strategy types.
-    # For example: (product=R0 OR product=L0) AND (strategy_type=MilkProd OR strategy_type=CropProd)
     slice_fields = {
         "product": "livelihood_strategies__product__cpc__istartswith",
         "strategy_type": "livelihood_strategies__strategy_type__iexact",
     }
 
-    livelihood_zone_name = DictQuerySetField("livelihood_zone_name")
-    source_organization_name = DictQuerySetField("source_organization_pk")
-    country_pk = DictQuerySetField("country_pk")
-    country_iso_en_name = DictQuerySetField("country_iso_en_name")
-    livelihoodzone_pk = DictQuerySetField("livelihoodzone_pk")
-    livelihood_strategy_pk = DictQuerySetField("livelihood_strategy_pk")
-    livelihood_activity_pk = DictQuerySetField("livelihood_activity_pk")
-    wealth_group_category_code = DictQuerySetField("wealth_group_category_code")
-    id = DictQuerySetField("id")
-    name = DictQuerySetField("name")
-    description = DictQuerySetField("description")
-    source_organization = DictQuerySetField("source_organization")
-    livelihood_zone = DictQuerySetField("livelihood_zone")
-    main_livelihood_category = DictQuerySetField("main_livelihood_category")
-    bss = DictQuerySetField("bss")
-    currency = DictQuerySetField("currency")
-    reference_year_start_date = DictQuerySetField("reference_year_start_date")
-    reference_year_end_date = DictQuerySetField("reference_year_end_date")
-    valid_from_date = DictQuerySetField("valid_from_date")
-    valid_to_date = DictQuerySetField("valid_to_date")
-    population_source = DictQuerySetField("population_source")
-    population_estimate = DictQuerySetField("population_estimate")
-    product_cpc = DictQuerySetField("product_cpc")
-    product_common_name = DictQuerySetField("product_common_name")
-    strategy_type = DictQuerySetField("strategy_type")
-
-    slice_sum_kcals_consumed = DictQuerySetField("slice_sum_kcals_consumed")
-    sum_kcals_consumed = DictQuerySetField("sum_kcals_consumed")
-    kcals_consumed_percent = DictQuerySetField("kcals_consumed_percent")
-
-    def get_fields(self):
-        """
-        User can specify fields= parameter to specify a field list, comma-delimited.
-
-        If the fields parameter is not passed or does not match fields, defaults to self.Meta.fields.
-
-        The aggregated fields self.aggregates are added regardless of user field selection.
-        """
-        field_list = "request" in self.context and self.context["request"].query_params.get("fields", None)
-        if not field_list:
-            return super().get_fields()
-
-        # User-provided list of fields
-        field_names = set(field_list.split(","))
-
-        # Add the aggregates that are always returned
-        for field_name, aggregate in self.aggregates.items():
-            field_names |= {
-                field_name,
-                self.aggregate_field_name(field_name, aggregate),
-                self.slice_aggregate_field_name(field_name, aggregate),
-                self.slice_percent_field_name(field_name, aggregate),
-            }
-
-        # Add the ordering field if specified
-        ordering = self.context["request"].query_params.get("ordering")
-        if ordering:
-            field_names.add(ordering)
-
-        # Remove any that don't match a field as a dict
-        return {k: v for k, v in super().get_fields().items() if k in field_names}
-
-    def get_field(self, obj, field_name):
-        """
-        Aggregated querysets are a list of dicts.
-        This is called by AggregatedQuerysetField to get the value from the row dict.
-        """
-        db_field = self.field_to_database_path(field_name)
-        value = obj.get(db_field, "")
-        # Get the readable, translated string from the choice key.
-        if field_name == "strategy_type" and value:
-            return dict(LivelihoodStrategyType.choices).get(value, value)
-        return value
-
     @staticmethod
     def field_to_database_path(field_name):
         language_code = translation.get_language()
@@ -1621,24 +1591,15 @@ def field_to_database_path(field_name):
             "livelihood_activity_pk": "livelihood_strategies__livelihoodactivity__pk",
             "wealth_group_category_code": "livelihood_strategies__livelihoodactivity__wealth_group__wealth_group_category__code",  # NOQA: E501
             "kcals_consumed": "livelihood_strategies__livelihoodactivity__kcals_consumed",
+            "income": "livelihood_strategies__livelihoodactivity__income",
+            "expenditure": "livelihood_strategies__livelihoodactivity__expenditure",
+            "percentage_kcals": "livelihood_strategies__livelihoodactivity__percentage_kcals",
             "livelihood_zone_name": f"livelihood_zone__name_{language_code}",
             "source_organization_pk": "source_organization__pk",
             "source_organization_name": "source_organization__name",
             "country_pk": "livelihood_zone__country__pk",
             "country_iso_en_name": "livelihood_zone__country__iso_en_name",
-            "product_cpc": "livelihood_strategies__product",
+            "product_cpc": "livelihood_strategies__product__cpc",
             "strategy_type": "livelihood_strategies__strategy_type",
             "product_common_name": f"livelihood_strategies__product__common_name_{language_code}",
         }.get(field_name, field_name)
-
-    @staticmethod
-    def aggregate_field_name(field_name, aggregate):
-        return f"{aggregate.name.lower()}_{field_name}"  # eg, sum_kcals_consumed
-
-    @staticmethod
-    def slice_aggregate_field_name(field_name, aggregate):
-        return f"slice_{aggregate.name.lower()}_{field_name}"  # eg, slice_sum_kcals_consumed
-
-    @staticmethod
-    def slice_percent_field_name(field_name, aggregate):
-        return f"{field_name}_percent"  # eg, kcals_consumed_percent