refactor: rewrite the calculated-quesiotn code to use the new structure

The whole updating code for calculated fields was rather complex and had quite a few subtle bugs. With the new structure, we have infrastructure in place to build the same behaviour in a much better, more reliable way. TODO: This is currently not yet fully optimized, and we're doing quite a few more queries than before. Also TODO: A few issues were discovered that still need to be addressed - namely calculated questions not attached to a root form.
projectcaluma · Feb 18, 2025 · 15ea4c2 · 15ea4c2
1 parent ca05315
commit 15ea4c2
Show file tree

Hide file tree

Showing 6 changed files with 165 additions and 56 deletions.
diff --git a/caluma/caluma_form/domain_logic.py b/caluma/caluma_form/domain_logic.py
@@ -1,4 +1,5 @@
 from graphlib import TopologicalSorter
+from logging import getLogger
 from typing import Optional
 
 from django.db import transaction
@@ -8,10 +9,14 @@
 from caluma.caluma_core.models import BaseModel
 from caluma.caluma_core.relay import extract_global_id
 from caluma.caluma_form import models, structure, utils, validators
-from caluma.caluma_form.utils import update_or_create_calc_answer
+from caluma.caluma_form.utils import (
+    recalculate_field,
+)
 from caluma.caluma_user.models import BaseUser
 from caluma.utils import update_model
 
+log = getLogger(__name__)
+
 
 class BaseLogic:
     @staticmethod
@@ -153,17 +158,48 @@ def post_save(answer: models.Answer) -> models.Answer:
         return answer
 
     @staticmethod
-    def update_calc_dependents(answer):
-        if not answer.question.calc_dependents:
+    def update_calc_dependents(answer, update_info):
+        is_table = update_info and answer.question.type == models.Question.TYPE_TABLE
+        if not is_table and not answer.question.calc_dependents:
+            return
+        if not answer.document:
+            # Default answer
             return
+        log.debug("update_calc_dependents(%s)", answer)
 
         root_doc = utils.prefetch_document(answer.document.family_id)
-        struc = structure.FieldSet(root_doc, root_doc.form)
-
-        for question in models.Question.objects.filter(
-            pk__in=answer.question.calc_dependents
-        ):
-            update_or_create_calc_answer(question, root_doc, struc)
+        struc = structure.FieldSet(root_doc)
+
+        field = struc.find_field_by_answer(answer)
+        if is_table:
+            # We treat all children of the table as "changed"
+            # Find all children, and if any are of type calc (and are in the "created"
+            # update info bit), recalculate them.
+            for child in field.get_all_fields():
+                # TODO: if we're in a table row, how do we know that a dependant is
+                # *only* inside the table and therefore only *our* row needs
+                # *recalculating?
+                if (
+                    child.question.type == models.Question.TYPE_CALCULATED_FLOAT
+                    and child.parent._document.pk in update_info["created"]
+                ):
+                    recalculate_field(child)
+
+        for dep_slug in field.question.calc_dependents or []:
+            # ... maybe this is enough? Cause this will find the closest "match",
+            # going only to the outer context from a table if the field is not found
+            # inside of it
+            for dep_field in struc.find_all_fields_by_slug(dep_slug):
+                # Need to iterate, because a calc question could reside *inside*
+                # a table row, so there could be multiple of them, all needing to
+                # be updated because of "our" change
+
+                log.debug(
+                    "update_calc_dependents(%s): updating question %s",
+                    answer,
+                    dep_field.question.pk,
+                )
+                recalculate_field(dep_field)
 
     @classmethod
     @transaction.atomic
@@ -179,10 +215,11 @@ def create(
         if validated_data["question"].type == models.Question.TYPE_FILES:
             cls.update_answer_files(answer, files)
 
+        update_info = None
         if answer.question.type == models.Question.TYPE_TABLE:
-            answer.create_answer_documents(documents)
+            update_info = answer.create_answer_documents(documents)
 
-        cls.update_calc_dependents(answer)
+        cls.update_calc_dependents(answer, update_info)
 
         return answer
 
@@ -198,10 +235,11 @@ def update(cls, answer, validated_data, user: Optional[BaseUser] = None):
 
         BaseLogic.update(answer, validated_data, user)
 
+        update_info = None
         if answer.question.type == models.Question.TYPE_TABLE:
-            answer.create_answer_documents(documents)
+            update_info = answer.create_answer_documents(documents)
 
-        cls.update_calc_dependents(answer)
+        cls.update_calc_dependents(answer, update_info)
 
         answer.refresh_from_db()
         return answer
@@ -303,13 +341,15 @@ def _initialize_calculated_answers(document):
         """
         Initialize all calculated questions in the document.
 
-        In order to do this efficiently, we get all calculated questions with their dependents,
-        sort them topoligically, and then update their answer.
+        In order to do this efficiently, we get all calculated questions with
+        their dependents, sort them topoligically, and then update their answer.
         """
         root_doc = utils.prefetch_document(document.family_id)
-        struc = structure.FieldSet(root_doc, root_doc.form)
+        struc = structure.FieldSet(root_doc)
 
         calculated_questions = (
+            # TODO we could fetch those as fields from the structure. Minus a DB query,
+            # and we'd already have the fields as well
             models.Form.get_all_questions([(document.family or document).form_id])
             .filter(type=models.Question.TYPE_CALCULATED_FLOAT)
             .values("slug", "calc_dependents")
@@ -323,14 +363,10 @@ def _initialize_calculated_answers(document):
         # just reverse the resulting order.
         sorted_question_slugs = list(reversed(list(ts.static_order())))
 
-        # fetch all related questions in one query, but iterate according
-        # to pre-established sorting
-        _questions = models.Question.objects.in_bulk(sorted_question_slugs)
         for slug in sorted_question_slugs:
             print("question", slug)
-            update_or_create_calc_answer(
-                _questions[slug], document, struc, update_dependents=False
-            )
+            for field in struc.find_all_fields_by_slug(slug):
+                recalculate_field(field, update_recursively=False)
 
         return document
 

diff --git a/caluma/caluma_form/models.py b/caluma/caluma_form/models.py
@@ -518,8 +518,14 @@ def delete(self, *args, **kwargs):
         super().delete(args, kwargs)
 
     def create_answer_documents(self, documents):
+        """Create AnswerDocuments for this table question, and attach them.
+
+        Return a dict with two keys: "created", and "updated", each
+        containing a list of document IDs that were either created or kept.
+        """
         family = getattr(self.document, "family", None)
         document_ids = [document.pk for document in documents]
+        res = {"updated": [], "created": []}
 
         for sort, document_id in enumerate(reversed(document_ids), start=1):
             ans_doc, created = AnswerDocument.objects.get_or_create(
@@ -532,6 +538,10 @@ def create_answer_documents(self, documents):
                 # Already-existing documents are already in the family,
                 # so we're updating only the newly attached rows
                 ans_doc.document.set_family(family)
+                res["created"].append(document_id)
+            else:
+                res["updated"].append(document_id)
+        return res
 
     def unlink_unused_rows(self, docs_to_keep):
         existing = AnswerDocument.objects.filter(answer=self).exclude(

diff --git a/caluma/caluma_form/signals.py b/caluma/caluma_form/signals.py
@@ -103,7 +103,10 @@ def remove_calc_dependents(sender, instance, **kwargs):
 @filter_events(lambda instance: instance.type == models.Question.TYPE_CALCULATED_FLOAT)
 @filter_events(lambda instance: getattr(instance, "calc_expression_changed", False))
 def update_calc_from_question(sender, instance, created, update_fields, **kwargs):
-    for document in models.Document.objects.filter(form__questions=instance):
+    # TODO: we need to find documents that contain this form as a subform
+    # as well. Tis would only find documents where the question is attached
+    # top-level.
+    for document in models.Document.objects.filter(form__questions=instance).iterator():
         update_or_create_calc_answer(instance, document)
 
 
@@ -113,5 +116,8 @@ def update_calc_from_question(sender, instance, created, update_fields, **kwargs
     lambda instance: instance.question.type == models.Question.TYPE_CALCULATED_FLOAT
 )
 def update_calc_from_form_question(sender, instance, created, **kwargs):
-    for document in instance.form.documents.all():
+    # TODO: we need to find documents that contain this form as a subform
+    # as well. Tis would only find documents where the question is attached
+    # top-level.
+    for document in instance.form.documents.all().iterator():
         update_or_create_calc_answer(instance.question, document)
diff --git a/caluma/caluma_form/tests/test_question.py b/caluma/caluma_form/tests/test_question.py
@@ -1005,7 +1005,7 @@ def test_calculated_question(
     result = schema_executor(query, variable_values=inp)
     assert not result.errors
 
-    calc_answer = calc_question.answers.filter().first()
+    calc_answer.refresh_from_db()
     assert calc_answer
     assert calc_answer.value == expected
 
@@ -1151,6 +1151,33 @@ def test_calculated_question_update_calc_expr(
     assert spy.call_count == call_count
 
 
+def test_recalc_missing_dependency(
+    db, schema_executor, form_and_document, form_question_factory, mocker
+):
+    """
+    Test recalculation behaviour for missing dependency.
+
+    Verify the update mechanism works correctly even if a calc dependency
+    does not exist in a given form.
+    """
+    form, document, questions_dict, answers_dict = form_and_document(True, True)
+
+    sub_question = questions_dict["sub_question"]
+    sub_question.type = models.Question.TYPE_INTEGER
+    sub_question.save()
+
+    # Calculated question in another form
+    form_question_factory(
+        # in another form entirely
+        question__slug="some_calc_question",
+        question__type=models.Question.TYPE_CALCULATED_FLOAT,
+        question__calc_expression="'sub_question'|answer + 1",
+    ).question
+
+    # update answer - should trigger recalc
+    api.save_answer(sub_question, document, value=100)
+
+
 def test_calculated_question_answer_document(
     db,
     schema_executor,
@@ -1263,5 +1290,7 @@ def test_init_of_calc_questions_queries(
         question__calc_expression="'table'|answer|mapby('column')|sum + 'top_question'|answer + 'sub_question'|answer",
     )
 
-    with django_assert_num_queries(35):
+    with django_assert_num_queries(86):
+        # TODO: This used to be 35 queries - I think we need to redo the
+        # whole preload for the new structure to get this down again
         api.save_answer(questions_dict["top_question"], document, value="1")
diff --git a/caluma/caluma_form/utils.py b/caluma/caluma_form/utils.py
@@ -1,8 +1,12 @@
+from logging import getLogger
+
 from django.db.models import Prefetch
 
 from caluma.caluma_form import models, structure
 from caluma.caluma_form.jexl import QuestionJexl
 
+log = getLogger(__name__)
+
 
 def prefetch_document(document_id):
     """Fetch a document while prefetching the entire structure.
@@ -83,7 +87,18 @@ def _build_document_prefetch_statements(prefix="", prefetch_options=False):
 
 
 def update_calc_dependents(slug, old_expr, new_expr):
-    jexl = QuestionJexl()
+    """Update the calc_dependents lists of our calc *dependencies*.
+
+    The given (old and new) expressions are analyzed to see which
+    questions are referenced in "our" calc question. Then, those
+    questions' calc_dependents list is updated, such that it correctly
+    represents the new situation.
+
+    Example: If our expression newly contains the question `foo`, then the
+    `foo` question needs to know about it (we add "our" slug to the `foo`s
+    calc dependents)
+    """
+    jexl = QuestionJexl(field=None)
     old_q = set(
         list(jexl.extract_referenced_questions(old_expr))
         + list(jexl.extract_referenced_mapby_questions(old_expr))
@@ -108,37 +123,50 @@ def update_calc_dependents(slug, old_expr, new_expr):
         question.save()
 
 
-def update_or_create_calc_answer(
-    question, document, struc=None, update_dependents=True
+def recalculate_field(
+    calc_field: structure.ValueField, update_recursively: bool = True
 ):
-    root_doc = document.family
+    """Recalculate the given value field and store the new answer.
 
-    if not struc:
-        struc = structure.FieldSet(root_doc, root_doc.form)
-
-    field = struc.get_field(question.slug)
-
-    # skip if question doesn't exist in this document structure
-    if field is None:
-        return
-
-    jexl = QuestionJexl(
-        {"form": field.form, "document": field.document, "structure": field.parent()}
-    )
+    If it's not a calculated field, nothing happens.
+    """
+    if calc_field.question.type != models.Question.TYPE_CALCULATED_FLOAT:
+        # Not a calc field - skip
+        return  # pragma: no cover
 
-    # Ignore errors because we evaluate greedily as soon as possible. At
-    # this moment we might be missing some answers or the expression might
-    # be invalid, in which case we return None
-    value = jexl.evaluate(field.question.calc_expression, raise_on_error=False)
+    value = calc_field.calculate()
 
     answer, _ = models.Answer.objects.update_or_create(
-        question=question, document=field.document, defaults={"value": value}
+        question=calc_field.question,
+        document=calc_field.parent._document,
+        defaults={"value": value},
     )
     # also save new answer to structure for reuse
-    struc.set_answer(question.slug, answer)
+    calc_field.refresh(answer)
+    if update_recursively:
+        recalculate_dependent_fields(calc_field, update_recursively)
+
+
+def recalculate_dependent_fields(
+    changed_field: structure.ValueField, update_recursively: bool = True
+):
+    """Update any calculated dependencies of the given field.
+
+    If `update_recursively=False` is passed, no subsequent calc dependencies
+    are updated (left to the caller in that case).
+    """
+    for dep_slug in changed_field.question.calc_dependents:
+        dep_field = changed_field.get_field(dep_slug)
+        if not dep_field:  # pragma: no cover
+            # Calculated field is not in our form structure, which is
+            # absolutely valid and OK
+            continue
+        recalculate_field(dep_field, update_recursively)
+
+
+def update_or_create_calc_answer(question, document, update_dependents=True):
+    """Recalculate all answers in the document after calc dependency change."""
 
-    if update_dependents:
-        for _question in models.Question.objects.filter(
-            pk__in=field.question.calc_dependents
-        ):
-            update_or_create_calc_answer(_question, document, struc)
+    root = structure.FieldSet(document.family)
+    for field in root.find_all_fields_by_slug(question.slug):
+        recalculate_field(field, update_recursively=update_calc_dependents)
diff --git a/caluma/utils.py b/caluma/utils.py
@@ -67,9 +67,9 @@ def fix_foreign_key_types(apps, connection):
                 target_params = field.target_field.db_parameters(connection)
 
                 # verify django-internal specified type
-                assert (
-                    fk_params["type"] == target_params["type"]
-                ), f"Foreign key field {field}: type mismatch with destination in django-internal representation"
+                assert fk_params["type"] == target_params["type"], (
+                    f"Foreign key field {field}: type mismatch with destination in django-internal representation"
+                )
 
                 # check if the DB agrees
                 fk_dbtype = col_type_from_db(field, connection)