Skip to content

Commit

Permalink
refactor: rewrite the calculated-quesiotn code to use the new structure
Browse files Browse the repository at this point in the history
The whole updating code for calculated fields was rather complex and
had quite a few subtle bugs. With the new structure, we have infrastructure
in place to build the same behaviour in a much better, more reliable way.

TODO: This is currently not yet fully optimized, and we're doing quite a few
more queries than before. Also TODO: A few issues were discovered that still
need to be addressed - namely calculated questions not attached to a root form.
  • Loading branch information
Your Name committed Feb 18, 2025
1 parent ca05315 commit 15ea4c2
Show file tree
Hide file tree
Showing 6 changed files with 165 additions and 56 deletions.
80 changes: 58 additions & 22 deletions caluma/caluma_form/domain_logic.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from graphlib import TopologicalSorter
from logging import getLogger
from typing import Optional

from django.db import transaction
Expand All @@ -8,10 +9,14 @@
from caluma.caluma_core.models import BaseModel
from caluma.caluma_core.relay import extract_global_id
from caluma.caluma_form import models, structure, utils, validators
from caluma.caluma_form.utils import update_or_create_calc_answer
from caluma.caluma_form.utils import (
recalculate_field,
)
from caluma.caluma_user.models import BaseUser
from caluma.utils import update_model

log = getLogger(__name__)


class BaseLogic:
@staticmethod
Expand Down Expand Up @@ -153,17 +158,48 @@ def post_save(answer: models.Answer) -> models.Answer:
return answer

@staticmethod
def update_calc_dependents(answer):
if not answer.question.calc_dependents:
def update_calc_dependents(answer, update_info):
is_table = update_info and answer.question.type == models.Question.TYPE_TABLE
if not is_table and not answer.question.calc_dependents:
return
if not answer.document:
# Default answer
return
log.debug("update_calc_dependents(%s)", answer)

root_doc = utils.prefetch_document(answer.document.family_id)
struc = structure.FieldSet(root_doc, root_doc.form)

for question in models.Question.objects.filter(
pk__in=answer.question.calc_dependents
):
update_or_create_calc_answer(question, root_doc, struc)
struc = structure.FieldSet(root_doc)

field = struc.find_field_by_answer(answer)
if is_table:
# We treat all children of the table as "changed"
# Find all children, and if any are of type calc (and are in the "created"
# update info bit), recalculate them.
for child in field.get_all_fields():
# TODO: if we're in a table row, how do we know that a dependant is
# *only* inside the table and therefore only *our* row needs
# *recalculating?
if (
child.question.type == models.Question.TYPE_CALCULATED_FLOAT
and child.parent._document.pk in update_info["created"]
):
recalculate_field(child)

for dep_slug in field.question.calc_dependents or []:
# ... maybe this is enough? Cause this will find the closest "match",
# going only to the outer context from a table if the field is not found
# inside of it
for dep_field in struc.find_all_fields_by_slug(dep_slug):
# Need to iterate, because a calc question could reside *inside*
# a table row, so there could be multiple of them, all needing to
# be updated because of "our" change

log.debug(
"update_calc_dependents(%s): updating question %s",
answer,
dep_field.question.pk,
)
recalculate_field(dep_field)

@classmethod
@transaction.atomic
Expand All @@ -179,10 +215,11 @@ def create(
if validated_data["question"].type == models.Question.TYPE_FILES:
cls.update_answer_files(answer, files)

update_info = None
if answer.question.type == models.Question.TYPE_TABLE:
answer.create_answer_documents(documents)
update_info = answer.create_answer_documents(documents)

cls.update_calc_dependents(answer)
cls.update_calc_dependents(answer, update_info)

return answer

Expand All @@ -198,10 +235,11 @@ def update(cls, answer, validated_data, user: Optional[BaseUser] = None):

BaseLogic.update(answer, validated_data, user)

update_info = None
if answer.question.type == models.Question.TYPE_TABLE:
answer.create_answer_documents(documents)
update_info = answer.create_answer_documents(documents)

cls.update_calc_dependents(answer)
cls.update_calc_dependents(answer, update_info)

answer.refresh_from_db()
return answer
Expand Down Expand Up @@ -303,13 +341,15 @@ def _initialize_calculated_answers(document):
"""
Initialize all calculated questions in the document.
In order to do this efficiently, we get all calculated questions with their dependents,
sort them topoligically, and then update their answer.
In order to do this efficiently, we get all calculated questions with
their dependents, sort them topoligically, and then update their answer.
"""
root_doc = utils.prefetch_document(document.family_id)
struc = structure.FieldSet(root_doc, root_doc.form)
struc = structure.FieldSet(root_doc)

calculated_questions = (
# TODO we could fetch those as fields from the structure. Minus a DB query,
# and we'd already have the fields as well
models.Form.get_all_questions([(document.family or document).form_id])
.filter(type=models.Question.TYPE_CALCULATED_FLOAT)
.values("slug", "calc_dependents")
Expand All @@ -323,14 +363,10 @@ def _initialize_calculated_answers(document):
# just reverse the resulting order.
sorted_question_slugs = list(reversed(list(ts.static_order())))

# fetch all related questions in one query, but iterate according
# to pre-established sorting
_questions = models.Question.objects.in_bulk(sorted_question_slugs)
for slug in sorted_question_slugs:
print("question", slug)
update_or_create_calc_answer(
_questions[slug], document, struc, update_dependents=False
)
for field in struc.find_all_fields_by_slug(slug):
recalculate_field(field, update_recursively=False)

return document

Expand Down
10 changes: 10 additions & 0 deletions caluma/caluma_form/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -518,8 +518,14 @@ def delete(self, *args, **kwargs):
super().delete(args, kwargs)

def create_answer_documents(self, documents):
"""Create AnswerDocuments for this table question, and attach them.
Return a dict with two keys: "created", and "updated", each
containing a list of document IDs that were either created or kept.
"""
family = getattr(self.document, "family", None)
document_ids = [document.pk for document in documents]
res = {"updated": [], "created": []}

for sort, document_id in enumerate(reversed(document_ids), start=1):
ans_doc, created = AnswerDocument.objects.get_or_create(
Expand All @@ -532,6 +538,10 @@ def create_answer_documents(self, documents):
# Already-existing documents are already in the family,
# so we're updating only the newly attached rows
ans_doc.document.set_family(family)
res["created"].append(document_id)
else:
res["updated"].append(document_id)
return res

def unlink_unused_rows(self, docs_to_keep):
existing = AnswerDocument.objects.filter(answer=self).exclude(
Expand Down
10 changes: 8 additions & 2 deletions caluma/caluma_form/signals.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,10 @@ def remove_calc_dependents(sender, instance, **kwargs):
@filter_events(lambda instance: instance.type == models.Question.TYPE_CALCULATED_FLOAT)
@filter_events(lambda instance: getattr(instance, "calc_expression_changed", False))
def update_calc_from_question(sender, instance, created, update_fields, **kwargs):
for document in models.Document.objects.filter(form__questions=instance):
# TODO: we need to find documents that contain this form as a subform
# as well. Tis would only find documents where the question is attached
# top-level.
for document in models.Document.objects.filter(form__questions=instance).iterator():
update_or_create_calc_answer(instance, document)


Expand All @@ -113,5 +116,8 @@ def update_calc_from_question(sender, instance, created, update_fields, **kwargs
lambda instance: instance.question.type == models.Question.TYPE_CALCULATED_FLOAT
)
def update_calc_from_form_question(sender, instance, created, **kwargs):
for document in instance.form.documents.all():
# TODO: we need to find documents that contain this form as a subform
# as well. Tis would only find documents where the question is attached
# top-level.
for document in instance.form.documents.all().iterator():
update_or_create_calc_answer(instance.question, document)
33 changes: 31 additions & 2 deletions caluma/caluma_form/tests/test_question.py
Original file line number Diff line number Diff line change
Expand Up @@ -1005,7 +1005,7 @@ def test_calculated_question(
result = schema_executor(query, variable_values=inp)
assert not result.errors

calc_answer = calc_question.answers.filter().first()
calc_answer.refresh_from_db()
assert calc_answer
assert calc_answer.value == expected

Expand Down Expand Up @@ -1151,6 +1151,33 @@ def test_calculated_question_update_calc_expr(
assert spy.call_count == call_count


def test_recalc_missing_dependency(
db, schema_executor, form_and_document, form_question_factory, mocker
):
"""
Test recalculation behaviour for missing dependency.
Verify the update mechanism works correctly even if a calc dependency
does not exist in a given form.
"""
form, document, questions_dict, answers_dict = form_and_document(True, True)

sub_question = questions_dict["sub_question"]
sub_question.type = models.Question.TYPE_INTEGER
sub_question.save()

# Calculated question in another form
form_question_factory(
# in another form entirely
question__slug="some_calc_question",
question__type=models.Question.TYPE_CALCULATED_FLOAT,
question__calc_expression="'sub_question'|answer + 1",
).question

# update answer - should trigger recalc
api.save_answer(sub_question, document, value=100)


def test_calculated_question_answer_document(
db,
schema_executor,
Expand Down Expand Up @@ -1263,5 +1290,7 @@ def test_init_of_calc_questions_queries(
question__calc_expression="'table'|answer|mapby('column')|sum + 'top_question'|answer + 'sub_question'|answer",
)

with django_assert_num_queries(35):
with django_assert_num_queries(86):
# TODO: This used to be 35 queries - I think we need to redo the
# whole preload for the new structure to get this down again
api.save_answer(questions_dict["top_question"], document, value="1")
82 changes: 55 additions & 27 deletions caluma/caluma_form/utils.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
from logging import getLogger

from django.db.models import Prefetch

from caluma.caluma_form import models, structure
from caluma.caluma_form.jexl import QuestionJexl

log = getLogger(__name__)


def prefetch_document(document_id):
"""Fetch a document while prefetching the entire structure.
Expand Down Expand Up @@ -83,7 +87,18 @@ def _build_document_prefetch_statements(prefix="", prefetch_options=False):


def update_calc_dependents(slug, old_expr, new_expr):
jexl = QuestionJexl()
"""Update the calc_dependents lists of our calc *dependencies*.
The given (old and new) expressions are analyzed to see which
questions are referenced in "our" calc question. Then, those
questions' calc_dependents list is updated, such that it correctly
represents the new situation.
Example: If our expression newly contains the question `foo`, then the
`foo` question needs to know about it (we add "our" slug to the `foo`s
calc dependents)
"""
jexl = QuestionJexl(field=None)
old_q = set(
list(jexl.extract_referenced_questions(old_expr))
+ list(jexl.extract_referenced_mapby_questions(old_expr))
Expand All @@ -108,37 +123,50 @@ def update_calc_dependents(slug, old_expr, new_expr):
question.save()


def update_or_create_calc_answer(
question, document, struc=None, update_dependents=True
def recalculate_field(
calc_field: structure.ValueField, update_recursively: bool = True
):
root_doc = document.family
"""Recalculate the given value field and store the new answer.
if not struc:
struc = structure.FieldSet(root_doc, root_doc.form)

field = struc.get_field(question.slug)

# skip if question doesn't exist in this document structure
if field is None:
return

jexl = QuestionJexl(
{"form": field.form, "document": field.document, "structure": field.parent()}
)
If it's not a calculated field, nothing happens.
"""
if calc_field.question.type != models.Question.TYPE_CALCULATED_FLOAT:
# Not a calc field - skip
return # pragma: no cover

# Ignore errors because we evaluate greedily as soon as possible. At
# this moment we might be missing some answers or the expression might
# be invalid, in which case we return None
value = jexl.evaluate(field.question.calc_expression, raise_on_error=False)
value = calc_field.calculate()

answer, _ = models.Answer.objects.update_or_create(
question=question, document=field.document, defaults={"value": value}
question=calc_field.question,
document=calc_field.parent._document,
defaults={"value": value},
)
# also save new answer to structure for reuse
struc.set_answer(question.slug, answer)
calc_field.refresh(answer)
if update_recursively:
recalculate_dependent_fields(calc_field, update_recursively)


def recalculate_dependent_fields(
changed_field: structure.ValueField, update_recursively: bool = True
):
"""Update any calculated dependencies of the given field.
If `update_recursively=False` is passed, no subsequent calc dependencies
are updated (left to the caller in that case).
"""
for dep_slug in changed_field.question.calc_dependents:
dep_field = changed_field.get_field(dep_slug)
if not dep_field: # pragma: no cover
# Calculated field is not in our form structure, which is
# absolutely valid and OK
continue
recalculate_field(dep_field, update_recursively)


def update_or_create_calc_answer(question, document, update_dependents=True):
"""Recalculate all answers in the document after calc dependency change."""

if update_dependents:
for _question in models.Question.objects.filter(
pk__in=field.question.calc_dependents
):
update_or_create_calc_answer(_question, document, struc)
root = structure.FieldSet(document.family)
for field in root.find_all_fields_by_slug(question.slug):
recalculate_field(field, update_recursively=update_calc_dependents)
6 changes: 3 additions & 3 deletions caluma/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,9 @@ def fix_foreign_key_types(apps, connection):
target_params = field.target_field.db_parameters(connection)

# verify django-internal specified type
assert (
fk_params["type"] == target_params["type"]
), f"Foreign key field {field}: type mismatch with destination in django-internal representation"
assert fk_params["type"] == target_params["type"], (
f"Foreign key field {field}: type mismatch with destination in django-internal representation"
)

# check if the DB agrees
fk_dbtype = col_type_from_db(field, connection)
Expand Down

0 comments on commit 15ea4c2

Please sign in to comment.