Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize Large Queries into Materialized Views #404

Draft
wants to merge 6 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 4 additions & 6 deletions commcare_connect/opportunity/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,13 @@
from tablib import Dataset

from commcare_connect.opportunity.forms import DateRanges
from commcare_connect.opportunity.helpers import (
get_annotated_opportunity_access,
get_annotated_opportunity_access_deliver_status,
)
from commcare_connect.opportunity.models import (
CatchmentArea,
CompletedWork,
Opportunity,
OpportunityAccess,
OpportunityDeliverySummary,
UserInviteSummary,
UserVisit,
VisitValidationStatus,
)
Expand Down Expand Up @@ -108,13 +106,13 @@ def export_empty_payment_table(opportunity: Opportunity) -> Dataset:


def export_user_status_table(opportunity: Opportunity) -> Dataset:
access_objects = get_annotated_opportunity_access(opportunity)
access_objects = UserInviteSummary.objects.filter(opportunity=opportunity)
table = UserStatusTable(access_objects, exclude=("date_popup", "view_profile"))
return get_dataset(table, export_title="User status export")


def export_deliver_status_table(opportunity: Opportunity) -> Dataset:
access_objects = get_annotated_opportunity_access_deliver_status(opportunity)
access_objects = OpportunityDeliverySummary.objects.filter(opportunity=opportunity)
table = DeliverStatusTable(access_objects, exclude=("details", "date_popup"))
return get_dataset(table, export_title="Deliver Status export")

Expand Down
129 changes: 1 addition & 128 deletions commcare_connect/opportunity/helpers.py
Original file line number Diff line number Diff line change
@@ -1,133 +1,6 @@
from collections import namedtuple

from django.db.models import Case, Count, F, Max, Min, Q, Sum, Value, When

from commcare_connect.opportunity.models import (
CompletedWork,
CompletedWorkStatus,
Opportunity,
OpportunityAccess,
PaymentUnit,
UserInvite,
VisitValidationStatus,
)


def get_annotated_opportunity_access(opportunity: Opportunity):
learn_modules_count = opportunity.learn_app.learn_modules.count()
access_objects = (
UserInvite.objects.filter(opportunity=opportunity)
.select_related("opportunity_access", "opportunity_access__opportunityclaim", "opportunity_access__user")
.annotate(
last_visit_date_d=Max(
"opportunity_access__user__uservisit__visit_date",
filter=Q(opportunity_access__user__uservisit__opportunity=opportunity)
& ~Q(opportunity_access__user__uservisit__status=VisitValidationStatus.trial),
),
date_deliver_started=Min(
"opportunity_access__user__uservisit__visit_date",
filter=Q(opportunity_access__user__uservisit__opportunity=opportunity),
),
passed_assessment=Sum(
Case(
When(
Q(
opportunity_access__user__assessments__opportunity=opportunity,
opportunity_access__user__assessments__passed=True,
),
then=1,
),
default=0,
)
),
completed_modules_count=Count(
"opportunity_access__user__completed_modules",
filter=Q(opportunity_access__user__completed_modules__opportunity=opportunity),
distinct=True,
),
job_claimed=Case(
When(
Q(opportunity_access__opportunityclaim__isnull=False),
then="opportunity_access__opportunityclaim__date_claimed",
)
),
)
.annotate(
date_learn_completed=Case(
When(
Q(completed_modules_count=learn_modules_count),
then=Max(
"opportunity_access__user__completed_modules__date",
filter=Q(opportunity_access__user__completed_modules__opportunity=opportunity),
),
)
)
)
.order_by("opportunity_access__user__name")
)

return access_objects


def get_annotated_opportunity_access_deliver_status(opportunity: Opportunity):
access_objects = []
for payment_unit in opportunity.paymentunit_set.all():
access_objects += (
OpportunityAccess.objects.filter(opportunity=opportunity)
.select_related("user")
.annotate(
payment_unit=Value(payment_unit.name),
pending=Count(
"completedwork",
filter=Q(
completedwork__opportunity_access_id=F("pk"),
completedwork__payment_unit=payment_unit,
completedwork__status=CompletedWorkStatus.pending,
),
distinct=True,
),
approved=Count(
"completedwork",
filter=Q(
completedwork__opportunity_access_id=F("pk"),
completedwork__payment_unit=payment_unit,
completedwork__status=CompletedWorkStatus.approved,
),
distinct=True,
),
rejected=Count(
"completedwork",
filter=Q(
completedwork__opportunity_access_id=F("pk"),
completedwork__payment_unit=payment_unit,
completedwork__status=CompletedWorkStatus.rejected,
),
distinct=True,
),
over_limit=Count(
"completedwork",
filter=Q(
completedwork__opportunity_access_id=F("pk"),
completedwork__payment_unit=payment_unit,
completedwork__status=CompletedWorkStatus.over_limit,
),
distinct=True,
),
incomplete=Count(
"completedwork",
filter=Q(
completedwork__opportunity_access_id=F("pk"),
completedwork__payment_unit=payment_unit,
completedwork__status=CompletedWorkStatus.incomplete,
),
distinct=True,
),
completed=F("approved") + F("rejected") + F("pending") + F("over_limit"),
)
.order_by("user__name")
)
access_objects.sort(key=lambda a: a.user.name)
return access_objects
from commcare_connect.opportunity.models import CompletedWork, CompletedWorkStatus, Opportunity, PaymentUnit


def get_payment_report_data(opportunity: Opportunity):
Expand Down
215 changes: 215 additions & 0 deletions commcare_connect/opportunity/migrations/0059_userinvitesummary.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,215 @@
# Generated by Django 4.2.5 on 2024-09-30 14:21
import django
from django.db import migrations, models
from django_celery_beat.models import CrontabSchedule, PeriodicTask


def create_refresh_materialized_view_task(apps, schema_editor):
schedule, _ = CrontabSchedule.objects.get_or_create(
minute="0",
hour="0", # At midnight
day_of_week="*",
day_of_month="*",
month_of_year="*",
)
PeriodicTask.objects.update_or_create(
crontab=schedule,
name="refresh_materialized_view",
task="commcare_connect.opportunity.tasks.refresh_materialized_view",
)


def delete_refresh_materialized_view_task(apps, schema_editor):
PeriodicTask.objects.filter(
name="refresh_materialized_view",
task="commcare_connect.opportunity.tasks.refresh_materialized_view",
).delete()


class Migration(migrations.Migration):
dependencies = [
("opportunity", "0058_paymentinvoice_payment_invoice"),
]

"""This migration exists because we created a materialized view for the `UserInviteSummary` model.
Django automatically generates migrations for all models present in the application.
Since `managed = False` and `db_table is provided Django will not create or modify this table in the database.
However, we still need this migration in the migration script to prevent Django from generating it again when `makemigrations` is run in the future.
This ensures that the model is recognized without altering the actual database structure."""

operations = [
migrations.CreateModel(
name="UserInviteSummary",
fields=[
("id", models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name="ID")),
(
"status",
models.CharField(
choices=[
("sms_delivered", "SMS Delivered"),
("sms_not_delivered", "SMS Not Delivered"),
("accepted", "Accepted"),
("invited", "Invited"),
("not_found", "ConnectID Not Found"),
],
default="invited",
max_length=50,
),
),
("last_visit_date", models.DateTimeField(blank=True, null=True)),
("date_deliver_started", models.DateTimeField(blank=True, null=True)),
("passed_assessment", models.IntegerField(default=0)),
("completed_modules_count", models.IntegerField(default=0)),
("job_claimed", models.DateTimeField(null=True)),
("date_learn_completed", models.DateTimeField(null=True)),
(
"opportunity",
models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to="opportunity.opportunity"),
),
(
"opportunity_access",
models.ForeignKey(
on_delete=django.db.models.deletion.DO_NOTHING,
to="opportunity.opportunityaccess",
),
),
],
options={
"db_table": "opportunity_userinvite_summary",
"managed": False,
},
),
migrations.RunSQL(
sql="""
CREATE MATERIALIZED VIEW opportunity_userinvite_summary AS
WITH total_learning_modules AS (
SELECT
app_id,
COUNT(*) AS total_modules_count
FROM
opportunity_learnmodule
GROUP BY
app_id
)
SELECT
userinvite.id AS id,
userinvite.status AS status,
userinvite.opportunity_access_id AS opportunity_access_id,
userinvite.opportunity_id AS opportunity_id,
MAX(uservisit.visit_date) FILTER (WHERE uservisit.opportunity_id = opp.id AND uservisit.status != 'trial') AS last_visit_date,
MIN(uservisit.visit_date) FILTER (WHERE uservisit.opportunity_id = opp.id) AS date_deliver_started,
SUM(CASE WHEN assessment.opportunity_id = opp.id AND assessment.passed = TRUE THEN 1 ELSE 0 END) AS passed_assessment,
COUNT(DISTINCT completedmodule.id) FILTER (WHERE completedmodule.opportunity_id = opp.id) AS completed_modules_count,
CASE WHEN claim.id IS NOT NULL THEN claim.date_claimed END AS job_claimed,
CASE
WHEN COUNT(completedmodule.id) = learning_module.total_modules_count THEN
MAX(completedmodule.date) FILTER (WHERE completedmodule.opportunity_id = opp.id)
END AS date_learn_completed
FROM
opportunity_userinvite AS userinvite
JOIN
opportunity_opportunityaccess AS access ON userinvite.opportunity_access_id = access.id
JOIN
users_user AS _user ON access.user_id = _user.id
LEFT JOIN
opportunity_uservisit AS uservisit ON _user.id = uservisit.user_id
LEFT JOIN
opportunity_assessment AS assessment ON _user.id = assessment.user_id
LEFT JOIN
opportunity_completedmodule AS completedmodule ON completedmodule.user_id = _user.id
LEFT JOIN
opportunity_opportunityclaim AS claim ON access.id = claim.opportunity_access_id
JOIN
opportunity_opportunity AS opp ON userinvite.opportunity_id = opp.id
JOIN
total_learning_modules AS learning_module ON learning_module.app_id = opp.learn_app_id
GROUP BY
userinvite.id, _user.id, claim.id, learning_module.total_modules_count, userinvite.opportunity_access_id;
""",
reverse_sql="DROP MATERIALIZED VIEW IF EXISTS opportunity_userinvite_summary;",
),
migrations.CreateModel(
name="OpportunityDeliverySummary",
fields=[
("id", models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name="ID")),
("approved", models.IntegerField(default=0)),
("pending", models.IntegerField(default=0)),
("rejected", models.IntegerField(default=0)),
("over_limit", models.IntegerField(default=0)),
("incomplete", models.IntegerField(default=0)),
("completed", models.IntegerField(default=0)),
("payment_unit", models.CharField(max_length=255)),
(
"opportunity",
models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to="opportunity.opportunity"),
),
(
"user",
models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to="users.user"),
),
(
"opportunity_access",
models.ForeignKey(
on_delete=django.db.models.deletion.DO_NOTHING,
to="opportunity.opportunityaccess",
),
),
],
options={
"db_table": "opportunity_delivery_summary",
"managed": False,
},
),
migrations.RunSQL(
sql="""
CREATE MATERIALIZED VIEW opportunity_delivery_summary AS
SELECT
access.id AS id,
access.id AS opportunity_access_id,
access.opportunity_id AS opportunity_id,
access.user_id AS user_id,
payment_unit.name AS payment_unit,

COUNT(DISTINCT CASE WHEN completed_work.status = 'pending'
AND completed_work.opportunity_access_id = access.id
AND completed_work.payment_unit_id = payment_unit.id THEN completed_work.id END) AS pending,

COUNT(DISTINCT CASE WHEN completed_work.status = 'approved'
AND completed_work.opportunity_access_id = access.id
AND completed_work.payment_unit_id = payment_unit.id THEN completed_work.id END) AS approved,

COUNT(DISTINCT CASE WHEN completed_work.status = 'rejected'
AND completed_work.opportunity_access_id = access.id
AND completed_work.payment_unit_id = payment_unit.id THEN completed_work.id END) AS rejected,

COUNT(DISTINCT CASE WHEN completed_work.status = 'over_limit'
AND completed_work.opportunity_access_id = access.id
AND completed_work.payment_unit_id = payment_unit.id THEN completed_work.id END) AS over_limit,

COUNT(DISTINCT CASE WHEN completed_work.status = 'incomplete'
AND completed_work.opportunity_access_id = access.id
AND completed_work.payment_unit_id = payment_unit.id THEN completed_work.id END) AS incomplete,

COALESCE(
COUNT(DISTINCT CASE WHEN completed_work.status IN ('approved', 'rejected', 'pending', 'over_limit')
AND completed_work.opportunity_access_id = access.id
AND completed_work.payment_unit_id = payment_unit.id THEN completed_work.id END),
0
) AS completed
FROM
opportunity_opportunityaccess AS access
LEFT JOIN
opportunity_completedwork AS completed_work ON access.id = completed_work.opportunity_access_id
LEFT JOIN
opportunity_paymentunit AS payment_unit ON completed_work.payment_unit_id = payment_unit.id
INNER JOIN
users_user AS _user ON access.user_id = _user.id
GROUP BY
access.id,
_user.id,
payment_unit.name;
""",
reverse_sql="DROP MATERIALIZED VIEW IF EXISTS opportunity_delivery_summary;",
),
migrations.RunPython(create_refresh_materialized_view_task, delete_refresh_materialized_view_task),
]
Loading
Loading