Skip to content

Commit

Permalink
Merge pull request #871 from mapswipe/dev
Browse files Browse the repository at this point in the history
Prod Deployment - Community Dashboard data calculation update
  • Loading branch information
thenav56 authored Jun 26, 2023
2 parents 1959557 + 9380369 commit 4675071
Show file tree
Hide file tree
Showing 16 changed files with 617 additions and 489 deletions.
2 changes: 1 addition & 1 deletion django/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ RUN apt-get update -y \
# For postgis
gdal-bin \
# Upgrade pip and install python packages for code
&& pip install --upgrade --no-cache-dir pip poetry==1.2.1 \
&& pip install --upgrade --no-cache-dir pip poetry==1.5.1 \
&& poetry --version \
# Configure to use system instead of virtualenvs
&& poetry config virtualenvs.create false \
Expand Down
162 changes: 138 additions & 24 deletions django/apps/aggregated/management/commands/update_aggregated_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,27 +19,131 @@
# |1|00:00:00.208768|00:00:01.398161|00:00:28.951521|
# |2|00:00:01.330297|00:00:06.076814|00:00:03.481192|
# |3|00:00:02.092967|00:00:11.271081|00:00:06.045881|

UPDATE_PROJECT_GROUP_DATA_USING_PROJECT_ID = f"""
WITH to_calculate_groups AS (
SELECT
project_id,
group_id
FROM groups
WHERE
(project_id, group_id) in (
SELECT
MS.project_id,
MS.group_id
FROM mapping_sessions MS
WHERE
project_id = %(project_id)s
GROUP BY MS.project_id, MS.group_id
) AND
(
total_area is NULL OR time_spent_max_allowed is NULL
)
),
groups_data AS (
SELECT
T.project_id,
T.group_id,
SUM( -- sqkm
ST_Area(T.geom::geography(GEOMETRY,4326)) / 1000000
) as total_task_group_area,
(
CASE
-- Using 95_percent value of existing data for each project_type
WHEN P.project_type = {Project.Type.BUILD_AREA.value} THEN 1.4
WHEN P.project_type = {Project.Type.COMPLETENESS.value} THEN 1.4
WHEN P.project_type = {Project.Type.CHANGE_DETECTION.value} THEN 11.2
-- FOOTPRINT: Not calculated right now
WHEN P.project_type = {Project.Type.FOOTPRINT.value} THEN 6.1
ELSE 1
END
) * COUNT(*) as time_spent_max_allowed
FROM tasks T
INNER JOIN to_calculate_groups G USING (project_id, group_id)
INNER JOIN projects P USING (project_id)
GROUP BY project_id, P.project_type, group_id
)
UPDATE groups G
SET
total_area = GD.total_task_group_area,
time_spent_max_allowed = GD.time_spent_max_allowed
FROM groups_data GD
WHERE
G.project_id = GD.project_id AND
G.group_id = GD.group_id;
"""


UPDATE_PROJECT_GROUP_DATA_USING_TIME_RANGE = f"""
WITH to_calculate_groups AS (
SELECT
project_id,
group_id
FROM groups
WHERE
(project_id, group_id) in (
SELECT
MS.project_id,
MS.group_id
FROM mapping_sessions MS
WHERE
MS.start_time >= %(from_date)s
AND MS.start_time < %(until_date)s
GROUP BY MS.project_id, MS.group_id
) AND
(
total_area is NULL OR time_spent_max_allowed is NULL
)
),
groups_data AS (
SELECT
T.project_id,
T.group_id,
SUM( -- sqkm
ST_Area(T.geom::geography(GEOMETRY,4326)) / 1000000
) as total_task_group_area,
(
CASE
-- Using 95_percent value of existing data for each project_type
WHEN P.project_type = {Project.Type.BUILD_AREA.value} THEN 1.4
WHEN P.project_type = {Project.Type.COMPLETENESS.value} THEN 1.4
WHEN P.project_type = {Project.Type.CHANGE_DETECTION.value} THEN 11.2
-- FOOTPRINT: Not calculated right now
WHEN P.project_type = {Project.Type.FOOTPRINT.value} THEN 6.1
ELSE 1
END
) * COUNT(*) as time_spent_max_allowed
FROM tasks T
INNER JOIN to_calculate_groups G USING (project_id, group_id)
INNER JOIN projects P USING (project_id)
GROUP BY project_id, P.project_type, group_id
)
UPDATE groups G
SET
total_area = GD.total_task_group_area,
time_spent_max_allowed = GD.time_spent_max_allowed
FROM groups_data GD
WHERE
G.project_id = GD.project_id AND
G.group_id = GD.group_id;
"""

TASK_GROUP_METADATA_QUERY = f"""
SELECT
project_id,
group_id,
SUM(
ST_Area(geom::geography(GEOMETRY,4326)) / 1000000
) as total_task_group_area, -- sqkm
(
SELECT
G.project_id,
G.group_id,
(
CASE
-- Using 95_percent value of existing data for each project_type
WHEN UG.project_type = {Project.Type.BUILD_AREA.value} THEN 1.4
WHEN UG.project_type = {Project.Type.COMPLETENESS.value} THEN 1.4
WHEN UG.project_type = {Project.Type.CHANGE_DETECTION.value} THEN 11.2
-- FOOTPRINT: Not calculated right now
WHEN UG.project_type = {Project.Type.FOOTPRINT.value} THEN 6.1
ELSE 1
-- Hide area for Footprint
WHEN P.project_type = {Project.Type.FOOTPRINT.value} THEN 0
ELSE G.total_area
END
) * COUNT(*) as time_spent_max_allowed
FROM tasks T
INNER JOIN used_task_groups UG USING (project_id, group_id)
GROUP BY project_id, project_type, group_id
) as total_task_group_area,
G.time_spent_max_allowed
FROM groups G
INNER JOIN used_task_groups UG USING (project_id, group_id)
INNER JOIN projects P USING (project_id)
GROUP BY G.project_id, P.project_type, G.group_id
"""


Expand All @@ -63,9 +167,7 @@
FROM mapping_sessions MS
INNER JOIN projects P USING (project_id)
WHERE
-- Skip for footprint type missions
P.project_type != {Project.Type.FOOTPRINT.value}
AND MS.start_time >= %(from_date)s
MS.start_time >= %(from_date)s
AND MS.start_time < %(until_date)s
GROUP BY project_id, project_type, group_id -- To get unique
),
Expand Down Expand Up @@ -142,9 +244,7 @@
INNER JOIN mapping_sessions MS USING (mapping_session_id)
INNER JOIN projects P USING (project_id)
WHERE
-- Skip for footprint type missions
P.project_type != {Project.Type.FOOTPRINT.value}
AND MS.start_time >= %(from_date)s
MS.start_time >= %(from_date)s
AND MS.start_time < %(until_date)s
GROUP BY project_id, project_type, group_id -- To get unique
),
Expand Down Expand Up @@ -239,6 +339,20 @@ def _track(self, tracker_type, label, sql):
until_date=until_date.strftime("%Y-%m-%d"),
)
start_time = time.time()

self.stdout.write(
f"Updating Project Group Data for {label.title()} for date: {params}"
)
with transaction.atomic():
with connection.cursor() as cursor:
cursor.execute(UPDATE_PROJECT_GROUP_DATA_USING_TIME_RANGE, params)
self.stdout.write(
self.style.SUCCESS(
f"Successfull. Runtime: {time.time() - start_time} seconds"
)
)

start_time = time.time()
self.stdout.write(f"Updating {label.title()} Data for date: {params}")
with transaction.atomic():
with connection.cursor() as cursor:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import time

from apps.existing_database.models import Project
from django.core.management.base import BaseCommand
from django.db import connection, transaction

from .update_aggregated_data import UPDATE_PROJECT_GROUP_DATA_USING_PROJECT_ID


class Command(BaseCommand):
def handle(self, **_):
project_qs = Project.objects.all()
total_projects = project_qs.count()
self.stdout.write(f"Total projects: {total_projects}")
for index, project_id in enumerate(
project_qs.values_list("project_id", flat=True),
start=1,
):
self.stdout.write(
"Running calculation for project ID "
f"({index}/{total_projects}): {project_id}"
)
with transaction.atomic():
start_time = time.time()
with connection.cursor() as cursor:
cursor.execute(
UPDATE_PROJECT_GROUP_DATA_USING_PROJECT_ID,
dict(project_id=project_id),
)
self.stdout.write(
self.style.SUCCESS(
f"- Successfull. Runtime: {time.time() - start_time} seconds"
)
)
5 changes: 5 additions & 0 deletions django/apps/existing_database/migrations/0001_initial.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@ class Migration(migrations.Migration):
("required_count", models.IntegerField(blank=True, null=True)),
("progress", models.IntegerField(blank=True, null=True)),
("project_type_specifics", models.TextField(blank=True, null=True)),
("total_area", models.FloatField(blank=True, null=True, default=None)),
(
"time_spent_max_allowed",
models.FloatField(blank=True, null=True, default=None),
),
],
options={
"db_table": "groups",
Expand Down
5 changes: 4 additions & 1 deletion django/apps/existing_database/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,10 @@ class Group(Model):
required_count = models.IntegerField(blank=True, null=True)
progress = models.IntegerField(blank=True, null=True)
# Database uses JSON instead of JSONB (not supported by django)
project_type_specifics = models.TextField(blank=True, null=True)
project_type_specifics = models.TextField(blank=True, null=True, default=None)
# Used by aggreagated module
total_area = models.FloatField(blank=True, null=True, default=None)
time_spent_max_allowed = models.FloatField(blank=True, null=True, default=None)

# Django derived fields from ForeignKey
project_id: str
Expand Down
Loading

0 comments on commit 4675071

Please sign in to comment.