From 08e7f6fecb54446626fee1681cc7551d52b84c3a Mon Sep 17 00:00:00 2001 From: Edmund Higham Date: Thu, 1 Aug 2024 16:23:37 -0400 Subject: [PATCH 1/4] [batch] make batches query go brrrrrrr (take 2) --- batch/batch/front_end/query/query_v2.py | 60 +++++++++++++------------ 1 file changed, 31 insertions(+), 29 deletions(-) diff --git a/batch/batch/front_end/query/query_v2.py b/batch/batch/front_end/query/query_v2.py index 6e82a00c50f..8034402fff3 100644 --- a/batch/batch/front_end/query/query_v2.py +++ b/batch/batch/front_end/query/query_v2.py @@ -125,40 +125,42 @@ def parse_list_batches_query_v2(user: str, q: str, last_batch_id: Optional[int]) where_conditions.append(f'({cond})') where_args += args - sql = f""" -SELECT batches.*, - cancelled_t.cancelled IS NOT NULL AS cancelled, - job_groups_n_jobs_in_complete_states.n_completed, - job_groups_n_jobs_in_complete_states.n_succeeded, - job_groups_n_jobs_in_complete_states.n_failed, - job_groups_n_jobs_in_complete_states.n_cancelled, - cost_t.cost, cost_t.cost_breakdown -FROM job_groups -LEFT JOIN batches ON batches.id = job_groups.batch_id -LEFT JOIN billing_projects ON batches.billing_project = billing_projects.name -LEFT JOIN job_groups_n_jobs_in_complete_states ON job_groups.batch_id = job_groups_n_jobs_in_complete_states.id AND job_groups.job_group_id = job_groups_n_jobs_in_complete_states.job_group_id -LEFT JOIN LATERAL ( - SELECT 1 AS cancelled - FROM job_group_self_and_ancestors - INNER JOIN job_groups_cancelled - ON job_group_self_and_ancestors.batch_id = job_groups_cancelled.id AND - job_group_self_and_ancestors.ancestor_id = job_groups_cancelled.job_group_id - WHERE job_groups.batch_id = job_group_self_and_ancestors.batch_id AND - job_groups.job_group_id = job_group_self_and_ancestors.job_group_id -) AS cancelled_t ON TRUE -STRAIGHT_JOIN billing_project_users ON batches.billing_project = billing_project_users.billing_project -LEFT JOIN LATERAL ( - SELECT COALESCE(SUM(`usage` * rate), 0) AS cost, JSON_OBJECTAGG(resources.resource, COALESCE(`usage` * rate, 0)) AS cost_breakdown - FROM ( + sql = f"""\ +SELECT batches.* + , cancelled_t.cancelled IS NOT NULL AS cancelled + , job_groups_n_jobs_in_complete_states.n_completed + , job_groups_n_jobs_in_complete_states.n_succeeded + , job_groups_n_jobs_in_complete_states.n_failed + , job_groups_n_jobs_in_complete_states.n_cancelled + , cost_t.cost + , cost_t.cost_breakdown +FROM batches IGNORE INDEX (batches_deleted) +STRAIGHT_JOIN billing_projects + ON batches.billing_project = billing_projects.name +STRAIGHT_JOIN billing_project_users + ON batches.billing_project = billing_project_users.billing_project +STRAIGHT_JOIN job_groups + ON job_groups.batch_id = batches.id +STRAIGHT_JOIN job_groups_n_jobs_in_complete_states + ON job_groups.batch_id = job_groups_n_jobs_in_complete_states.id + AND job_groups.job_group_id = job_groups_n_jobs_in_complete_states.job_group_id +LEFT JOIN (SELECT *, 1 AS cancelled FROM job_groups_cancelled) AS cancelled_t + ON job_groups.batch_id = cancelled_t.id + AND job_groups.job_group_id = cancelled_t.job_group_id +STRAIGHT_JOIN LATERAL ( + WITH resource_costs AS ( SELECT resource_id, CAST(COALESCE(SUM(`usage`), 0) AS SIGNED) AS `usage` FROM aggregated_job_group_resources_v3 - WHERE job_groups.batch_id = aggregated_job_group_resources_v3.batch_id AND job_groups.job_group_id = aggregated_job_group_resources_v3.job_group_id + WHERE batch_id = batches.id GROUP BY resource_id - ) AS usage_t - LEFT JOIN resources ON usage_t.resource_id = resources.resource_id + ) + SELECT COALESCE(SUM(`usage` * rate), 0) AS cost + , JSON_OBJECTAGG(resource, COALESCE(`usage` * rate, 0)) AS cost_breakdown + FROM resource_costs + INNER JOIN resources USING (resource_id) ) AS cost_t ON TRUE WHERE {' AND '.join(where_conditions)} -ORDER BY job_groups.batch_id DESC +ORDER BY batches.id DESC LIMIT 51; """ From be26ba3f1c42e6a31f312b68328137f2a9299a09 Mon Sep 17 00:00:00 2001 From: Edmund Higham Date: Tue, 6 Aug 2024 15:08:07 -0400 Subject: [PATCH 2/4] dont aggregate across job groups --- batch/batch/front_end/query/query_v2.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/batch/batch/front_end/query/query_v2.py b/batch/batch/front_end/query/query_v2.py index 8034402fff3..82d60af24d0 100644 --- a/batch/batch/front_end/query/query_v2.py +++ b/batch/batch/front_end/query/query_v2.py @@ -151,7 +151,8 @@ def parse_list_batches_query_v2(user: str, q: str, last_batch_id: Optional[int]) WITH resource_costs AS ( SELECT resource_id, CAST(COALESCE(SUM(`usage`), 0) AS SIGNED) AS `usage` FROM aggregated_job_group_resources_v3 - WHERE batch_id = batches.id + WHERE batch_id = batches.id + AND job_group_id = job_groups.job_group_id GROUP BY resource_id ) SELECT COALESCE(SUM(`usage` * rate), 0) AS cost From 86d213662d34b13db3fe16324e3b1f089691f54c Mon Sep 17 00:00:00 2001 From: Edmund Higham Date: Wed, 7 Aug 2024 14:16:55 -0400 Subject: [PATCH 3/4] `STRAIGHT_JOIN` only required on job group tables --- batch/batch/front_end/query/query_v2.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/batch/batch/front_end/query/query_v2.py b/batch/batch/front_end/query/query_v2.py index 82d60af24d0..8975514ef5c 100644 --- a/batch/batch/front_end/query/query_v2.py +++ b/batch/batch/front_end/query/query_v2.py @@ -135,9 +135,9 @@ def parse_list_batches_query_v2(user: str, q: str, last_batch_id: Optional[int]) , cost_t.cost , cost_t.cost_breakdown FROM batches IGNORE INDEX (batches_deleted) -STRAIGHT_JOIN billing_projects +INNER JOIN billing_projects ON batches.billing_project = billing_projects.name -STRAIGHT_JOIN billing_project_users +INNER JOIN billing_project_users ON batches.billing_project = billing_project_users.billing_project STRAIGHT_JOIN job_groups ON job_groups.batch_id = batches.id From 6f259313440a50b9bbfa0093e3d1f063ccf2ef90 Mon Sep 17 00:00:00 2001 From: Edmund Higham Date: Wed, 7 Aug 2024 14:20:24 -0400 Subject: [PATCH 4/4] `STRAIGHT_JOIN` not required on derived tables --- batch/batch/front_end/query/query_v2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/batch/batch/front_end/query/query_v2.py b/batch/batch/front_end/query/query_v2.py index 8975514ef5c..2eba30a95fa 100644 --- a/batch/batch/front_end/query/query_v2.py +++ b/batch/batch/front_end/query/query_v2.py @@ -147,7 +147,7 @@ def parse_list_batches_query_v2(user: str, q: str, last_batch_id: Optional[int]) LEFT JOIN (SELECT *, 1 AS cancelled FROM job_groups_cancelled) AS cancelled_t ON job_groups.batch_id = cancelled_t.id AND job_groups.job_group_id = cancelled_t.job_group_id -STRAIGHT_JOIN LATERAL ( +INNER JOIN LATERAL ( WITH resource_costs AS ( SELECT resource_id, CAST(COALESCE(SUM(`usage`), 0) AS SIGNED) AS `usage` FROM aggregated_job_group_resources_v3