From 79be77e3ee2e57d7e006721dc4aa07a4bff196fa Mon Sep 17 00:00:00 2001 From: CollinBeczak Date: Thu, 27 Feb 2025 13:53:12 -0600 Subject: [PATCH] consolidate code --- .../repository/TaskClusterRepository.scala | 240 ++++++++---------- 1 file changed, 108 insertions(+), 132 deletions(-) diff --git a/app/org/maproulette/framework/repository/TaskClusterRepository.scala b/app/org/maproulette/framework/repository/TaskClusterRepository.scala index 3132573b..29c7f508 100644 --- a/app/org/maproulette/framework/repository/TaskClusterRepository.scala +++ b/app/org/maproulette/framework/repository/TaskClusterRepository.scala @@ -27,40 +27,90 @@ class TaskClusterRepository @Inject() (override val db: Database, challengeDAL: val pointParser = this.challengeDAL.pointParser - private val joinClause = - new StringBuilder( - """ - INNER JOIN challenges c ON c.id = tasks.parent_id - INNER JOIN projects p ON p.id = c.parent_id - LEFT OUTER JOIN task_review ON task_review.task_id = tasks.id - """ + // Constants for SQL queries + private val TASK_COLUMNS = + """ + tasks.id, + tasks.name, + tasks.parent_id, + c.name, + tasks.instruction, + tasks.status, + tasks.mapped_on, + tasks.completed_time_spent, + tasks.completed_by, + tasks.bundle_id, + tasks.is_bundle_primary, + tasks.cooperative_work_json::TEXT as cooperative_work, + task_review.review_status, + task_review.review_requested_by, + task_review.reviewed_by, + task_review.reviewed_at, + task_review.review_started_at, + task_review.meta_review_status, + task_review.meta_reviewed_by, + task_review.meta_reviewed_at, + task_review.additional_reviewers, + ST_AsGeoJSON(tasks.location) AS location, + priority, + CASE + WHEN task_review.review_started_at IS NULL THEN 0 + ELSE EXTRACT(epoch FROM (task_review.reviewed_at - task_review.review_started_at)) + END AS reviewDuration + """ + + private val JOIN_CLAUSE = """ + INNER JOIN challenges c ON c.id = tasks.parent_id + INNER JOIN projects p ON p.id = c.parent_id + LEFT OUTER JOIN task_review ON task_review.task_id = tasks.id + """ + + private val filteredTasksCTE = """ + WITH filtered_tasks AS ( + SELECT tasks.id + FROM tasks + INNER JOIN challenges c ON c.id = tasks.parent_id + INNER JOIN projects p ON p.id = c.parent_id + LEFT OUTER JOIN task_review ON task_review.task_id = tasks.id + WHERE %s ) + """ /** - * Queries task clusters with the give query filters and number of points + * Queries task clusters within the given parameters * * @param query - Query with the built in filters - * @param numberOfPoints - Number of points to return + * @param numberOfPoints - Number of points to return (capped by actual result count) * @param params - SearchParameters to save with the search + * @return List of TaskCluster objects representing grouped task points + * @note The clustering uses K-means algorithm to group nearby tasks. + * When count(*) = 1, additional task details are included in the result. + * The convex hull (bounding) is calculated for each cluster group. */ def queryTaskClusters( query: Query, numberOfPoints: Int, params: SearchParameters ): List[TaskCluster] = { + // Validate input parameters + require(numberOfPoints > 0, "Number of points must be positive") + require(query != null, "Query cannot be null") + require(params != null, "Search parameters cannot be null") + this.withMRTransaction { implicit c => val (sql, parameters) = this.getTaskClusterQuery(query, numberOfPoints) sql.insert( 0, - """SELECT kmeans, count(*) as numberOfPoints, - CASE WHEN count(*)=1 THEN (array_agg(taskId))[1] END as taskId, - CASE WHEN count(*)=1 THEN (array_agg(taskGeojson))[1] END as geojson, - CASE WHEN count(*)=1 THEN (array_agg(taskStatus))[1] END as taskStatus, - CASE WHEN count(*)=1 THEN (array_agg(taskPriority))[1] END as taskPriority, - ST_AsGeoJSON(ST_Centroid(ST_Collect(taskLocation))) AS geom, - ST_AsGeoJSON(ST_ConvexHull(ST_Collect(taskLocation))) AS bounding, - array_agg(distinct challengeIds) as challengeIds """ + SELECT kmeans, count(*) as numberOfPoints, + CASE WHEN count(*)=1 THEN (array_agg(taskId))[1] END as taskId, + CASE WHEN count(*)=1 THEN (array_agg(taskGeojson))[1] END as geojson, + CASE WHEN count(*)=1 THEN (array_agg(taskStatus))[1] END as taskStatus, + CASE WHEN count(*)=1 THEN (array_agg(taskPriority))[1] END as taskPriority, + ST_AsGeoJSON(ST_Centroid(ST_Collect(taskLocation))) AS geom, + ST_AsGeoJSON(ST_ConvexHull(ST_Collect(taskLocation))) AS bounding, + array_agg(distinct challengeIds) as challengeIds + """ ) sql.append("GROUP BY kmeans ORDER BY kmeans") @@ -83,12 +133,12 @@ class TaskClusterRepository @Inject() (override val db: Database, challengeDAL: (SELECT CASE WHEN COUNT(*) < $numberOfPoints THEN COUNT(*) ELSE $numberOfPoints END FROM tasks - $joinClause + $JOIN_CLAUSE WHERE ${query.filter.sql()} )::Integer ) OVER () AS kmeans, tasks.location as taskLocation, tasks.parent_id as challengeIds FROM tasks - $joinClause + $JOIN_CLAUSE WHERE ${query.filter.sql()} ) AS ksub WHERE location IS NOT NULL @@ -121,12 +171,16 @@ class TaskClusterRepository @Inject() (override val db: Database, challengeDAL: } /** - * Querys tasks in a bounding box + * Queries tasks within a specific bounding box with pagination support * - * @param query Query to execute - * @param paging - * @param c An available connection - * @return The list of Tasks found within the bounding box and the total count of tasks if not bounding + * @param query The base query containing filters + * @param order Sorting criteria for the results + * @param paging Pagination parameters + * @param location Optional bounding box coordinates + * @return Tuple of (total count, list of points) + * @note Uses a CTE (Common Table Expression) for better performance by filtering + * tasks first before joining with other tables. The spatial index is used + * when location filter is provided. */ def queryTasksInBoundingBox( query: Query, @@ -134,77 +188,41 @@ class TaskClusterRepository @Inject() (override val db: Database, challengeDAL: paging: Paging, location: Option[SearchLocation] ): (Int, List[ClusteredPoint]) = { + // Validate paging parameters + require(paging.limit > 0, "Page limit must be positive") + require(paging.page >= 0, "Page number must be non-negative") + + // Validate location coordinates if provided + location.foreach { loc => + require(loc.left < loc.right, "Invalid longitude range") + require(loc.bottom < loc.top, "Invalid latitude range") + } + this.withMRTransaction { implicit c => - // Extract the location filter if provided - val locationClause = location match { - case Some(loc) => + val locationClause = location + .map(loc => s"tasks.location && ST_MakeEnvelope(${loc.left}, ${loc.bottom}, ${loc.right}, ${loc.top}, 4326)" - case None => "TRUE" - } - - // Create a modified query that only selects task IDs - val filteredTasksCTE = s""" - WITH filtered_tasks AS ( - SELECT tasks.id - FROM tasks - INNER JOIN challenges c ON c.id = tasks.parent_id - INNER JOIN projects p ON p.id = c.parent_id - LEFT OUTER JOIN task_review ON task_review.task_id = tasks.id - WHERE ${query.filter.sql()} ) - """ - - // Count query using the CTE - val countQuery = s""" - ${filteredTasksCTE} - SELECT count(*) FROM filtered_tasks - """ + .getOrElse("TRUE") - val count = SQL(countQuery).as(SqlParser.int("count").single) + val cte = filteredTasksCTE.format(query.filter.sql()) - // Main query using the CTE with ordering and paging - val orderClause = order.sql() + val count = SQL(s""" + ${cte} + SELECT count(*) FROM filtered_tasks + """).as(SqlParser.int("count").single) - // Explicitly construct the full query with LIMIT and OFFSET - val mainQuery = s""" - ${filteredTasksCTE} - SELECT tasks.id, - tasks.name, - tasks.parent_id, - c.name, - tasks.instruction, - tasks.status, - tasks.mapped_on, - tasks.completed_time_spent, - tasks.completed_by, - tasks.bundle_id, - tasks.is_bundle_primary, - tasks.cooperative_work_json::TEXT as cooperative_work, - task_review.review_status, - task_review.review_requested_by, - task_review.reviewed_by, - task_review.reviewed_at, - task_review.review_started_at, - task_review.meta_review_status, - task_review.meta_reviewed_by, - task_review.meta_reviewed_at, - task_review.additional_reviewers, - ST_AsGeoJSON(tasks.location) AS location, - priority, - CASE - WHEN task_review.review_started_at IS NULL THEN 0 - ELSE EXTRACT(epoch FROM (task_review.reviewed_at - task_review.review_started_at)) - END AS reviewDuration + val results = SQL(s""" + ${cte} + SELECT ${TASK_COLUMNS} FROM filtered_tasks INNER JOIN tasks ON tasks.id = filtered_tasks.id INNER JOIN challenges c ON c.id = tasks.parent_id INNER JOIN projects p ON p.id = c.parent_id LEFT OUTER JOIN task_review ON task_review.task_id = tasks.id WHERE ${locationClause} - ${orderClause} LIMIT ${paging.limit} OFFSET ${paging.page} - """ - - val results = SQL(mainQuery).as(this.pointParser.*) + ${order.sql()} LIMIT ${paging.limit} OFFSET ${paging.page} + """).as(pointParser.*) (count, results) } @@ -225,55 +243,15 @@ class TaskClusterRepository @Inject() (override val db: Database, challengeDAL: limit: Int ): List[ClusteredPoint] = { this.withMRTransaction { implicit c => - // Extract the location filter if provided - val locationClause = location match { - case Some(loc) => + val locationClause = location + .map(loc => s"tasks.location && ST_MakeEnvelope(${loc.left}, ${loc.bottom}, ${loc.right}, ${loc.top}, 4326)" - case None => "TRUE" - } - - // Create a modified query that only selects task IDs - val filteredTasksCTE = s""" - WITH filtered_tasks AS ( - SELECT tasks.id - FROM tasks - INNER JOIN challenges c ON c.id = tasks.parent_id - INNER JOIN projects p ON p.id = c.parent_id - LEFT OUTER JOIN task_review ON task_review.task_id = tasks.id - WHERE ${query.filter.sql()} ) - """ + .getOrElse("TRUE") - // Main query using the CTE - val mainQuery = s""" - ${filteredTasksCTE} - SELECT tasks.id, - tasks.name, - tasks.parent_id, - c.name, - tasks.instruction, - tasks.status, - tasks.mapped_on, - tasks.completed_time_spent, - tasks.completed_by, - tasks.bundle_id, - tasks.is_bundle_primary, - tasks.cooperative_work_json::TEXT as cooperative_work, - task_review.review_status, - task_review.review_requested_by, - task_review.reviewed_by, - task_review.reviewed_at, - task_review.review_started_at, - task_review.meta_review_status, - task_review.meta_reviewed_by, - task_review.meta_reviewed_at, - task_review.additional_reviewers, - ST_AsGeoJSON(tasks.location) AS location, - priority, - CASE - WHEN task_review.review_started_at IS NULL THEN 0 - ELSE EXTRACT(epoch FROM (task_review.reviewed_at - task_review.review_started_at)) - END AS reviewDuration + SQL(s""" + ${filteredTasksCTE.format(query.filter.sql())} + SELECT ${TASK_COLUMNS} FROM filtered_tasks INNER JOIN tasks ON tasks.id = filtered_tasks.id INNER JOIN challenges c ON c.id = tasks.parent_id @@ -281,9 +259,7 @@ class TaskClusterRepository @Inject() (override val db: Database, challengeDAL: LEFT OUTER JOIN task_review ON task_review.task_id = tasks.id WHERE ${locationClause} LIMIT ${limit} - """ - - SQL(mainQuery).on(query.parameters(): _*).as(this.pointParser.*) + """).on(query.parameters(): _*).as(pointParser.*) } }