Skip to content

Commit

Permalink
Merge branch 'deploy/hammer' into hammer/use-labels
Browse files Browse the repository at this point in the history
Signed-off-by: Aaron Chong <[email protected]>
  • Loading branch information
aaronchongth committed May 8, 2024
2 parents 6c8dcf2 + 35a9b0d commit 38e70a4
Showing 1 changed file with 15 additions and 12 deletions.
27 changes: 15 additions & 12 deletions packages/api-server/api_server/repositories/tasks.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import asyncio
import sys
from datetime import datetime
from typing import Dict, List, Optional, Sequence, Tuple, cast
Expand Down Expand Up @@ -38,7 +37,6 @@ def __init__(
):
self.user = user
self.logger = logger
self.save_task_state_mutex = asyncio.Lock()

async def save_task_request(
self, task_state: TaskState, task_request: TaskRequest
Expand Down Expand Up @@ -103,19 +101,24 @@ async def save_task_state(self, task_state: TaskState) -> None:
task_state.unix_millis_warn_time / 1000
)

# FIXME: If the task dispatcher is also provided websocket access to
# the API server, when a new task is dispatched via the API server,
# there may be a race condition where both the ROS 2 task response and
# task dispatcher websocket update may attempt to create a new task
# state model with the same task ID. This have unfortunately not been
# reproducible locally, only in the production environment, which uses
# Postgres instead of sqlite. This may be fixed upstream in DB or ORM,
# this mutex can be removed once these libraries have been updated and
# tested to be fixed.
async with self.save_task_state_mutex:
try:
await ttm.TaskState.update_or_create(
task_state_dict, id_=task_state.booking.id
)
except Exception as e: # pylint: disable=W0703
# This is to catch a combination of exceptions from Tortoise ORM,
# especially in the case where a data race occurs when two instances
# of update_or_create attempts to create entries with the same task
# ID at the same time. The exceptions expected are DoesNotExist,
# IntegrityError and TransactionManagementError.
# This data race happens when the server attempts to record the RMF
# service call response and Task dispatcher's websocket push at
# almost the same time.
# FIXME: this has not been observed outside of production
# environment, and may be fixed upstream in updated libraries.
self.logger.error(
f"Failed to save task state of id [{task_state.booking.id}] [{e}]"
)

async def query_task_states(
self, query: QuerySet[DbTaskState], pagination: Optional[Pagination] = None
Expand Down

0 comments on commit 38e70a4

Please sign in to comment.