From f2c6c3ce759e759f8d002eb2eb39326cf6dbb63a Mon Sep 17 00:00:00 2001 From: Eugene Batalov Date: Tue, 17 Dec 2024 18:39:08 +0000 Subject: [PATCH] Store task stdout and stderr in blobs with task_id in names We were naming task stdout and stderr blobs without task_ids but with invoke ids. If a function is running multiple times per invoke this results in overwriting stdout and stderr blobs by each task. Also depending on timings Server DB could store wrong blob sizes (cause blobs could get overwritten after DB record was saved). This all gets fixed just by adding task_id into stdout, stderr blob names. Max S3 object name is 1024 bytes long. Storing two uuids (invoke id and task id) uses < 100 bytes of this space. So should be fine. No other places need to get updated because the DB is keyd using task_ids, and all Server APIs also include task ids in urls. Testing: make build cargo test --- server/src/routes/internal_ingest.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/server/src/routes/internal_ingest.rs b/server/src/routes/internal_ingest.rs index 32834398c..6f3666f28 100644 --- a/server/src/routes/internal_ingest.rs +++ b/server/src/routes/internal_ingest.rs @@ -140,11 +140,12 @@ pub async fn ingest_files_from_executor( IndexifyAPIError::bad_request("task_result is required before diagnostics") })?; let file_name = format!( - "{}.{}.{}.{}.{}", + "{}.{}.{}.{}.{}.{}", task_result.namespace, task_result.compute_graph, task_result.compute_fn, task_result.invocation_id, + task_result.task_id, name, ); let res = write_to_disk(state.clone().blob_storage, &mut field, &file_name).await?;