From 45db9413e9d5842d7729f6f90664b3493a940c9c Mon Sep 17 00:00:00 2001 From: octodog Date: Thu, 11 Jul 2024 21:35:03 +0900 Subject: [PATCH] feat: Enable to configure sync_container_lifecycles task (#2338) (#2433) Co-authored-by: Sanghun Lee Backported-from: main (24.09) Backported-to: 24.03 Backport-of: 2338 --- changes/2338.fix.md | 1 + src/ai/backend/agent/agent.py | 8 +++++++- src/ai/backend/agent/config.py | 15 +++++++++++++++ 3 files changed, 23 insertions(+), 1 deletion(-) create mode 100644 changes/2338.fix.md diff --git a/changes/2338.fix.md b/changes/2338.fix.md new file mode 100644 index 0000000000..bda3e39fd7 --- /dev/null +++ b/changes/2338.fix.md @@ -0,0 +1 @@ +Add support for configuring `sync_container_lifecycles()` task. diff --git a/src/ai/backend/agent/agent.py b/src/ai/backend/agent/agent.py index 4a373eaea0..4863462528 100644 --- a/src/ai/backend/agent/agent.py +++ b/src/ai/backend/agent/agent.py @@ -704,7 +704,13 @@ async def _pipeline(r: Redis): self.timer_tasks.append(aiotools.create_timer(self.heartbeat, heartbeat_interval)) # Prepare auto-cleaning of idle kernels. - self.timer_tasks.append(aiotools.create_timer(self.sync_container_lifecycles, 10.0)) + sync_container_lifecycles_config = self.local_config["agent"]["sync-container-lifecycles"] + if sync_container_lifecycles_config["enabled"]: + self.timer_tasks.append( + aiotools.create_timer( + self.sync_container_lifecycles, sync_container_lifecycles_config["interval"] + ) + ) if abuse_report_path := self.local_config["agent"].get("abuse-report-path"): log.info( diff --git a/src/ai/backend/agent/config.py b/src/ai/backend/agent/config.py index 6f2526b143..f7a7cdcf75 100644 --- a/src/ai/backend/agent/config.py +++ b/src/ai/backend/agent/config.py @@ -16,6 +16,11 @@ "size-limit": "64M", } +default_sync_container_lifecycles_config = { + "enabled": True, + "interval": 10.0, +} + agent_local_config_iv = ( t.Dict({ t.Key("agent"): t.Dict({ @@ -59,6 +64,16 @@ t.Key("force-terminate-abusing-containers", default=False): t.ToBool, t.Key("kernel-creation-concurrency", default=4): t.ToInt[1:32], t.Key("use-experimental-redis-event-dispatcher", default=False): t.ToBool, + t.Key( + "sync-container-lifecycles", default=default_sync_container_lifecycles_config + ): t.Dict({ + t.Key( + "enabled", default=default_sync_container_lifecycles_config["enabled"] + ): t.ToBool, + t.Key( + "interval", default=default_sync_container_lifecycles_config["interval"] + ): t.ToFloat[0:], + }).allow_extra("*"), }).allow_extra("*"), t.Key("container"): t.Dict({ t.Key("kernel-uid", default=-1): tx.UserID,