From 8725fdefb8e769b13e3d5bdabf64b3a23f7063f1 Mon Sep 17 00:00:00 2001 From: Tyler Ouyang Date: Mon, 5 Feb 2024 16:38:50 -0800 Subject: [PATCH] Handle Agent without host (#1443) Usually the host being checked is not terminated. However there might be some synchronization latency. Mark it as not stale and we will handle it in the next run. This will reduce error budget noise. --- .../com/pinterest/teletraan/worker/AgentJanitor.java | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/deploy-service/teletraanservice/src/main/java/com/pinterest/teletraan/worker/AgentJanitor.java b/deploy-service/teletraanservice/src/main/java/com/pinterest/teletraan/worker/AgentJanitor.java index 602523dd33..3159b7a089 100644 --- a/deploy-service/teletraanservice/src/main/java/com/pinterest/teletraan/worker/AgentJanitor.java +++ b/deploy-service/teletraanservice/src/main/java/com/pinterest/teletraan/worker/AgentJanitor.java @@ -121,11 +121,17 @@ private boolean isHostStale(HostAgentBean hostAgentBean) { HostBean hostBean; try { - hostBean = hostDAO.getHostsByHostId(hostAgentBean.getHost_id()).get(0); + List hostBeans = hostDAO.getHostsByHostId(hostAgentBean.getHost_id()); + if (hostBeans.isEmpty()) { + // Usually the host being checked is not terminated. However there might be some + // synchronization latency. + // Mark it as not stale and we will handle it in the next run. + return false; + } + hostBean = hostBeans.get(0); } catch (Exception ex) { LOG.error("failed to get host bean for ({}), {}", hostAgentBean, ex); errorBudgetFailure.increment(); - return false; }