diff --git a/deploy-service/common/src/main/java/com/pinterest/deployservice/dao/HostAgentDAO.java b/deploy-service/common/src/main/java/com/pinterest/deployservice/dao/HostAgentDAO.java index de81762490..e61447dec7 100644 --- a/deploy-service/common/src/main/java/com/pinterest/deployservice/dao/HostAgentDAO.java +++ b/deploy-service/common/src/main/java/com/pinterest/deployservice/dao/HostAgentDAO.java @@ -4,9 +4,9 @@ * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/deploy-service/common/src/main/java/com/pinterest/deployservice/dao/HostDAO.java b/deploy-service/common/src/main/java/com/pinterest/deployservice/dao/HostDAO.java index 0e656a288b..fc1714ec99 100644 --- a/deploy-service/common/src/main/java/com/pinterest/deployservice/dao/HostDAO.java +++ b/deploy-service/common/src/main/java/com/pinterest/deployservice/dao/HostDAO.java @@ -56,7 +56,7 @@ public interface HostDAO { List getTerminatingHosts() throws Exception; - List getAgentlessHostIds(int limit) throws SQLException; + List getStaleAgentlessHostIds(long noUpdateSince, int limit) throws SQLException; Collection getHostsByEnvId(String envId) throws Exception; diff --git a/deploy-service/common/src/main/java/com/pinterest/deployservice/db/DBHostDAOImpl.java b/deploy-service/common/src/main/java/com/pinterest/deployservice/db/DBHostDAOImpl.java index 048e0d580e..3b95e4f167 100644 --- a/deploy-service/common/src/main/java/com/pinterest/deployservice/db/DBHostDAOImpl.java +++ b/deploy-service/common/src/main/java/com/pinterest/deployservice/db/DBHostDAOImpl.java @@ -52,7 +52,7 @@ public class DBHostDAOImpl implements HostDAO { private static final String GET_HOST_BY_HOSTID = "SELECT * FROM hosts WHERE host_id=?"; private static final String GET_HOSTS_BY_STATES = "SELECT * FROM hosts WHERE state in (?, ?, ?) GROUP BY host_id ORDER BY last_update"; private static final String GET_GROUP_NAMES_BY_HOST = "SELECT group_name FROM hosts WHERE host_name=?"; - private static final String GET_AGENTLESS_HOST_IDS = "SELECT DISTINCT hosts.host_id FROM hosts LEFT JOIN hosts_and_agents ON hosts.host_id = hosts_and_agents.host_id WHERE hosts_and_agents.host_id IS NULL ORDER BY hosts.last_update LIMIT ?"; + private static final String GET_STALE_AGENTLESS_HOST_IDS = "SELECT DISTINCT hosts.host_id FROM hosts LEFT JOIN hosts_and_agents ON hosts.host_id = hosts_and_agents.host_id WHERE last_update < ? AND hosts_and_agents.host_id IS NULL ORDER BY hosts.last_update DESC LIMIT ?"; private static final String GET_HOST_NAMES_BY_GROUP = "SELECT host_name FROM hosts WHERE group_name=?"; private static final String GET_HOST_IDS_BY_GROUP = "SELECT DISTINCT host_id FROM hosts WHERE group_name=?"; private static final String GET_HOSTS_BY_ENVID = "SELECT h.* FROM hosts h INNER JOIN groups_and_envs ge ON ge.group_name = h.group_name WHERE ge.env_id=? UNION DISTINCT SELECT hs.* FROM hosts hs INNER JOIN hosts_and_envs he ON he.host_name = hs.host_name WHERE he.env_id=?"; @@ -195,9 +195,9 @@ public List getTerminatingHosts() throws Exception { } @Override - public List getAgentlessHostIds(int limit) throws SQLException { + public List getStaleAgentlessHostIds(long noUpdateSince, int limit) throws SQLException { ResultSetHandler> h = new BeanListHandler<>(String.class); - return new QueryRunner(dataSource).query(GET_AGENTLESS_HOST_IDS, h, limit); + return new QueryRunner(dataSource).query(GET_STALE_AGENTLESS_HOST_IDS, h, noUpdateSince, limit); } @Override diff --git a/deploy-service/common/src/main/java/com/pinterest/deployservice/handler/HostHandler.java b/deploy-service/common/src/main/java/com/pinterest/deployservice/handler/HostHandler.java index a0d6d70d6e..0d240f4cb5 100644 --- a/deploy-service/common/src/main/java/com/pinterest/deployservice/handler/HostHandler.java +++ b/deploy-service/common/src/main/java/com/pinterest/deployservice/handler/HostHandler.java @@ -1,17 +1,14 @@ package com.pinterest.deployservice.handler; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import com.pinterest.deployservice.ServiceContext; -import com.pinterest.deployservice.bean.HostBean; -import com.pinterest.deployservice.common.CommonUtils; import com.pinterest.deployservice.dao.AgentDAO; -import com.pinterest.deployservice.dao.HostDAO; import com.pinterest.deployservice.dao.HostAgentDAO; +import com.pinterest.deployservice.dao.HostDAO; import com.pinterest.deployservice.dao.HostTagDAO; -import com.pinterest.deployservice.handler.HostHandler; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; public class HostHandler { private static final Logger LOG = LoggerFactory.getLogger(HostHandler.class); @@ -27,7 +24,7 @@ public HostHandler(ServiceContext serviceContext) { hostTagDAO = serviceContext.getHostTagDAO(); } - public void removeHost(String hostId) throws Exception { + public void removeHost(String hostId) { try { hostDAO.deleteAllById(hostId); agentDAO.deleteAllById(hostId); diff --git a/deploy-service/teletraanservice/src/main/java/com/pinterest/teletraan/worker/AgentJanitor.java b/deploy-service/teletraanservice/src/main/java/com/pinterest/teletraan/worker/AgentJanitor.java index a6ec76332e..a26d178d76 100644 --- a/deploy-service/teletraanservice/src/main/java/com/pinterest/teletraan/worker/AgentJanitor.java +++ b/deploy-service/teletraanservice/src/main/java/com/pinterest/teletraan/worker/AgentJanitor.java @@ -43,7 +43,7 @@ public class AgentJanitor extends SimpleAgentJanitor { private static final Logger LOG = LoggerFactory.getLogger(AgentJanitor.class); private final RodimusManager rodimusManager; private long maxLaunchLatencyThreshold; - private long absoluteThreshold = 2 * 7 * 24 * 3600 * 1000; // 2 weeks + private long absoluteThreshold = 24 * 3600 * 1000; // 1 day private int agentlessHostBatchSize = 300; public AgentJanitor(ServiceContext serviceContext, int minStaleHostThreshold, @@ -184,9 +184,11 @@ private void processStaleHosts() { * Hosts may stuck in this state so we should clean up here. */ private void cleanUpAgentlessHosts() { + long current_time = System.currentTimeMillis(); + long noUpdateSince = current_time - absoluteThreshold; List agentlessHosts; try { - agentlessHosts = hostDAO.getAgentlessHostIds(agentlessHostBatchSize); + agentlessHosts = hostDAO.getStaleAgentlessHostIds(noUpdateSince, agentlessHostBatchSize); } catch (SQLException ex) { LOG.error("failed to get agentless hosts", ex); return; diff --git a/deploy-service/teletraanservice/src/main/java/com/pinterest/teletraan/worker/SimpleAgentJanitor.java b/deploy-service/teletraanservice/src/main/java/com/pinterest/teletraan/worker/SimpleAgentJanitor.java index ec95488832..5ece6c726d 100644 --- a/deploy-service/teletraanservice/src/main/java/com/pinterest/teletraan/worker/SimpleAgentJanitor.java +++ b/deploy-service/teletraanservice/src/main/java/com/pinterest/teletraan/worker/SimpleAgentJanitor.java @@ -61,11 +61,7 @@ public SimpleAgentJanitor(ServiceContext serviceContext, int minStaleHostThresho // remove the stale host from db void removeStaleHost(String id) { LOG.info(String.format("Delete records of stale host {}", id)); - try { - hostHandler.removeHost(id); - } catch (Exception e) { - LOG.error("Failed to delete all records for host {}. exception {}", id, e); - } + hostHandler.removeHost(id); } void markUnreachableHost(String id) {