diff --git a/deploy-service/common/src/main/java/com/pinterest/deployservice/db/DBHostDAOImpl.java b/deploy-service/common/src/main/java/com/pinterest/deployservice/db/DBHostDAOImpl.java index b79c9809aa..1cc4072578 100644 --- a/deploy-service/common/src/main/java/com/pinterest/deployservice/db/DBHostDAOImpl.java +++ b/deploy-service/common/src/main/java/com/pinterest/deployservice/db/DBHostDAOImpl.java @@ -51,7 +51,7 @@ public class DBHostDAOImpl implements HostDAO { private static final String GET_GROUP_SIZE = "SELECT COUNT(host_id) FROM hosts WHERE group_name=?"; private static final String GET_ALL_HOSTS_BY_GROUP = "SELECT * FROM hosts WHERE group_name=? AND state!='TERMINATING'"; private static final String GET_HOST_BY_NAME = "SELECT * FROM hosts WHERE host_name=?"; - private static final String GET_HOST_BY_HOSTID = "SELECT * FROM hosts WHERE host_id=?"; + private static final String GET_HOST_BY_HOSTID = "SELECT * FROM hosts WHERE host_id=? ORDER BY create_date"; private static final String GET_HOSTS_BY_STATES = "SELECT * FROM hosts WHERE state in (?, ?, ?) GROUP BY host_id ORDER BY last_update"; private static final String GET_GROUP_NAMES_BY_HOST = "SELECT group_name FROM hosts WHERE host_name=?"; private static final String GET_STALE_AGENTLESS_HOST_IDS = "SELECT DISTINCT hosts.host_id FROM hosts LEFT JOIN hosts_and_agents ON hosts.host_id = hosts_and_agents.host_id WHERE hosts.last_update < ? AND hosts_and_agents.host_id IS NULL ORDER BY hosts.last_update DESC LIMIT ?"; diff --git a/deploy-service/common/src/main/java/com/pinterest/deployservice/handler/GoalAnalyst.java b/deploy-service/common/src/main/java/com/pinterest/deployservice/handler/GoalAnalyst.java index 08d106ac7f..65716e2797 100644 --- a/deploy-service/common/src/main/java/com/pinterest/deployservice/handler/GoalAnalyst.java +++ b/deploy-service/common/src/main/java/com/pinterest/deployservice/handler/GoalAnalyst.java @@ -15,6 +15,9 @@ */ package com.pinterest.deployservice.handler; +import static com.pinterest.teletraan.universal.metrics.micrometer.PinStatsNamingConvention.CUSTOM_NAME_PREFIX; + +import java.time.Duration; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; @@ -43,6 +46,9 @@ import com.pinterest.deployservice.dao.DeployDAO; import com.pinterest.deployservice.dao.EnvironDAO; import com.pinterest.deployservice.dao.HostTagDAO; + +import io.micrometer.core.instrument.Metrics; + import com.pinterest.deployservice.dao.DeployConstraintDAO; import com.fasterxml.jackson.databind.ObjectMapper; @@ -53,6 +59,8 @@ public class GoalAnalyst { // Notice hotfix and rollback priority should still lower than system service priority private static final int HOT_FIX_PRIORITY = DeployPriority.HIGHER.getValue() - 20; private static final int ROLL_BACK_PRIORITY = DeployPriority.HIGHER.getValue() - 10; + private static final String DEPLOY_LATENCY_TIMER_NAME = CUSTOM_NAME_PREFIX + "teletraan.%s.%s.deploy_latency"; + private static final String FIRST_DEPLOY_COUNTER_NAME = CUSTOM_NAME_PREFIX + "teletraan.%s.%s.first_deploy"; private String host; private String host_id; @@ -346,18 +354,19 @@ boolean shouldUpdateAgentRecord(AgentBean origBean, AgentBean updateBean) { // We populate all the fields, since this could be used for insertOrUpdate as well AgentBean genUpdateBeanByReport(PingReportBean report, AgentBean agent) { // We generate complete bean in case we need to insertOrUpdate it into agents table + long currentTime = System.currentTimeMillis(); AgentBean updateBean = new AgentBean(); updateBean.setHost_name(host); updateBean.setHost_id(host_id); updateBean.setDeploy_id(report.getDeployId()); updateBean.setEnv_id(report.getEnvId()); - updateBean.setLast_update(System.currentTimeMillis()); + updateBean.setLast_update(currentTime); updateBean.setLast_operator(Constants.SYSTEM_OPERATOR); updateBean.setFail_count(report.getFailCount()); updateBean.setStatus(report.getAgentStatus()); updateBean.setLast_err_no(report.getErrorCode()); updateBean.setState(proposeNewAgentState(report, agent)); - updateBean.setStage_start_date(System.currentTimeMillis()); + updateBean.setStage_start_date(currentTime); updateBean.setDeploy_stage(report.getDeployStage()); if (report.getContainerHealthStatus() == null) { updateBean.setContainer_health_status(""); @@ -368,22 +377,36 @@ AgentBean genUpdateBeanByReport(PingReportBean report, AgentBean agent) { if (agent == null) { // if agent is missing in agent table, treat it as not first_deploy. updateBean.setFirst_deploy(false); - updateBean.setStart_date(System.currentTimeMillis()); + updateBean.setStart_date(currentTime); } else { updateBean.setFirst_deploy(agent.getFirst_deploy()); updateBean.setStart_date(agent.getStart_date()); } - if (report.getDeployStage() == DeployStage.SERVING_BUILD) { + if (report.getDeployStage() == DeployStage.SERVING_BUILD && updateBean.getFirst_deploy()) { // turn off first deploy flag updateBean.setFirst_deploy(false); - updateBean.setFirst_deploy_time(System.currentTimeMillis()); + updateBean.setFirst_deploy_time(currentTime); + emitMetrics(updateBean); } // TODO record error message as well if errorCode != 0 return updateBean; } + private void emitMetrics(AgentBean updateBean) { + try { + EnvironBean env = envs.get(updateBean.getEnv_id()); + Metrics.timer(String.format(DEPLOY_LATENCY_TIMER_NAME, env.getEnv_name(), env.getStage_name())) + .record(Duration.ofMillis(updateBean.getFirst_deploy_time() - updateBean.getStart_date())); + Metrics.counter(String.format(FIRST_DEPLOY_COUNTER_NAME, env.getEnv_name(), env.getStage_name()), "success", + String.valueOf(updateBean.getStatus().equals(AgentStatus.SUCCEEDED))) + .increment(); + } catch (Exception ex) { + LOG.warn("Failed to emit metrics of {}", updateBean.toString(), ex); + } + } + // Generate new agent bean based on the report & current agent record, // This is intended to be used for deploy goal to install next stage AgentBean genNextStageUpdateBean(EnvironBean env, PingReportBean report, AgentBean agent) { diff --git a/deploy-service/common/src/main/java/com/pinterest/deployservice/handler/PingHandler.java b/deploy-service/common/src/main/java/com/pinterest/deployservice/handler/PingHandler.java index 5d5e954a83..428318ea4f 100644 --- a/deploy-service/common/src/main/java/com/pinterest/deployservice/handler/PingHandler.java +++ b/deploy-service/common/src/main/java/com/pinterest/deployservice/handler/PingHandler.java @@ -15,7 +15,10 @@ */ package com.pinterest.deployservice.handler; +import static com.pinterest.teletraan.universal.metrics.micrometer.PinStatsNamingConvention.CUSTOM_NAME_PREFIX; + import java.sql.Connection; +import java.time.Duration; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -52,6 +55,7 @@ import com.pinterest.deployservice.bean.EnvType; import com.pinterest.deployservice.bean.EnvironBean; import com.pinterest.deployservice.bean.HostAgentBean; +import com.pinterest.deployservice.bean.HostBean; import com.pinterest.deployservice.bean.HostState; import com.pinterest.deployservice.bean.HostTagBean; import com.pinterest.deployservice.bean.OpCode; @@ -80,6 +84,8 @@ import com.pinterest.deployservice.dao.UtilDAO; import com.pinterest.deployservice.pingrequests.PingRequestValidator; +import io.micrometer.core.instrument.Metrics; + /** * This is where we handle agent ping and return deploy goal! */ @@ -88,7 +94,7 @@ public class PingHandler { private static final PingResponseBean NOOP; private static final Set EMPTY_GROUPS; private static final String PINTEREST_MAIN_AWS_ACCOUNT = "998131032990"; - //private static final long AGENT_COUNT_CACHE_TTL = 5 * 1000; + private static final String PROVISION_LATENCY_TIMER_NAME = CUSTOM_NAME_PREFIX + "teletraan.%s.provision_latency"; static { NOOP = new PingResponseBean(); @@ -198,27 +204,45 @@ void updateHosts(String hostName, String hostIp, String hostId, Set grou void updateHostStatus(String hostId, String hostName, String hostIp, String agentVersion, String asg) throws Exception { HostAgentBean hostAgentBean = hostAgentDAO.getHostById(hostId); - long current_time = System.currentTimeMillis(); + long currentTime = System.currentTimeMillis(); boolean isExisting = true; if (hostAgentBean == null) { hostAgentBean = new HostAgentBean(); hostAgentBean.setHost_id(hostId); - hostAgentBean.setCreate_date(current_time); + hostAgentBean.setCreate_date(currentTime); isExisting = false; } hostAgentBean.setHost_name(hostName); hostAgentBean.setIp(hostIp); - hostAgentBean.setLast_update(current_time); + hostAgentBean.setLast_update(currentTime); hostAgentBean.setAgent_Version(agentVersion); hostAgentBean.setAuto_scaling_group(asg); if (!isExisting) { + // First ping hostAgentDAO.insert(hostAgentBean); + emitProvisionLatency(currentTime, hostId, asg); } else { hostAgentDAO.update(hostId, hostAgentBean); } } + void emitProvisionLatency(long currentTime, String hostId, String asg) { + try { + List hosts = hostDAO.getHostsByHostId(hostId); + if (hosts.size() == 0) { + LOG.warn("No host found for hostId {}, skip", hostId); + return; + } + String timerName = String.format(PROVISION_LATENCY_TIMER_NAME, asg); + HostBean initialHost = hosts.get(0); + long provisionLatency = currentTime - initialHost.getCreate_date(); + Metrics.timer(timerName).record(Duration.ofMillis(provisionLatency)); + } catch (Exception e) { + LOG.warn("Failed to emit infra latency for " + hostId, e); + } + } + void deleteAgentSafely(String hostId, String envId) { try { LOG.debug("Delete agent {}/{} record.", hostId, envId); diff --git a/deploy-service/common/src/test/java/com/pinterest/deployservice/handler/GoalAnalystTest.java b/deploy-service/common/src/test/java/com/pinterest/deployservice/handler/GoalAnalystTest.java index b5af5d37bf..78eb838f21 100644 --- a/deploy-service/common/src/test/java/com/pinterest/deployservice/handler/GoalAnalystTest.java +++ b/deploy-service/common/src/test/java/com/pinterest/deployservice/handler/GoalAnalystTest.java @@ -4,9 +4,9 @@ * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -315,6 +315,7 @@ public void testFirstTimeDeployPostRestart() throws Exception { AgentBean agent = genDefaultAgent(); agent.setFirst_deploy(true); + agent.setStart_date(0L); agents.put(agent.getEnv_id(), agent); GoalAnalyst analyst = new GoalAnalyst(null, null, null, null, "foo", "id-1", envs, reports, agents, null); analyst.analysis(); @@ -348,6 +349,7 @@ public void testFirstTimeDeployEnd() throws Exception { AgentBean agent = genDefaultAgent(); agent.setFirst_deploy(true); + agent.setStart_date(0L); agents.put(agent.getEnv_id(), agent); GoalAnalyst analyst = new GoalAnalyst(null, null, null, null, "foo", "id-1", envs, reports, agents, null); analyst.analysis();