Skip to content

Commit

Permalink
Fix ConcurrentModificationException (#1456)
Browse files Browse the repository at this point in the history
Update worker to better log exceptions
Also fixed ConcurrentModificationException
  • Loading branch information
tylerwowen authored Feb 21, 2024
1 parent f61bfea commit 7faf7f2
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import java.time.Duration;
import java.time.Instant;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Collection;
Expand Down Expand Up @@ -101,25 +102,30 @@ public MetricsEmitter(ServiceContext serviceContext, Clock clock) {

@Override
public void run() {
emitLaunchingMetrics();
try {
emitLaunchingMetrics();
} catch (Exception e) {
LOG.error("Failed to emit launching metrics", e);
}
}

void emitLaunchingMetrics() {
Instant timeoutCutoff = Instant.ofEpochMilli(clock.wallTime()).minus(Duration.ofMinutes(LAUNCH_TIMEOUT_MINUTE));
updateHostClassification(timeoutCutoff);
try {
processRemovedHosts();
processNewHosts();
updateHostClassification(timeoutCutoff);
} catch (Exception e) {
LOG.error("Failed to update host classification", e);
}
processRemovedHosts();
processNewHosts();
cleanUpTimers();
}

private void updateHostClassification(Instant timeoutCutoff) {
try {
List<HostBean> agentlessHosts = hostDAO
.getAgentlessHosts(Instant.ofEpochMilli(clock.wallTime()).minus(Duration.ofMinutes(MAX_TRACK_DURATION_MINUTE)).toEpochMilli(), 10000);
.getAgentlessHosts(Instant.ofEpochMilli(clock.wallTime()).minus(Duration.ofMinutes(MAX_TRACK_DURATION_MINUTE))
.toEpochMilli(), 10000);
hostClassifier.updateClassification(agentlessHosts, timeoutCutoff);
} catch (SQLException e) {
LOG.error("Failed to get agentless hosts", e);
Expand Down Expand Up @@ -166,11 +172,16 @@ private void processNewHosts() {
* Clean up timers for hosts that have been initializing for too long
*/
private void cleanUpTimers() {
for (String hostId : hostTimers.keySet()) {
LongTaskTimer.Sample sample = hostTimers.get(hostId);
Iterator<Map.Entry<String, LongTaskTimer.Sample>> iterator = hostTimers.entrySet().iterator();

while (iterator.hasNext()) {
Map.Entry<String, LongTaskTimer.Sample> entry = iterator.next();
String hostId = entry.getKey();
LongTaskTimer.Sample sample = entry.getValue();

if (sample.duration(TimeUnit.MINUTES) > (double) MAX_TRACK_DURATION_MINUTE) {
sample.stop();
hostTimers.remove(hostId);
iterator.remove();
errorBudgetFailure.increment();
LOG.info("Removed timer for host {} after max tracking duration", hostId);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ public void testEmitLaunchingMetrics() throws SQLException {
HostBean normalHost = createHostBean(Instant.ofEpochMilli(t2));
HostBean carryOverHost = createHostBean(Instant.ofEpochMilli(t2));
HostBean cleanedUpHost = createHostBean(Instant.ofEpochMilli(t2));
HostBean cleanedUpHost2 = createHostBean(Instant.ofEpochMilli(t2));

// T2
when(hostDAO.getAgentlessHosts(anyLong(), anyInt()))
Expand Down Expand Up @@ -177,12 +178,12 @@ public void testEmitLaunchingMetrics() throws SQLException {
// appears in the list
clock.add(Duration.ofMinutes(MetricsEmitter.LAUNCH_TIMEOUT_MINUTE));
when(hostDAO.getAgentlessHosts(anyLong(), anyInt()))
.thenReturn(Arrays.asList(cleanedUpHost));
.thenReturn(Arrays.asList(cleanedUpHost, cleanedUpHost2));
sut.emitLaunchingMetrics();

assertEquals(0, timer.activeTasks());
assertEquals(1, successCounter.count(), 0.01);
assertEquals(3, failureCounter.count(), 0.01);
assertEquals(4, failureCounter.count(), 0.01);

// When cleanedUpHost is removed from the list, the metrics won't change again
when(hostDAO.getAgentlessHosts(anyLong(), anyInt()))
Expand All @@ -191,6 +192,6 @@ public void testEmitLaunchingMetrics() throws SQLException {

assertEquals(0, timer.activeTasks());
assertEquals(1, successCounter.count(), 0.01);
assertEquals(3, failureCounter.count(), 0.01);
assertEquals(4, failureCounter.count(), 0.01);
}
}

0 comments on commit 7faf7f2

Please sign in to comment.