Skip to content

Commit

Permalink
Merge pull request #26 from swisspost/develop
Browse files Browse the repository at this point in the history
PR for release
  • Loading branch information
mcweba authored Dec 12, 2024
2 parents eaf94f4 + 95e77e5 commit b1543e9
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 2 deletions.
21 changes: 20 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ vertx-cluster-watchdog
[![Build Status](https://travis-ci.com/swisspush/vertx-cluster-watchdog.svg?branch=master)](https://travis-ci.com/swisspush/vertx-cluster-watchdog)
[![codecov](https://codecov.io/gh/swisspost/vertx-cluster-watchdog/branch/master/graph/badge.svg?token=nbrYxHDMmJ)](https://codecov.io/gh/swisspost/vertx-cluster-watchdog)

Checks if all your hazelcast cluster members are receiveing published messages over the bus.
Checks if all your hazelcast cluster members are receiving published messages over the bus.

How to run the watchdog
-----------------------
Expand Down Expand Up @@ -63,3 +63,22 @@ Tests
-----

The tests try to simulate the cluster with multiple instances of the verticle. The amount of cluster members is injected over the config.

Micrometer metrics
------------------
When enabled, `vertx-cluster-watchdog` is monitored with micrometer. The following metrics are available:
* cluster_watchdog_members
* cluster_watchdog_members_responded

Example metrics:

```
# HELP cluster_watchdog_members Amount of members visible to the cluster
# TYPE cluster_watchdog_members gauge
cluster_watchdog_members 2.0
# HELP cluster_watchdog_members_responded Amount of cluster members responded when accessed
# TYPE cluster_watchdog_members_responded gauge
cluster_watchdog_members_responded 2.0
```

To enable the metrics, set a `MeterRegistry` instance by calling `setMeterRegistry(MeterRegistry meterRegistry)` method in `ClusterWatchdog` class.
8 changes: 7 additions & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>org.swisspush</groupId>
<artifactId>cluster-watchdog</artifactId>
<version>3.1.1-SNAPSHOT</version>
<version>3.1.2-SNAPSHOT</version>
<name>cluster-watchdog</name>
<packaging>jar</packaging>
<description>
Expand Down Expand Up @@ -79,6 +79,11 @@
<artifactId>slf4j-simple</artifactId>
<version>${slf4j.version}</version>
</dependency>
<dependency>
<groupId>io.micrometer</groupId>
<artifactId>micrometer-core</artifactId>
<version>${micrometer.version}</version>
</dependency>
<!-- TEST dependencies -->
<dependency>
<groupId>junit</groupId>
Expand Down Expand Up @@ -399,6 +404,7 @@
<properties>
<vertx.version>4.5.2</vertx.version>
<slf4j.version>2.0.10</slf4j.version>
<micrometer.version>1.12.13</micrometer.version>
<commons-lang.version>2.6</commons-lang.version>
<commons-io.version>2.15.1</commons-io.version>
<commons-collections4.version>4.4</commons-collections4.version>
Expand Down
21 changes: 21 additions & 0 deletions src/main/java/org/swisspush/vertx/cluster/ClusterWatchdog.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package org.swisspush.vertx.cluster;

import io.micrometer.core.instrument.Gauge;
import io.micrometer.core.instrument.MeterRegistry;
import io.vertx.core.AbstractVerticle;
import io.vertx.core.Handler;
import io.vertx.core.Promise;
Expand All @@ -11,6 +13,7 @@

import java.text.SimpleDateFormat;
import java.util.*;
import java.util.concurrent.atomic.AtomicLong;

public class ClusterWatchdog extends AbstractVerticle {

Expand All @@ -32,6 +35,9 @@ public class ClusterWatchdog extends AbstractVerticle {
private Map<String,List<JsonObject>> healthCheckResponses;
private ClusterWatchdogHttpHandler clusterWatchdogHttpHandler;

private final AtomicLong atomicClusterMemberCountRequired = new AtomicLong(0);
private final AtomicLong atomicClusterMemberRespondersCount = new AtomicLong(0);

@Override
public void start(Promise<Void> startPromise) {

Expand All @@ -50,6 +56,7 @@ public void start(Promise<Void> startPromise) {
} else {
clusterMemberCount = clusterMemberCountFromConfig;
}
atomicClusterMemberRespondersCount.set(0);

int resultQueueLength = config.getInteger("resultQueueLength", 100);
log.info("ClusterWatchdog used resultQueueLength: " + resultQueueLength);
Expand Down Expand Up @@ -111,6 +118,15 @@ public void start(Promise<Void> startPromise) {
});
}

public void setMeterRegistry(MeterRegistry meterRegistry) {
if(meterRegistry != null) {
Gauge.builder("cluster.watchdog.members", atomicClusterMemberCountRequired, AtomicLong::get)
.description("Amount of members visible to the cluster").register(meterRegistry);
Gauge.builder("cluster.watchdog.members.responded", atomicClusterMemberRespondersCount, AtomicLong::get)
.description("Amount of cluster members responded when accessed").register(meterRegistry);
}
}

class ClusterCheckHandler implements Handler<Long> {

public void handle(Long event) {
Expand All @@ -135,6 +151,8 @@ public void handle(Long event) {
return;
}

atomicClusterMemberCountRequired.set(clusterMemberCount);

// publish the broadcast event which will us get the response of all the registered handlers
eb.publish(BROADCAST, testpayload);

Expand All @@ -148,6 +166,9 @@ public void handle(Long event) {
watchdogResult.time = time;
watchdogResult.verticleId = uniqueId;
watchdogResult.clusterMemberCount = clusterMemberCount;

atomicClusterMemberRespondersCount.set(responses != null ? responses.size() : 0);

if(responses == null) {
log.error("ClusterWatchdog found no responses for timestamp: " + timestamp);
watchdogResult.status = ClusterHealthStatus.INCONSISTENT;
Expand Down

0 comments on commit b1543e9

Please sign in to comment.