From 3834f50599254bc3d6c943b4171b145e6b8148ce Mon Sep 17 00:00:00 2001 From: Dragos Misca Date: Thu, 11 Apr 2024 13:06:06 -0700 Subject: [PATCH 01/44] Add Replicator stats draft --- .../stats/OpenTelemetryReplicatorMetrics.java | 174 ++++++++++++++++++ 1 file changed, 174 insertions(+) create mode 100644 pulsar-broker/src/main/java/org/apache/pulsar/broker/stats/OpenTelemetryReplicatorMetrics.java diff --git a/pulsar-broker/src/main/java/org/apache/pulsar/broker/stats/OpenTelemetryReplicatorMetrics.java b/pulsar-broker/src/main/java/org/apache/pulsar/broker/stats/OpenTelemetryReplicatorMetrics.java new file mode 100644 index 0000000000000..cc094798b20ff --- /dev/null +++ b/pulsar-broker/src/main/java/org/apache/pulsar/broker/stats/OpenTelemetryReplicatorMetrics.java @@ -0,0 +1,174 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pulsar.broker.stats; + +import io.opentelemetry.api.common.Attributes; +import io.opentelemetry.api.metrics.BatchCallback; +import io.opentelemetry.api.metrics.ObservableDoubleMeasurement; +import io.opentelemetry.api.metrics.ObservableLongMeasurement; +import java.util.Optional; +import java.util.concurrent.TimeUnit; +import java.util.function.BiConsumer; +import java.util.function.Consumer; +import org.apache.pulsar.broker.PulsarService; +import org.apache.pulsar.broker.service.AbstractTopic; +import org.apache.pulsar.broker.service.Dispatcher; +import org.apache.pulsar.broker.service.Replicator; +import org.apache.pulsar.broker.service.Subscription; +import org.apache.pulsar.broker.service.Topic; +import org.apache.pulsar.broker.service.persistent.PersistentTopic; +import org.apache.pulsar.common.policies.data.BacklogQuota; +import org.apache.pulsar.common.stats.MetricsUtil; +import org.apache.pulsar.compaction.CompactedTopicContext; +import org.apache.pulsar.compaction.Compactor; +import org.apache.pulsar.opentelemetry.OpenTelemetryAttributes; + +public class OpenTelemetryReplicatorStats implements AutoCloseable { + + // Replaces pulsar_replication_rate_in + public static final String MESSAGE_IN_COUNTER = "pulsar.broker.replication.message.incoming.count"; + private final ObservableLongMeasurement messageInCounter; + + // Replaces pulsar_replication_rate_out + public static final String MESSAGE_OUT_COUNTER = "pulsar.broker.replication.message.outgoing.count"; + private final ObservableLongMeasurement messageOutCounter; + + // Replaces pulsar_replication_throughput_in + public static final String BYTES_IN_COUNTER = "pulsar.broker.replication.message.incoming.size"; + private final ObservableLongMeasurement bytesInCounter; + + // Replaces pulsar_replication_throughput_out + public static final String BYTES_OUT_COUNTER = "pulsar.broker.replication.message.outgoing.size"; + private final ObservableLongMeasurement bytesOutCounter; + + // Replaces pulsar_replication_backlog + public static final String BACKLOG_COUNTER = "pulsar.broker.replication.message.backlog"; + private final ObservableLongMeasurement backlogCounter; + + // Replaces pulsar_replication_rate_expired + public static final String EXPIRED_COUNTER = "pulsar.broker.replication.message.expired"; + private final ObservableLongMeasurement expiredCounter; + + // Replaces pulsar_replication_connected_count + public static final String CONNECTED_COUNTER = "pulsar.broker.replication.connected.count"; + private final ObservableLongMeasurement connectedCounter; + + // Replaces pulsar_replication_delay_in_seconds + public static final String DELAY_GAUGE = "pulsar.broker.replication.message.age"; + private final ObservableLongMeasurement delayGauge; + + private final BatchCallback batchCallback; + private final PulsarService pulsar; + + public OpenTelemetryReplicatorStats(PulsarService pulsar) { + this.pulsar = pulsar; + var meter = pulsar.getOpenTelemetry().getMeter(); + + messageInCounter = meter + .upDownCounterBuilder(MESSAGE_IN_COUNTER) + .setUnit("{message}") + .setDescription("The total number of messages received from the remote cluster through this replicator.") + .buildObserver(); + + messageOutCounter = meter + .upDownCounterBuilder(MESSAGE_OUT_COUNTER) + .setUnit("{message}") + .setDescription("The total number of messages sent to the remote cluster through this replicator.") + .buildObserver(); + + bytesInCounter = meter + .upDownCounterBuilder(BYTES_IN_COUNTER) + .setUnit("{byte}") + .setDescription("The total number of messages bytes received from the remote cluster through this replicator.") + .buildObserver(); + + bytesOutCounter = meter + .upDownCounterBuilder(BYTES_OUT_COUNTER) + .setUnit("{byte}") + .setDescription("The total number of messages bytes sent to the remote cluster through this replicator.") + .buildObserver(); + + backlogCounter = meter + .upDownCounterBuilder(BACKLOG_COUNTER) + .setUnit("{message}") + .setDescription("The total number of messages in the backlog for this replicator.") + .buildObserver(); + + expiredCounter = meter + .upDownCounterBuilder(EXPIRED_COUNTER) + .setUnit("{message}") + .setDescription("The total number of messages that expired for this replicator.") + .buildObserver(); + + connectedCounter = meter + .upDownCounterBuilder(CONNECTED_COUNTER) + .setUnit("{subscriber}") + .setDescription("The total number of replication subscribers that are running.") + .buildObserver(); + + delayGauge = meter + .upDownCounterBuilder(DELAY_GAUGE) + .setUnit("{second}") + .setDescription("The total number of messages that expired for this replicator.") + .buildObserver(); + + batchCallback = meter.batchCallback(() -> pulsar.getBrokerService() + .getTopics() + .values() + .stream() + .map(topicFuture -> topicFuture.getNow(Optional.empty())) + .forEach(topic -> topic.ifPresent(this::recordMetricsForTopic)), + messageInCounter, + messageOutCounter, + bytesInCounter, + bytesOutCounter, + backlogCounter, + expiredCounter, + connectedCounter, + delayGauge); + } + + @Override + public void close() { + batchCallback.close(); + } + + private void recordMetricsForTopic(Topic topic) { + var attributes = Attributes.builder() + .put(OpenTelemetryAttributes.PULSAR_TOPIC, topic.getName()) + .build(); + var replicators = topic.getReplicators(); + + topic.getReplicators().values().stream().mapMulti(new BiConsumer>() { + @Override + public void accept(Replicator replicator, Consumer consumer) { + + } + }); + + messageInCounter.record(dummyValue, attributes); + messageOutCounter.record(dummyValue, attributes); + bytesInCounter.record(dummyValue, attributes); + bytesOutCounter.record(dummyValue, attributes); + backlogCounter.record(dummyValue, attributes); + expiredCounter.record(dummyValue, attributes); + connectedCounter.record(dummyValue, attributes); + delayGauge.record(dummyValue, attributes); + } +} From 5297854c4e4053e3039b1170261ac26fbbf2d712 Mon Sep 17 00:00:00 2001 From: Dragos Misca Date: Mon, 15 Apr 2024 10:43:00 -0700 Subject: [PATCH 02/44] Dummy commit --- .../broker/stats/OpenTelemetryReplicatorMetrics.java | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/pulsar-broker/src/main/java/org/apache/pulsar/broker/stats/OpenTelemetryReplicatorMetrics.java b/pulsar-broker/src/main/java/org/apache/pulsar/broker/stats/OpenTelemetryReplicatorMetrics.java index cc094798b20ff..f73fab5baa88a 100644 --- a/pulsar-broker/src/main/java/org/apache/pulsar/broker/stats/OpenTelemetryReplicatorMetrics.java +++ b/pulsar-broker/src/main/java/org/apache/pulsar/broker/stats/OpenTelemetryReplicatorMetrics.java @@ -20,23 +20,13 @@ import io.opentelemetry.api.common.Attributes; import io.opentelemetry.api.metrics.BatchCallback; -import io.opentelemetry.api.metrics.ObservableDoubleMeasurement; import io.opentelemetry.api.metrics.ObservableLongMeasurement; import java.util.Optional; -import java.util.concurrent.TimeUnit; import java.util.function.BiConsumer; import java.util.function.Consumer; import org.apache.pulsar.broker.PulsarService; -import org.apache.pulsar.broker.service.AbstractTopic; -import org.apache.pulsar.broker.service.Dispatcher; import org.apache.pulsar.broker.service.Replicator; -import org.apache.pulsar.broker.service.Subscription; import org.apache.pulsar.broker.service.Topic; -import org.apache.pulsar.broker.service.persistent.PersistentTopic; -import org.apache.pulsar.common.policies.data.BacklogQuota; -import org.apache.pulsar.common.stats.MetricsUtil; -import org.apache.pulsar.compaction.CompactedTopicContext; -import org.apache.pulsar.compaction.Compactor; import org.apache.pulsar.opentelemetry.OpenTelemetryAttributes; public class OpenTelemetryReplicatorStats implements AutoCloseable { From 7642b8b755acedc3bafc8e495ca85b7fbda31e03 Mon Sep 17 00:00:00 2001 From: Dragos Misca Date: Wed, 22 May 2024 14:15:07 -0700 Subject: [PATCH 03/44] Fix NPE bug in [Non]PersistentReplicator.getStats --- .../NonPersistentReplicator.java | 2 +- .../persistent/PersistentReplicator.java | 2 +- ...java => OpenTelemetryReplicatorStats.java} | 25 ++++++------------- 3 files changed, 10 insertions(+), 19 deletions(-) rename pulsar-broker/src/main/java/org/apache/pulsar/broker/stats/{OpenTelemetryReplicatorMetrics.java => OpenTelemetryReplicatorStats.java} (88%) diff --git a/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/nonpersistent/NonPersistentReplicator.java b/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/nonpersistent/NonPersistentReplicator.java index 51509f3818a28..c001cd5bd33d0 100644 --- a/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/nonpersistent/NonPersistentReplicator.java +++ b/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/nonpersistent/NonPersistentReplicator.java @@ -144,10 +144,10 @@ public void updateRates() { @Override public NonPersistentReplicatorStatsImpl getStats() { + ProducerImpl producer = this.producer; stats.connected = producer != null && producer.isConnected(); stats.replicationDelayInSeconds = getReplicationDelayInSeconds(); - ProducerImpl producer = this.producer; if (producer != null) { stats.outboundConnection = producer.getConnectionId(); stats.outboundConnectedSince = producer.getConnectedSince(); diff --git a/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/persistent/PersistentReplicator.java b/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/persistent/PersistentReplicator.java index c3a27a15e9d90..9668431ca7791 100644 --- a/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/persistent/PersistentReplicator.java +++ b/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/persistent/PersistentReplicator.java @@ -597,11 +597,11 @@ public void updateRates() { } public ReplicatorStatsImpl getStats() { + ProducerImpl producer = this.producer; stats.replicationBacklog = cursor != null ? cursor.getNumberOfEntriesInBacklog(false) : 0; stats.connected = producer != null && producer.isConnected(); stats.replicationDelayInSeconds = getReplicationDelayInSeconds(); - ProducerImpl producer = this.producer; if (producer != null) { stats.outboundConnection = producer.getConnectionId(); stats.outboundConnectedSince = producer.getConnectedSince(); diff --git a/pulsar-broker/src/main/java/org/apache/pulsar/broker/stats/OpenTelemetryReplicatorMetrics.java b/pulsar-broker/src/main/java/org/apache/pulsar/broker/stats/OpenTelemetryReplicatorStats.java similarity index 88% rename from pulsar-broker/src/main/java/org/apache/pulsar/broker/stats/OpenTelemetryReplicatorMetrics.java rename to pulsar-broker/src/main/java/org/apache/pulsar/broker/stats/OpenTelemetryReplicatorStats.java index f73fab5baa88a..9266345e44ff8 100644 --- a/pulsar-broker/src/main/java/org/apache/pulsar/broker/stats/OpenTelemetryReplicatorMetrics.java +++ b/pulsar-broker/src/main/java/org/apache/pulsar/broker/stats/OpenTelemetryReplicatorStats.java @@ -22,12 +22,8 @@ import io.opentelemetry.api.metrics.BatchCallback; import io.opentelemetry.api.metrics.ObservableLongMeasurement; import java.util.Optional; -import java.util.function.BiConsumer; -import java.util.function.Consumer; import org.apache.pulsar.broker.PulsarService; -import org.apache.pulsar.broker.service.Replicator; -import org.apache.pulsar.broker.service.Topic; -import org.apache.pulsar.opentelemetry.OpenTelemetryAttributes; +import org.apache.pulsar.broker.service.AbstractReplicator; public class OpenTelemetryReplicatorStats implements AutoCloseable { @@ -64,10 +60,8 @@ public class OpenTelemetryReplicatorStats implements AutoCloseable { private final ObservableLongMeasurement delayGauge; private final BatchCallback batchCallback; - private final PulsarService pulsar; public OpenTelemetryReplicatorStats(PulsarService pulsar) { - this.pulsar = pulsar; var meter = pulsar.getOpenTelemetry().getMeter(); messageInCounter = meter @@ -123,7 +117,11 @@ public OpenTelemetryReplicatorStats(PulsarService pulsar) { .values() .stream() .map(topicFuture -> topicFuture.getNow(Optional.empty())) - .forEach(topic -> topic.ifPresent(this::recordMetricsForTopic)), + .filter(Optional::isPresent) + .map(Optional::get) + .flatMap(topic -> topic.getReplicators().values().stream()) + .map(AbstractReplicator.class::cast) + .forEach(this::recordMetricsForReplicator), messageInCounter, messageOutCounter, bytesInCounter, @@ -139,18 +137,11 @@ public void close() { batchCallback.close(); } - private void recordMetricsForTopic(Topic topic) { + private void recordMetricsForReplicator(AbstractReplicator replicator) { var attributes = Attributes.builder() - .put(OpenTelemetryAttributes.PULSAR_TOPIC, topic.getName()) .build(); - var replicators = topic.getReplicators(); - topic.getReplicators().values().stream().mapMulti(new BiConsumer>() { - @Override - public void accept(Replicator replicator, Consumer consumer) { - - } - }); + var dummyValue = 0L; messageInCounter.record(dummyValue, attributes); messageOutCounter.record(dummyValue, attributes); From 37759a9d425e8a73747bad24042c2599ebf030e7 Mon Sep 17 00:00:00 2001 From: Dragos Misca Date: Wed, 22 May 2024 14:15:28 -0700 Subject: [PATCH 04/44] Add Replicator.getTopic method --- .../org/apache/pulsar/broker/service/AbstractReplicator.java | 4 ++++ .../java/org/apache/pulsar/broker/service/Replicator.java | 2 ++ 2 files changed, 6 insertions(+) diff --git a/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/AbstractReplicator.java b/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/AbstractReplicator.java index 869a4bc81d310..e07e7a92e1067 100644 --- a/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/AbstractReplicator.java +++ b/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/AbstractReplicator.java @@ -136,6 +136,10 @@ public AbstractReplicator(String localCluster, Topic localTopic, String remoteCl protected abstract void disableReplicatorRead(); + public Topic getTopic() { + return localTopic; + } + public String getRemoteCluster() { return remoteCluster; } diff --git a/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/Replicator.java b/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/Replicator.java index 5c314397da80e..81bf7559d041e 100644 --- a/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/Replicator.java +++ b/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/Replicator.java @@ -27,6 +27,8 @@ public interface Replicator { void startProducer(); + Topic getTopic(); + ReplicatorStatsImpl getStats(); CompletableFuture terminate(); From 8dc58e8bc976e8c8d091000a52fa1da20f9dae72 Mon Sep 17 00:00:00 2001 From: Dragos Misca Date: Wed, 22 May 2024 14:15:35 -0700 Subject: [PATCH 05/44] Checkstyle fix --- .../broker/stats/OpenTelemetryReplicatorStats.java | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/pulsar-broker/src/main/java/org/apache/pulsar/broker/stats/OpenTelemetryReplicatorStats.java b/pulsar-broker/src/main/java/org/apache/pulsar/broker/stats/OpenTelemetryReplicatorStats.java index 9266345e44ff8..dca4b479dd876 100644 --- a/pulsar-broker/src/main/java/org/apache/pulsar/broker/stats/OpenTelemetryReplicatorStats.java +++ b/pulsar-broker/src/main/java/org/apache/pulsar/broker/stats/OpenTelemetryReplicatorStats.java @@ -67,7 +67,8 @@ public OpenTelemetryReplicatorStats(PulsarService pulsar) { messageInCounter = meter .upDownCounterBuilder(MESSAGE_IN_COUNTER) .setUnit("{message}") - .setDescription("The total number of messages received from the remote cluster through this replicator.") + .setDescription( + "The total number of messages received from the remote cluster through this replicator.") .buildObserver(); messageOutCounter = meter @@ -79,13 +80,15 @@ public OpenTelemetryReplicatorStats(PulsarService pulsar) { bytesInCounter = meter .upDownCounterBuilder(BYTES_IN_COUNTER) .setUnit("{byte}") - .setDescription("The total number of messages bytes received from the remote cluster through this replicator.") + .setDescription( + "The total number of messages bytes received from the remote cluster through this replicator.") .buildObserver(); bytesOutCounter = meter .upDownCounterBuilder(BYTES_OUT_COUNTER) .setUnit("{byte}") - .setDescription("The total number of messages bytes sent to the remote cluster through this replicator.") + .setDescription( + "The total number of messages bytes sent to the remote cluster through this replicator.") .buildObserver(); backlogCounter = meter @@ -138,9 +141,12 @@ public void close() { } private void recordMetricsForReplicator(AbstractReplicator replicator) { + var topic = replicator.getTopic(); + var attributes = Attributes.builder() .build(); + var stats = replicator.getStats(); var dummyValue = 0L; messageInCounter.record(dummyValue, attributes); From a063034318b549c7539bff5541c1bf72e40e0c4b Mon Sep 17 00:00:00 2001 From: Dragos Misca Date: Wed, 22 May 2024 20:30:45 -0700 Subject: [PATCH 06/44] Factor out AbstractReplicator.getReplicationDelayMs --- .../apache/pulsar/broker/service/AbstractReplicator.java | 5 +++++ .../service/nonpersistent/NonPersistentReplicator.java | 9 +-------- .../broker/service/persistent/PersistentReplicator.java | 9 +-------- 3 files changed, 7 insertions(+), 16 deletions(-) diff --git a/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/AbstractReplicator.java b/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/AbstractReplicator.java index e07e7a92e1067..ecd19c588cb27 100644 --- a/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/AbstractReplicator.java +++ b/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/AbstractReplicator.java @@ -136,6 +136,11 @@ public AbstractReplicator(String localCluster, Topic localTopic, String remoteCl protected abstract void disableReplicatorRead(); + public long getReplicationDelayMs() { + var producer = this.producer; + return producer == null ? 0 : producer.getDelayInMillis(); + } + public Topic getTopic() { return localTopic; } diff --git a/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/nonpersistent/NonPersistentReplicator.java b/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/nonpersistent/NonPersistentReplicator.java index c001cd5bd33d0..efd85c913c360 100644 --- a/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/nonpersistent/NonPersistentReplicator.java +++ b/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/nonpersistent/NonPersistentReplicator.java @@ -146,7 +146,7 @@ public void updateRates() { public NonPersistentReplicatorStatsImpl getStats() { ProducerImpl producer = this.producer; stats.connected = producer != null && producer.isConnected(); - stats.replicationDelayInSeconds = getReplicationDelayInSeconds(); + stats.replicationDelayInSeconds = TimeUnit.MILLISECONDS.toSeconds(getReplicationDelayMs()); if (producer != null) { stats.outboundConnection = producer.getConnectionId(); @@ -159,13 +159,6 @@ public NonPersistentReplicatorStatsImpl getStats() { return stats; } - private long getReplicationDelayInSeconds() { - if (producer != null) { - return TimeUnit.MILLISECONDS.toSeconds(producer.getDelayInMillis()); - } - return 0L; - } - private static final class ProducerSendCallback implements SendCallback { private NonPersistentReplicator replicator; private Entry entry; diff --git a/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/persistent/PersistentReplicator.java b/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/persistent/PersistentReplicator.java index 9668431ca7791..5627589ca542e 100644 --- a/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/persistent/PersistentReplicator.java +++ b/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/persistent/PersistentReplicator.java @@ -600,7 +600,7 @@ public ReplicatorStatsImpl getStats() { ProducerImpl producer = this.producer; stats.replicationBacklog = cursor != null ? cursor.getNumberOfEntriesInBacklog(false) : 0; stats.connected = producer != null && producer.isConnected(); - stats.replicationDelayInSeconds = getReplicationDelayInSeconds(); + stats.replicationDelayInSeconds = TimeUnit.MILLISECONDS.toSeconds(getReplicationDelayMs()); if (producer != null) { stats.outboundConnection = producer.getConnectionId(); @@ -617,13 +617,6 @@ public void updateMessageTTL(int messageTTLInSeconds) { this.messageTTLInSeconds = messageTTLInSeconds; } - private long getReplicationDelayInSeconds() { - if (producer != null) { - return TimeUnit.MILLISECONDS.toSeconds(producer.getDelayInMillis()); - } - return 0L; - } - @Override public boolean expireMessages(int messageTTLInSeconds) { if ((cursor.getNumberOfEntriesInBacklog(false) == 0) From 012cb1fc505e55b778bc6e6f3e57f16c8fb72da6 Mon Sep 17 00:00:00 2001 From: Dragos Misca Date: Wed, 22 May 2024 20:35:25 -0700 Subject: [PATCH 07/44] Factor out AbstractReplicator.isConnected --- .../pulsar/broker/service/AbstractReplicator.java | 6 ++++++ .../service/nonpersistent/NonPersistentReplicator.java | 8 +------- .../service/persistent/PersistentReplicator.java | 10 ++-------- 3 files changed, 9 insertions(+), 15 deletions(-) diff --git a/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/AbstractReplicator.java b/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/AbstractReplicator.java index ecd19c588cb27..a2129e904071b 100644 --- a/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/AbstractReplicator.java +++ b/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/AbstractReplicator.java @@ -136,6 +136,12 @@ public AbstractReplicator(String localCluster, Topic localTopic, String remoteCl protected abstract void disableReplicatorRead(); + @Override + public boolean isConnected() { + var producer = this.producer; + return producer != null && producer.isConnected(); + } + public long getReplicationDelayMs() { var producer = this.producer; return producer == null ? 0 : producer.getDelayInMillis(); diff --git a/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/nonpersistent/NonPersistentReplicator.java b/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/nonpersistent/NonPersistentReplicator.java index efd85c913c360..6d370b87e8bb7 100644 --- a/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/nonpersistent/NonPersistentReplicator.java +++ b/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/nonpersistent/NonPersistentReplicator.java @@ -145,7 +145,7 @@ public void updateRates() { @Override public NonPersistentReplicatorStatsImpl getStats() { ProducerImpl producer = this.producer; - stats.connected = producer != null && producer.isConnected(); + stats.connected = isConnected(); stats.replicationDelayInSeconds = TimeUnit.MILLISECONDS.toSeconds(getReplicationDelayMs()); if (producer != null) { @@ -249,10 +249,4 @@ public long getNumberOfEntriesInBacklog() { protected void disableReplicatorRead() { // No-op } - - @Override - public boolean isConnected() { - ProducerImpl producer = this.producer; - return producer != null && producer.isConnected(); - } } diff --git a/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/persistent/PersistentReplicator.java b/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/persistent/PersistentReplicator.java index 5627589ca542e..76c11baa1fc9a 100644 --- a/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/persistent/PersistentReplicator.java +++ b/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/persistent/PersistentReplicator.java @@ -597,11 +597,11 @@ public void updateRates() { } public ReplicatorStatsImpl getStats() { - ProducerImpl producer = this.producer; stats.replicationBacklog = cursor != null ? cursor.getNumberOfEntriesInBacklog(false) : 0; - stats.connected = producer != null && producer.isConnected(); + stats.connected = isConnected(); stats.replicationDelayInSeconds = TimeUnit.MILLISECONDS.toSeconds(getReplicationDelayMs()); + ProducerImpl producer = this.producer; if (producer != null) { stats.outboundConnection = producer.getConnectionId(); stats.outboundConnectedSince = producer.getConnectedSince(); @@ -685,12 +685,6 @@ protected void checkReplicatedSubscriptionMarker(Position position, MessageImpl< } } - @Override - public boolean isConnected() { - ProducerImpl producer = this.producer; - return producer != null && producer.isConnected(); - } - @Override protected void doReleaseResources() { dispatchRateLimiter.ifPresent(DispatchRateLimiter::close); From 5f056fa450ed9f876a94d792a3746261940079e8 Mon Sep 17 00:00:00 2001 From: Dragos Misca Date: Thu, 23 May 2024 11:40:50 -0700 Subject: [PATCH 08/44] Refactor ReplicatorStats --- .../data/NonPersistentReplicatorStats.java | 2 + .../common/policies/data/ReplicatorStats.java | 10 ++++ .../NonPersistentReplicatorStatsImpl.java | 13 +++-- .../data/stats/ReplicatorStatsImpl.java | 54 +++++++++++++++++-- 4 files changed, 72 insertions(+), 7 deletions(-) diff --git a/pulsar-client-admin-api/src/main/java/org/apache/pulsar/common/policies/data/NonPersistentReplicatorStats.java b/pulsar-client-admin-api/src/main/java/org/apache/pulsar/common/policies/data/NonPersistentReplicatorStats.java index 6c77de9195786..8e94a18405852 100644 --- a/pulsar-client-admin-api/src/main/java/org/apache/pulsar/common/policies/data/NonPersistentReplicatorStats.java +++ b/pulsar-client-admin-api/src/main/java/org/apache/pulsar/common/policies/data/NonPersistentReplicatorStats.java @@ -27,4 +27,6 @@ public interface NonPersistentReplicatorStats extends ReplicatorStats { * for non-persistent topic: broker drops msg for replicator if replicator connection is not writable. **/ double getMsgDropRate(); + + long getMsgDropCount(); } diff --git a/pulsar-client-admin-api/src/main/java/org/apache/pulsar/common/policies/data/ReplicatorStats.java b/pulsar-client-admin-api/src/main/java/org/apache/pulsar/common/policies/data/ReplicatorStats.java index 24be2f9380bb7..e143215c32897 100644 --- a/pulsar-client-admin-api/src/main/java/org/apache/pulsar/common/policies/data/ReplicatorStats.java +++ b/pulsar-client-admin-api/src/main/java/org/apache/pulsar/common/policies/data/ReplicatorStats.java @@ -24,19 +24,29 @@ public interface ReplicatorStats { /** Total rate of messages received from the remote cluster (msg/s). */ + @Deprecated double getMsgRateIn(); + long getMsgInCount(); /** Total throughput received from the remote cluster (bytes/s). */ + @Deprecated double getMsgThroughputIn(); + long getBytesInCount(); /** Total rate of messages delivered to the replication-subscriber (msg/s). */ + @Deprecated double getMsgRateOut(); + long getMsgOutCount(); /** Total throughput delivered to the replication-subscriber (bytes/s). */ + @Deprecated double getMsgThroughputOut(); + long getBytesOutCount(); /** Total rate of messages expired (msg/s). */ + @Deprecated double getMsgRateExpired(); + long getMsgExpiredCount(); /** Number of messages pending to be replicated to remote cluster. */ long getReplicationBacklog(); diff --git a/pulsar-common/src/main/java/org/apache/pulsar/common/policies/data/stats/NonPersistentReplicatorStatsImpl.java b/pulsar-common/src/main/java/org/apache/pulsar/common/policies/data/stats/NonPersistentReplicatorStatsImpl.java index 98f838a94493c..dde5430ff0225 100644 --- a/pulsar-common/src/main/java/org/apache/pulsar/common/policies/data/stats/NonPersistentReplicatorStatsImpl.java +++ b/pulsar-common/src/main/java/org/apache/pulsar/common/policies/data/stats/NonPersistentReplicatorStatsImpl.java @@ -18,21 +18,21 @@ */ package org.apache.pulsar.common.policies.data.stats; -import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; import java.util.Objects; -import lombok.Getter; +import lombok.Data; +import lombok.EqualsAndHashCode; import org.apache.pulsar.common.policies.data.NonPersistentReplicatorStats; /** * Statistics for a non-persistent replicator. */ -@SuppressFBWarnings("EQ_DOESNT_OVERRIDE_EQUALS") +@Data +@EqualsAndHashCode(callSuper = true) public class NonPersistentReplicatorStatsImpl extends ReplicatorStatsImpl implements NonPersistentReplicatorStats { /** * for non-persistent topic: broker drops msg for replicator if replicator connection is not writable. **/ - @Getter public double msgDropRate; public NonPersistentReplicatorStatsImpl add(NonPersistentReplicatorStatsImpl stats) { @@ -41,4 +41,9 @@ public NonPersistentReplicatorStatsImpl add(NonPersistentReplicatorStatsImpl sta this.msgDropRate += stats.msgDropRate; return this; } + + @Override + public long getMsgDropCount() { + return 0; + } } diff --git a/pulsar-common/src/main/java/org/apache/pulsar/common/policies/data/stats/ReplicatorStatsImpl.java b/pulsar-common/src/main/java/org/apache/pulsar/common/policies/data/stats/ReplicatorStatsImpl.java index 6933f5cc7ed76..dd268e8e6f001 100644 --- a/pulsar-common/src/main/java/org/apache/pulsar/common/policies/data/stats/ReplicatorStatsImpl.java +++ b/pulsar-common/src/main/java/org/apache/pulsar/common/policies/data/stats/ReplicatorStatsImpl.java @@ -19,6 +19,7 @@ package org.apache.pulsar.common.policies.data.stats; import java.util.Objects; +import java.util.concurrent.atomic.LongAdder; import lombok.Data; import org.apache.pulsar.common.policies.data.ReplicatorStats; @@ -31,15 +32,23 @@ public class ReplicatorStatsImpl implements ReplicatorStats { /** Total rate of messages received from the remote cluster (msg/s). */ public double msgRateIn; + private final LongAdder msgInCount = new LongAdder(); + /** Total throughput received from the remote cluster (bytes/s). */ public double msgThroughputIn; + private final LongAdder bytesInCount = new LongAdder(); + /** Total rate of messages delivered to the replication-subscriber (msg/s). */ public double msgRateOut; + private final LongAdder msgOutCount = new LongAdder(); + /** Total throughput delivered to the replication-subscriber (bytes/s). */ public double msgThroughputOut; + private final LongAdder bytesOutCount = new LongAdder(); + /** Total rate of messages expired (msg/s). */ public double msgRateExpired; @@ -72,10 +81,49 @@ public ReplicatorStatsImpl add(ReplicatorStatsImpl stats) { this.msgThroughputOut += stats.msgThroughputOut; this.msgRateExpired += stats.msgRateExpired; this.replicationBacklog += stats.replicationBacklog; - if (this.connected) { - this.connected &= stats.connected; - } + this.connected &= stats.connected; this.replicationDelayInSeconds = Math.max(this.replicationDelayInSeconds, stats.replicationDelayInSeconds); return this; } + + @Override + public long getMsgInCount() { + return msgInCount.sum(); + } + + public void incrementMsgInCounter() { + msgInCount.increment(); + } + + @Override + public long getBytesInCount() { + return bytesInCount.sum(); + } + + public void incrementBytesInCounter(long bytes) { + bytesInCount.add(bytes); + } + + @Override + public long getMsgOutCount() { + return msgOutCount.sum(); + } + + public void incrementMsgOutCounter() { + msgOutCount.increment(); + } + + @Override + public long getBytesOutCount() { + return bytesOutCount.sum(); + } + + public void incrementBytesOutCounter(long bytes) { + bytesOutCount.add(bytes); + } + + @Override + public long getMsgExpiredCount() { + return 0; + } } From 11325183650e4ad69b3f9fae3c1f401c181f9b46 Mon Sep 17 00:00:00 2001 From: Dragos Misca Date: Thu, 23 May 2024 12:21:26 -0700 Subject: [PATCH 09/44] Fix possible NPEs in PersistentReplicator.updateCursorState --- .../broker/service/persistent/PersistentReplicator.java | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/persistent/PersistentReplicator.java b/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/persistent/PersistentReplicator.java index 76c11baa1fc9a..cfef2454c474a 100644 --- a/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/persistent/PersistentReplicator.java +++ b/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/persistent/PersistentReplicator.java @@ -331,11 +331,12 @@ protected CompletableFuture getSchemaInfo(MessageImpl msg) throws Ex } public void updateCursorState() { - if (this.cursor != null) { - if (producer != null && producer.isConnected()) { - this.cursor.setActive(); + var cursor = this.cursor; + if (cursor != null) { + if (isConnected()) { + cursor.setActive(); } else { - this.cursor.setInactive(); + cursor.setInactive(); } } } From 1fea1acf19fa883c4d3755ab0d7ed0bb1136f5a8 Mon Sep 17 00:00:00 2001 From: Dragos Misca Date: Thu, 23 May 2024 12:22:58 -0700 Subject: [PATCH 10/44] Rename Replicator.getTopic to Replicator.getLocalTopic --- .../org/apache/pulsar/broker/service/AbstractReplicator.java | 5 +---- .../java/org/apache/pulsar/broker/service/Replicator.java | 2 +- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/AbstractReplicator.java b/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/AbstractReplicator.java index a2129e904071b..c225f3a830c4b 100644 --- a/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/AbstractReplicator.java +++ b/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/AbstractReplicator.java @@ -55,6 +55,7 @@ public abstract class AbstractReplicator implements Replicator { protected final PulsarClientImpl replicationClient; protected final PulsarClientImpl client; protected String replicatorId; + @Getter protected final Topic localTopic; protected volatile ProducerImpl producer; @@ -147,10 +148,6 @@ public long getReplicationDelayMs() { return producer == null ? 0 : producer.getDelayInMillis(); } - public Topic getTopic() { - return localTopic; - } - public String getRemoteCluster() { return remoteCluster; } diff --git a/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/Replicator.java b/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/Replicator.java index 81bf7559d041e..20e9df33f014e 100644 --- a/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/Replicator.java +++ b/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/Replicator.java @@ -27,7 +27,7 @@ public interface Replicator { void startProducer(); - Topic getTopic(); + Topic getLocalTopic(); ReplicatorStatsImpl getStats(); From 4d8595265e02ffead6fad7493e7ba88065a6270e Mon Sep 17 00:00:00 2001 From: Dragos Misca Date: Thu, 23 May 2024 12:30:23 -0700 Subject: [PATCH 11/44] Instantiate OpenTelemetryReplicatorStats field in PulsarService --- .../main/java/org/apache/pulsar/broker/PulsarService.java | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pulsar-broker/src/main/java/org/apache/pulsar/broker/PulsarService.java b/pulsar-broker/src/main/java/org/apache/pulsar/broker/PulsarService.java index 6482ead1f5a2d..d75215d2cc10b 100644 --- a/pulsar-broker/src/main/java/org/apache/pulsar/broker/PulsarService.java +++ b/pulsar-broker/src/main/java/org/apache/pulsar/broker/PulsarService.java @@ -110,6 +110,7 @@ import org.apache.pulsar.broker.service.schema.SchemaStorageFactory; import org.apache.pulsar.broker.stats.MetricsGenerator; import org.apache.pulsar.broker.stats.OpenTelemetryConsumerStats; +import org.apache.pulsar.broker.stats.OpenTelemetryReplicatorStats; import org.apache.pulsar.broker.stats.OpenTelemetryTopicStats; import org.apache.pulsar.broker.stats.PulsarBrokerOpenTelemetry; import org.apache.pulsar.broker.stats.prometheus.PrometheusMetricsServlet; @@ -256,6 +257,7 @@ public class PulsarService implements AutoCloseable, ShutdownService { private final PulsarBrokerOpenTelemetry openTelemetry; private OpenTelemetryTopicStats openTelemetryTopicStats; private OpenTelemetryConsumerStats openTelemetryConsumerStats; + private OpenTelemetryReplicatorStats openTelemetryReplicatorStats; private TransactionMetadataStoreService transactionMetadataStoreService; private TransactionBufferProvider transactionBufferProvider; @@ -634,6 +636,10 @@ public CompletableFuture closeAsync() { brokerClientSharedTimer.stop(); monotonicSnapshotClock.close(); + if (openTelemetryReplicatorStats != null) { + openTelemetryReplicatorStats.close(); + openTelemetryReplicatorStats = null; + } if (openTelemetryConsumerStats != null) { openTelemetryConsumerStats.close(); openTelemetryConsumerStats = null; @@ -785,6 +791,7 @@ public void start() throws PulsarServerException { openTelemetryTopicStats = new OpenTelemetryTopicStats(this); openTelemetryConsumerStats = new OpenTelemetryConsumerStats(this); + openTelemetryReplicatorStats = new OpenTelemetryReplicatorStats(this); localMetadataSynchronizer = StringUtils.isNotBlank(config.getMetadataSyncEventTopic()) ? new PulsarMetadataEventSynchronizer(this, config.getMetadataSyncEventTopic()) From 7540e9e661e54c014a53bd9d0ef2f08f359b7cd0 Mon Sep 17 00:00:00 2001 From: Dragos Misca Date: Thu, 23 May 2024 12:56:40 -0700 Subject: [PATCH 12/44] Enable OpenTelemetry in ReplicatorTest --- .../broker/service/ReplicatorTestBase.java | 58 +++++++++++++++---- 1 file changed, 46 insertions(+), 12 deletions(-) diff --git a/pulsar-broker/src/test/java/org/apache/pulsar/broker/service/ReplicatorTestBase.java b/pulsar-broker/src/test/java/org/apache/pulsar/broker/service/ReplicatorTestBase.java index 838632febd889..766ad6d6da08e 100644 --- a/pulsar-broker/src/test/java/org/apache/pulsar/broker/service/ReplicatorTestBase.java +++ b/pulsar-broker/src/test/java/org/apache/pulsar/broker/service/ReplicatorTestBase.java @@ -21,12 +21,10 @@ import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertNotNull; import static org.testng.Assert.assertNull; - -import com.google.common.io.Resources; import com.google.common.collect.Sets; - +import com.google.common.io.Resources; import io.netty.util.concurrent.DefaultThreadFactory; - +import io.opentelemetry.sdk.testing.exporter.InMemoryMetricReader; import java.net.URL; import java.util.Optional; import java.util.Set; @@ -35,12 +33,9 @@ import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; - import org.apache.pulsar.broker.PulsarService; import org.apache.pulsar.broker.ServiceConfiguration; -import org.apache.pulsar.common.policies.data.ClusterData; -import org.apache.pulsar.common.policies.data.TopicType; -import org.apache.pulsar.tests.TestRetrySupport; +import org.apache.pulsar.broker.stats.BrokerOpenTelemetryTestUtil; import org.apache.pulsar.client.admin.PulsarAdmin; import org.apache.pulsar.client.api.Consumer; import org.apache.pulsar.client.api.Message; @@ -51,7 +46,11 @@ import org.apache.pulsar.client.api.PulsarClientException; import org.apache.pulsar.client.api.TypedMessageBuilder; import org.apache.pulsar.common.naming.TopicName; +import org.apache.pulsar.common.policies.data.ClusterData; import org.apache.pulsar.common.policies.data.TenantInfoImpl; +import org.apache.pulsar.common.policies.data.TopicType; +import org.apache.pulsar.functions.worker.WorkerConfig; +import org.apache.pulsar.tests.TestRetrySupport; import org.apache.pulsar.zookeeper.LocalBookkeeperEnsemble; import org.apache.pulsar.zookeeper.ZookeeperServerTest; import org.slf4j.Logger; @@ -63,6 +62,7 @@ public abstract class ReplicatorTestBase extends TestRetrySupport { ServiceConfiguration config1 = new ServiceConfiguration(); PulsarService pulsar1; BrokerService ns1; + protected InMemoryMetricReader metricReader1; PulsarAdmin admin1; LocalBookkeeperEnsemble bkEnsemble1; @@ -74,6 +74,7 @@ public abstract class ReplicatorTestBase extends TestRetrySupport { BrokerService ns2; PulsarAdmin admin2; LocalBookkeeperEnsemble bkEnsemble2; + protected InMemoryMetricReader metricReader2; URL url3; URL urlTls3; @@ -82,6 +83,7 @@ public abstract class ReplicatorTestBase extends TestRetrySupport { BrokerService ns3; PulsarAdmin admin3; LocalBookkeeperEnsemble bkEnsemble3; + protected InMemoryMetricReader metricReader3; URL url4; URL urlTls4; @@ -89,6 +91,7 @@ public abstract class ReplicatorTestBase extends TestRetrySupport { PulsarService pulsar4; PulsarAdmin admin4; LocalBookkeeperEnsemble bkEnsemble4; + protected InMemoryMetricReader metricReader4; ZookeeperServerTest globalZkS; @@ -154,7 +157,8 @@ protected void setup() throws Exception { // completely // independent config objects instead of referring to the same properties object setConfig1DefaultValue(); - pulsar1 = new PulsarService(config1); + metricReader1 = InMemoryMetricReader.create(); + pulsar1 = buildPulsarService(config1, metricReader1); pulsar1.start(); ns1 = pulsar1.getBrokerService(); @@ -169,7 +173,8 @@ protected void setup() throws Exception { bkEnsemble2.start(); setConfig2DefaultValue(); - pulsar2 = new PulsarService(config2); + metricReader2 = InMemoryMetricReader.create(); + pulsar2 = buildPulsarService(config2, metricReader2); pulsar2.start(); ns2 = pulsar2.getBrokerService(); @@ -184,7 +189,8 @@ protected void setup() throws Exception { bkEnsemble3.start(); setConfig3DefaultValue(); - pulsar3 = new PulsarService(config3); + metricReader3 = InMemoryMetricReader.create(); + pulsar3 = buildPulsarService(config3, metricReader3); pulsar3.start(); ns3 = pulsar3.getBrokerService(); @@ -199,7 +205,8 @@ protected void setup() throws Exception { bkEnsemble4.start(); setConfig4DefaultValue(); - pulsar4 = new PulsarService(config4); + metricReader4 = InMemoryMetricReader.create(); + pulsar4 = buildPulsarService(config4, metricReader4); pulsar4.start(); url4 = new URL(pulsar4.getWebServiceAddress()); @@ -312,6 +319,16 @@ protected void setup() throws Exception { } + private PulsarService buildPulsarService(ServiceConfiguration config, InMemoryMetricReader metricReader) { + var otelSdkCustomizer = metricReader != null ? + BrokerOpenTelemetryTestUtil.getOpenTelemetrySdkBuilderConsumer(metricReader) : null; + return new PulsarService(config, + new WorkerConfig(), + Optional.empty(), + exitCode -> log.info("Pulsar service finished with exit code {}", exitCode), + otelSdkCustomizer); + } + public void setConfig3DefaultValue() { setConfigDefaults(config3, cluster3, bkEnsemble3); config3.setTlsEnabled(true); @@ -409,6 +426,23 @@ protected void cleanup() throws Exception { admin4 = null; } + if (metricReader4 != null) { + metricReader4.close(); + metricReader4 = null; + } + if (metricReader3 != null) { + metricReader3.close(); + metricReader3 = null; + } + if (metricReader2 != null) { + metricReader2.close(); + metricReader2 = null; + } + if (metricReader1 != null) { + metricReader1.close(); + metricReader1 = null; + } + if (pulsar4 != null) { pulsar4.close(); pulsar4 = null; From b6a30fd77730cdb26e0eccdc360f8e95c380bcab Mon Sep 17 00:00:00 2001 From: Dragos Misca Date: Thu, 23 May 2024 13:07:13 -0700 Subject: [PATCH 13/44] Update ReplicatorStats implementation --- .../stats/OpenTelemetryReplicatorStats.java | 35 +++++++++++++------ 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/pulsar-broker/src/main/java/org/apache/pulsar/broker/stats/OpenTelemetryReplicatorStats.java b/pulsar-broker/src/main/java/org/apache/pulsar/broker/stats/OpenTelemetryReplicatorStats.java index dca4b479dd876..e1b278504e5ba 100644 --- a/pulsar-broker/src/main/java/org/apache/pulsar/broker/stats/OpenTelemetryReplicatorStats.java +++ b/pulsar-broker/src/main/java/org/apache/pulsar/broker/stats/OpenTelemetryReplicatorStats.java @@ -20,10 +20,16 @@ import io.opentelemetry.api.common.Attributes; import io.opentelemetry.api.metrics.BatchCallback; +import io.opentelemetry.api.metrics.ObservableDoubleMeasurement; import io.opentelemetry.api.metrics.ObservableLongMeasurement; import java.util.Optional; +import java.util.concurrent.TimeUnit; import org.apache.pulsar.broker.PulsarService; import org.apache.pulsar.broker.service.AbstractReplicator; +import org.apache.pulsar.broker.service.persistent.PersistentReplicator; +import org.apache.pulsar.common.policies.data.NonPersistentReplicatorStats; +import org.apache.pulsar.common.policies.data.stats.ReplicatorStatsImpl; +import org.apache.pulsar.common.stats.MetricsUtil; public class OpenTelemetryReplicatorStats implements AutoCloseable { @@ -57,7 +63,7 @@ public class OpenTelemetryReplicatorStats implements AutoCloseable { // Replaces pulsar_replication_delay_in_seconds public static final String DELAY_GAUGE = "pulsar.broker.replication.message.age"; - private final ObservableLongMeasurement delayGauge; + private final ObservableDoubleMeasurement delayGauge; private final BatchCallback batchCallback; @@ -110,7 +116,7 @@ public OpenTelemetryReplicatorStats(PulsarService pulsar) { .buildObserver(); delayGauge = meter - .upDownCounterBuilder(DELAY_GAUGE) + .gaugeBuilder(DELAY_GAUGE) .setUnit("{second}") .setDescription("The total number of messages that expired for this replicator.") .buildObserver(); @@ -142,6 +148,7 @@ public void close() { private void recordMetricsForReplicator(AbstractReplicator replicator) { var topic = replicator.getTopic(); + var topic = replicator.getLocalTopic(); var attributes = Attributes.builder() .build(); @@ -149,13 +156,21 @@ private void recordMetricsForReplicator(AbstractReplicator replicator) { var stats = replicator.getStats(); var dummyValue = 0L; - messageInCounter.record(dummyValue, attributes); - messageOutCounter.record(dummyValue, attributes); - bytesInCounter.record(dummyValue, attributes); - bytesOutCounter.record(dummyValue, attributes); - backlogCounter.record(dummyValue, attributes); - expiredCounter.record(dummyValue, attributes); - connectedCounter.record(dummyValue, attributes); - delayGauge.record(dummyValue, attributes); + messageInCounter.record(stats.getMsgInCount(), attributes); + messageOutCounter.record(stats.getMsgOutCount(), attributes); + bytesInCounter.record(stats.getBytesInCount(), attributes); + bytesOutCounter.record(stats.getBytesOutCount(), attributes); + connectedCounter.record(replicator.isConnected() ? 0 : 1, attributes); + var delaySeconds = MetricsUtil.convertToSeconds(replicator.getReplicationDelayMs(), TimeUnit.MILLISECONDS); + delayGauge.record(delaySeconds, attributes); + + if (replicator instanceof PersistentReplicator persistentReplicator) { + expiredCounter.record(persistentReplicator.getMessageExpiredCount(), attributes); + backlogCounter.record(persistentReplicator.getNumberOfEntriesInBacklog(), attributes); + } + + if (stats instanceof NonPersistentReplicatorStats nonPersistentStats) { + var dropCount = nonPersistentStats.getMsgDropCount(); + } } } From 4ef7ee7583bc4496706e15ca2ff652d05d9f8e77 Mon Sep 17 00:00:00 2001 From: Dragos Misca Date: Thu, 23 May 2024 13:07:29 -0700 Subject: [PATCH 14/44] Update stat values --- .../broker/service/persistent/GeoPersistentReplicator.java | 2 ++ .../broker/service/persistent/PersistentReplicator.java | 4 ++++ .../pulsar/broker/service/persistent/ShadowReplicator.java | 2 ++ 3 files changed, 8 insertions(+) diff --git a/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/persistent/GeoPersistentReplicator.java b/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/persistent/GeoPersistentReplicator.java index 4ef2710fea61a..dea07728f2fac 100644 --- a/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/persistent/GeoPersistentReplicator.java +++ b/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/persistent/GeoPersistentReplicator.java @@ -167,6 +167,8 @@ protected boolean replicateEntries(List entries) { msg.getMessageBuilder().clearTxnidMostBits(); msg.getMessageBuilder().clearTxnidLeastBits(); msgOut.recordEvent(headersAndPayload.readableBytes()); + getStats().incrementMsgOutCounter(); + getStats().incrementBytesOutCounter(headersAndPayload.readableBytes()); // Increment pending messages for messages produced locally PENDING_MESSAGES_UPDATER.incrementAndGet(this); producer.sendAsync(msg, ProducerSendCallback.create(this, entry, msg)); diff --git a/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/persistent/PersistentReplicator.java b/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/persistent/PersistentReplicator.java index cfef2454c474a..09a358079de6a 100644 --- a/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/persistent/PersistentReplicator.java +++ b/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/persistent/PersistentReplicator.java @@ -187,6 +187,10 @@ public long getNumberOfEntriesInBacklog() { return cursor.getNumberOfEntriesInBacklog(true); } + public long getMessageExpiredCount() { + return expiryMonitor.getTotalMessageExpired(); + } + @Override protected void disableReplicatorRead() { if (this.cursor != null) { diff --git a/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/persistent/ShadowReplicator.java b/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/persistent/ShadowReplicator.java index 85e837ff1879a..2dea43a79694e 100644 --- a/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/persistent/ShadowReplicator.java +++ b/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/persistent/ShadowReplicator.java @@ -92,6 +92,8 @@ protected boolean replicateEntries(List entries) { dispatchRateLimiter.ifPresent(rateLimiter -> rateLimiter.consumeDispatchQuota(1, entry.getLength())); msgOut.recordEvent(headersAndPayload.readableBytes()); + getStats().incrementMsgOutCounter(); + getStats().incrementBytesOutCounter(headersAndPayload.readableBytes()); msg.setReplicatedFrom(localCluster); From ebbe49edf9f882a5a510151a45b19b4ee7618d63 Mon Sep 17 00:00:00 2001 From: Dragos Misca Date: Thu, 23 May 2024 13:07:41 -0700 Subject: [PATCH 15/44] Add draft OpenTelemetryReplicatorStatsTest --- .../OpenTelemetryReplicatorStatsTest.java | 151 ++++++++++++++++++ 1 file changed, 151 insertions(+) create mode 100644 pulsar-broker/src/test/java/org/apache/pulsar/broker/stats/OpenTelemetryReplicatorStatsTest.java diff --git a/pulsar-broker/src/test/java/org/apache/pulsar/broker/stats/OpenTelemetryReplicatorStatsTest.java b/pulsar-broker/src/test/java/org/apache/pulsar/broker/stats/OpenTelemetryReplicatorStatsTest.java new file mode 100644 index 0000000000000..7cc95831ee2e8 --- /dev/null +++ b/pulsar-broker/src/test/java/org/apache/pulsar/broker/stats/OpenTelemetryReplicatorStatsTest.java @@ -0,0 +1,151 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pulsar.broker.stats; + +import static org.apache.pulsar.broker.stats.BrokerOpenTelemetryTestUtil.assertMetricLongSumValue; +import static org.assertj.core.api.Assertions.assertThat; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.argThat; +import static org.mockito.Mockito.doAnswer; +import io.opentelemetry.api.common.Attributes; +import java.util.List; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicReference; +import lombok.Cleanup; +import org.apache.pulsar.broker.BrokerTestUtil; +import org.apache.pulsar.broker.intercept.BrokerInterceptor; +import org.apache.pulsar.broker.service.BrokerTestBase; +import org.apache.pulsar.broker.service.Consumer; +import org.apache.pulsar.broker.testcontext.PulsarTestContext; +import org.apache.pulsar.client.api.SubscriptionInitialPosition; +import org.apache.pulsar.client.api.SubscriptionType; +import org.apache.pulsar.opentelemetry.OpenTelemetryAttributes; +import org.awaitility.Awaitility; +import org.mockito.Mockito; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +public class OpenTelemetryReplicatorStatsTest extends BrokerTestBase { + + private BrokerInterceptor brokerInterceptor; + + @BeforeMethod(alwaysRun = true) + @Override + protected void setup() throws Exception { + brokerInterceptor = + Mockito.mock(BrokerInterceptor.class, Mockito.withSettings().defaultAnswer(Mockito.CALLS_REAL_METHODS)); + super.baseSetup(); + } + + @AfterMethod(alwaysRun = true) + @Override + protected void cleanup() throws Exception { + super.internalCleanup(); + } + + @Override + protected void customizeMainPulsarTestContextBuilder(PulsarTestContext.Builder builder) { + super.customizeMainPulsarTestContextBuilder(builder); + builder.enableOpenTelemetry(true); + builder.brokerInterceptor(brokerInterceptor); + } + + @Test(timeOut = 30_000) + public void testMessagingMetrics() throws Exception { + var topicName = BrokerTestUtil.newUniqueName("persistent://prop/ns-abc/testConsumerMessagingMetrics"); + admin.topics().createNonPartitionedTopic(topicName); + + var messageCount = 5; + var ackCount = 3; + + var subscriptionName = BrokerTestUtil.newUniqueName("test"); + var receiverQueueSize = 100; + + // Intercept calls to create consumer, in order to fetch client information. + var consumerRef = new AtomicReference(); + doAnswer(invocation -> { + consumerRef.compareAndSet(null, invocation.getArgument(1)); + return null; + }).when(brokerInterceptor) + .consumerCreated(any(), argThat(arg -> arg.getSubscription().getName().equals(subscriptionName)), any()); + + @Cleanup + var consumer = pulsarClient.newConsumer() + .topic(topicName) + .subscriptionName(subscriptionName) + .subscriptionInitialPosition(SubscriptionInitialPosition.Earliest) + .subscriptionType(SubscriptionType.Shared) + .ackTimeout(1, TimeUnit.SECONDS) + .receiverQueueSize(receiverQueueSize) + .property("prop1", "value1") + .subscribe(); + + Awaitility.await().until(() -> consumerRef.get() != null); + var serverConsumer = consumerRef.get(); + + @Cleanup + var producer = pulsarClient.newProducer() + .topic(topicName) + .create(); + for (int i = 0; i < messageCount; i++) { + producer.send(String.format("msg-%d", i).getBytes()); + var message = consumer.receive(); + if (i < ackCount) { + consumer.acknowledge(message); + } + } + + var attributes = Attributes.builder() + .put(OpenTelemetryAttributes.PULSAR_DOMAIN, "persistent") + .put(OpenTelemetryAttributes.PULSAR_TENANT, "prop") + .put(OpenTelemetryAttributes.PULSAR_NAMESPACE, "prop/ns-abc") + .put(OpenTelemetryAttributes.PULSAR_TOPIC, topicName) + .put(OpenTelemetryAttributes.PULSAR_SUBSCRIPTION_NAME, subscriptionName) + .put(OpenTelemetryAttributes.PULSAR_SUBSCRIPTION_TYPE, SubscriptionType.Shared.toString()) + .put(OpenTelemetryAttributes.PULSAR_CONSUMER_NAME, consumer.getConsumerName()) + .put(OpenTelemetryAttributes.PULSAR_CONSUMER_ID, 0) + .put(OpenTelemetryAttributes.PULSAR_CONSUMER_CONNECTED_SINCE, + serverConsumer.getConnectedSince().getEpochSecond()) + .put(OpenTelemetryAttributes.PULSAR_CLIENT_ADDRESS, serverConsumer.getClientAddressAndPort()) + .put(OpenTelemetryAttributes.PULSAR_CLIENT_VERSION, serverConsumer.getClientVersion()) + .put(OpenTelemetryAttributes.PULSAR_CONSUMER_METADATA, List.of("prop1:value1")) + .build(); + + Awaitility.await().untilAsserted(() -> { + var metrics = pulsarTestContext.getOpenTelemetryMetricReader().collectAllMetrics(); + + assertMetricLongSumValue(metrics, OpenTelemetryConsumerStats.MESSAGE_OUT_COUNTER, attributes, + actual -> assertThat(actual).isPositive()); + assertMetricLongSumValue(metrics, OpenTelemetryConsumerStats.BYTES_OUT_COUNTER, attributes, + actual -> assertThat(actual).isPositive()); + + assertMetricLongSumValue(metrics, OpenTelemetryConsumerStats.MESSAGE_ACK_COUNTER, attributes, ackCount); + assertMetricLongSumValue(metrics, OpenTelemetryConsumerStats.MESSAGE_PERMITS_COUNTER, attributes, + actual -> assertThat(actual).isGreaterThanOrEqualTo(receiverQueueSize - messageCount - ackCount)); + + var unAckCount = messageCount - ackCount; + assertMetricLongSumValue(metrics, OpenTelemetryConsumerStats.MESSAGE_UNACKNOWLEDGED_COUNTER, + attributes.toBuilder().put(OpenTelemetryAttributes.PULSAR_CONSUMER_BLOCKED, false).build(), + unAckCount); + assertMetricLongSumValue(metrics, OpenTelemetryConsumerStats.MESSAGE_REDELIVER_COUNTER, attributes, + actual -> assertThat(actual).isGreaterThanOrEqualTo(unAckCount)); + }); + } +} From 154f0bc4377c5b70a229590550331b2d7d40cc4f Mon Sep 17 00:00:00 2001 From: Dragos Misca Date: Thu, 23 May 2024 14:19:39 -0700 Subject: [PATCH 16/44] Remove redundant cursor null pointer checks in PersistentReplicator --- .../persistent/PersistentReplicator.java | 20 ++++++++----------- 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/persistent/PersistentReplicator.java b/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/persistent/PersistentReplicator.java index 09a358079de6a..fb02dbd728078 100644 --- a/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/persistent/PersistentReplicator.java +++ b/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/persistent/PersistentReplicator.java @@ -28,6 +28,7 @@ import io.netty.util.Recycler; import io.netty.util.Recycler.Handle; import java.util.List; +import java.util.Objects; import java.util.Optional; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ExecutionException; @@ -119,7 +120,7 @@ public PersistentReplicator(String localCluster, PersistentTopic localTopic, Man super(localCluster, localTopic, remoteCluster, remoteTopic, localTopic.getReplicatorPrefix(), brokerService, replicationClient); this.topic = localTopic; - this.cursor = cursor; + this.cursor = Objects.requireNonNull(cursor); this.expiryMonitor = new PersistentMessageExpiryMonitor(localTopic, Codec.decode(cursor.getName()), cursor, null); HAVE_PENDING_READ_UPDATER.set(this, FALSE); @@ -193,10 +194,8 @@ public long getMessageExpiredCount() { @Override protected void disableReplicatorRead() { - if (this.cursor != null) { - // deactivate cursor after successfully close the producer - this.cursor.setInactive(); - } + // deactivate cursor after successfully close the producer + this.cursor.setInactive(); } /** @@ -335,13 +334,10 @@ protected CompletableFuture getSchemaInfo(MessageImpl msg) throws Ex } public void updateCursorState() { - var cursor = this.cursor; - if (cursor != null) { - if (isConnected()) { - cursor.setActive(); - } else { - cursor.setInactive(); - } + if (isConnected()) { + cursor.setActive(); + } else { + cursor.setInactive(); } } From 36d5d2cd92d597317743b868450e5d9204d0221f Mon Sep 17 00:00:00 2001 From: Dragos Misca Date: Thu, 23 May 2024 14:20:14 -0700 Subject: [PATCH 17/44] Checkstyle fix --- .../pulsar/broker/stats/OpenTelemetryReplicatorStats.java | 3 --- 1 file changed, 3 deletions(-) diff --git a/pulsar-broker/src/main/java/org/apache/pulsar/broker/stats/OpenTelemetryReplicatorStats.java b/pulsar-broker/src/main/java/org/apache/pulsar/broker/stats/OpenTelemetryReplicatorStats.java index e1b278504e5ba..b6aa48f3dcdf1 100644 --- a/pulsar-broker/src/main/java/org/apache/pulsar/broker/stats/OpenTelemetryReplicatorStats.java +++ b/pulsar-broker/src/main/java/org/apache/pulsar/broker/stats/OpenTelemetryReplicatorStats.java @@ -28,7 +28,6 @@ import org.apache.pulsar.broker.service.AbstractReplicator; import org.apache.pulsar.broker.service.persistent.PersistentReplicator; import org.apache.pulsar.common.policies.data.NonPersistentReplicatorStats; -import org.apache.pulsar.common.policies.data.stats.ReplicatorStatsImpl; import org.apache.pulsar.common.stats.MetricsUtil; public class OpenTelemetryReplicatorStats implements AutoCloseable { @@ -147,14 +146,12 @@ public void close() { } private void recordMetricsForReplicator(AbstractReplicator replicator) { - var topic = replicator.getTopic(); var topic = replicator.getLocalTopic(); var attributes = Attributes.builder() .build(); var stats = replicator.getStats(); - var dummyValue = 0L; messageInCounter.record(stats.getMsgInCount(), attributes); messageOutCounter.record(stats.getMsgOutCount(), attributes); From 23e24a25b873f23a32d2e1bf3f4c0aa9ac700c64 Mon Sep 17 00:00:00 2001 From: Dragos Misca Date: Thu, 23 May 2024 16:17:18 -0700 Subject: [PATCH 18/44] Update stats access --- .../broker/service/persistent/GeoPersistentReplicator.java | 4 ++-- .../broker/service/persistent/PersistentReplicator.java | 2 +- .../pulsar/broker/service/persistent/ShadowReplicator.java | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/persistent/GeoPersistentReplicator.java b/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/persistent/GeoPersistentReplicator.java index dea07728f2fac..7ab038eb12ec6 100644 --- a/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/persistent/GeoPersistentReplicator.java +++ b/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/persistent/GeoPersistentReplicator.java @@ -167,8 +167,8 @@ protected boolean replicateEntries(List entries) { msg.getMessageBuilder().clearTxnidMostBits(); msg.getMessageBuilder().clearTxnidLeastBits(); msgOut.recordEvent(headersAndPayload.readableBytes()); - getStats().incrementMsgOutCounter(); - getStats().incrementBytesOutCounter(headersAndPayload.readableBytes()); + stats.incrementMsgOutCounter(); + stats.incrementBytesOutCounter(headersAndPayload.readableBytes()); // Increment pending messages for messages produced locally PENDING_MESSAGES_UPDATER.incrementAndGet(this); producer.sendAsync(msg, ProducerSendCallback.create(this, entry, msg)); diff --git a/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/persistent/PersistentReplicator.java b/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/persistent/PersistentReplicator.java index fb02dbd728078..104702ad9117f 100644 --- a/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/persistent/PersistentReplicator.java +++ b/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/persistent/PersistentReplicator.java @@ -109,7 +109,7 @@ public abstract class PersistentReplicator extends AbstractReplicator // for connected subscriptions, message expiry will be checked if the backlog is greater than this threshold private static final int MINIMUM_BACKLOG_FOR_EXPIRY_CHECK = 1000; - private final ReplicatorStatsImpl stats = new ReplicatorStatsImpl(); + protected final ReplicatorStatsImpl stats = new ReplicatorStatsImpl(); protected volatile boolean fetchSchemaInProgress = false; diff --git a/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/persistent/ShadowReplicator.java b/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/persistent/ShadowReplicator.java index 2dea43a79694e..25591857aa1b5 100644 --- a/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/persistent/ShadowReplicator.java +++ b/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/persistent/ShadowReplicator.java @@ -92,8 +92,8 @@ protected boolean replicateEntries(List entries) { dispatchRateLimiter.ifPresent(rateLimiter -> rateLimiter.consumeDispatchQuota(1, entry.getLength())); msgOut.recordEvent(headersAndPayload.readableBytes()); - getStats().incrementMsgOutCounter(); - getStats().incrementBytesOutCounter(headersAndPayload.readableBytes()); + stats.incrementMsgOutCounter(); + stats.incrementBytesOutCounter(headersAndPayload.readableBytes()); msg.setReplicatedFrom(localCluster); From 6173af8882b3a7b8ee196ea3de91bc1fd8f2f5c9 Mon Sep 17 00:00:00 2001 From: Dragos Misca Date: Thu, 23 May 2024 16:47:46 -0700 Subject: [PATCH 19/44] Skip redundant null check --- .../pulsar/broker/service/persistent/PersistentReplicator.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/persistent/PersistentReplicator.java b/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/persistent/PersistentReplicator.java index 104702ad9117f..cc46ea8fa8c26 100644 --- a/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/persistent/PersistentReplicator.java +++ b/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/persistent/PersistentReplicator.java @@ -598,7 +598,7 @@ public void updateRates() { } public ReplicatorStatsImpl getStats() { - stats.replicationBacklog = cursor != null ? cursor.getNumberOfEntriesInBacklog(false) : 0; + stats.replicationBacklog = cursor.getNumberOfEntriesInBacklog(false); stats.connected = isConnected(); stats.replicationDelayInSeconds = TimeUnit.MILLISECONDS.toSeconds(getReplicationDelayMs()); From 325ffca8ef2775a9537e7cf3548b266492ceed96 Mon Sep 17 00:00:00 2001 From: Dragos Misca Date: Thu, 23 May 2024 17:16:16 -0700 Subject: [PATCH 20/44] Debug --- .../broker/admin/v2/PersistentTopics.java | 10 +++++++++- .../pulsar/broker/service/ReplicatorTest.java | 17 ++++++++++++----- .../broker/service/ReplicatorTestBase.java | 6 +++--- 3 files changed, 24 insertions(+), 9 deletions(-) diff --git a/pulsar-broker/src/main/java/org/apache/pulsar/broker/admin/v2/PersistentTopics.java b/pulsar-broker/src/main/java/org/apache/pulsar/broker/admin/v2/PersistentTopics.java index 7e138442ae228..d2df7c4e04b2f 100644 --- a/pulsar-broker/src/main/java/org/apache/pulsar/broker/admin/v2/PersistentTopics.java +++ b/pulsar-broker/src/main/java/org/apache/pulsar/broker/admin/v2/PersistentTopics.java @@ -29,6 +29,7 @@ import java.util.Map; import java.util.Optional; import java.util.Set; +import java.util.function.Consumer; import javax.ws.rs.DELETE; import javax.ws.rs.DefaultValue; import javax.ws.rs.Encoded; @@ -79,6 +80,7 @@ import org.apache.pulsar.common.policies.data.SubscribeRate; import org.apache.pulsar.common.policies.data.TopicOperation; import org.apache.pulsar.common.policies.data.TopicPolicies; +import org.apache.pulsar.common.policies.data.TopicStats; import org.apache.pulsar.common.policies.data.impl.AutoSubscriptionCreationOverrideImpl; import org.apache.pulsar.common.policies.data.impl.BacklogQuotaImpl; import org.apache.pulsar.common.policies.data.impl.DispatchRateImpl; @@ -1221,7 +1223,13 @@ public void getStats( new GetStatsOptions(getPreciseBacklog, subscriptionBacklogSize, getEarliestTimeInBacklog, excludePublishers, excludeConsumers); internalGetStatsAsync(authoritative, getStatsOptions) - .thenAccept(asyncResponse::resume) + .thenAccept(new Consumer() { + @Override + public void accept(TopicStats topicStats) { + log.info("Completed getStats request for topic {}", topicName); + asyncResponse.resume(topicStats); + } + }) .exceptionally(ex -> { // If the exception is not redirect exception we need to log it. if (isNot307And404Exception(ex)) { diff --git a/pulsar-broker/src/test/java/org/apache/pulsar/broker/service/ReplicatorTest.java b/pulsar-broker/src/test/java/org/apache/pulsar/broker/service/ReplicatorTest.java index 765727aeac319..62f373136d66d 100644 --- a/pulsar-broker/src/test/java/org/apache/pulsar/broker/service/ReplicatorTest.java +++ b/pulsar-broker/src/test/java/org/apache/pulsar/broker/service/ReplicatorTest.java @@ -36,6 +36,7 @@ import java.lang.reflect.Field; import java.lang.reflect.Method; import java.nio.charset.StandardCharsets; +import java.time.Duration; import java.util.ArrayList; import java.util.List; import java.util.Map; @@ -521,11 +522,17 @@ public void testReplicationWillNotStuckByIncompleteSchemaFuture() throws Excepti pulsar1.getConfiguration().setReplicationProducerQueueSize(originalReplicationProducerQueueSize); } - private static void waitReplicateFinish(TopicName topicName, PulsarAdmin admin){ - Awaitility.await().untilAsserted(() -> { - for (Map.Entry subStats : - admin.topics().getStats(topicName.toString(), true, false, false).getReplication().entrySet()){ - assertTrue(subStats.getValue().getReplicationBacklog() == 0, "replication task finished"); + private void waitReplicateFinish(TopicName topicName, PulsarAdmin admin) { + Awaitility.await().atMost(Duration.ofMinutes(5)).untilAsserted(() -> { + try { + var stats = admin.topics().getStats(topicName.toString(), true, false, false); + var replicationStats = stats.getReplication(); + var entries = replicationStats.entrySet(); + for (Map.Entry subStats : entries) { + assertEquals(subStats.getValue().getReplicationBacklog(), 0, "replication task finished"); + } + } catch (PulsarAdminException e) { + assert false; } }); } diff --git a/pulsar-broker/src/test/java/org/apache/pulsar/broker/service/ReplicatorTestBase.java b/pulsar-broker/src/test/java/org/apache/pulsar/broker/service/ReplicatorTestBase.java index 766ad6d6da08e..156e1cf78f5d6 100644 --- a/pulsar-broker/src/test/java/org/apache/pulsar/broker/service/ReplicatorTestBase.java +++ b/pulsar-broker/src/test/java/org/apache/pulsar/broker/service/ReplicatorTestBase.java @@ -320,13 +320,13 @@ protected void setup() throws Exception { } private PulsarService buildPulsarService(ServiceConfiguration config, InMemoryMetricReader metricReader) { - var otelSdkCustomizer = metricReader != null ? - BrokerOpenTelemetryTestUtil.getOpenTelemetrySdkBuilderConsumer(metricReader) : null; + //var otelSdkCustomizer = metricReader != null ? + //BrokerOpenTelemetryTestUtil.getOpenTelemetrySdkBuilderConsumer(metricReader) : null; return new PulsarService(config, new WorkerConfig(), Optional.empty(), exitCode -> log.info("Pulsar service finished with exit code {}", exitCode), - otelSdkCustomizer); + null); } public void setConfig3DefaultValue() { From b008ade86bdd7af0dd4a1426a014cc7804aedf43 Mon Sep 17 00:00:00 2001 From: Dragos Misca Date: Thu, 23 May 2024 21:19:14 -0700 Subject: [PATCH 21/44] Debug --- .../data/stats/NonPersistentReplicatorStatsImpl.java | 6 ++---- .../common/policies/data/stats/ReplicatorStatsImpl.java | 5 +++++ 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/pulsar-common/src/main/java/org/apache/pulsar/common/policies/data/stats/NonPersistentReplicatorStatsImpl.java b/pulsar-common/src/main/java/org/apache/pulsar/common/policies/data/stats/NonPersistentReplicatorStatsImpl.java index dde5430ff0225..32bfa6f85018f 100644 --- a/pulsar-common/src/main/java/org/apache/pulsar/common/policies/data/stats/NonPersistentReplicatorStatsImpl.java +++ b/pulsar-common/src/main/java/org/apache/pulsar/common/policies/data/stats/NonPersistentReplicatorStatsImpl.java @@ -19,20 +19,18 @@ package org.apache.pulsar.common.policies.data.stats; import java.util.Objects; -import lombok.Data; -import lombok.EqualsAndHashCode; +import lombok.Getter; import org.apache.pulsar.common.policies.data.NonPersistentReplicatorStats; /** * Statistics for a non-persistent replicator. */ -@Data -@EqualsAndHashCode(callSuper = true) public class NonPersistentReplicatorStatsImpl extends ReplicatorStatsImpl implements NonPersistentReplicatorStats { /** * for non-persistent topic: broker drops msg for replicator if replicator connection is not writable. **/ + @Getter public double msgDropRate; public NonPersistentReplicatorStatsImpl add(NonPersistentReplicatorStatsImpl stats) { diff --git a/pulsar-common/src/main/java/org/apache/pulsar/common/policies/data/stats/ReplicatorStatsImpl.java b/pulsar-common/src/main/java/org/apache/pulsar/common/policies/data/stats/ReplicatorStatsImpl.java index dd268e8e6f001..48d9034cebcbe 100644 --- a/pulsar-common/src/main/java/org/apache/pulsar/common/policies/data/stats/ReplicatorStatsImpl.java +++ b/pulsar-common/src/main/java/org/apache/pulsar/common/policies/data/stats/ReplicatorStatsImpl.java @@ -21,6 +21,7 @@ import java.util.Objects; import java.util.concurrent.atomic.LongAdder; import lombok.Data; +import lombok.EqualsAndHashCode; import org.apache.pulsar.common.policies.data.ReplicatorStats; /** @@ -32,21 +33,25 @@ public class ReplicatorStatsImpl implements ReplicatorStats { /** Total rate of messages received from the remote cluster (msg/s). */ public double msgRateIn; + @EqualsAndHashCode.Exclude private final LongAdder msgInCount = new LongAdder(); /** Total throughput received from the remote cluster (bytes/s). */ public double msgThroughputIn; + @EqualsAndHashCode.Exclude private final LongAdder bytesInCount = new LongAdder(); /** Total rate of messages delivered to the replication-subscriber (msg/s). */ public double msgRateOut; + @EqualsAndHashCode.Exclude private final LongAdder msgOutCount = new LongAdder(); /** Total throughput delivered to the replication-subscriber (bytes/s). */ public double msgThroughputOut; + @EqualsAndHashCode.Exclude private final LongAdder bytesOutCount = new LongAdder(); /** Total rate of messages expired (msg/s). */ From 6af06f9d9465375e2e7daffb4ff850cdb8a82001 Mon Sep 17 00:00:00 2001 From: Dragos Misca Date: Thu, 23 May 2024 22:07:27 -0700 Subject: [PATCH 22/44] Aadd default implementations for ReplicatorStats methods --- .../data/NonPersistentReplicatorStats.java | 4 ++- .../common/policies/data/ReplicatorStats.java | 25 +++++++++++++++---- 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/pulsar-client-admin-api/src/main/java/org/apache/pulsar/common/policies/data/NonPersistentReplicatorStats.java b/pulsar-client-admin-api/src/main/java/org/apache/pulsar/common/policies/data/NonPersistentReplicatorStats.java index 8e94a18405852..867b531be8817 100644 --- a/pulsar-client-admin-api/src/main/java/org/apache/pulsar/common/policies/data/NonPersistentReplicatorStats.java +++ b/pulsar-client-admin-api/src/main/java/org/apache/pulsar/common/policies/data/NonPersistentReplicatorStats.java @@ -28,5 +28,7 @@ public interface NonPersistentReplicatorStats extends ReplicatorStats { **/ double getMsgDropRate(); - long getMsgDropCount(); + default long getMsgDropCount() { + return 0; + } } diff --git a/pulsar-client-admin-api/src/main/java/org/apache/pulsar/common/policies/data/ReplicatorStats.java b/pulsar-client-admin-api/src/main/java/org/apache/pulsar/common/policies/data/ReplicatorStats.java index e143215c32897..5c0c1c62d15a7 100644 --- a/pulsar-client-admin-api/src/main/java/org/apache/pulsar/common/policies/data/ReplicatorStats.java +++ b/pulsar-client-admin-api/src/main/java/org/apache/pulsar/common/policies/data/ReplicatorStats.java @@ -26,27 +26,42 @@ public interface ReplicatorStats { /** Total rate of messages received from the remote cluster (msg/s). */ @Deprecated double getMsgRateIn(); - long getMsgInCount(); + + default long getMsgInCount() { + return 0; + } /** Total throughput received from the remote cluster (bytes/s). */ @Deprecated double getMsgThroughputIn(); - long getBytesInCount(); + + default long getBytesInCount() { + return 0; + } /** Total rate of messages delivered to the replication-subscriber (msg/s). */ @Deprecated double getMsgRateOut(); - long getMsgOutCount(); + + default long getMsgOutCount() { + return 0; + } /** Total throughput delivered to the replication-subscriber (bytes/s). */ @Deprecated double getMsgThroughputOut(); - long getBytesOutCount(); + + default long getBytesOutCount() { + return 0; + } /** Total rate of messages expired (msg/s). */ @Deprecated double getMsgRateExpired(); - long getMsgExpiredCount(); + + default long getMsgExpiredCount() { + return 0; + } /** Number of messages pending to be replicated to remote cluster. */ long getReplicationBacklog(); From ed9be191b3ea11b70fe6ba096920779df926ed7d Mon Sep 17 00:00:00 2001 From: Dragos Misca Date: Thu, 23 May 2024 23:03:31 -0700 Subject: [PATCH 23/44] Fix test errors caused by stats serialization --- .../pulsar/broker/service/ReplicatorTest.java | 2 +- .../NonPersistentReplicatorStatsImpl.java | 5 ----- .../data/stats/ReplicatorStatsImpl.java | 20 +++++++++---------- 3 files changed, 11 insertions(+), 16 deletions(-) diff --git a/pulsar-broker/src/test/java/org/apache/pulsar/broker/service/ReplicatorTest.java b/pulsar-broker/src/test/java/org/apache/pulsar/broker/service/ReplicatorTest.java index 62f373136d66d..e4e65c79330ea 100644 --- a/pulsar-broker/src/test/java/org/apache/pulsar/broker/service/ReplicatorTest.java +++ b/pulsar-broker/src/test/java/org/apache/pulsar/broker/service/ReplicatorTest.java @@ -523,7 +523,7 @@ public void testReplicationWillNotStuckByIncompleteSchemaFuture() throws Excepti } private void waitReplicateFinish(TopicName topicName, PulsarAdmin admin) { - Awaitility.await().atMost(Duration.ofMinutes(5)).untilAsserted(() -> { + Awaitility.await().untilAsserted(() -> { try { var stats = admin.topics().getStats(topicName.toString(), true, false, false); var replicationStats = stats.getReplication(); diff --git a/pulsar-common/src/main/java/org/apache/pulsar/common/policies/data/stats/NonPersistentReplicatorStatsImpl.java b/pulsar-common/src/main/java/org/apache/pulsar/common/policies/data/stats/NonPersistentReplicatorStatsImpl.java index 32bfa6f85018f..289de074262f8 100644 --- a/pulsar-common/src/main/java/org/apache/pulsar/common/policies/data/stats/NonPersistentReplicatorStatsImpl.java +++ b/pulsar-common/src/main/java/org/apache/pulsar/common/policies/data/stats/NonPersistentReplicatorStatsImpl.java @@ -39,9 +39,4 @@ public NonPersistentReplicatorStatsImpl add(NonPersistentReplicatorStatsImpl sta this.msgDropRate += stats.msgDropRate; return this; } - - @Override - public long getMsgDropCount() { - return 0; - } } diff --git a/pulsar-common/src/main/java/org/apache/pulsar/common/policies/data/stats/ReplicatorStatsImpl.java b/pulsar-common/src/main/java/org/apache/pulsar/common/policies/data/stats/ReplicatorStatsImpl.java index 48d9034cebcbe..280170b8ca35f 100644 --- a/pulsar-common/src/main/java/org/apache/pulsar/common/policies/data/stats/ReplicatorStatsImpl.java +++ b/pulsar-common/src/main/java/org/apache/pulsar/common/policies/data/stats/ReplicatorStatsImpl.java @@ -18,10 +18,11 @@ */ package org.apache.pulsar.common.policies.data.stats; +import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.annotation.JsonProperty; import java.util.Objects; import java.util.concurrent.atomic.LongAdder; import lombok.Data; -import lombok.EqualsAndHashCode; import org.apache.pulsar.common.policies.data.ReplicatorStats; /** @@ -33,25 +34,25 @@ public class ReplicatorStatsImpl implements ReplicatorStats { /** Total rate of messages received from the remote cluster (msg/s). */ public double msgRateIn; - @EqualsAndHashCode.Exclude + @JsonIgnore private final LongAdder msgInCount = new LongAdder(); /** Total throughput received from the remote cluster (bytes/s). */ public double msgThroughputIn; - @EqualsAndHashCode.Exclude + @JsonIgnore private final LongAdder bytesInCount = new LongAdder(); /** Total rate of messages delivered to the replication-subscriber (msg/s). */ public double msgRateOut; - @EqualsAndHashCode.Exclude + @JsonIgnore private final LongAdder msgOutCount = new LongAdder(); /** Total throughput delivered to the replication-subscriber (bytes/s). */ public double msgThroughputOut; - @EqualsAndHashCode.Exclude + @JsonIgnore private final LongAdder bytesOutCount = new LongAdder(); /** Total rate of messages expired (msg/s). */ @@ -92,6 +93,7 @@ public ReplicatorStatsImpl add(ReplicatorStatsImpl stats) { } @Override + @JsonProperty public long getMsgInCount() { return msgInCount.sum(); } @@ -101,6 +103,7 @@ public void incrementMsgInCounter() { } @Override + @JsonProperty public long getBytesInCount() { return bytesInCount.sum(); } @@ -110,6 +113,7 @@ public void incrementBytesInCounter(long bytes) { } @Override + @JsonProperty public long getMsgOutCount() { return msgOutCount.sum(); } @@ -119,6 +123,7 @@ public void incrementMsgOutCounter() { } @Override + @JsonProperty public long getBytesOutCount() { return bytesOutCount.sum(); } @@ -126,9 +131,4 @@ public long getBytesOutCount() { public void incrementBytesOutCounter(long bytes) { bytesOutCount.add(bytes); } - - @Override - public long getMsgExpiredCount() { - return 0; - } } From 2f71eb0e64257a54424002bf16ae856b993f22d9 Mon Sep 17 00:00:00 2001 From: Dragos Misca Date: Thu, 23 May 2024 23:33:27 -0700 Subject: [PATCH 24/44] Spotbugs fixes --- .../pulsar/broker/admin/v2/PersistentTopics.java | 10 +--------- .../pulsar/broker/service/ReplicatorTest.java | 15 ++++----------- .../pulsar/broker/service/ReplicatorTestBase.java | 4 +--- .../stats/NonPersistentReplicatorStatsImpl.java | 6 ++++-- 4 files changed, 10 insertions(+), 25 deletions(-) diff --git a/pulsar-broker/src/main/java/org/apache/pulsar/broker/admin/v2/PersistentTopics.java b/pulsar-broker/src/main/java/org/apache/pulsar/broker/admin/v2/PersistentTopics.java index d2df7c4e04b2f..7e138442ae228 100644 --- a/pulsar-broker/src/main/java/org/apache/pulsar/broker/admin/v2/PersistentTopics.java +++ b/pulsar-broker/src/main/java/org/apache/pulsar/broker/admin/v2/PersistentTopics.java @@ -29,7 +29,6 @@ import java.util.Map; import java.util.Optional; import java.util.Set; -import java.util.function.Consumer; import javax.ws.rs.DELETE; import javax.ws.rs.DefaultValue; import javax.ws.rs.Encoded; @@ -80,7 +79,6 @@ import org.apache.pulsar.common.policies.data.SubscribeRate; import org.apache.pulsar.common.policies.data.TopicOperation; import org.apache.pulsar.common.policies.data.TopicPolicies; -import org.apache.pulsar.common.policies.data.TopicStats; import org.apache.pulsar.common.policies.data.impl.AutoSubscriptionCreationOverrideImpl; import org.apache.pulsar.common.policies.data.impl.BacklogQuotaImpl; import org.apache.pulsar.common.policies.data.impl.DispatchRateImpl; @@ -1223,13 +1221,7 @@ public void getStats( new GetStatsOptions(getPreciseBacklog, subscriptionBacklogSize, getEarliestTimeInBacklog, excludePublishers, excludeConsumers); internalGetStatsAsync(authoritative, getStatsOptions) - .thenAccept(new Consumer() { - @Override - public void accept(TopicStats topicStats) { - log.info("Completed getStats request for topic {}", topicName); - asyncResponse.resume(topicStats); - } - }) + .thenAccept(asyncResponse::resume) .exceptionally(ex -> { // If the exception is not redirect exception we need to log it. if (isNot307And404Exception(ex)) { diff --git a/pulsar-broker/src/test/java/org/apache/pulsar/broker/service/ReplicatorTest.java b/pulsar-broker/src/test/java/org/apache/pulsar/broker/service/ReplicatorTest.java index e4e65c79330ea..765727aeac319 100644 --- a/pulsar-broker/src/test/java/org/apache/pulsar/broker/service/ReplicatorTest.java +++ b/pulsar-broker/src/test/java/org/apache/pulsar/broker/service/ReplicatorTest.java @@ -36,7 +36,6 @@ import java.lang.reflect.Field; import java.lang.reflect.Method; import java.nio.charset.StandardCharsets; -import java.time.Duration; import java.util.ArrayList; import java.util.List; import java.util.Map; @@ -522,17 +521,11 @@ public void testReplicationWillNotStuckByIncompleteSchemaFuture() throws Excepti pulsar1.getConfiguration().setReplicationProducerQueueSize(originalReplicationProducerQueueSize); } - private void waitReplicateFinish(TopicName topicName, PulsarAdmin admin) { + private static void waitReplicateFinish(TopicName topicName, PulsarAdmin admin){ Awaitility.await().untilAsserted(() -> { - try { - var stats = admin.topics().getStats(topicName.toString(), true, false, false); - var replicationStats = stats.getReplication(); - var entries = replicationStats.entrySet(); - for (Map.Entry subStats : entries) { - assertEquals(subStats.getValue().getReplicationBacklog(), 0, "replication task finished"); - } - } catch (PulsarAdminException e) { - assert false; + for (Map.Entry subStats : + admin.topics().getStats(topicName.toString(), true, false, false).getReplication().entrySet()){ + assertTrue(subStats.getValue().getReplicationBacklog() == 0, "replication task finished"); } }); } diff --git a/pulsar-broker/src/test/java/org/apache/pulsar/broker/service/ReplicatorTestBase.java b/pulsar-broker/src/test/java/org/apache/pulsar/broker/service/ReplicatorTestBase.java index 156e1cf78f5d6..33877b681184f 100644 --- a/pulsar-broker/src/test/java/org/apache/pulsar/broker/service/ReplicatorTestBase.java +++ b/pulsar-broker/src/test/java/org/apache/pulsar/broker/service/ReplicatorTestBase.java @@ -320,13 +320,11 @@ protected void setup() throws Exception { } private PulsarService buildPulsarService(ServiceConfiguration config, InMemoryMetricReader metricReader) { - //var otelSdkCustomizer = metricReader != null ? - //BrokerOpenTelemetryTestUtil.getOpenTelemetrySdkBuilderConsumer(metricReader) : null; return new PulsarService(config, new WorkerConfig(), Optional.empty(), exitCode -> log.info("Pulsar service finished with exit code {}", exitCode), - null); + BrokerOpenTelemetryTestUtil.getOpenTelemetrySdkBuilderConsumer(metricReader)); } public void setConfig3DefaultValue() { diff --git a/pulsar-common/src/main/java/org/apache/pulsar/common/policies/data/stats/NonPersistentReplicatorStatsImpl.java b/pulsar-common/src/main/java/org/apache/pulsar/common/policies/data/stats/NonPersistentReplicatorStatsImpl.java index 289de074262f8..967dd0f83e56e 100644 --- a/pulsar-common/src/main/java/org/apache/pulsar/common/policies/data/stats/NonPersistentReplicatorStatsImpl.java +++ b/pulsar-common/src/main/java/org/apache/pulsar/common/policies/data/stats/NonPersistentReplicatorStatsImpl.java @@ -19,18 +19,20 @@ package org.apache.pulsar.common.policies.data.stats; import java.util.Objects; -import lombok.Getter; +import lombok.Data; +import lombok.EqualsAndHashCode; import org.apache.pulsar.common.policies.data.NonPersistentReplicatorStats; /** * Statistics for a non-persistent replicator. */ +@Data +@EqualsAndHashCode(callSuper = true) public class NonPersistentReplicatorStatsImpl extends ReplicatorStatsImpl implements NonPersistentReplicatorStats { /** * for non-persistent topic: broker drops msg for replicator if replicator connection is not writable. **/ - @Getter public double msgDropRate; public NonPersistentReplicatorStatsImpl add(NonPersistentReplicatorStatsImpl stats) { From 5d20675148d51c3f8b575f3fffd3f39dd31f86e8 Mon Sep 17 00:00:00 2001 From: Dragos Misca Date: Tue, 28 May 2024 10:07:21 -0700 Subject: [PATCH 25/44] Fix BrokerServiceLookupTest.testMultipleBrokerLookup --- .../org/apache/pulsar/client/api/BrokerServiceLookupTest.java | 1 + 1 file changed, 1 insertion(+) diff --git a/pulsar-broker/src/test/java/org/apache/pulsar/client/api/BrokerServiceLookupTest.java b/pulsar-broker/src/test/java/org/apache/pulsar/client/api/BrokerServiceLookupTest.java index 336728f279eda..266fc4e448ab6 100644 --- a/pulsar-broker/src/test/java/org/apache/pulsar/client/api/BrokerServiceLookupTest.java +++ b/pulsar-broker/src/test/java/org/apache/pulsar/client/api/BrokerServiceLookupTest.java @@ -192,6 +192,7 @@ public void testMultipleBrokerLookup() throws Exception { // Disable collecting topic stats during this test, as it deadlocks on access to map BrokerService.topics. pulsar2.getOpenTelemetryTopicStats().close(); pulsar2.getOpenTelemetryConsumerStats().close(); + pulsar2.getOpenTelemetryReplicatorStats().close(); var metricReader = pulsarTestContext.getOpenTelemetryMetricReader(); var lookupRequestSemaphoreField = BrokerService.class.getDeclaredField("lookupRequestSemaphore"); From 2b2d18dd0c507f47c98e0d2a7e5d67ba60a39b67 Mon Sep 17 00:00:00 2001 From: Dragos Misca Date: Tue, 28 May 2024 10:22:13 -0700 Subject: [PATCH 26/44] Add comments --- .../data/NonPersistentReplicatorStats.java | 5 ++-- .../common/policies/data/ReplicatorStats.java | 25 ++++++++----------- .../NonPersistentReplicatorStatsImpl.java | 5 ++++ .../data/stats/ReplicatorStatsImpl.java | 6 +++++ 4 files changed, 23 insertions(+), 18 deletions(-) diff --git a/pulsar-client-admin-api/src/main/java/org/apache/pulsar/common/policies/data/NonPersistentReplicatorStats.java b/pulsar-client-admin-api/src/main/java/org/apache/pulsar/common/policies/data/NonPersistentReplicatorStats.java index 867b531be8817..bfeeb6d037a78 100644 --- a/pulsar-client-admin-api/src/main/java/org/apache/pulsar/common/policies/data/NonPersistentReplicatorStats.java +++ b/pulsar-client-admin-api/src/main/java/org/apache/pulsar/common/policies/data/NonPersistentReplicatorStats.java @@ -28,7 +28,6 @@ public interface NonPersistentReplicatorStats extends ReplicatorStats { **/ double getMsgDropRate(); - default long getMsgDropCount() { - return 0; - } + /** Total number of messages dropped by the broker for the replicator. */ + long getMsgDropCount(); } diff --git a/pulsar-client-admin-api/src/main/java/org/apache/pulsar/common/policies/data/ReplicatorStats.java b/pulsar-client-admin-api/src/main/java/org/apache/pulsar/common/policies/data/ReplicatorStats.java index 5c0c1c62d15a7..1790cc35f50c5 100644 --- a/pulsar-client-admin-api/src/main/java/org/apache/pulsar/common/policies/data/ReplicatorStats.java +++ b/pulsar-client-admin-api/src/main/java/org/apache/pulsar/common/policies/data/ReplicatorStats.java @@ -27,41 +27,36 @@ public interface ReplicatorStats { @Deprecated double getMsgRateIn(); - default long getMsgInCount() { - return 0; - } + /** Total number of messages received from the remote cluster. */ + long getMsgInCount(); /** Total throughput received from the remote cluster (bytes/s). */ @Deprecated double getMsgThroughputIn(); - default long getBytesInCount() { - return 0; - } + /** Total number of bytes received from the remote cluster. */ + long getBytesInCount(); /** Total rate of messages delivered to the replication-subscriber (msg/s). */ @Deprecated double getMsgRateOut(); - default long getMsgOutCount() { - return 0; - } + /** Total number of messages sent to the remote cluster. */ + long getMsgOutCount(); /** Total throughput delivered to the replication-subscriber (bytes/s). */ @Deprecated double getMsgThroughputOut(); - default long getBytesOutCount() { - return 0; - } + /** Total number of bytes sent to the remote cluster. */ + long getBytesOutCount(); /** Total rate of messages expired (msg/s). */ @Deprecated double getMsgRateExpired(); - default long getMsgExpiredCount() { - return 0; - } + /** Total number of messages expired. */ + long getMsgExpiredCount(); /** Number of messages pending to be replicated to remote cluster. */ long getReplicationBacklog(); diff --git a/pulsar-common/src/main/java/org/apache/pulsar/common/policies/data/stats/NonPersistentReplicatorStatsImpl.java b/pulsar-common/src/main/java/org/apache/pulsar/common/policies/data/stats/NonPersistentReplicatorStatsImpl.java index 967dd0f83e56e..dde5430ff0225 100644 --- a/pulsar-common/src/main/java/org/apache/pulsar/common/policies/data/stats/NonPersistentReplicatorStatsImpl.java +++ b/pulsar-common/src/main/java/org/apache/pulsar/common/policies/data/stats/NonPersistentReplicatorStatsImpl.java @@ -41,4 +41,9 @@ public NonPersistentReplicatorStatsImpl add(NonPersistentReplicatorStatsImpl sta this.msgDropRate += stats.msgDropRate; return this; } + + @Override + public long getMsgDropCount() { + return 0; + } } diff --git a/pulsar-common/src/main/java/org/apache/pulsar/common/policies/data/stats/ReplicatorStatsImpl.java b/pulsar-common/src/main/java/org/apache/pulsar/common/policies/data/stats/ReplicatorStatsImpl.java index 280170b8ca35f..def4a9d11fbd4 100644 --- a/pulsar-common/src/main/java/org/apache/pulsar/common/policies/data/stats/ReplicatorStatsImpl.java +++ b/pulsar-common/src/main/java/org/apache/pulsar/common/policies/data/stats/ReplicatorStatsImpl.java @@ -131,4 +131,10 @@ public long getBytesOutCount() { public void incrementBytesOutCounter(long bytes) { bytesOutCount.add(bytes); } + + @Override + @JsonProperty + public long getMsgExpiredCount() { + return 0; + } } From 1228a03c6b9c7cfb81d83124f64eb1c43bb64232 Mon Sep 17 00:00:00 2001 From: Dragos Misca Date: Tue, 28 May 2024 10:36:18 -0700 Subject: [PATCH 27/44] Add replicator stats attributes --- .../stats/OpenTelemetryReplicatorStats.java | 18 +++++++++++++----- .../opentelemetry/OpenTelemetryAttributes.java | 6 ++++++ 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/pulsar-broker/src/main/java/org/apache/pulsar/broker/stats/OpenTelemetryReplicatorStats.java b/pulsar-broker/src/main/java/org/apache/pulsar/broker/stats/OpenTelemetryReplicatorStats.java index b6aa48f3dcdf1..1187de8baa180 100644 --- a/pulsar-broker/src/main/java/org/apache/pulsar/broker/stats/OpenTelemetryReplicatorStats.java +++ b/pulsar-broker/src/main/java/org/apache/pulsar/broker/stats/OpenTelemetryReplicatorStats.java @@ -27,8 +27,10 @@ import org.apache.pulsar.broker.PulsarService; import org.apache.pulsar.broker.service.AbstractReplicator; import org.apache.pulsar.broker.service.persistent.PersistentReplicator; +import org.apache.pulsar.common.naming.TopicName; import org.apache.pulsar.common.policies.data.NonPersistentReplicatorStats; import org.apache.pulsar.common.stats.MetricsUtil; +import org.apache.pulsar.opentelemetry.OpenTelemetryAttributes; public class OpenTelemetryReplicatorStats implements AutoCloseable { @@ -146,13 +148,19 @@ public void close() { } private void recordMetricsForReplicator(AbstractReplicator replicator) { - var topic = replicator.getLocalTopic(); - - var attributes = Attributes.builder() - .build(); + var topicName = TopicName.get(replicator.getLocalTopic().getName()); + var builder = Attributes.builder() + .put(OpenTelemetryAttributes.PULSAR_DOMAIN, topicName.getDomain().toString()) + .put(OpenTelemetryAttributes.PULSAR_TENANT, topicName.getTenant()) + .put(OpenTelemetryAttributes.PULSAR_NAMESPACE, topicName.getNamespace()) + .put(OpenTelemetryAttributes.PULSAR_TOPIC, topicName.getPartitionedTopicName()); + if (topicName.isPartitioned()) { + builder.put(OpenTelemetryAttributes.PULSAR_PARTITION_INDEX, topicName.getPartitionIndex()); + } + builder.put(OpenTelemetryAttributes.PULSAR_REPLICATION_REMOTE_CLUSTER_NAME, replicator.getRemoteCluster()); + var attributes = builder.build(); var stats = replicator.getStats(); - messageInCounter.record(stats.getMsgInCount(), attributes); messageOutCounter.record(stats.getMsgOutCount(), attributes); bytesInCounter.record(stats.getBytesInCount(), attributes); diff --git a/pulsar-opentelemetry/src/main/java/org/apache/pulsar/opentelemetry/OpenTelemetryAttributes.java b/pulsar-opentelemetry/src/main/java/org/apache/pulsar/opentelemetry/OpenTelemetryAttributes.java index 4f898b382e633..954fb3430e164 100644 --- a/pulsar-opentelemetry/src/main/java/org/apache/pulsar/opentelemetry/OpenTelemetryAttributes.java +++ b/pulsar-opentelemetry/src/main/java/org/apache/pulsar/opentelemetry/OpenTelemetryAttributes.java @@ -115,4 +115,10 @@ public interface OpenTelemetryAttributes { * The type of the backlog quota. */ AttributeKey PULSAR_BACKLOG_QUOTA_TYPE = AttributeKey.stringKey("pulsar.backlog.quota.type"); + + /** + * The name of the remote cluster for a Pulsar replicator. + */ + AttributeKey PULSAR_REPLICATION_REMOTE_CLUSTER_NAME = + AttributeKey.stringKey("pulsar.replication.remote.cluster.name"); } From 410d31ec9b9fc9b1ab0a103b0af510a7084ec848 Mon Sep 17 00:00:00 2001 From: Dragos Misca Date: Tue, 28 May 2024 10:56:32 -0700 Subject: [PATCH 28/44] Draft ReplicatorTest#testReplicationMetrics --- .../pulsar/broker/service/ReplicatorTest.java | 50 +++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/pulsar-broker/src/test/java/org/apache/pulsar/broker/service/ReplicatorTest.java b/pulsar-broker/src/test/java/org/apache/pulsar/broker/service/ReplicatorTest.java index 765727aeac319..43c60429ffe41 100644 --- a/pulsar-broker/src/test/java/org/apache/pulsar/broker/service/ReplicatorTest.java +++ b/pulsar-broker/src/test/java/org/apache/pulsar/broker/service/ReplicatorTest.java @@ -20,6 +20,8 @@ import static org.apache.pulsar.broker.BrokerTestUtil.newUniqueName; import static org.apache.pulsar.broker.auth.MockedPulsarServiceBaseTest.retryStrategically; +import static org.apache.pulsar.broker.stats.BrokerOpenTelemetryTestUtil.assertMetricLongGaugeValue; +import static org.apache.pulsar.broker.stats.BrokerOpenTelemetryTestUtil.assertMetricLongSumValue; import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.spy; @@ -33,10 +35,13 @@ import com.google.common.collect.Sets; import com.scurrilous.circe.checksum.Crc32cIntChecksum; import io.netty.buffer.ByteBuf; +import io.opentelemetry.api.common.Attributes; +import io.opentelemetry.sdk.metrics.data.MetricData; import java.lang.reflect.Field; import java.lang.reflect.Method; import java.nio.charset.StandardCharsets; import java.util.ArrayList; +import java.util.Comparator; import java.util.List; import java.util.Map; import java.util.Optional; @@ -70,6 +75,7 @@ import org.apache.pulsar.broker.service.BrokerServiceException.NamingException; import org.apache.pulsar.broker.service.persistent.PersistentReplicator; import org.apache.pulsar.broker.service.persistent.PersistentTopic; +import org.apache.pulsar.broker.stats.OpenTelemetryReplicatorStats; import org.apache.pulsar.client.admin.PulsarAdmin; import org.apache.pulsar.client.admin.PulsarAdminException; import org.apache.pulsar.client.api.Consumer; @@ -105,6 +111,7 @@ import org.apache.pulsar.common.policies.data.TopicStats; import org.apache.pulsar.common.protocol.Commands; import org.apache.pulsar.common.util.collections.ConcurrentOpenHashMap; +import org.apache.pulsar.opentelemetry.OpenTelemetryAttributes; import org.apache.pulsar.schema.Schemas; import org.awaitility.Awaitility; import org.awaitility.reflect.WhiteboxImpl; @@ -1820,6 +1827,49 @@ public void testReplicatorWithTTL() throws Exception { assertEquals(result, Lists.newArrayList("V1", "V2", "V3", "V4")); } + @Test(dataProvider = "namespace") + public void testReplicationMetrics(String namespace) throws Exception { + var destTopicName = + TopicName.get(BrokerTestUtil.newUniqueName("persistent://" + namespace + "/replicationMetrics")); + + @Cleanup + var producer1 = new MessageProducer(url1, destTopicName); + + @Cleanup + var consumer1 = new MessageConsumer(url1, destTopicName); + + @Cleanup + var consumer2 = new MessageConsumer(url2, destTopicName); + + // Produce from cluster 1 and consume from the 1 and 2. + producer1.produce(2); + consumer1.receive(2); + consumer2.receive(1); + + var metrics1 = metricReader1.collectAllMetrics() + .stream() + .filter(metric -> metric.getName().startsWith("pulsar.broker.replication")) + .sorted(Comparator.comparing(MetricData::getName)) + .toList(); + var attributes1 = Attributes.of( + OpenTelemetryAttributes.PULSAR_DOMAIN, destTopicName.getDomain().value(), + OpenTelemetryAttributes.PULSAR_TENANT, destTopicName.getTenant(), + OpenTelemetryAttributes.PULSAR_NAMESPACE, destTopicName.getNamespacePortion(), + OpenTelemetryAttributes.PULSAR_TOPIC, destTopicName.getPartitionedTopicName(), + OpenTelemetryAttributes.PULSAR_REPLICATION_REMOTE_CLUSTER_NAME, cluster2 + ); + assertMetricLongSumValue(metrics1, OpenTelemetryReplicatorStats.MESSAGE_IN_COUNTER, attributes1, 1); + assertMetricLongSumValue(metrics1, OpenTelemetryReplicatorStats.BYTES_IN_COUNTER, attributes1, 1); + assertMetricLongSumValue(metrics1, OpenTelemetryReplicatorStats.MESSAGE_OUT_COUNTER, attributes1, 1); + assertMetricLongSumValue(metrics1, OpenTelemetryReplicatorStats.BYTES_OUT_COUNTER, attributes1, 1); + + assertMetricLongSumValue(metrics1, OpenTelemetryReplicatorStats.BACKLOG_COUNTER, attributes1, 1); + assertMetricLongSumValue(metrics1, OpenTelemetryReplicatorStats.EXPIRED_COUNTER, attributes1, 1); + assertMetricLongSumValue(metrics1, OpenTelemetryReplicatorStats.CONNECTED_COUNTER, attributes1, 1); + + assertMetricLongGaugeValue(metrics1, OpenTelemetryReplicatorStats.DELAY_GAUGE, attributes1, 1); + } + private void pauseReplicator(PersistentReplicator replicator) { Awaitility.await().untilAsserted(() -> { assertTrue(replicator.isConnected()); From 2b2547274ec33e88e402327cecd41c03a914d385 Mon Sep 17 00:00:00 2001 From: Dragos Misca Date: Tue, 28 May 2024 11:10:03 -0700 Subject: [PATCH 29/44] Fix test --- .../stats/OpenTelemetryReplicatorStats.java | 2 +- .../pulsar/broker/service/ReplicatorTest.java | 21 +++++++++++-------- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/pulsar-broker/src/main/java/org/apache/pulsar/broker/stats/OpenTelemetryReplicatorStats.java b/pulsar-broker/src/main/java/org/apache/pulsar/broker/stats/OpenTelemetryReplicatorStats.java index 1187de8baa180..ec06307958c99 100644 --- a/pulsar-broker/src/main/java/org/apache/pulsar/broker/stats/OpenTelemetryReplicatorStats.java +++ b/pulsar-broker/src/main/java/org/apache/pulsar/broker/stats/OpenTelemetryReplicatorStats.java @@ -165,7 +165,7 @@ private void recordMetricsForReplicator(AbstractReplicator replicator) { messageOutCounter.record(stats.getMsgOutCount(), attributes); bytesInCounter.record(stats.getBytesInCount(), attributes); bytesOutCounter.record(stats.getBytesOutCount(), attributes); - connectedCounter.record(replicator.isConnected() ? 0 : 1, attributes); + connectedCounter.record(replicator.isConnected() ? 1 : 0, attributes); var delaySeconds = MetricsUtil.convertToSeconds(replicator.getReplicationDelayMs(), TimeUnit.MILLISECONDS); delayGauge.record(delaySeconds, attributes); diff --git a/pulsar-broker/src/test/java/org/apache/pulsar/broker/service/ReplicatorTest.java b/pulsar-broker/src/test/java/org/apache/pulsar/broker/service/ReplicatorTest.java index 43c60429ffe41..cf28640a4e4fc 100644 --- a/pulsar-broker/src/test/java/org/apache/pulsar/broker/service/ReplicatorTest.java +++ b/pulsar-broker/src/test/java/org/apache/pulsar/broker/service/ReplicatorTest.java @@ -18,6 +18,7 @@ */ package org.apache.pulsar.broker.service; +import static io.opentelemetry.sdk.testing.assertj.OpenTelemetryAssertions.assertThat; import static org.apache.pulsar.broker.BrokerTestUtil.newUniqueName; import static org.apache.pulsar.broker.auth.MockedPulsarServiceBaseTest.retryStrategically; import static org.apache.pulsar.broker.stats.BrokerOpenTelemetryTestUtil.assertMetricLongGaugeValue; @@ -1842,7 +1843,7 @@ public void testReplicationMetrics(String namespace) throws Exception { var consumer2 = new MessageConsumer(url2, destTopicName); // Produce from cluster 1 and consume from the 1 and 2. - producer1.produce(2); + producer1.produce(3); consumer1.receive(2); consumer2.receive(1); @@ -1854,17 +1855,19 @@ public void testReplicationMetrics(String namespace) throws Exception { var attributes1 = Attributes.of( OpenTelemetryAttributes.PULSAR_DOMAIN, destTopicName.getDomain().value(), OpenTelemetryAttributes.PULSAR_TENANT, destTopicName.getTenant(), - OpenTelemetryAttributes.PULSAR_NAMESPACE, destTopicName.getNamespacePortion(), + OpenTelemetryAttributes.PULSAR_NAMESPACE, destTopicName.getNamespace(), OpenTelemetryAttributes.PULSAR_TOPIC, destTopicName.getPartitionedTopicName(), OpenTelemetryAttributes.PULSAR_REPLICATION_REMOTE_CLUSTER_NAME, cluster2 ); - assertMetricLongSumValue(metrics1, OpenTelemetryReplicatorStats.MESSAGE_IN_COUNTER, attributes1, 1); - assertMetricLongSumValue(metrics1, OpenTelemetryReplicatorStats.BYTES_IN_COUNTER, attributes1, 1); - assertMetricLongSumValue(metrics1, OpenTelemetryReplicatorStats.MESSAGE_OUT_COUNTER, attributes1, 1); - assertMetricLongSumValue(metrics1, OpenTelemetryReplicatorStats.BYTES_OUT_COUNTER, attributes1, 1); - - assertMetricLongSumValue(metrics1, OpenTelemetryReplicatorStats.BACKLOG_COUNTER, attributes1, 1); - assertMetricLongSumValue(metrics1, OpenTelemetryReplicatorStats.EXPIRED_COUNTER, attributes1, 1); + var dummyValue = 0; + assertMetricLongSumValue(metrics1, OpenTelemetryReplicatorStats.MESSAGE_IN_COUNTER, attributes1, dummyValue); + assertMetricLongSumValue(metrics1, OpenTelemetryReplicatorStats.BYTES_IN_COUNTER, attributes1, dummyValue); + assertMetricLongSumValue(metrics1, OpenTelemetryReplicatorStats.MESSAGE_OUT_COUNTER, attributes1, 3); + assertMetricLongSumValue(metrics1, OpenTelemetryReplicatorStats.BYTES_OUT_COUNTER, attributes1, + aLong -> assertThat(aLong).isGreaterThan(0)); + + assertMetricLongSumValue(metrics1, OpenTelemetryReplicatorStats.BACKLOG_COUNTER, attributes1, 0); + assertMetricLongSumValue(metrics1, OpenTelemetryReplicatorStats.EXPIRED_COUNTER, attributes1, 0); assertMetricLongSumValue(metrics1, OpenTelemetryReplicatorStats.CONNECTED_COUNTER, attributes1, 1); assertMetricLongGaugeValue(metrics1, OpenTelemetryReplicatorStats.DELAY_GAUGE, attributes1, 1); From 5abbae37635911a6872598f1b2fe8d526b82b610 Mon Sep 17 00:00:00 2001 From: Dragos Misca Date: Tue, 28 May 2024 11:15:06 -0700 Subject: [PATCH 30/44] Test fix --- .../pulsar/broker/service/ReplicatorTest.java | 4 ++-- .../stats/BrokerOpenTelemetryTestUtil.java | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/pulsar-broker/src/test/java/org/apache/pulsar/broker/service/ReplicatorTest.java b/pulsar-broker/src/test/java/org/apache/pulsar/broker/service/ReplicatorTest.java index cf28640a4e4fc..0b3e53bbc7d18 100644 --- a/pulsar-broker/src/test/java/org/apache/pulsar/broker/service/ReplicatorTest.java +++ b/pulsar-broker/src/test/java/org/apache/pulsar/broker/service/ReplicatorTest.java @@ -21,7 +21,7 @@ import static io.opentelemetry.sdk.testing.assertj.OpenTelemetryAssertions.assertThat; import static org.apache.pulsar.broker.BrokerTestUtil.newUniqueName; import static org.apache.pulsar.broker.auth.MockedPulsarServiceBaseTest.retryStrategically; -import static org.apache.pulsar.broker.stats.BrokerOpenTelemetryTestUtil.assertMetricLongGaugeValue; +import static org.apache.pulsar.broker.stats.BrokerOpenTelemetryTestUtil.assertMetricDoubleGaugeValue; import static org.apache.pulsar.broker.stats.BrokerOpenTelemetryTestUtil.assertMetricLongSumValue; import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.doAnswer; @@ -1870,7 +1870,7 @@ public void testReplicationMetrics(String namespace) throws Exception { assertMetricLongSumValue(metrics1, OpenTelemetryReplicatorStats.EXPIRED_COUNTER, attributes1, 0); assertMetricLongSumValue(metrics1, OpenTelemetryReplicatorStats.CONNECTED_COUNTER, attributes1, 1); - assertMetricLongGaugeValue(metrics1, OpenTelemetryReplicatorStats.DELAY_GAUGE, attributes1, 1); + assertMetricDoubleGaugeValue(metrics1, OpenTelemetryReplicatorStats.DELAY_GAUGE, attributes1, 0.0); } private void pauseReplicator(PersistentReplicator replicator) { diff --git a/pulsar-broker/src/test/java/org/apache/pulsar/broker/stats/BrokerOpenTelemetryTestUtil.java b/pulsar-broker/src/test/java/org/apache/pulsar/broker/stats/BrokerOpenTelemetryTestUtil.java index cb61677ab953d..d7ad0588201d4 100644 --- a/pulsar-broker/src/test/java/org/apache/pulsar/broker/stats/BrokerOpenTelemetryTestUtil.java +++ b/pulsar-broker/src/test/java/org/apache/pulsar/broker/stats/BrokerOpenTelemetryTestUtil.java @@ -89,4 +89,22 @@ public static void assertMetricLongGaugeValue(Collection metrics, St valueConsumer.accept(point.getValue()); })))); } + + public static void assertMetricDoubleGaugeValue(Collection metrics, String metricName, + Attributes attributes, double expected) { + assertMetricDoubleGaugeValue(metrics, metricName, attributes, actual -> assertThat(actual).isEqualTo(expected)); + } + + public static void assertMetricDoubleGaugeValue(Collection metrics, String metricName, + Attributes attributes, Consumer valueConsumer) { + assertThat(metrics) + .anySatisfy(metric -> assertThat(metric) + .hasName(metricName) + .hasDoubleGaugeSatisfying(gauge -> gauge.satisfies( + pointData -> assertThat(pointData.getPoints()).anySatisfy( + point -> { + assertThat(point.getAttributes()).isEqualTo(attributes); + valueConsumer.accept(point.getValue()); + })))); + } } From e1b1d0e218682aaae86a9b25101808d3511f5c02 Mon Sep 17 00:00:00 2001 From: Dragos Misca Date: Tue, 28 May 2024 11:55:04 -0700 Subject: [PATCH 31/44] Validate backlog counter --- .../pulsar/broker/service/ReplicatorTest.java | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/pulsar-broker/src/test/java/org/apache/pulsar/broker/service/ReplicatorTest.java b/pulsar-broker/src/test/java/org/apache/pulsar/broker/service/ReplicatorTest.java index 0b3e53bbc7d18..93f3ca3d75126 100644 --- a/pulsar-broker/src/test/java/org/apache/pulsar/broker/service/ReplicatorTest.java +++ b/pulsar-broker/src/test/java/org/apache/pulsar/broker/service/ReplicatorTest.java @@ -1007,6 +1007,15 @@ public void testResumptionAfterBacklogRelaxed() throws Exception { Thread.sleep((TIME_TO_CHECK_BACKLOG_QUOTA + 1) * 1000); assertEquals(replicator.getStats().replicationBacklog, 0); + var attributes = Attributes.of( + OpenTelemetryAttributes.PULSAR_DOMAIN, dest.getDomain().value(), + OpenTelemetryAttributes.PULSAR_TENANT, dest.getTenant(), + OpenTelemetryAttributes.PULSAR_NAMESPACE, dest.getNamespace(), + OpenTelemetryAttributes.PULSAR_TOPIC, dest.getPartitionedTopicName(), + OpenTelemetryAttributes.PULSAR_REPLICATION_REMOTE_CLUSTER_NAME, cluster2 + ); + assertMetricLongSumValue( + metricReader1.collectAllMetrics(), OpenTelemetryReplicatorStats.BACKLOG_COUNTER, attributes, 0); // Next message will not be replicated, because r2 has reached the quota producer1.produce(1); @@ -1014,6 +1023,8 @@ public void testResumptionAfterBacklogRelaxed() throws Exception { Thread.sleep(500); assertEquals(replicator.getStats().replicationBacklog, 1); + assertMetricLongSumValue( + metricReader1.collectAllMetrics(), OpenTelemetryReplicatorStats.BACKLOG_COUNTER, attributes, 1); // Consumer will now drain 1 message and the replication backlog will be cleared consumer2.receive(1); @@ -1029,6 +1040,8 @@ public void testResumptionAfterBacklogRelaxed() throws Exception { } assertEquals(replicator.getStats().replicationBacklog, 0); + assertMetricLongSumValue( + metricReader1.collectAllMetrics(), OpenTelemetryReplicatorStats.BACKLOG_COUNTER, attributes, 0); } } From a5789a47bc6d10828e7f7f4492b1b36ea9bd7d73 Mon Sep 17 00:00:00 2001 From: Dragos Misca Date: Tue, 28 May 2024 11:55:24 -0700 Subject: [PATCH 32/44] Implement dropped message counter --- .../nonpersistent/NonPersistentReplicator.java | 3 +++ .../broker/stats/OpenTelemetryReplicatorStats.java | 14 ++++++++++++-- .../stats/NonPersistentReplicatorStatsImpl.java | 13 ++++++++++++- 3 files changed, 27 insertions(+), 3 deletions(-) diff --git a/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/nonpersistent/NonPersistentReplicator.java b/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/nonpersistent/NonPersistentReplicator.java index 6d370b87e8bb7..f07472f37d5f3 100644 --- a/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/nonpersistent/NonPersistentReplicator.java +++ b/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/nonpersistent/NonPersistentReplicator.java @@ -116,6 +116,8 @@ public void sendMessage(Entry entry) { } msgOut.recordEvent(headersAndPayload.readableBytes()); + stats.incrementMsgOutCounter(); + stats.incrementBytesOutCounter(headersAndPayload.readableBytes()); msg.setReplicatedFrom(localCluster); @@ -129,6 +131,7 @@ public void sendMessage(Entry entry) { replicatorId); } msgDrop.recordEvent(); + stats.incrementMsgDropCount(); entry.release(); } } diff --git a/pulsar-broker/src/main/java/org/apache/pulsar/broker/stats/OpenTelemetryReplicatorStats.java b/pulsar-broker/src/main/java/org/apache/pulsar/broker/stats/OpenTelemetryReplicatorStats.java index ec06307958c99..c1e7f8a402675 100644 --- a/pulsar-broker/src/main/java/org/apache/pulsar/broker/stats/OpenTelemetryReplicatorStats.java +++ b/pulsar-broker/src/main/java/org/apache/pulsar/broker/stats/OpenTelemetryReplicatorStats.java @@ -66,6 +66,9 @@ public class OpenTelemetryReplicatorStats implements AutoCloseable { public static final String DELAY_GAUGE = "pulsar.broker.replication.message.age"; private final ObservableDoubleMeasurement delayGauge; + public static final String DROPPED_COUNTER = "pulsar.broker.replication.dropped.count"; + private final ObservableLongMeasurement droppedCounter; + private final BatchCallback batchCallback; public OpenTelemetryReplicatorStats(PulsarService pulsar) { @@ -122,6 +125,12 @@ public OpenTelemetryReplicatorStats(PulsarService pulsar) { .setDescription("The total number of messages that expired for this replicator.") .buildObserver(); + droppedCounter = meter + .upDownCounterBuilder(DROPPED_COUNTER) + .setUnit("{message}") + .setDescription("The total number of dropped messages for this replicator.") + .buildObserver(); + batchCallback = meter.batchCallback(() -> pulsar.getBrokerService() .getTopics() .values() @@ -139,7 +148,8 @@ public OpenTelemetryReplicatorStats(PulsarService pulsar) { backlogCounter, expiredCounter, connectedCounter, - delayGauge); + delayGauge, + droppedCounter); } @Override @@ -175,7 +185,7 @@ private void recordMetricsForReplicator(AbstractReplicator replicator) { } if (stats instanceof NonPersistentReplicatorStats nonPersistentStats) { - var dropCount = nonPersistentStats.getMsgDropCount(); + droppedCounter.record(nonPersistentStats.getMsgDropCount(), attributes); } } } diff --git a/pulsar-common/src/main/java/org/apache/pulsar/common/policies/data/stats/NonPersistentReplicatorStatsImpl.java b/pulsar-common/src/main/java/org/apache/pulsar/common/policies/data/stats/NonPersistentReplicatorStatsImpl.java index dde5430ff0225..a09d03b21a03a 100644 --- a/pulsar-common/src/main/java/org/apache/pulsar/common/policies/data/stats/NonPersistentReplicatorStatsImpl.java +++ b/pulsar-common/src/main/java/org/apache/pulsar/common/policies/data/stats/NonPersistentReplicatorStatsImpl.java @@ -18,7 +18,10 @@ */ package org.apache.pulsar.common.policies.data.stats; +import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.annotation.JsonProperty; import java.util.Objects; +import java.util.concurrent.atomic.LongAdder; import lombok.Data; import lombok.EqualsAndHashCode; import org.apache.pulsar.common.policies.data.NonPersistentReplicatorStats; @@ -35,6 +38,9 @@ public class NonPersistentReplicatorStatsImpl extends ReplicatorStatsImpl implem **/ public double msgDropRate; + @JsonIgnore + private final LongAdder msgDropCount = new LongAdder(); + public NonPersistentReplicatorStatsImpl add(NonPersistentReplicatorStatsImpl stats) { Objects.requireNonNull(stats); super.add(stats); @@ -43,7 +49,12 @@ public NonPersistentReplicatorStatsImpl add(NonPersistentReplicatorStatsImpl sta } @Override + @JsonProperty public long getMsgDropCount() { - return 0; + return msgDropCount.sum(); + } + + public void incrementMsgDropCount() { + msgDropCount.increment(); } } From 95aac5157168d67ff9fdcf1f5786e0d7bb3c9019 Mon Sep 17 00:00:00 2001 From: Dragos Misca Date: Tue, 28 May 2024 12:08:10 -0700 Subject: [PATCH 33/44] Validate delay metric --- .../pulsar/broker/service/ReplicatorTest.java | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/pulsar-broker/src/test/java/org/apache/pulsar/broker/service/ReplicatorTest.java b/pulsar-broker/src/test/java/org/apache/pulsar/broker/service/ReplicatorTest.java index 93f3ca3d75126..11ceb619e41b3 100644 --- a/pulsar-broker/src/test/java/org/apache/pulsar/broker/service/ReplicatorTest.java +++ b/pulsar-broker/src/test/java/org/apache/pulsar/broker/service/ReplicatorTest.java @@ -1014,8 +1014,9 @@ public void testResumptionAfterBacklogRelaxed() throws Exception { OpenTelemetryAttributes.PULSAR_TOPIC, dest.getPartitionedTopicName(), OpenTelemetryAttributes.PULSAR_REPLICATION_REMOTE_CLUSTER_NAME, cluster2 ); - assertMetricLongSumValue( - metricReader1.collectAllMetrics(), OpenTelemetryReplicatorStats.BACKLOG_COUNTER, attributes, 0); + var metrics = metricReader1.collectAllMetrics(); + assertMetricLongSumValue(metrics, OpenTelemetryReplicatorStats.BACKLOG_COUNTER, attributes, 0); + assertMetricDoubleGaugeValue(metrics, OpenTelemetryReplicatorStats.DELAY_GAUGE, attributes, 0.0); // Next message will not be replicated, because r2 has reached the quota producer1.produce(1); @@ -1023,8 +1024,10 @@ public void testResumptionAfterBacklogRelaxed() throws Exception { Thread.sleep(500); assertEquals(replicator.getStats().replicationBacklog, 1); - assertMetricLongSumValue( - metricReader1.collectAllMetrics(), OpenTelemetryReplicatorStats.BACKLOG_COUNTER, attributes, 1); + metrics = metricReader1.collectAllMetrics(); + assertMetricLongSumValue(metrics, OpenTelemetryReplicatorStats.BACKLOG_COUNTER, attributes, 1); + assertMetricDoubleGaugeValue(metrics, OpenTelemetryReplicatorStats.DELAY_GAUGE, attributes, + aDouble -> assertThat(aDouble).isGreaterThan(0.5)); // Consumer will now drain 1 message and the replication backlog will be cleared consumer2.receive(1); @@ -1040,8 +1043,9 @@ public void testResumptionAfterBacklogRelaxed() throws Exception { } assertEquals(replicator.getStats().replicationBacklog, 0); - assertMetricLongSumValue( - metricReader1.collectAllMetrics(), OpenTelemetryReplicatorStats.BACKLOG_COUNTER, attributes, 0); + metrics = metricReader1.collectAllMetrics(); + assertMetricLongSumValue(metrics, OpenTelemetryReplicatorStats.BACKLOG_COUNTER, attributes, 0); + assertMetricDoubleGaugeValue(metrics, OpenTelemetryReplicatorStats.DELAY_GAUGE, attributes, 0.0); } } From 1b8739e3d47c26da23da71a065432b0d8f2d683f Mon Sep 17 00:00:00 2001 From: Dragos Misca Date: Tue, 28 May 2024 17:31:25 -0700 Subject: [PATCH 34/44] Validate expired metric --- .../pulsar/broker/service/ReplicatorTest.java | 48 ++++++++++++------- 1 file changed, 31 insertions(+), 17 deletions(-) diff --git a/pulsar-broker/src/test/java/org/apache/pulsar/broker/service/ReplicatorTest.java b/pulsar-broker/src/test/java/org/apache/pulsar/broker/service/ReplicatorTest.java index 11ceb619e41b3..58290b1eab25c 100644 --- a/pulsar-broker/src/test/java/org/apache/pulsar/broker/service/ReplicatorTest.java +++ b/pulsar-broker/src/test/java/org/apache/pulsar/broker/service/ReplicatorTest.java @@ -27,6 +27,7 @@ import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.spy; import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertNotEquals; import static org.testng.Assert.assertNotNull; import static org.testng.Assert.assertNull; @@ -1864,12 +1865,7 @@ public void testReplicationMetrics(String namespace) throws Exception { consumer1.receive(2); consumer2.receive(1); - var metrics1 = metricReader1.collectAllMetrics() - .stream() - .filter(metric -> metric.getName().startsWith("pulsar.broker.replication")) - .sorted(Comparator.comparing(MetricData::getName)) - .toList(); - var attributes1 = Attributes.of( + var attributes = Attributes.of( OpenTelemetryAttributes.PULSAR_DOMAIN, destTopicName.getDomain().value(), OpenTelemetryAttributes.PULSAR_TENANT, destTopicName.getTenant(), OpenTelemetryAttributes.PULSAR_NAMESPACE, destTopicName.getNamespace(), @@ -1877,17 +1873,32 @@ public void testReplicationMetrics(String namespace) throws Exception { OpenTelemetryAttributes.PULSAR_REPLICATION_REMOTE_CLUSTER_NAME, cluster2 ); var dummyValue = 0; - assertMetricLongSumValue(metrics1, OpenTelemetryReplicatorStats.MESSAGE_IN_COUNTER, attributes1, dummyValue); - assertMetricLongSumValue(metrics1, OpenTelemetryReplicatorStats.BYTES_IN_COUNTER, attributes1, dummyValue); - assertMetricLongSumValue(metrics1, OpenTelemetryReplicatorStats.MESSAGE_OUT_COUNTER, attributes1, 3); - assertMetricLongSumValue(metrics1, OpenTelemetryReplicatorStats.BYTES_OUT_COUNTER, attributes1, - aLong -> assertThat(aLong).isGreaterThan(0)); - - assertMetricLongSumValue(metrics1, OpenTelemetryReplicatorStats.BACKLOG_COUNTER, attributes1, 0); - assertMetricLongSumValue(metrics1, OpenTelemetryReplicatorStats.EXPIRED_COUNTER, attributes1, 0); - assertMetricLongSumValue(metrics1, OpenTelemetryReplicatorStats.CONNECTED_COUNTER, attributes1, 1); - - assertMetricDoubleGaugeValue(metrics1, OpenTelemetryReplicatorStats.DELAY_GAUGE, attributes1, 0.0); + var metrics = metricReader1.collectAllMetrics(); + assertMetricLongSumValue(metrics, OpenTelemetryReplicatorStats.MESSAGE_IN_COUNTER, attributes, dummyValue); + assertMetricLongSumValue(metrics, OpenTelemetryReplicatorStats.BYTES_IN_COUNTER, attributes, dummyValue); + assertMetricLongSumValue(metrics, OpenTelemetryReplicatorStats.MESSAGE_OUT_COUNTER, attributes, 3); + assertMetricLongSumValue(metrics, OpenTelemetryReplicatorStats.BYTES_OUT_COUNTER, attributes, + aLong -> assertThat(aLong).isPositive()); + assertMetricLongSumValue(metrics, OpenTelemetryReplicatorStats.CONNECTED_COUNTER, attributes, 1); + + var topicOpt = pulsar1.getBrokerService().getTopicReference(destTopicName.toString()); + assertThat(topicOpt).isPresent(); + var topic = topicOpt.get(); + topic.getReplicators() + .values() + .stream() + .map(PersistentReplicator.class::cast) + .forEach(this::pauseReplicator); + producer1.produce(5); + Awaitility.await().untilAsserted(() -> { + topic.getReplicators() + .values() + .stream() + .map(PersistentReplicator.class::cast) + .forEach(repl -> repl.expireMessages(1)); + assertMetricLongSumValue(metricReader1.collectAllMetrics(), OpenTelemetryReplicatorStats.EXPIRED_COUNTER, + attributes, 5); + }); } private void pauseReplicator(PersistentReplicator replicator) { @@ -1895,5 +1906,8 @@ private void pauseReplicator(PersistentReplicator replicator) { assertTrue(replicator.isConnected()); }); replicator.closeProducerAsync(true); + Awaitility.await().untilAsserted(() -> { + assertFalse(replicator.isConnected()); + }); } } From ad49a8297853098ff9ac88e7d5b1a1c178dbd5c8 Mon Sep 17 00:00:00 2001 From: Dragos Misca Date: Tue, 28 May 2024 17:34:14 -0700 Subject: [PATCH 35/44] Test cleanup --- .../pulsar/broker/service/ReplicatorTest.java | 22 +-- .../OpenTelemetryReplicatorStatsTest.java | 151 ------------------ 2 files changed, 8 insertions(+), 165 deletions(-) delete mode 100644 pulsar-broker/src/test/java/org/apache/pulsar/broker/stats/OpenTelemetryReplicatorStatsTest.java diff --git a/pulsar-broker/src/test/java/org/apache/pulsar/broker/service/ReplicatorTest.java b/pulsar-broker/src/test/java/org/apache/pulsar/broker/service/ReplicatorTest.java index 58290b1eab25c..b4daa2081010a 100644 --- a/pulsar-broker/src/test/java/org/apache/pulsar/broker/service/ReplicatorTest.java +++ b/pulsar-broker/src/test/java/org/apache/pulsar/broker/service/ReplicatorTest.java @@ -38,12 +38,10 @@ import com.scurrilous.circe.checksum.Crc32cIntChecksum; import io.netty.buffer.ByteBuf; import io.opentelemetry.api.common.Attributes; -import io.opentelemetry.sdk.metrics.data.MetricData; import java.lang.reflect.Field; import java.lang.reflect.Method; import java.nio.charset.StandardCharsets; import java.util.ArrayList; -import java.util.Comparator; import java.util.List; import java.util.Map; import java.util.Optional; @@ -1028,7 +1026,7 @@ public void testResumptionAfterBacklogRelaxed() throws Exception { metrics = metricReader1.collectAllMetrics(); assertMetricLongSumValue(metrics, OpenTelemetryReplicatorStats.BACKLOG_COUNTER, attributes, 1); assertMetricDoubleGaugeValue(metrics, OpenTelemetryReplicatorStats.DELAY_GAUGE, attributes, - aDouble -> assertThat(aDouble).isGreaterThan(0.5)); + aDouble -> assertThat(aDouble).isPositive()); // Consumer will now drain 1 message and the replication backlog will be cleared consumer2.receive(1); @@ -1846,10 +1844,9 @@ public void testReplicatorWithTTL() throws Exception { assertEquals(result, Lists.newArrayList("V1", "V2", "V3", "V4")); } - @Test(dataProvider = "namespace") - public void testReplicationMetrics(String namespace) throws Exception { - var destTopicName = - TopicName.get(BrokerTestUtil.newUniqueName("persistent://" + namespace + "/replicationMetrics")); + @Test + public void testReplicationMetrics() throws Exception { + var destTopicName = TopicName.get(BrokerTestUtil.newUniqueName("persistent://pulsar/ns/replicationMetrics")); @Cleanup var producer1 = new MessageProducer(url1, destTopicName); @@ -1884,18 +1881,15 @@ public void testReplicationMetrics(String namespace) throws Exception { var topicOpt = pulsar1.getBrokerService().getTopicReference(destTopicName.toString()); assertThat(topicOpt).isPresent(); var topic = topicOpt.get(); - topic.getReplicators() + var persistentReplicators = topic.getReplicators() .values() .stream() .map(PersistentReplicator.class::cast) - .forEach(this::pauseReplicator); + .toList(); + persistentReplicators.forEach(this::pauseReplicator); producer1.produce(5); Awaitility.await().untilAsserted(() -> { - topic.getReplicators() - .values() - .stream() - .map(PersistentReplicator.class::cast) - .forEach(repl -> repl.expireMessages(1)); + persistentReplicators.forEach(repl -> repl.expireMessages(1)); assertMetricLongSumValue(metricReader1.collectAllMetrics(), OpenTelemetryReplicatorStats.EXPIRED_COUNTER, attributes, 5); }); diff --git a/pulsar-broker/src/test/java/org/apache/pulsar/broker/stats/OpenTelemetryReplicatorStatsTest.java b/pulsar-broker/src/test/java/org/apache/pulsar/broker/stats/OpenTelemetryReplicatorStatsTest.java deleted file mode 100644 index 7cc95831ee2e8..0000000000000 --- a/pulsar-broker/src/test/java/org/apache/pulsar/broker/stats/OpenTelemetryReplicatorStatsTest.java +++ /dev/null @@ -1,151 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.pulsar.broker.stats; - -import static org.apache.pulsar.broker.stats.BrokerOpenTelemetryTestUtil.assertMetricLongSumValue; -import static org.assertj.core.api.Assertions.assertThat; -import static org.mockito.ArgumentMatchers.any; -import static org.mockito.ArgumentMatchers.argThat; -import static org.mockito.Mockito.doAnswer; -import io.opentelemetry.api.common.Attributes; -import java.util.List; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicReference; -import lombok.Cleanup; -import org.apache.pulsar.broker.BrokerTestUtil; -import org.apache.pulsar.broker.intercept.BrokerInterceptor; -import org.apache.pulsar.broker.service.BrokerTestBase; -import org.apache.pulsar.broker.service.Consumer; -import org.apache.pulsar.broker.testcontext.PulsarTestContext; -import org.apache.pulsar.client.api.SubscriptionInitialPosition; -import org.apache.pulsar.client.api.SubscriptionType; -import org.apache.pulsar.opentelemetry.OpenTelemetryAttributes; -import org.awaitility.Awaitility; -import org.mockito.Mockito; -import org.testng.annotations.AfterMethod; -import org.testng.annotations.BeforeMethod; -import org.testng.annotations.Test; - -public class OpenTelemetryReplicatorStatsTest extends BrokerTestBase { - - private BrokerInterceptor brokerInterceptor; - - @BeforeMethod(alwaysRun = true) - @Override - protected void setup() throws Exception { - brokerInterceptor = - Mockito.mock(BrokerInterceptor.class, Mockito.withSettings().defaultAnswer(Mockito.CALLS_REAL_METHODS)); - super.baseSetup(); - } - - @AfterMethod(alwaysRun = true) - @Override - protected void cleanup() throws Exception { - super.internalCleanup(); - } - - @Override - protected void customizeMainPulsarTestContextBuilder(PulsarTestContext.Builder builder) { - super.customizeMainPulsarTestContextBuilder(builder); - builder.enableOpenTelemetry(true); - builder.brokerInterceptor(brokerInterceptor); - } - - @Test(timeOut = 30_000) - public void testMessagingMetrics() throws Exception { - var topicName = BrokerTestUtil.newUniqueName("persistent://prop/ns-abc/testConsumerMessagingMetrics"); - admin.topics().createNonPartitionedTopic(topicName); - - var messageCount = 5; - var ackCount = 3; - - var subscriptionName = BrokerTestUtil.newUniqueName("test"); - var receiverQueueSize = 100; - - // Intercept calls to create consumer, in order to fetch client information. - var consumerRef = new AtomicReference(); - doAnswer(invocation -> { - consumerRef.compareAndSet(null, invocation.getArgument(1)); - return null; - }).when(brokerInterceptor) - .consumerCreated(any(), argThat(arg -> arg.getSubscription().getName().equals(subscriptionName)), any()); - - @Cleanup - var consumer = pulsarClient.newConsumer() - .topic(topicName) - .subscriptionName(subscriptionName) - .subscriptionInitialPosition(SubscriptionInitialPosition.Earliest) - .subscriptionType(SubscriptionType.Shared) - .ackTimeout(1, TimeUnit.SECONDS) - .receiverQueueSize(receiverQueueSize) - .property("prop1", "value1") - .subscribe(); - - Awaitility.await().until(() -> consumerRef.get() != null); - var serverConsumer = consumerRef.get(); - - @Cleanup - var producer = pulsarClient.newProducer() - .topic(topicName) - .create(); - for (int i = 0; i < messageCount; i++) { - producer.send(String.format("msg-%d", i).getBytes()); - var message = consumer.receive(); - if (i < ackCount) { - consumer.acknowledge(message); - } - } - - var attributes = Attributes.builder() - .put(OpenTelemetryAttributes.PULSAR_DOMAIN, "persistent") - .put(OpenTelemetryAttributes.PULSAR_TENANT, "prop") - .put(OpenTelemetryAttributes.PULSAR_NAMESPACE, "prop/ns-abc") - .put(OpenTelemetryAttributes.PULSAR_TOPIC, topicName) - .put(OpenTelemetryAttributes.PULSAR_SUBSCRIPTION_NAME, subscriptionName) - .put(OpenTelemetryAttributes.PULSAR_SUBSCRIPTION_TYPE, SubscriptionType.Shared.toString()) - .put(OpenTelemetryAttributes.PULSAR_CONSUMER_NAME, consumer.getConsumerName()) - .put(OpenTelemetryAttributes.PULSAR_CONSUMER_ID, 0) - .put(OpenTelemetryAttributes.PULSAR_CONSUMER_CONNECTED_SINCE, - serverConsumer.getConnectedSince().getEpochSecond()) - .put(OpenTelemetryAttributes.PULSAR_CLIENT_ADDRESS, serverConsumer.getClientAddressAndPort()) - .put(OpenTelemetryAttributes.PULSAR_CLIENT_VERSION, serverConsumer.getClientVersion()) - .put(OpenTelemetryAttributes.PULSAR_CONSUMER_METADATA, List.of("prop1:value1")) - .build(); - - Awaitility.await().untilAsserted(() -> { - var metrics = pulsarTestContext.getOpenTelemetryMetricReader().collectAllMetrics(); - - assertMetricLongSumValue(metrics, OpenTelemetryConsumerStats.MESSAGE_OUT_COUNTER, attributes, - actual -> assertThat(actual).isPositive()); - assertMetricLongSumValue(metrics, OpenTelemetryConsumerStats.BYTES_OUT_COUNTER, attributes, - actual -> assertThat(actual).isPositive()); - - assertMetricLongSumValue(metrics, OpenTelemetryConsumerStats.MESSAGE_ACK_COUNTER, attributes, ackCount); - assertMetricLongSumValue(metrics, OpenTelemetryConsumerStats.MESSAGE_PERMITS_COUNTER, attributes, - actual -> assertThat(actual).isGreaterThanOrEqualTo(receiverQueueSize - messageCount - ackCount)); - - var unAckCount = messageCount - ackCount; - assertMetricLongSumValue(metrics, OpenTelemetryConsumerStats.MESSAGE_UNACKNOWLEDGED_COUNTER, - attributes.toBuilder().put(OpenTelemetryAttributes.PULSAR_CONSUMER_BLOCKED, false).build(), - unAckCount); - assertMetricLongSumValue(metrics, OpenTelemetryConsumerStats.MESSAGE_REDELIVER_COUNTER, attributes, - actual -> assertThat(actual).isGreaterThanOrEqualTo(unAckCount)); - }); - } -} From ee0375989743c7e1fb49a78ddcbe4b76234df5e8 Mon Sep 17 00:00:00 2001 From: Dragos Misca Date: Thu, 13 Jun 2024 14:43:16 -0700 Subject: [PATCH 36/44] Cosmetic fixes --- .../pulsar/opentelemetry/OpenTelemetryAttributes.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pulsar-opentelemetry/src/main/java/org/apache/pulsar/opentelemetry/OpenTelemetryAttributes.java b/pulsar-opentelemetry/src/main/java/org/apache/pulsar/opentelemetry/OpenTelemetryAttributes.java index a1d73b39c624e..bb6ee2d26e12f 100644 --- a/pulsar-opentelemetry/src/main/java/org/apache/pulsar/opentelemetry/OpenTelemetryAttributes.java +++ b/pulsar-opentelemetry/src/main/java/org/apache/pulsar/opentelemetry/OpenTelemetryAttributes.java @@ -122,15 +122,15 @@ enum CompactionStatus { * The type of the backlog quota. */ AttributeKey PULSAR_BACKLOG_QUOTA_TYPE = AttributeKey.stringKey("pulsar.backlog.quota.type"); + enum BacklogQuotaType { + SIZE, + TIME; + public final Attributes attributes = Attributes.of(PULSAR_BACKLOG_QUOTA_TYPE, name().toLowerCase()); + } /** * The name of the remote cluster for a Pulsar replicator. */ AttributeKey PULSAR_REPLICATION_REMOTE_CLUSTER_NAME = AttributeKey.stringKey("pulsar.replication.remote.cluster.name"); - enum BacklogQuotaType { - SIZE, - TIME; - public final Attributes attributes = Attributes.of(PULSAR_BACKLOG_QUOTA_TYPE, name().toLowerCase()); - } } From 92f7ce84c33e2f1a7154d6b2e74383d640cfafcf Mon Sep 17 00:00:00 2001 From: Dragos Misca Date: Mon, 17 Jun 2024 10:25:27 -0700 Subject: [PATCH 37/44] Fix merge conflicts --- .../java/org/apache/pulsar/broker/PulsarService.java | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/pulsar-broker/src/main/java/org/apache/pulsar/broker/PulsarService.java b/pulsar-broker/src/main/java/org/apache/pulsar/broker/PulsarService.java index e9f8e114a8eac..8cf1376642b88 100644 --- a/pulsar-broker/src/main/java/org/apache/pulsar/broker/PulsarService.java +++ b/pulsar-broker/src/main/java/org/apache/pulsar/broker/PulsarService.java @@ -18,6 +18,7 @@ */ package org.apache.pulsar.broker; +import static com.google.common.base.Preconditions.checkNotNull; import static org.apache.commons.lang3.StringUtils.isBlank; import static org.apache.commons.lang3.StringUtils.isNotBlank; import static org.apache.pulsar.broker.admin.impl.BrokersBase.getHeartbeatTopicName; @@ -259,11 +260,8 @@ public class PulsarService implements AutoCloseable, ShutdownService { private final PulsarBrokerOpenTelemetry openTelemetry; private OpenTelemetryTopicStats openTelemetryTopicStats; private OpenTelemetryConsumerStats openTelemetryConsumerStats; -<<<<<<< HEAD - private OpenTelemetryReplicatorStats openTelemetryReplicatorStats; -======= private OpenTelemetryProducerStats openTelemetryProducerStats; ->>>>>>> origin/master + private OpenTelemetryReplicatorStats openTelemetryReplicatorStats; private TransactionMetadataStoreService transactionMetadataStoreService; private TransactionBufferProvider transactionBufferProvider; @@ -841,11 +839,8 @@ public void start() throws PulsarServerException { openTelemetryTopicStats = new OpenTelemetryTopicStats(this); openTelemetryConsumerStats = new OpenTelemetryConsumerStats(this); -<<<<<<< HEAD - openTelemetryReplicatorStats = new OpenTelemetryReplicatorStats(this); -======= openTelemetryProducerStats = new OpenTelemetryProducerStats(this); ->>>>>>> origin/master + openTelemetryReplicatorStats = new OpenTelemetryReplicatorStats(this); localMetadataSynchronizer = StringUtils.isNotBlank(config.getMetadataSyncEventTopic()) ? new PulsarMetadataEventSynchronizer(this, config.getMetadataSyncEventTopic()) From 7390e071d8e1341cc52e9b062422c14b46bad278 Mon Sep 17 00:00:00 2001 From: Dragos Misca Date: Mon, 24 Jun 2024 13:37:14 -0700 Subject: [PATCH 38/44] Rename Replicator.getStats -> Replicator.computeStats for improved clarity --- .../org/apache/pulsar/broker/service/Replicator.java | 4 +++- .../nonpersistent/NonPersistentReplicator.java | 7 ++++++- .../service/nonpersistent/NonPersistentTopic.java | 2 +- .../service/persistent/PersistentReplicator.java | 4 +++- .../broker/service/persistent/PersistentTopic.java | 4 ++-- .../stats/prometheus/NamespaceStatsAggregator.java | 2 +- .../broker/service/AbstractReplicatorTest.java | 5 +++++ .../apache/pulsar/broker/service/ReplicatorTest.java | 12 ++++++------ 8 files changed, 27 insertions(+), 13 deletions(-) diff --git a/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/Replicator.java b/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/Replicator.java index 20e9df33f014e..667063e491085 100644 --- a/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/Replicator.java +++ b/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/Replicator.java @@ -29,7 +29,7 @@ public interface Replicator { Topic getLocalTopic(); - ReplicatorStatsImpl getStats(); + ReplicatorStatsImpl computeStats(); CompletableFuture terminate(); @@ -55,4 +55,6 @@ default Optional getRateLimiter() { long getNumberOfEntriesInBacklog(); boolean isTerminated(); + + ReplicatorStatsImpl getStats(); } diff --git a/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/nonpersistent/NonPersistentReplicator.java b/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/nonpersistent/NonPersistentReplicator.java index f07472f37d5f3..6441230fad87b 100644 --- a/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/nonpersistent/NonPersistentReplicator.java +++ b/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/nonpersistent/NonPersistentReplicator.java @@ -146,7 +146,7 @@ public void updateRates() { } @Override - public NonPersistentReplicatorStatsImpl getStats() { + public NonPersistentReplicatorStatsImpl computeStats() { ProducerImpl producer = this.producer; stats.connected = isConnected(); stats.replicationDelayInSeconds = TimeUnit.MILLISECONDS.toSeconds(getReplicationDelayMs()); @@ -162,6 +162,11 @@ public NonPersistentReplicatorStatsImpl getStats() { return stats; } + @Override + public NonPersistentReplicatorStatsImpl getStats() { + return stats; + } + private static final class ProducerSendCallback implements SendCallback { private NonPersistentReplicator replicator; private Entry entry; diff --git a/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/nonpersistent/NonPersistentTopic.java b/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/nonpersistent/NonPersistentTopic.java index a6f65f6da3284..0c6ebdfefa01f 100644 --- a/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/nonpersistent/NonPersistentTopic.java +++ b/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/nonpersistent/NonPersistentTopic.java @@ -961,7 +961,7 @@ public CompletableFuture asyncGetStats(GetStatsOptions }); replicators.forEach((cluster, replicator) -> { - NonPersistentReplicatorStatsImpl replicatorStats = replicator.getStats(); + NonPersistentReplicatorStatsImpl replicatorStats = replicator.computeStats(); // Add incoming msg rates PublisherStatsImpl pubStats = remotePublishersStats.get(replicator.getRemoteCluster()); diff --git a/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/persistent/PersistentReplicator.java b/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/persistent/PersistentReplicator.java index ebdf04f7ef217..aa53a93da5c4f 100644 --- a/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/persistent/PersistentReplicator.java +++ b/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/persistent/PersistentReplicator.java @@ -34,6 +34,7 @@ import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicIntegerFieldUpdater; +import lombok.Getter; import org.apache.bookkeeper.mledger.AsyncCallbacks; import org.apache.bookkeeper.mledger.AsyncCallbacks.ClearBacklogCallback; import org.apache.bookkeeper.mledger.AsyncCallbacks.DeleteCallback; @@ -108,6 +109,7 @@ public abstract class PersistentReplicator extends AbstractReplicator // for connected subscriptions, message expiry will be checked if the backlog is greater than this threshold private static final int MINIMUM_BACKLOG_FOR_EXPIRY_CHECK = 1000; + @Getter protected final ReplicatorStatsImpl stats = new ReplicatorStatsImpl(); protected volatile boolean fetchSchemaInProgress = false; @@ -596,7 +598,7 @@ public void updateRates() { stats.msgRateExpired = msgExpired.getRate() + expiryMonitor.getMessageExpiryRate(); } - public ReplicatorStatsImpl getStats() { + public ReplicatorStatsImpl computeStats() { stats.replicationBacklog = cursor.getNumberOfEntriesInBacklog(false); stats.connected = isConnected(); stats.replicationDelayInSeconds = TimeUnit.MILLISECONDS.toSeconds(getReplicationDelayMs()); diff --git a/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/persistent/PersistentTopic.java b/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/persistent/PersistentTopic.java index 1983fa3c383e3..6e3d49fbe9ff1 100644 --- a/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/persistent/PersistentTopic.java +++ b/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/persistent/PersistentTopic.java @@ -2353,7 +2353,7 @@ public void updateRates(NamespaceStats nsStats, NamespaceBundleStats bundleStats } // Update replicator stats - ReplicatorStatsImpl rStat = replicator.getStats(); + ReplicatorStatsImpl rStat = replicator.computeStats(); // Add incoming msg rates PublisherStatsImpl pubStats = topicStatsHelper.remotePublishersStats.get(replicator.getRemoteCluster()); @@ -2636,7 +2636,7 @@ public CompletableFuture asyncGetStats(GetStatsOptions }); replicators.forEach((cluster, replicator) -> { - ReplicatorStatsImpl replicatorStats = replicator.getStats(); + ReplicatorStatsImpl replicatorStats = replicator.computeStats(); // Add incoming msg rates PublisherStatsImpl pubStats = remotePublishersStats.get(replicator.getRemoteCluster()); diff --git a/pulsar-broker/src/main/java/org/apache/pulsar/broker/stats/prometheus/NamespaceStatsAggregator.java b/pulsar-broker/src/main/java/org/apache/pulsar/broker/stats/prometheus/NamespaceStatsAggregator.java index 3bbc9100b364f..a229ef54c795d 100644 --- a/pulsar-broker/src/main/java/org/apache/pulsar/broker/stats/prometheus/NamespaceStatsAggregator.java +++ b/pulsar-broker/src/main/java/org/apache/pulsar/broker/stats/prometheus/NamespaceStatsAggregator.java @@ -290,7 +290,7 @@ private static void getTopicStats(Topic topic, TopicStats stats, boolean include } topic.getReplicators().forEach((cluster, replicator) -> { - ReplicatorStatsImpl replStats = replicator.getStats(); + ReplicatorStatsImpl replStats = replicator.computeStats(); AggregatedReplicationStats aggReplStats = stats.replicationStats.get(replicator.getRemoteCluster()); if (aggReplStats == null) { aggReplStats = new AggregatedReplicationStats(); diff --git a/pulsar-broker/src/test/java/org/apache/pulsar/broker/service/AbstractReplicatorTest.java b/pulsar-broker/src/test/java/org/apache/pulsar/broker/service/AbstractReplicatorTest.java index d20f5f0d520e9..64d3088b20622 100644 --- a/pulsar-broker/src/test/java/org/apache/pulsar/broker/service/AbstractReplicatorTest.java +++ b/pulsar-broker/src/test/java/org/apache/pulsar/broker/service/AbstractReplicatorTest.java @@ -139,6 +139,11 @@ protected Position getReplicatorReadPosition() { return PositionFactory.EARLIEST; } + @Override + public ReplicatorStatsImpl computeStats() { + return null; + } + @Override public ReplicatorStatsImpl getStats() { return null; diff --git a/pulsar-broker/src/test/java/org/apache/pulsar/broker/service/ReplicatorTest.java b/pulsar-broker/src/test/java/org/apache/pulsar/broker/service/ReplicatorTest.java index 5e0c7ae3ebe34..46b4df235d16d 100644 --- a/pulsar-broker/src/test/java/org/apache/pulsar/broker/service/ReplicatorTest.java +++ b/pulsar-broker/src/test/java/org/apache/pulsar/broker/service/ReplicatorTest.java @@ -674,7 +674,7 @@ public void testReplicatorClearBacklog() throws Exception { Thread.sleep(100); replicator.updateRates(); // for code-coverage replicator.expireMessages(1); // for code-coverage - ReplicatorStats status = replicator.getStats(); + ReplicatorStats status = replicator.computeStats(); assertEquals(status.getReplicationBacklog(), 0); } @@ -704,7 +704,7 @@ public void testResetReplicatorSubscriptionPosition() throws Exception { replicator.updateRates(); - ReplicatorStats status = replicator.getStats(); + ReplicatorStats status = replicator.computeStats(); assertEquals(status.getReplicationBacklog(), 0); } @@ -1004,7 +1004,7 @@ public void testResumptionAfterBacklogRelaxed() throws Exception { Thread.sleep((TIME_TO_CHECK_BACKLOG_QUOTA + 1) * 1000); - assertEquals(replicator.getStats().replicationBacklog, 0); + assertEquals(replicator.computeStats().replicationBacklog, 0); var attributes = Attributes.of( OpenTelemetryAttributes.PULSAR_DOMAIN, dest.getDomain().value(), OpenTelemetryAttributes.PULSAR_TENANT, dest.getTenant(), @@ -1021,7 +1021,7 @@ public void testResumptionAfterBacklogRelaxed() throws Exception { Thread.sleep(500); - assertEquals(replicator.getStats().replicationBacklog, 1); + assertEquals(replicator.computeStats().replicationBacklog, 1); metrics = metricReader1.collectAllMetrics(); assertMetricLongSumValue(metrics, OpenTelemetryReplicatorStats.BACKLOG_COUNTER, attributes, 1); assertMetricDoubleGaugeValue(metrics, OpenTelemetryReplicatorStats.DELAY_GAUGE, attributes, @@ -1034,13 +1034,13 @@ public void testResumptionAfterBacklogRelaxed() throws Exception { consumer2.receive(1); int retry = 10; - for (int i = 0; i < retry && replicator.getStats().replicationBacklog > 0; i++) { + for (int i = 0; i < retry && replicator.computeStats().replicationBacklog > 0; i++) { if (i != retry - 1) { Thread.sleep(100); } } - assertEquals(replicator.getStats().replicationBacklog, 0); + assertEquals(replicator.computeStats().replicationBacklog, 0); metrics = metricReader1.collectAllMetrics(); assertMetricLongSumValue(metrics, OpenTelemetryReplicatorStats.BACKLOG_COUNTER, attributes, 0); assertMetricDoubleGaugeValue(metrics, OpenTelemetryReplicatorStats.DELAY_GAUGE, attributes, 0.0); From 94c40d9051a92f45ec4a222bf7b852b24b67a9f2 Mon Sep 17 00:00:00 2001 From: Dragos Misca Date: Mon, 24 Jun 2024 13:42:00 -0700 Subject: [PATCH 39/44] Increment replicator message publish counters --- .../org/apache/pulsar/broker/service/AbstractTopic.java | 8 ++++++++ .../common/policies/data/stats/ReplicatorStatsImpl.java | 9 +++------ 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/AbstractTopic.java b/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/AbstractTopic.java index 572b54e0d3e79..fbf11f1d0ad62 100644 --- a/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/AbstractTopic.java +++ b/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/AbstractTopic.java @@ -933,6 +933,14 @@ public void incrementPublishCount(Producer producer, int numOfMessages, long msg if (isSystemTopic()) { systemTopicBytesInCounter.add(msgSizeInBytes); } + + if (producer.isRemote()) { + var remoteClusterName = producer.getRemoteCluster(); + var replicator = getReplicators().get(remoteClusterName); + if (replicator != null) { + replicator.getStats().incrementPublishCount(numOfMessages, msgSizeInBytes); + } + } } private void handlePublishThrottling(Producer producer, int numOfMessages, long msgSizeInBytes) { diff --git a/pulsar-common/src/main/java/org/apache/pulsar/common/policies/data/stats/ReplicatorStatsImpl.java b/pulsar-common/src/main/java/org/apache/pulsar/common/policies/data/stats/ReplicatorStatsImpl.java index def4a9d11fbd4..c19169cbee57f 100644 --- a/pulsar-common/src/main/java/org/apache/pulsar/common/policies/data/stats/ReplicatorStatsImpl.java +++ b/pulsar-common/src/main/java/org/apache/pulsar/common/policies/data/stats/ReplicatorStatsImpl.java @@ -98,18 +98,15 @@ public long getMsgInCount() { return msgInCount.sum(); } - public void incrementMsgInCounter() { - msgInCount.increment(); - } - @Override @JsonProperty public long getBytesInCount() { return bytesInCount.sum(); } - public void incrementBytesInCounter(long bytes) { - bytesInCount.add(bytes); + public void incrementPublishCount(int numOfMessages, long msgSizeInBytes) { + msgInCount.add(numOfMessages); + bytesInCount.add(msgSizeInBytes); } @Override From 58b0c8116126f5c42f31e04fd36d76107e61b624 Mon Sep 17 00:00:00 2001 From: Dragos Misca Date: Mon, 24 Jun 2024 13:42:57 -0700 Subject: [PATCH 40/44] Skip exceptional topic futures during otel collections --- .../pulsar/broker/stats/OpenTelemetryReplicatorStats.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pulsar-broker/src/main/java/org/apache/pulsar/broker/stats/OpenTelemetryReplicatorStats.java b/pulsar-broker/src/main/java/org/apache/pulsar/broker/stats/OpenTelemetryReplicatorStats.java index c1e7f8a402675..71fe162480ee2 100644 --- a/pulsar-broker/src/main/java/org/apache/pulsar/broker/stats/OpenTelemetryReplicatorStats.java +++ b/pulsar-broker/src/main/java/org/apache/pulsar/broker/stats/OpenTelemetryReplicatorStats.java @@ -23,6 +23,7 @@ import io.opentelemetry.api.metrics.ObservableDoubleMeasurement; import io.opentelemetry.api.metrics.ObservableLongMeasurement; import java.util.Optional; +import java.util.concurrent.CompletableFuture; import java.util.concurrent.TimeUnit; import org.apache.pulsar.broker.PulsarService; import org.apache.pulsar.broker.service.AbstractReplicator; @@ -135,7 +136,8 @@ public OpenTelemetryReplicatorStats(PulsarService pulsar) { .getTopics() .values() .stream() - .map(topicFuture -> topicFuture.getNow(Optional.empty())) + .filter(topicFuture -> topicFuture.isDone() && !topicFuture.isCompletedExceptionally()) + .map(CompletableFuture::join) .filter(Optional::isPresent) .map(Optional::get) .flatMap(topic -> topic.getReplicators().values().stream()) From 0b8c93a3a15a83b3b89ce2e177bceb0a317772f4 Mon Sep 17 00:00:00 2001 From: Dragos Misca Date: Mon, 24 Jun 2024 13:53:50 -0700 Subject: [PATCH 41/44] Cache attributes object in AbstractReplicator field --- .../broker/service/AbstractReplicator.java | 28 +++++++++++++++++++ .../stats/OpenTelemetryReplicatorStats.java | 17 ++--------- 2 files changed, 30 insertions(+), 15 deletions(-) diff --git a/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/AbstractReplicator.java b/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/AbstractReplicator.java index c225f3a830c4b..8552a9f09e93b 100644 --- a/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/AbstractReplicator.java +++ b/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/AbstractReplicator.java @@ -19,6 +19,7 @@ package org.apache.pulsar.broker.service; import com.google.common.annotations.VisibleForTesting; +import io.opentelemetry.api.common.Attributes; import java.util.Optional; import java.util.concurrent.CompletableFuture; import java.util.concurrent.TimeUnit; @@ -42,6 +43,7 @@ import org.apache.pulsar.common.util.Backoff; import org.apache.pulsar.common.util.FutureUtil; import org.apache.pulsar.common.util.StringInterner; +import org.apache.pulsar.opentelemetry.OpenTelemetryAttributes; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -75,6 +77,10 @@ public abstract class AbstractReplicator implements Replicator { @Getter protected volatile State state = State.Disconnected; + private volatile Attributes attributes = null; + private static final AtomicReferenceFieldUpdater ATTRIBUTES_UPDATER = + AtomicReferenceFieldUpdater.newUpdater(AbstractReplicator.class, Attributes.class, "attributes"); + public enum State { /** * This enum has two mean meanings: @@ -488,4 +494,26 @@ protected ImmutablePair compareSetAndGetState(State expect, Stat public boolean isTerminated() { return state == State.Terminating || state == State.Terminated; } + + public Attributes getAttributes() { + if (attributes != null) { + return attributes; + } + return ATTRIBUTES_UPDATER.updateAndGet(this, old -> { + if (old != null) { + return old; + } + var topicName = TopicName.get(getLocalTopic().getName()); + var builder = Attributes.builder() + .put(OpenTelemetryAttributes.PULSAR_DOMAIN, topicName.getDomain().toString()) + .put(OpenTelemetryAttributes.PULSAR_TENANT, topicName.getTenant()) + .put(OpenTelemetryAttributes.PULSAR_NAMESPACE, topicName.getNamespace()) + .put(OpenTelemetryAttributes.PULSAR_TOPIC, topicName.getPartitionedTopicName()); + if (topicName.isPartitioned()) { + builder.put(OpenTelemetryAttributes.PULSAR_PARTITION_INDEX, topicName.getPartitionIndex()); + } + builder.put(OpenTelemetryAttributes.PULSAR_REPLICATION_REMOTE_CLUSTER_NAME, getRemoteCluster()); + return builder.build(); + }); + } } diff --git a/pulsar-broker/src/main/java/org/apache/pulsar/broker/stats/OpenTelemetryReplicatorStats.java b/pulsar-broker/src/main/java/org/apache/pulsar/broker/stats/OpenTelemetryReplicatorStats.java index 71fe162480ee2..e44bc0fb41a19 100644 --- a/pulsar-broker/src/main/java/org/apache/pulsar/broker/stats/OpenTelemetryReplicatorStats.java +++ b/pulsar-broker/src/main/java/org/apache/pulsar/broker/stats/OpenTelemetryReplicatorStats.java @@ -18,7 +18,6 @@ */ package org.apache.pulsar.broker.stats; -import io.opentelemetry.api.common.Attributes; import io.opentelemetry.api.metrics.BatchCallback; import io.opentelemetry.api.metrics.ObservableDoubleMeasurement; import io.opentelemetry.api.metrics.ObservableLongMeasurement; @@ -28,10 +27,8 @@ import org.apache.pulsar.broker.PulsarService; import org.apache.pulsar.broker.service.AbstractReplicator; import org.apache.pulsar.broker.service.persistent.PersistentReplicator; -import org.apache.pulsar.common.naming.TopicName; import org.apache.pulsar.common.policies.data.NonPersistentReplicatorStats; import org.apache.pulsar.common.stats.MetricsUtil; -import org.apache.pulsar.opentelemetry.OpenTelemetryAttributes; public class OpenTelemetryReplicatorStats implements AutoCloseable { @@ -160,19 +157,9 @@ public void close() { } private void recordMetricsForReplicator(AbstractReplicator replicator) { - var topicName = TopicName.get(replicator.getLocalTopic().getName()); - var builder = Attributes.builder() - .put(OpenTelemetryAttributes.PULSAR_DOMAIN, topicName.getDomain().toString()) - .put(OpenTelemetryAttributes.PULSAR_TENANT, topicName.getTenant()) - .put(OpenTelemetryAttributes.PULSAR_NAMESPACE, topicName.getNamespace()) - .put(OpenTelemetryAttributes.PULSAR_TOPIC, topicName.getPartitionedTopicName()); - if (topicName.isPartitioned()) { - builder.put(OpenTelemetryAttributes.PULSAR_PARTITION_INDEX, topicName.getPartitionIndex()); - } - builder.put(OpenTelemetryAttributes.PULSAR_REPLICATION_REMOTE_CLUSTER_NAME, replicator.getRemoteCluster()); - var attributes = builder.build(); - + var attributes = replicator.getAttributes(); var stats = replicator.getStats(); + messageInCounter.record(stats.getMsgInCount(), attributes); messageOutCounter.record(stats.getMsgOutCount(), attributes); bytesInCounter.record(stats.getBytesInCount(), attributes); From bea5073f373f56c44171adb3b1087529c4446a7c Mon Sep 17 00:00:00 2001 From: Dragos Misca Date: Mon, 24 Jun 2024 14:09:00 -0700 Subject: [PATCH 42/44] Update ReplicatorTest#testReplicationMetrics --- .../pulsar/broker/service/ReplicatorTest.java | 78 +++++++++++-------- 1 file changed, 47 insertions(+), 31 deletions(-) diff --git a/pulsar-broker/src/test/java/org/apache/pulsar/broker/service/ReplicatorTest.java b/pulsar-broker/src/test/java/org/apache/pulsar/broker/service/ReplicatorTest.java index 46b4df235d16d..6e36ccfc24e60 100644 --- a/pulsar-broker/src/test/java/org/apache/pulsar/broker/service/ReplicatorTest.java +++ b/pulsar-broker/src/test/java/org/apache/pulsar/broker/service/ReplicatorTest.java @@ -1855,37 +1855,53 @@ public void testReplicationMetrics() throws Exception { consumer1.receive(2); consumer2.receive(1); - var attributes = Attributes.of( - OpenTelemetryAttributes.PULSAR_DOMAIN, destTopicName.getDomain().value(), - OpenTelemetryAttributes.PULSAR_TENANT, destTopicName.getTenant(), - OpenTelemetryAttributes.PULSAR_NAMESPACE, destTopicName.getNamespace(), - OpenTelemetryAttributes.PULSAR_TOPIC, destTopicName.getPartitionedTopicName(), - OpenTelemetryAttributes.PULSAR_REPLICATION_REMOTE_CLUSTER_NAME, cluster2 - ); - var dummyValue = 0; - var metrics = metricReader1.collectAllMetrics(); - assertMetricLongSumValue(metrics, OpenTelemetryReplicatorStats.MESSAGE_IN_COUNTER, attributes, dummyValue); - assertMetricLongSumValue(metrics, OpenTelemetryReplicatorStats.BYTES_IN_COUNTER, attributes, dummyValue); - assertMetricLongSumValue(metrics, OpenTelemetryReplicatorStats.MESSAGE_OUT_COUNTER, attributes, 3); - assertMetricLongSumValue(metrics, OpenTelemetryReplicatorStats.BYTES_OUT_COUNTER, attributes, - aLong -> assertThat(aLong).isPositive()); - assertMetricLongSumValue(metrics, OpenTelemetryReplicatorStats.CONNECTED_COUNTER, attributes, 1); - - var topicOpt = pulsar1.getBrokerService().getTopicReference(destTopicName.toString()); - assertThat(topicOpt).isPresent(); - var topic = topicOpt.get(); - var persistentReplicators = topic.getReplicators() - .values() - .stream() - .map(PersistentReplicator.class::cast) - .toList(); - persistentReplicators.forEach(this::pauseReplicator); - producer1.produce(5); - Awaitility.await().untilAsserted(() -> { - persistentReplicators.forEach(repl -> repl.expireMessages(1)); - assertMetricLongSumValue(metricReader1.collectAllMetrics(), OpenTelemetryReplicatorStats.EXPIRED_COUNTER, - attributes, 5); - }); + { + // Validate replicator metrics on cluster 1 from cluster 2 + var attributes = Attributes.of( + OpenTelemetryAttributes.PULSAR_DOMAIN, destTopicName.getDomain().value(), + OpenTelemetryAttributes.PULSAR_TENANT, destTopicName.getTenant(), + OpenTelemetryAttributes.PULSAR_NAMESPACE, destTopicName.getNamespace(), + OpenTelemetryAttributes.PULSAR_TOPIC, destTopicName.getPartitionedTopicName(), + OpenTelemetryAttributes.PULSAR_REPLICATION_REMOTE_CLUSTER_NAME, cluster2 + ); + var metrics = metricReader1.collectAllMetrics(); + assertMetricLongSumValue(metrics, OpenTelemetryReplicatorStats.MESSAGE_OUT_COUNTER, attributes, 3); + assertMetricLongSumValue(metrics, OpenTelemetryReplicatorStats.BYTES_OUT_COUNTER, attributes, + aLong -> assertThat(aLong).isPositive()); + assertMetricLongSumValue(metrics, OpenTelemetryReplicatorStats.CONNECTED_COUNTER, attributes, 1); + + var topicOpt = pulsar1.getBrokerService().getTopicReference(destTopicName.toString()); + assertThat(topicOpt).isPresent(); + var topic = topicOpt.get(); + var persistentReplicators = topic.getReplicators() + .values() + .stream() + .map(PersistentReplicator.class::cast) + .toList(); + persistentReplicators.forEach(this::pauseReplicator); + producer1.produce(5); + Awaitility.await().untilAsserted(() -> { + persistentReplicators.forEach(repl -> repl.expireMessages(1)); + assertMetricLongSumValue(metricReader1.collectAllMetrics(), + OpenTelemetryReplicatorStats.EXPIRED_COUNTER, + attributes, 5); + }); + } + + { + // Validate replicator metrics on cluster 2 from cluster 1 + var attributes = Attributes.of( + OpenTelemetryAttributes.PULSAR_DOMAIN, destTopicName.getDomain().value(), + OpenTelemetryAttributes.PULSAR_TENANT, destTopicName.getTenant(), + OpenTelemetryAttributes.PULSAR_NAMESPACE, destTopicName.getNamespace(), + OpenTelemetryAttributes.PULSAR_TOPIC, destTopicName.getPartitionedTopicName(), + OpenTelemetryAttributes.PULSAR_REPLICATION_REMOTE_CLUSTER_NAME, cluster1 + ); + var metrics = metricReader2.collectAllMetrics(); + assertMetricLongSumValue(metrics, OpenTelemetryReplicatorStats.MESSAGE_IN_COUNTER, attributes, 3); + assertMetricLongSumValue(metrics, OpenTelemetryReplicatorStats.BYTES_IN_COUNTER, attributes, + aLong -> assertThat(aLong).isPositive()); + } } @Test From 9a9c35ab0b89c1aa4d16285e98e1fd69d40f2b51 Mon Sep 17 00:00:00 2001 From: Dragos Misca Date: Mon, 24 Jun 2024 17:38:55 -0700 Subject: [PATCH 43/44] Remove hardcoded pulsar.broker.replication.connected.count metric --- .../broker/stats/OpenTelemetryReplicatorStats.java | 14 +------------- .../pulsar/broker/service/ReplicatorTest.java | 1 - 2 files changed, 1 insertion(+), 14 deletions(-) diff --git a/pulsar-broker/src/main/java/org/apache/pulsar/broker/stats/OpenTelemetryReplicatorStats.java b/pulsar-broker/src/main/java/org/apache/pulsar/broker/stats/OpenTelemetryReplicatorStats.java index e44bc0fb41a19..fa7fa6a40cc2c 100644 --- a/pulsar-broker/src/main/java/org/apache/pulsar/broker/stats/OpenTelemetryReplicatorStats.java +++ b/pulsar-broker/src/main/java/org/apache/pulsar/broker/stats/OpenTelemetryReplicatorStats.java @@ -26,8 +26,8 @@ import java.util.concurrent.TimeUnit; import org.apache.pulsar.broker.PulsarService; import org.apache.pulsar.broker.service.AbstractReplicator; +import org.apache.pulsar.broker.service.nonpersistent.NonPersistentReplicator; import org.apache.pulsar.broker.service.persistent.PersistentReplicator; -import org.apache.pulsar.common.policies.data.NonPersistentReplicatorStats; import org.apache.pulsar.common.stats.MetricsUtil; public class OpenTelemetryReplicatorStats implements AutoCloseable { @@ -56,10 +56,6 @@ public class OpenTelemetryReplicatorStats implements AutoCloseable { public static final String EXPIRED_COUNTER = "pulsar.broker.replication.message.expired"; private final ObservableLongMeasurement expiredCounter; - // Replaces pulsar_replication_connected_count - public static final String CONNECTED_COUNTER = "pulsar.broker.replication.connected.count"; - private final ObservableLongMeasurement connectedCounter; - // Replaces pulsar_replication_delay_in_seconds public static final String DELAY_GAUGE = "pulsar.broker.replication.message.age"; private final ObservableDoubleMeasurement delayGauge; @@ -111,12 +107,6 @@ public OpenTelemetryReplicatorStats(PulsarService pulsar) { .setDescription("The total number of messages that expired for this replicator.") .buildObserver(); - connectedCounter = meter - .upDownCounterBuilder(CONNECTED_COUNTER) - .setUnit("{subscriber}") - .setDescription("The total number of replication subscribers that are running.") - .buildObserver(); - delayGauge = meter .gaugeBuilder(DELAY_GAUGE) .setUnit("{second}") @@ -146,7 +136,6 @@ public OpenTelemetryReplicatorStats(PulsarService pulsar) { bytesOutCounter, backlogCounter, expiredCounter, - connectedCounter, delayGauge, droppedCounter); } @@ -164,7 +153,6 @@ private void recordMetricsForReplicator(AbstractReplicator replicator) { messageOutCounter.record(stats.getMsgOutCount(), attributes); bytesInCounter.record(stats.getBytesInCount(), attributes); bytesOutCounter.record(stats.getBytesOutCount(), attributes); - connectedCounter.record(replicator.isConnected() ? 1 : 0, attributes); var delaySeconds = MetricsUtil.convertToSeconds(replicator.getReplicationDelayMs(), TimeUnit.MILLISECONDS); delayGauge.record(delaySeconds, attributes); diff --git a/pulsar-broker/src/test/java/org/apache/pulsar/broker/service/ReplicatorTest.java b/pulsar-broker/src/test/java/org/apache/pulsar/broker/service/ReplicatorTest.java index 6e36ccfc24e60..1c47abab775b3 100644 --- a/pulsar-broker/src/test/java/org/apache/pulsar/broker/service/ReplicatorTest.java +++ b/pulsar-broker/src/test/java/org/apache/pulsar/broker/service/ReplicatorTest.java @@ -1868,7 +1868,6 @@ public void testReplicationMetrics() throws Exception { assertMetricLongSumValue(metrics, OpenTelemetryReplicatorStats.MESSAGE_OUT_COUNTER, attributes, 3); assertMetricLongSumValue(metrics, OpenTelemetryReplicatorStats.BYTES_OUT_COUNTER, attributes, aLong -> assertThat(aLong).isPositive()); - assertMetricLongSumValue(metrics, OpenTelemetryReplicatorStats.CONNECTED_COUNTER, attributes, 1); var topicOpt = pulsar1.getBrokerService().getTopicReference(destTopicName.toString()); assertThat(topicOpt).isPresent(); From 2a8534a5529bdd054777981b1bd796f1dd0e398c Mon Sep 17 00:00:00 2001 From: Dragos Misca Date: Mon, 24 Jun 2024 17:39:03 -0700 Subject: [PATCH 44/44] Cosmetic fixes --- .../stats/OpenTelemetryReplicatorStats.java | 40 +++++++++---------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/pulsar-broker/src/main/java/org/apache/pulsar/broker/stats/OpenTelemetryReplicatorStats.java b/pulsar-broker/src/main/java/org/apache/pulsar/broker/stats/OpenTelemetryReplicatorStats.java index fa7fa6a40cc2c..04bc805a64bbf 100644 --- a/pulsar-broker/src/main/java/org/apache/pulsar/broker/stats/OpenTelemetryReplicatorStats.java +++ b/pulsar-broker/src/main/java/org/apache/pulsar/broker/stats/OpenTelemetryReplicatorStats.java @@ -49,18 +49,18 @@ public class OpenTelemetryReplicatorStats implements AutoCloseable { private final ObservableLongMeasurement bytesOutCounter; // Replaces pulsar_replication_backlog - public static final String BACKLOG_COUNTER = "pulsar.broker.replication.message.backlog"; + public static final String BACKLOG_COUNTER = "pulsar.broker.replication.message.backlog.count"; private final ObservableLongMeasurement backlogCounter; - // Replaces pulsar_replication_rate_expired - public static final String EXPIRED_COUNTER = "pulsar.broker.replication.message.expired"; - private final ObservableLongMeasurement expiredCounter; - // Replaces pulsar_replication_delay_in_seconds - public static final String DELAY_GAUGE = "pulsar.broker.replication.message.age"; + public static final String DELAY_GAUGE = "pulsar.broker.replication.message.backlog.age"; private final ObservableDoubleMeasurement delayGauge; - public static final String DROPPED_COUNTER = "pulsar.broker.replication.dropped.count"; + // Replaces pulsar_replication_rate_expired + public static final String EXPIRED_COUNTER = "pulsar.broker.replication.message.expired.count"; + private final ObservableLongMeasurement expiredCounter; + + public static final String DROPPED_COUNTER = "pulsar.broker.replication.message.dropped.count"; private final ObservableLongMeasurement droppedCounter; private final BatchCallback batchCallback; @@ -83,14 +83,14 @@ public OpenTelemetryReplicatorStats(PulsarService pulsar) { bytesInCounter = meter .upDownCounterBuilder(BYTES_IN_COUNTER) - .setUnit("{byte}") + .setUnit("{By}") .setDescription( "The total number of messages bytes received from the remote cluster through this replicator.") .buildObserver(); bytesOutCounter = meter .upDownCounterBuilder(BYTES_OUT_COUNTER) - .setUnit("{byte}") + .setUnit("{By}") .setDescription( "The total number of messages bytes sent to the remote cluster through this replicator.") .buildObserver(); @@ -101,22 +101,22 @@ public OpenTelemetryReplicatorStats(PulsarService pulsar) { .setDescription("The total number of messages in the backlog for this replicator.") .buildObserver(); + delayGauge = meter + .gaugeBuilder(DELAY_GAUGE) + .setUnit("s") + .setDescription("The age of the oldest message in the replicator backlog.") + .buildObserver(); + expiredCounter = meter .upDownCounterBuilder(EXPIRED_COUNTER) .setUnit("{message}") .setDescription("The total number of messages that expired for this replicator.") .buildObserver(); - delayGauge = meter - .gaugeBuilder(DELAY_GAUGE) - .setUnit("{second}") - .setDescription("The total number of messages that expired for this replicator.") - .buildObserver(); - droppedCounter = meter .upDownCounterBuilder(DROPPED_COUNTER) .setUnit("{message}") - .setDescription("The total number of dropped messages for this replicator.") + .setDescription("The total number of messages dropped by this replicator.") .buildObserver(); batchCallback = meter.batchCallback(() -> pulsar.getBrokerService() @@ -135,8 +135,8 @@ public OpenTelemetryReplicatorStats(PulsarService pulsar) { bytesInCounter, bytesOutCounter, backlogCounter, - expiredCounter, delayGauge, + expiredCounter, droppedCounter); } @@ -159,10 +159,8 @@ private void recordMetricsForReplicator(AbstractReplicator replicator) { if (replicator instanceof PersistentReplicator persistentReplicator) { expiredCounter.record(persistentReplicator.getMessageExpiredCount(), attributes); backlogCounter.record(persistentReplicator.getNumberOfEntriesInBacklog(), attributes); - } - - if (stats instanceof NonPersistentReplicatorStats nonPersistentStats) { - droppedCounter.record(nonPersistentStats.getMsgDropCount(), attributes); + } else if (replicator instanceof NonPersistentReplicator nonPersistentReplicator) { + droppedCounter.record(nonPersistentReplicator.getStats().getMsgDropCount(), attributes); } } }