diff --git a/common-lib/common/signal/README.md b/common-lib/common/signal/README.md index ff42975ab..cf663ee2b 100644 --- a/common-lib/common/signal/README.md +++ b/common-lib/common/signal/README.md @@ -65,7 +65,7 @@ Signal's level: |aggFunction| A function used to aggregate metrics. |avg,min,max,sum...|`sum`|`avg`| |aggKeepLabels| Extra labels to keep when aggregating with by() clause. |`['pool','level']`|`[]`| |infoLabel| Only applicable to `info` metrics. Points to label name used to extract info. |*|-|-| -|valueMapping| Define signal's valueMapping in the same way defined in Grafana Dashboard Schema. |*|-|-| +|valueMappings| Define signal's valueMappings in the same way defined in Grafana Dashboard Schema. |*|-|-| |legendCustomTemplate| A custom legend template could be defined with this to override automatic legend's generation|*|`null`|`{{instance}}`| |rangeFunction| Rate function to use for counter metrics.|rate,irate,delta,idelta,increase|`rate`|`increase`| @@ -178,21 +178,24 @@ local jsonSignals = description: 'status', unit: 'short', expr: 'status{%(queriesSelector)s}', - valueMapping: { - type: 'value', - options: { - '1': { - text: 'Up', - color: 'light-green', - index: 1, + valueMappings: + [ + { + type: 'value', + options: { + '1': { + text: 'Up', + color: 'light-green', + index: 1, + }, + '0': { + text: 'Down', + color: 'light-red', + index: 0, + }, + } }, - '0': { - text: 'Down', - color: 'light-red', - index: 0, - }, - }, - }, + ], }, }, }; diff --git a/common-lib/common/signal/base.libsonnet b/common-lib/common/signal/base.libsonnet index 177b809fc..7640b0d10 100644 --- a/common-lib/common/signal/base.libsonnet +++ b/common-lib/common/signal/base.libsonnet @@ -15,7 +15,7 @@ local signalUtils = import './utils.libsonnet'; aggKeepLabels, vars, datasource, - valueMapping, + valueMappings, legendCustomTemplate, rangeFunction, ): { @@ -98,13 +98,13 @@ local signalUtils = import './utils.libsonnet'; g.panel.timeSeries.fieldOverride.byQuery.new(name) + g.panel.timeSeries.fieldOverride.byQuery.withPropertiesFromOptions( g.panel.timeSeries.standardOptions.withUnit(self.unit) - + g.panel.timeSeries.standardOptions.withMappings(valueMapping) + + g.panel.timeSeries.standardOptions.withMappings(valueMappings) ) else if override == 'byName' then g.panel.timeSeries.fieldOverride.byName.new(name) + g.panel.timeSeries.fieldOverride.byName.withPropertiesFromOptions( g.panel.timeSeries.standardOptions.withUnit(self.unit) - + g.panel.timeSeries.standardOptions.withMappings(valueMapping) + + g.panel.timeSeries.standardOptions.withMappings(valueMappings) ) else error 'Unknown override type, only "byName", "byQuery" are supported.', ], diff --git a/common-lib/common/signal/counter.libsonnet b/common-lib/common/signal/counter.libsonnet index ca26aff7b..6fa15e1a8 100644 --- a/common-lib/common/signal/counter.libsonnet +++ b/common-lib/common/signal/counter.libsonnet @@ -16,7 +16,7 @@ base { aggKeepLabels, vars, datasource, - valueMapping, + valueMappings, legendCustomTemplate, rangeFunction, ): @@ -32,7 +32,7 @@ base { aggKeepLabels, vars, datasource, - valueMapping, + valueMappings, legendCustomTemplate, rangeFunction, ) diff --git a/common-lib/common/signal/gauge.libsonnet b/common-lib/common/signal/gauge.libsonnet index 71d3c94ac..2e5e468ea 100644 --- a/common-lib/common/signal/gauge.libsonnet +++ b/common-lib/common/signal/gauge.libsonnet @@ -15,7 +15,7 @@ base { aggKeepLabels, vars, datasource, - valueMapping, + valueMappings, legendCustomTemplate, ): base.new( @@ -30,7 +30,7 @@ base { aggKeepLabels, vars, datasource, - valueMapping, + valueMappings, legendCustomTemplate, rangeFunction=null, ) diff --git a/common-lib/common/signal/histogram.libsonnet b/common-lib/common/signal/histogram.libsonnet index ca26aff7b..6fa15e1a8 100644 --- a/common-lib/common/signal/histogram.libsonnet +++ b/common-lib/common/signal/histogram.libsonnet @@ -16,7 +16,7 @@ base { aggKeepLabels, vars, datasource, - valueMapping, + valueMappings, legendCustomTemplate, rangeFunction, ): @@ -32,7 +32,7 @@ base { aggKeepLabels, vars, datasource, - valueMapping, + valueMappings, legendCustomTemplate, rangeFunction, ) diff --git a/common-lib/common/signal/info.libsonnet b/common-lib/common/signal/info.libsonnet index ec2e384b3..7e0693d11 100644 --- a/common-lib/common/signal/info.libsonnet +++ b/common-lib/common/signal/info.libsonnet @@ -18,7 +18,7 @@ base { aggKeepLabels, vars, datasource, - valueMapping, + valueMappings, legendCustomTemplate, ): base.new( @@ -33,7 +33,7 @@ base { aggKeepLabels, vars, datasource, - valueMapping, + valueMappings, legendCustomTemplate, rangeFunction=null, ) diff --git a/common-lib/common/signal/raw.libsonnet b/common-lib/common/signal/raw.libsonnet index ca26aff7b..6fa15e1a8 100644 --- a/common-lib/common/signal/raw.libsonnet +++ b/common-lib/common/signal/raw.libsonnet @@ -16,7 +16,7 @@ base { aggKeepLabels, vars, datasource, - valueMapping, + valueMappings, legendCustomTemplate, rangeFunction, ): @@ -32,7 +32,7 @@ base { aggKeepLabels, vars, datasource, - valueMapping, + valueMappings, legendCustomTemplate, rangeFunction, ) diff --git a/common-lib/common/signal/signal.libsonnet b/common-lib/common/signal/signal.libsonnet index 044778028..ba842d71e 100644 --- a/common-lib/common/signal/signal.libsonnet +++ b/common-lib/common/signal/signal.libsonnet @@ -57,7 +57,7 @@ local stub = import './stub.libsonnet'; aggFunction=std.get(signalsJson.signals[s], 'aggFunction', std.get(signalsJson, 'aggFunction', 'avg')), aggKeepLabels=std.get(signalsJson.signals[s], 'aggKeepLabels', std.get(signalsJson, 'aggKeepLabels', [])), infoLabel=std.get(signalsJson.signals[s], 'infoLabel', null), - valueMapping=std.get(signalsJson.signals[s], 'valueMapping', {}), + valueMappings=std.get(signalsJson.signals[s], 'valueMappings', []), legendCustomTemplate=std.get(signalsJson.signals[s], 'legendCustomTemplate', std.get(signalsJson, 'legendCustomTemplate', null)), rangeFunction=std.get(signalsJson.signals[s], 'rangeFunction', std.get(signalsJson, 'rangeFunction', 'rate')), // rate, irate , delta, increase, idelta... ) @@ -98,7 +98,7 @@ local stub = import './stub.libsonnet'; aggFunction=std.get(signalsJson.signals[s], 'aggFunction', std.get(signalsJson, 'aggFunction', 'avg')), aggKeepLabels=std.get(signalsJson.signals[s].sources[type], 'aggKeepLabels', std.get(signalsJson, 'aggKeepLabels', [])), infoLabel=std.get(signalsJson.signals[s].sources[type], 'infoLabel', null), - valueMapping=std.get(signalsJson.signals[s].sources[type], 'valueMapping', {}), + valueMappings=std.get(signalsJson.signals[s].sources[type], 'valueMappings', []), legendCustomTemplate=std.get(signalsJson.signals[s].sources[type], 'legendCustomTemplate', std.get(signalsJson, 'legendCustomTemplate', null)), rangeFunction=std.get(signalsJson.signals[s].sources[type], 'rangeFunction', std.get(signalsJson, 'rangeFunction', 'rate')), ) @@ -183,7 +183,7 @@ local stub = import './stub.libsonnet'; aggFunction=self.aggFunction, aggKeepLabels=self.aggKeepLabels, infoLabel=null, - valueMapping={}, + valueMappings=[], legendCustomTemplate=null, rangeFunction='rate' ): @@ -211,7 +211,7 @@ local stub = import './stub.libsonnet'; aggKeepLabels=aggKeepLabels, datasource=datasource, vars=this.templatingVariables, - valueMapping=valueMapping, + valueMappings=valueMappings, legendCustomTemplate=legendCustomTemplate, ) else if type == 'raw' then @@ -227,7 +227,7 @@ local stub = import './stub.libsonnet'; aggKeepLabels=aggKeepLabels, datasource=datasource, vars=this.templatingVariables, - valueMapping=valueMapping, + valueMappings=valueMappings, legendCustomTemplate=legendCustomTemplate, rangeFunction=rangeFunction, ) @@ -244,7 +244,7 @@ local stub = import './stub.libsonnet'; aggKeepLabels=aggKeepLabels, datasource=datasource, vars=this.templatingVariables, - valueMapping=valueMapping, + valueMappings=valueMappings, legendCustomTemplate=legendCustomTemplate, rangeFunction=rangeFunction, ) @@ -261,7 +261,7 @@ local stub = import './stub.libsonnet'; aggKeepLabels=aggKeepLabels, datasource=datasource, vars=this.templatingVariables, - valueMapping=valueMapping, + valueMappings=valueMappings, legendCustomTemplate=legendCustomTemplate, rangeFunction=rangeFunction, ) @@ -278,7 +278,7 @@ local stub = import './stub.libsonnet'; aggKeepLabels=aggKeepLabels, datasource=datasource, vars=this.templatingVariables, - valueMapping=valueMapping, + valueMappings=valueMappings, legendCustomTemplate=legendCustomTemplate, ) else if type == 'stub' then diff --git a/jvm-observ-lib/README.md b/jvm-observ-lib/README.md index 5dce681cf..19db520ec 100644 --- a/jvm-observ-lib/README.md +++ b/jvm-observ-lib/README.md @@ -8,6 +8,7 @@ Supports the following sources: - `otel` (https://github.com/open-telemetry/opentelemetry-java-contrib/blob/main/jmx-metrics/docs/target-systems/jvm.md) - `java_micrometer` (springboot) (https://github.com/micrometer-metrics/micrometer/blob/main/micrometer-core/src/main/java/io/micrometer/core/instrument/binder/jvm/JvmMemoryMetrics.java) - `prometheus_old` client_java instrumentation prior to 1.0.0 release: (https://github.com/prometheus/client_java/releases/tag/v1.0.0-alpha-4) +- `jmx_exporter` https://github.com/prometheus/jmx_exporter/blob/main/collector/src/test/java/io/prometheus/jmx/JmxCollectorTest.java#L195 ` ## Import diff --git a/jvm-observ-lib/alerts.libsonnet b/jvm-observ-lib/alerts.libsonnet index 863baa23b..beb4d1455 100644 --- a/jvm-observ-lib/alerts.libsonnet +++ b/jvm-observ-lib/alerts.libsonnet @@ -3,7 +3,7 @@ groups: [ { - name: this.config.uid, + name: this.config.uid + '-jvm-alerts', rules: [ { diff --git a/jvm-observ-lib/dashboards.libsonnet b/jvm-observ-lib/dashboards.libsonnet index 026202036..b36b838d8 100644 --- a/jvm-observ-lib/dashboards.libsonnet +++ b/jvm-observ-lib/dashboards.libsonnet @@ -14,10 +14,10 @@ local g = import './g.libsonnet'; this.grafana.rows.overview, this.process.grafana.rows.process, this.grafana.rows.memory, - this.grafana.rows.gc, - this.grafana.rows.threads, - this.grafana.rows.buffers, ] + + (if this.config.metricsSource != 'jmx_exporter' then [this.grafana.rows.gc] else []) + + [this.grafana.rows.threads] + + (if this.config.metricsSource != 'jmx_exporter' then [this.grafana.rows.buffers] else []) + ( if this.config.metricsSource == 'java_micrometer' || this.config.metricsSource == 'otel' then [ diff --git a/jvm-observ-lib/main.libsonnet b/jvm-observ-lib/main.libsonnet index aefcbd344..dffdc945b 100644 --- a/jvm-observ-lib/main.libsonnet +++ b/jvm-observ-lib/main.libsonnet @@ -21,6 +21,7 @@ local processlib = import 'process-observ-lib/main.libsonnet'; metricsSource: if this.config.metricsSource == 'otel' then 'java_otel' else if this.config.metricsSource == 'prometheus' then 'prometheus' + else if this.config.metricsSource == 'jmx_exporter' then 'jmx_exporter' else if this.config.metricsSource == 'prometheus_old' then 'prometheus' else if this.config.metricsSource == 'java_micrometer' then 'java_micrometer' else error 'no such metricsSource for processlib', diff --git a/jvm-observ-lib/signals/buffers.libsonnet b/jvm-observ-lib/signals/buffers.libsonnet index c0bc8272b..ac6502bfb 100644 --- a/jvm-observ-lib/signals/buffers.libsonnet +++ b/jvm-observ-lib/signals/buffers.libsonnet @@ -19,6 +19,7 @@ function(this) description: "Direct buffer is allocated outside the Java heap and represents the OS native memory used by the JVM process. It is generally used for I/O operations. Note that direct buffers aren't freed up by GC.", type: 'gauge', unit: 'bytes', + optional: true, sources: { java_micrometer: { expr: 'jvm_buffer_memory_used_bytes{%(queriesSelector)s}', @@ -39,6 +40,7 @@ function(this) description: "Direct buffer is allocated outside the Java heap and represents the OS native memory used by the JVM process. It is generally used for I/O operations. Note that direct buffers aren't freed up by GC.", type: 'gauge', unit: 'bytes', + optional: true, sources: { java_micrometer: { expr: 'jvm_buffer_total_capacity_bytes{%(queriesSelector)s}', @@ -59,6 +61,7 @@ function(this) description: 'The mapped buffer pool is used for its FileChannel instances.', type: 'gauge', unit: 'bytes', + optional: true, sources: { java_micrometer: { expr: 'jvm_buffer_memory_used_bytes{%(queriesSelector)s}', @@ -79,6 +82,7 @@ function(this) description: 'The mapped buffer pool is used for its FileChannel instances.', type: 'gauge', unit: 'bytes', + optional: true, sources: { java_micrometer: { expr: 'jvm_buffer_total_capacity_bytes{%(queriesSelector)s}', diff --git a/jvm-observ-lib/signals/classes.libsonnet b/jvm-observ-lib/signals/classes.libsonnet index 339245dad..d1926fdef 100644 --- a/jvm-observ-lib/signals/classes.libsonnet +++ b/jvm-observ-lib/signals/classes.libsonnet @@ -12,6 +12,7 @@ function(this) prometheus: 'jvm_classes_loaded', // https://prometheus.github.io/client_java/instrumentation/jvm/#jvm-class-loading-metrics otel: 'process_runtime_jvm_classes_loaded', prometheus_old: 'jvm_classes_loaded', + jmx_exporter: 'java_lang_classloading_loadedclasscount', }, signals: { classesLoaded: { @@ -32,6 +33,9 @@ function(this) prometheus_old: { expr: 'jvm_classes_loaded{%(queriesSelector)s}', }, + jmx_exporter: { + expr: 'java_lang_classloading_loadedclasscount{%(queriesSelector)s}', + }, }, }, }, diff --git a/jvm-observ-lib/signals/gc.libsonnet b/jvm-observ-lib/signals/gc.libsonnet index 2eb70d220..6e6ed5535 100644 --- a/jvm-observ-lib/signals/gc.libsonnet +++ b/jvm-observ-lib/signals/gc.libsonnet @@ -24,6 +24,7 @@ function(this) description: 'Major and minor garbage collection', type: 'counter', unit: 'ops', + optional: true, sources: { java_micrometer: { expr: 'jvm_gc_pause_seconds_count{%(queriesSelector)s}', @@ -175,6 +176,7 @@ function(this) description: 'Memory used for G1 Eden Space Collection.', type: 'gauge', unit: 'bytes', + optional: true, sources: { //spring java_micrometer: { @@ -205,6 +207,7 @@ function(this) |||, type: 'gauge', unit: 'bytes', + optional: true, sources: { java_micrometer: { expr: 'jvm_memory_max_bytes{id=~"(G1 )?Eden Space", area="heap", %(queriesSelector)s}', @@ -233,6 +236,7 @@ function(this) |||, type: 'gauge', unit: 'bytes', + optional: true, sources: { java_micrometer: { expr: 'jvm_memory_committed_bytes{id=~"(G1 |PS )?Eden Space", area="heap", %(queriesSelector)s}', @@ -256,6 +260,7 @@ function(this) description: 'Memory used for Survival collection.', type: 'gauge', unit: 'bytes', + optional: true, sources: { //spring java_micrometer: { @@ -286,6 +291,7 @@ function(this) |||, type: 'gauge', unit: 'bytes', + optional: true, sources: { java_micrometer: { expr: 'jvm_memory_max_bytes{id=~"(G1 |PS )?Survivor Space"", area="heap", %(queriesSelector)s} != -1', @@ -308,6 +314,7 @@ function(this) |||, type: 'gauge', unit: 'bytes', + optional: true, sources: { java_micrometer: { expr: 'jvm_memory_committed_bytes{id=~"(G1 |PS )?Survivor Space", area="heap", %(queriesSelector)s}', @@ -330,6 +337,7 @@ function(this) description: 'Memory used for Tenured(Old Gen) collection.', type: 'gauge', unit: 'bytes', + optional: true, sources: { //spring java_micrometer: { @@ -360,6 +368,7 @@ function(this) |||, type: 'gauge', unit: 'bytes', + optional: true, sources: { java_micrometer: { expr: 'jvm_memory_max_bytes{id="Tenured Gen", area="heap", %(queriesSelector)s} != -1', @@ -385,6 +394,7 @@ function(this) |||, type: 'gauge', unit: 'bytes', + optional: true, sources: { java_micrometer: { expr: 'jvm_memory_committed_bytes{id="Tenured Gen", area="heap", %(queriesSelector)s}', diff --git a/jvm-observ-lib/signals/memory.libsonnet b/jvm-observ-lib/signals/memory.libsonnet index 738a19814..7eef093bc 100644 --- a/jvm-observ-lib/signals/memory.libsonnet +++ b/jvm-observ-lib/signals/memory.libsonnet @@ -10,6 +10,7 @@ function(this) prometheus: 'jvm_memory_used_bytes', // https://prometheus.github.io/client_java/instrumentation/jvm/#jvm-memory-metrics otel: 'process_runtime_jvm_memory_usage', prometheus_old: 'jvm_memory_bytes_max', + jmx_exporter: 'java_lang_memory_heapmemoryusage_used', //https://github.com/prometheus/jmx_exporter/blob/main/collector/src/test/java/io/prometheus/jmx/JmxCollectorTest.java#L195 }, signals: { //memory @@ -32,6 +33,9 @@ function(this) prometheus_old: { expr: 'sum without (id) (jvm_memory_bytes_used{area="heap", %(queriesSelector)s})', }, + jmx_exporter: { + expr: 'java_lang_memory_heapmemoryusage_used{%(queriesSelector)s}', + }, }, }, memoryMaxHeap: { @@ -59,6 +63,9 @@ function(this) prometheus_old: { expr: 'sum without (id) (jvm_memory_bytes_max{area="heap", %(queriesSelector)s} != -1)', }, + jmx_exporter: { + expr: 'java_lang_memory_heapmemoryusage_max{%(queriesSelector)s} != -1', + }, }, }, memoryUsedNonHeap: { @@ -78,11 +85,14 @@ function(this) prometheus_old: { expr: 'sum without (id) (jvm_memory_bytes_used{area="nonheap", %(queriesSelector)s})', }, + jmx_exporter: { + expr: 'java_lang_memory_nonheapmemoryusage_used{%(queriesSelector)s}', + }, }, }, memoryMaxNonHeap: { name: 'JVM memory max(nonheap)', - description: 'Measure of memory max possible (non-heap).', + description: 'Measure of memory max possible (non-heap). Returns -1 if the maximum memory size is undefined.', type: 'gauge', unit: 'bytes', sources: { @@ -96,7 +106,9 @@ function(this) prometheus_old: { expr: 'sum without (id) (jvm_memory_bytes_max{area="nonheap", %(queriesSelector)s} != -1)', }, - + jmx_exporter: { + expr: 'java_lang_memory_nonheapmemoryusage_max{%(queriesSelector)s} != -1', + }, }, }, memoryCommittedHeap: { @@ -117,6 +129,9 @@ function(this) prometheus_old: { expr: 'sum without (id) (jvm_memory_bytes_committed{area="heap", %(queriesSelector)s})', }, + jmx_exporter: { + expr: 'java_lang_memory_heapmemoryusage_committed{%(queriesSelector)s}', + }, }, }, memoryCommittedNonHeap: { @@ -137,6 +152,9 @@ function(this) prometheus_old: { expr: 'sum without (id) (jvm_memory_bytes_committed{area="nonheap", %(queriesSelector)s})', }, + jmx_exporter: { + expr: 'java_lang_memory_nonheapmemoryusage_committed{%(queriesSelector)s}', + }, }, }, }, diff --git a/jvm-observ-lib/signals/threads.libsonnet b/jvm-observ-lib/signals/threads.libsonnet index e3cabcbcc..5c2738d3d 100644 --- a/jvm-observ-lib/signals/threads.libsonnet +++ b/jvm-observ-lib/signals/threads.libsonnet @@ -12,6 +12,7 @@ function(this) prometheus: 'jvm_threads_current', // https://prometheus.github.io/client_java/instrumentation/jvm/#jvm-memory-metrics otel: 'process_runtime_jvm_threads_count', prometheus_old: 'jvm_threads_current', + jmx_exporter: 'java_lang_threading_threadcount', }, signals: { threads: { @@ -30,6 +31,9 @@ function(this) expr: 'sum without (daemon) (process_runtime_jvm_threads_count{%(queriesSelector)s})', }, prometheus_old: self.prometheus, + jmx_exporter: { + expr: 'java_lang_threading_threadcount{%(queriesSelector)s}', + }, }, }, threadsDaemon: { @@ -48,7 +52,12 @@ function(this) expr: 'process_runtime_jvm_threads_count{daemon="true", %(queriesSelector)s}', }, prometheus_old: self.prometheus, + jmx_exporter: { + expr: 'java_lang_threading_daemonthreadcount{%(queriesSelector)s}', + }, }, + + }, threadsPeak: { name: 'Threads (peak)', @@ -64,9 +73,9 @@ function(this) expr: 'jvm_threads_peak{%(queriesSelector)s}', }, prometheus_old: self.prometheus, - // otel: { - // expr: '?{daemon="true", %(queriesSelector)s}', - // }, + jmx_exporter: { + expr: 'java_lang_threading_peakthreadcount{%(queriesSelector)s}', + }, }, }, threadsDeadlocked: { @@ -76,16 +85,10 @@ function(this) unit: 'short', optional: true, sources: { - // java_micrometer: { - // expr: '?{%(queriesSelector)s}', - // }, prometheus: { expr: 'jvm_threads_deadlocked{%(queriesSelector)s}', }, prometheus_old: self.prometheus, - // otel: { - // expr: '?{%(queriesSelector)s}', - // }, }, }, threadStates: { diff --git a/kafka-mixin/alerts/alerts.libsonnet b/kafka-mixin/alerts/alerts.libsonnet deleted file mode 100644 index 2ea1e202d..000000000 --- a/kafka-mixin/alerts/alerts.libsonnet +++ /dev/null @@ -1,119 +0,0 @@ -{ - prometheusAlerts+:: { - groups+: [ - { - name: 'Kafka_Alerts', - rules: [ - { - alert: 'KafkaOfflinePartitonCount', - expr: - 'sum without(' + std.join(',', $._config.instanceLabels) + ') (kafka_controller_kafkacontroller_offlinepartitionscount{%(kafkaFilteringSelector)s}) > 0' % $._config, - - 'for': '5m', - labels: { - severity: 'critical', - }, - annotations: { - summary: 'Kafka has offline partitons.', - description: 'Kafka cluster {{ $labels.kafka_cluster }} has {{ $value }} offline partitions. After successful leader election, if the leader for partition dies, then the partition moves to the OfflinePartition state. Offline partitions are not available for reading and writing. Restart the brokers, if needed, and check the logs for errors.', - }, - }, - { - alert: 'KafkaUnderReplicatedPartitionCount', - expr: ||| - sum without() (kafka_server_replicamanager_underreplicatedpartitions{%(kafkaFilteringSelector)s}) > 0 - ||| % $._config, - 'for': '5m', - labels: { - severity: 'critical', - }, - annotations: { - summary: 'Kafka has under replicated partitons.', - description: 'Kafka instance {{ $labels.instance }} in cluster {{ $labels.kafka_cluster }} has {{ $value }} under replicated partitons', - }, - }, - { - alert: 'KafkaNoActiveController', - expr: 'sum without(' + std.join(',', $._config.instanceLabels) + ') (kafka_controller_kafkacontroller_activecontrollercount{%(kafkaFilteringSelector)s}) != 1' % $._config, - 'for': '5m', - labels: { - severity: 'critical', - }, - annotations: { - summary: 'Kafka has no active controller.', - description: 'Kafka cluster {{ $labels.kafka_cluster }} has {{ $value }} broker(s) reporting as the active controller in the last 5 minute interval. During steady state there should be only one active controller per cluster.', - }, - }, - { - alert: 'KafkaUncleanLeaderElection', - expr: 'max without(' + std.join(',', $._config.instanceLabels) + ') (rate(kafka_controller_controllerstats_uncleanleaderelectionspersec{%(kafkaFilteringSelector)s}[5m])) != 0' % $._config, - 'for': '5m', - labels: { - severity: 'critical', - }, - annotations: { - summary: 'Kafka has unclean leader elections.', - description: 'Kafka cluster {{ $labels.kafka_cluster }} has {{ $value }} unclean partition leader elections reported in the last 5 minute interval. When unclean leader election is held among out-of-sync replicas, there is a possibility of data loss if any messages were not synced prior to the loss of the former leader. So if the number of unclean elections is greater than 0, investigate broker logs to determine why leaders were re-elected, and look for WARN or ERROR messages. Consider setting the broker configuration parameter unclean.leader.election.enable to false so that a replica outside of the set of in-sync replicas is never elected leader.', - }, - }, - { - alert: 'KafkaISRExpandRate', - expr: ||| - sum without() (rate(kafka_server_replicamanager_isrexpandspersec{%(kafkaFilteringSelector)s}[5m])) != 0 - ||| % $._config, - 'for': '5m', - keep_firing_for: '15m', - labels: { - severity: 'warning', - }, - annotations: { - summary: 'Kafka ISR expansion rate is expanding.', - description: 'Kafka instance {{ $labels.instance }} in cluster {{ $labels.kafka_cluster }} In-Sync Replica (ISR) is expanding by {{ $value }} per second. If a broker goes down, ISR for some of the partitions shrink. When that broker is up again, ISRs are expanded once the replicas are fully caught up. Other than that, the expected value for ISR expansion rate is 0. If ISR is expanding and shrinking frequently, adjust Allowed replica lag.', - }, - }, - { - alert: 'KafkaISRShrinkRate', - expr: ||| - sum without() (rate(kafka_server_replicamanager_isrshrinkspersec{%(kafkaFilteringSelector)s}[5m])) != 0 - ||| % $._config, - 'for': '5m', - keep_firing_for: '15m', - labels: { - severity: 'warning', - }, - annotations: { - summary: 'Kafka ISR expansion rate is shrinking.', - description: 'Kafka instance {{ $labels.instance }} in cluster {{ $labels.kafka_cluster }} In-Sync Replica (ISR) is shrinking by {{ $value }} per second. If a broker goes down, ISR for some of the partitions shrink. When that broker is up again, ISRs are expanded once the replicas are fully caught up. Other than that, the expected value for ISR shrink rate is 0. If ISR is expanding and shrinking frequently, adjust Allowed replica lag.', - }, - }, - { - alert: 'KafkaBrokerCount', - expr: 'count without(' + std.join(',', $._config.instanceLabels) + ') (kafka_server_kafkaserver_brokerstate{%(kafkaFilteringSelector)s}) == 0' % $._config, - 'for': '5m', - labels: { - severity: 'critical', - }, - annotations: { - summary: 'Kafka has no brokers online.', - description: 'Kafka cluster {{ $labels.kafka_cluster }} broker count is 0.', - }, - }, - { - alert: 'KafkaZookeeperSyncConnect', - expr: ||| - avg without() (kafka_server_sessionexpirelistener_zookeepersyncconnectspersec{%(kafkaFilteringSelector)s}) < 0 - ||| % $._config, - 'for': '5m', - labels: { - severity: 'warning', - }, - annotations: { - summary: 'Kafka Zookeeper sync disconected.', - description: 'Kafka instance {{ $labels.instance }} in cluster {{ $labels.kafka_cluster }} Zookeeper sync disconected.', - }, - }, - ], - }, - ], - }, -} diff --git a/kafka-mixin/dashboards/dashboards.libsonnet b/kafka-mixin/dashboards/dashboards.libsonnet index 3a1ae78aa..0de1c3ae2 100644 --- a/kafka-mixin/dashboards/dashboards.libsonnet +++ b/kafka-mixin/dashboards/dashboards.libsonnet @@ -3,7 +3,6 @@ grafanaDashboards+:: { 'connect-overview.json': (import 'connect-overview.json'), 'kafka-ksqldb-overview.json': (import 'kafka-ksqldb-overview.json'), - 'kafka-overview.json': (import 'kafka-overview.json'), 'schema-registry-overview.json': (import 'schema-registry-overview.json'), }, } diff --git a/kafka-mixin/dashboards/kafka-overview.json b/kafka-mixin/dashboards/kafka-overview.json deleted file mode 100644 index cdcb14ade..000000000 --- a/kafka-mixin/dashboards/kafka-overview.json +++ /dev/null @@ -1,7171 +0,0 @@ -{ - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": { - "type": "datasource", - "uid": "grafana" - }, - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "description": "Kafka resource usage and throughput", - "editable": true, - "fiscalYearStartMonth": 0, - "gnetId": 721, - "graphTooltip": 0, - "id": 3, - "links": [], - "liveNow": false, - "panels": [ - { - "collapsed": false, - "datasource": { - "uid": "$datasource" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 42, - "panels": [], - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "refId": "A" - } - ], - "title": "Healthcheck", - "type": "row" - }, - { - "datasource": { - "uid": "${datasource}" - }, - "description": "Number of active controllers in the cluster.", - "fieldConfig": { - "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "#299c46", - "value": null - }, - { - "color": "#e5ac0e", - "value": 2 - }, - { - "color": "#bf1b00" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 0, - "y": 1 - }, - "id": 12, - "links": [], - "maxDataPoints": 100, - "options": { - "colorMode": "value", - "fieldOptions": { - "calcs": [ - "lastNotNull" - ] - }, - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "10.2.3", - "targets": [ - { - "datasource": { - "uid": "${datasource}" - }, - "expr": "sum(kafka_controller_kafkacontroller_activecontrollercount{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\"})", - "format": "time_series", - "intervalFactor": 1, - "refId": "A" - } - ], - "title": "Active Controllers", - "type": "stat" - }, - { - "datasource": { - "uid": "${datasource}" - }, - "description": "Number of Brokers Online", - "fieldConfig": { - "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "#d44a3a", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 0 - }, - { - "color": "semi-dark-green", - "value": 2 - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 4, - "y": 1 - }, - "id": 14, - "links": [], - "maxDataPoints": 100, - "options": { - "colorMode": "value", - "fieldOptions": { - "calcs": [ - "lastNotNull" - ] - }, - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "10.2.3", - "repeatDirection": "h", - "targets": [ - { - "datasource": { - "uid": "${datasource}" - }, - "expr": "count(kafka_server_replicamanager_leadercount{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\"})", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "", - "refId": "A" - } - ], - "title": "Brokers Online", - "type": "stat" - }, - { - "datasource": { - "uid": "${datasource}" - }, - "description": "Unclean leader election rate", - "fieldConfig": { - "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "#299c46", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 1 - }, - { - "color": "#d44a3a" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 8, - "y": 1 - }, - "id": 16, - "links": [], - "maxDataPoints": 100, - "options": { - "colorMode": "value", - "fieldOptions": { - "calcs": [ - "lastNotNull" - ] - }, - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "10.2.3", - "targets": [ - { - "datasource": { - "uid": "${datasource}" - }, - "expr": "sum(kafka_controller_controllerstats_uncleanleaderelectionspersec{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\"})", - "format": "time_series", - "intervalFactor": 1, - "refId": "A" - } - ], - "title": "Unclean Leader Election Rate", - "type": "stat" - }, - { - "datasource": { - "uid": "${datasource}" - }, - "description": "", - "fieldConfig": { - "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "#299c46", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 2 - }, - { - "color": "#d44a3a" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 12, - "y": 1 - }, - "id": 33, - "links": [], - "maxDataPoints": 100, - "options": { - "colorMode": "value", - "fieldOptions": { - "calcs": [ - "lastNotNull" - ] - }, - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "10.2.3", - "targets": [ - { - "datasource": { - "uid": "${datasource}" - }, - "expr": "sum(kafka_controller_kafkacontroller_preferredreplicaimbalancecount{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\"})", - "format": "time_series", - "intervalFactor": 1, - "refId": "A" - } - ], - "title": "Preferred Replica Imbalance", - "type": "stat" - }, - { - "datasource": { - "uid": "${datasource}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "Bytes/s", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "Bps" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 1 - }, - "id": 84, - "links": [], - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "10.2.3", - "targets": [ - { - "datasource": { - "uid": "${datasource}" - }, - "expr": "sum(rate(kafka_server_brokertopicmetrics_bytesinpersec{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\",topic!=\"\"}[$__rate_interval]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Bytes in", - "metric": "kafka_server_brokertopicmetrics_bytesinpersec", - "refId": "A", - "step": 4 - }, - { - "datasource": { - "uid": "${datasource}" - }, - "expr": "sum(rate(kafka_server_brokertopicmetrics_bytesoutpersec{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\",topic!=\"\"}[$__rate_interval]))", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "Bytes out", - "metric": "kafka_server_brokertopicmetrics_bytesinpersec", - "refId": "B", - "step": 4 - } - ], - "title": "Broker network throughput", - "type": "timeseries" - }, - { - "datasource": { - "uid": "${datasource}" - }, - "description": "Number of partitions that dont have an active leader and are hence not writable or readable.", - "fieldConfig": { - "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "semi-dark-green", - "value": null - }, - { - "color": "#bf1b00", - "value": 1 - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 0, - "y": 5 - }, - "id": 22, - "links": [], - "maxDataPoints": 100, - "options": { - "colorMode": "value", - "fieldOptions": { - "calcs": [ - "lastNotNull" - ] - }, - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "10.2.3", - "targets": [ - { - "datasource": { - "uid": "${datasource}" - }, - "expr": "sum(kafka_controller_kafkacontroller_offlinepartitionscount{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\"})", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "", - "refId": "A" - } - ], - "title": "Offline Partitions Count", - "type": "stat" - }, - { - "datasource": { - "uid": "${datasource}" - }, - "description": "Number of under-replicated partitions (| ISR | < | all replicas |).", - "fieldConfig": { - "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "semi-dark-green", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 1 - }, - { - "color": "#bf1b00", - "value": 5 - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 4, - "y": 5 - }, - "id": 20, - "links": [], - "maxDataPoints": 100, - "options": { - "colorMode": "value", - "fieldOptions": { - "calcs": [ - "lastNotNull" - ] - }, - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "10.2.3", - "targets": [ - { - "datasource": { - "uid": "${datasource}" - }, - "expr": "sum(kafka_server_replicamanager_underreplicatedpartitions{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\"})", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "refId": "A" - } - ], - "title": "Under Replicated Partitions", - "type": "stat" - }, - { - "datasource": { - "uid": "${datasource}" - }, - "description": "Number of partitions under min insync replicas.", - "fieldConfig": { - "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "semi-dark-green", - "value": null - }, - { - "color": "#bf1b00", - "value": 1 - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 8, - "y": 5 - }, - "id": 32, - "links": [], - "maxDataPoints": 100, - "options": { - "colorMode": "value", - "fieldOptions": { - "calcs": [ - "lastNotNull" - ] - }, - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "10.2.3", - "targets": [ - { - "datasource": { - "uid": "${datasource}" - }, - "expr": "sum(kafka_cluster_partition_underminisr{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\"})", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "refId": "A" - } - ], - "title": "Under Min ISR Partitions", - "type": "stat" - }, - { - "datasource": { - "uid": "${datasource}" - }, - "description": "Partitions that are online", - "fieldConfig": { - "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "#d44a3a", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 0 - }, - { - "color": "#299c46", - "value": 0 - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 12, - "y": 5 - }, - "id": 18, - "links": [], - "maxDataPoints": 100, - "options": { - "colorMode": "value", - "fieldOptions": { - "calcs": [ - "lastNotNull" - ] - }, - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "10.2.3", - "targets": [ - { - "datasource": { - "uid": "${datasource}" - }, - "expr": "sum(kafka_server_replicamanager_partitioncount{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\"})", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "", - "refId": "A" - } - ], - "title": "Online Partitions", - "type": "stat" - }, - { - "collapsed": false, - "datasource": { - "uid": "$datasource" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 9 - }, - "id": 40, - "panels": [], - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "refId": "A" - } - ], - "title": "System", - "type": "row" - }, - { - "datasource": { - "uid": "${datasource}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "Cores", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percent" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "localhost:7071" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#629E51", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 7, - "w": 8, - "x": 0, - "y": 10 - }, - "id": 27, - "links": [], - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "10.2.3", - "targets": [ - { - "datasource": { - "uid": "${datasource}" - }, - "expr": "irate(process_cpu_seconds_total{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\"}[$__rate_interval])*100", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "metric": "process_cpu_secondspersec", - "refId": "A", - "step": 4 - } - ], - "title": "CPU Usage", - "type": "timeseries" - }, - { - "datasource": { - "uid": "${datasource}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "Memory", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "localhost:7071" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#BA43A9", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 7, - "w": 8, - "x": 8, - "y": 10 - }, - "id": 2, - "links": [], - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "10.2.3", - "targets": [ - { - "datasource": { - "uid": "${datasource}" - }, - "expr": "sum without(area)(jvm_memory_bytes_used{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\"})", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "metric": "jvm_memory_bytes_used", - "refId": "A", - "step": 4 - } - ], - "title": "JVM Memory Used", - "type": "timeseries" - }, - { - "datasource": { - "uid": "${datasource}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "% time in GC", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "localhost:7071" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#890F02", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 7, - "w": 8, - "x": 16, - "y": 10 - }, - "id": 3, - "links": [], - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "10.2.3", - "targets": [ - { - "datasource": { - "uid": "${datasource}" - }, - "expr": "sum without(gc)(rate(jvm_gc_collection_seconds_sum{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\"}[$__rate_interval]))", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "metric": "jvm_gc_collection_seconds_sum", - "refId": "A", - "step": 4 - } - ], - "title": "Time spent in GC", - "type": "timeseries" - }, - { - "collapsed": false, - "datasource": { - "uid": "$datasource" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 17 - }, - "id": 29, - "panels": [], - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "refId": "A" - } - ], - "title": "Throughput In/Out", - "type": "row" - }, - { - "datasource": { - "uid": "${datasource}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "Messages/s", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "iops" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 8, - "x": 0, - "y": 18 - }, - "id": 4, - "links": [], - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "10.2.3", - "targets": [ - { - "datasource": { - "uid": "${datasource}" - }, - "expr": "sum without(instance)(rate(kafka_server_brokertopicmetrics_messagesinpersec{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\",topic=~\"$topic\"}[$__rate_interval]))", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{topic}}", - "metric": "kafka_server_brokertopicmetrics_messagesinpersec", - "refId": "A", - "step": 4 - } - ], - "title": "Messages In Per Topic", - "type": "timeseries" - }, - { - "datasource": { - "uid": "${datasource}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "Bytes/s", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "Bps" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 8, - "x": 8, - "y": 18 - }, - "id": 5, - "links": [], - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "10.2.3", - "targets": [ - { - "datasource": { - "uid": "${datasource}" - }, - "expr": "sum without(instance)(rate(kafka_server_brokertopicmetrics_bytesinpersec{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\",topic=~\"$topic\"}[$__rate_interval]))", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{topic}}", - "metric": "kafka_server_brokertopicmetrics_bytesinpersec", - "refId": "A", - "step": 4 - } - ], - "title": "Bytes In Per Topic", - "type": "timeseries" - }, - { - "datasource": { - "uid": "${datasource}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "Bytes/s", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "Bps" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 8, - "x": 16, - "y": 18 - }, - "id": 6, - "links": [], - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "10.2.3", - "targets": [ - { - "datasource": { - "uid": "${datasource}" - }, - "expr": "sum without(instance)(rate(kafka_server_brokertopicmetrics_bytesoutpersec{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\",topic=~\"$topic\"}[$__rate_interval]))", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{topic}}", - "metric": "kafka_server_brokertopicmetrics_bytesinpersec", - "refId": "A", - "step": 4 - } - ], - "title": "Bytes Out Per Topic", - "type": "timeseries" - }, - { - "datasource": { - "uid": "${datasource}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "Messages/s", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "iops" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 8, - "x": 0, - "y": 25 - }, - "id": 10, - "links": [], - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "10.2.3", - "targets": [ - { - "datasource": { - "uid": "${datasource}" - }, - "expr": "sum without(topic)(rate(kafka_server_brokertopicmetrics_messagesinpersec{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\",topic=~\"$topic\"}[$__rate_interval]))", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "metric": "kafka_server_brokertopicmetrics_messagesinpersec", - "refId": "A", - "step": 4 - } - ], - "title": "Messages In Per Broker", - "type": "timeseries" - }, - { - "datasource": { - "uid": "${datasource}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "Bytes/s", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "Bps" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 8, - "x": 8, - "y": 25 - }, - "id": 7, - "links": [], - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "10.2.3", - "targets": [ - { - "datasource": { - "uid": "${datasource}" - }, - "expr": "sum without(topic)(rate(kafka_server_brokertopicmetrics_bytesinpersec{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\",topic=~\"$topic\"}[$__rate_interval]))", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "metric": "kafka_server_brokertopicmetrics_bytesinpersec", - "refId": "A", - "step": 4 - } - ], - "title": "Bytes In Per Broker", - "type": "timeseries" - }, - { - "datasource": { - "uid": "${datasource}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 8, - "x": 16, - "y": 25 - }, - "id": 9, - "links": [], - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "10.2.3", - "targets": [ - { - "datasource": { - "uid": "${datasource}" - }, - "expr": "sum without(topic)(rate(kafka_server_brokertopicmetrics_bytesoutpersec{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\",topic=~\"$topic\"}[$__rate_interval]))", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{instance}}", - "refId": "A" - } - ], - "title": "Bytes Out Per Broker", - "type": "timeseries" - }, - { - "collapsed": false, - "datasource": { - "uid": "$datasource" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 32 - }, - "id": 117, - "panels": [], - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "refId": "A" - } - ], - "title": "Replication", - "type": "row" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "description": "Offline partitions over time", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 33 - }, - "id": 122, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "10.2.3", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "editorMode": "code", - "expr": "sum(kafka_server_replicamanager_partitioncount{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\"})", - "interval": "", - "legendFormat": "", - "range": true, - "refId": "A" - } - ], - "title": "Online Partitions", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "Offline partitions over time", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 33 - }, - "id": 121, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "10.2.3", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "sum(kafka_controller_kafkacontroller_offlinepartitionscount{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\"})", - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "title": "Offline Partitions", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "Under replicated partitions over time", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 41 - }, - "id": 120, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "10.2.3", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "sum(kafka_cluster_partition_underreplicated{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\"})", - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "title": "Under Replicated Partitions", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "Under min in sync replicas partitions over time", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 41 - }, - "id": 119, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "10.2.3", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "sum(kafka_cluster_partition_underminisr{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\"})", - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "title": "Under Min ISR Partitions", - "type": "timeseries" - }, - { - "collapsed": false, - "datasource": { - "uid": "$datasource" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 49 - }, - "id": 44, - "panels": [], - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "refId": "A" - } - ], - "title": "Thread utilization", - "type": "row" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "Average fraction of time the network processor threads are idle. Values are between 0 (all resources are used) and 100 (all resources are available) ", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 50 - }, - "id": 24, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "asc" - } - }, - "pluginVersion": "10.2.3", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "kafka_network_socketserver_networkprocessoravgidlepercent{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\"}", - "legendFormat": "{{instance}}", - "refId": "A" - } - ], - "title": "Network Processor Avg Idle Percent", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "Average fraction of time the request handler threads are idle. Values are between 0 (all resources are used) and 100 (all resources are available). ", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 50 - }, - "id": 25, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "asc" - } - }, - "pluginVersion": "10.2.3", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "kafka_server_kafkarequesthandlerpool_requesthandleravgidlepercent_total{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\"}", - "legendFormat": "{{instance}}", - "refId": "A" - } - ], - "title": "Request Handler Avg Idle Percent", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 58 - }, - "id": 126, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "10.2.3", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "kafka_network_requestchannel_requestqueuesize{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\"}", - "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" - } - ], - "title": "Request Queue Size", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 58 - }, - "id": 127, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "10.2.3", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "kafka_network_requestchannel_responsequeuesize{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\",processor=\"\"}", - "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" - } - ], - "title": "Response Queue Size", - "type": "timeseries" - }, - { - "collapsed": false, - "datasource": { - "uid": "$datasource" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 66 - }, - "id": 86, - "panels": [], - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "refId": "A" - } - ], - "title": "Zookeeper", - "type": "row" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "Latency in millseconds for ZooKeeper requests from broker. ", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 67 - }, - "id": 88, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "10.2.3", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "kafka_server_zookeeperclientmetrics_zookeeperrequestlatencyms{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\",quantile=~\"$percentile\"}", - "hide": false, - "legendFormat": "{{instance}} - {{quantile}}", - "refId": "A" - } - ], - "title": "Zookeeper Request Latency", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 67 - }, - "id": 92, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "10.2.3", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "rate(kafka_server_sessionexpirelistener_zookeepersyncconnectspersec{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\"}[$__rate_interval])", - "hide": false, - "instant": false, - "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" - } - ], - "title": "Zookeeper connections per sec", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 75 - }, - "id": 89, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "10.2.3", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "rate(kafka_server_sessionexpirelistener_zookeeperexpirespersec{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\"}[$__rate_interval])", - "hide": false, - "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" - } - ], - "title": "Zookeeper expired connections per sec", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 75 - }, - "id": 90, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "10.2.3", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "rate(kafka_server_sessionexpirelistener_zookeeperdisconnectspersec{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\"}[$__rate_interval])", - "hide": false, - "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" - } - ], - "title": "Zookeeper disconnect per sec", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 83 - }, - "id": 91, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "10.2.3", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "rate(kafka_server_sessionexpirelistener_zookeeperauthfailurespersec{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\"}[$__rate_interval])", - "hide": false, - "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" - } - ], - "title": "Zookeeper auth failures per sec", - "type": "timeseries" - }, - { - "collapsed": false, - "datasource": { - "uid": "$datasource" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 91 - }, - "id": 82, - "panels": [], - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "refId": "A" - } - ], - "title": "Isr Shrinks / Expands", - "type": "row" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": ": The number of in-sync replicas (ISRs) for a particular partition should remain fairly static, the only exceptions are when you are expanding your broker cluster or removing partitions. In order to maintain high availability, a healthy Kafka cluster requires a minimum number of ISRs for failover. A replica could be removed from the ISR pool for a couple of reasons: it is too far behind the leaders offset (user-configurable by setting the replica.lag.max.messages configuration parameter), or it has not contacted the leader for some time (configurable with the replica.socket.timeout.ms parameter). No matter the reason, an increase in IsrShrinksPerSec without a corresponding increase in IsrExpandsPerSec shortly thereafter is cause for concern and requires user intervention.The Kafka documentation provides a wealth of information on the user-configurable parameters for brokers.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "ops" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 92 - }, - "id": 80, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "10.2.3", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "kafka_server_replicamanager_isrshrinkspersec{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\"}", - "legendFormat": "{{instance}}", - "refId": "A" - } - ], - "title": "IsrShrinks per Sec", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": ": The number of in-sync replicas (ISRs) for a particular partition should remain fairly static, the only exceptions are when you are expanding your broker cluster or removing partitions. In order to maintain high availability, a healthy Kafka cluster requires a minimum number of ISRs for failover. A replica could be removed from the ISR pool for a couple of reasons: it is too far behind the leaders offset (user-configurable by setting the replica.lag.max.messages configuration parameter), or it has not contacted the leader for some time (configurable with the replica.socket.timeout.ms parameter). No matter the reason, an increase in IsrShrinksPerSec without a corresponding increase in IsrExpandsPerSec shortly thereafter is cause for concern and requires user intervention.The Kafka documentation provides a wealth of information on the user-configurable parameters for brokers.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "ops" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 92 - }, - "id": 83, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "10.2.3", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "kafka_server_replicamanager_isrexpandspersec{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\"}", - "hide": false, - "legendFormat": "{{instance}}", - "refId": "A" - } - ], - "title": "IsrExpands per Sec", - "type": "timeseries" - }, - { - "collapsed": false, - "datasource": { - "uid": "$datasource" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 100 - }, - "id": 53, - "panels": [], - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "refId": "A" - } - ], - "title": "Logs size", - "type": "row" - }, - { - "datasource": { - "uid": "$datasource" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "decbytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 101 - }, - "id": 55, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "10.2.3", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "sum(kafka_log_log_size{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\",topic=~\"$topic\"}) by (topic)", - "interval": "", - "legendFormat": "{{topic}}", - "refId": "A" - } - ], - "title": "Log size per Topic", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "decbytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 101 - }, - "id": 56, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "10.2.3", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "sum(kafka_log_log_size{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\",topic=~\"$topic\"}) by (instance)", - "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" - } - ], - "title": "Log size per Broker", - "type": "timeseries" - }, - { - "collapsed": false, - "datasource": { - "uid": "$datasource" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 109 - }, - "id": 58, - "panels": [], - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "refId": "A" - } - ], - "title": "Producer Performance", - "type": "row" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "A high value can imply there aren't enough IO threads or the CPU is a bottleneck, or the request queue isnt large enough. The request queue size should match the number of connections.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 110 - }, - "id": 60, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "10.2.3", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "kafka_network_requestmetrics_requestqueuetimems{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\",quantile=~\"$percentile\",request=\"Produce\"}", - "hide": false, - "legendFormat": "{{instance}} - {{quantile}}", - "refId": "A" - } - ], - "title": "Producer - RequestQueueTimeMs", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "In most cases, a high value can imply slow local storage or the storage is a bottleneck. One should also investigate LogFlushRateAndTimeMs to know how long page flushes are taking, which will also indicate a slow disk. In the case of FetchFollower requests, time spent in LocalTimeMs can be the result of a ZooKeeper write to change the ISR.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 110 - }, - "id": 61, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "10.2.3", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "kafka_network_requestmetrics_localtimems{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\",quantile=~\"$percentile\",request=\"Produce\"}", - "hide": false, - "legendFormat": "{{instance}} - {{quantile}}", - "refId": "A" - } - ], - "title": "Producer - LocalTimeMs", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "A high value can imply a slow network connection. For fetch request, if the remote time is high, it could be that there is not enough data to give in a fetch response. This can happen when the consumer or replica is caught up and there is no new incoming data. If this is the case, remote time will be close to the max wait time, which is normal. Max wait time is configured via replica.fetch.wait.max.ms and fetch.max.wait.ms. ", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 118 - }, - "id": 62, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "10.2.3", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "kafka_network_requestmetrics_remotetimems{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\",quantile=~\"$percentile\",request=\"Produce\"}", - "hide": false, - "legendFormat": "{{instance}} - {{quantile}}", - "refId": "A" - } - ], - "title": "Producer - RemoteTimeMs", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "A high value can imply there aren't enough network threads or the network cant dequeue responses quickly enough, causing back pressure in the response queue. ", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 118 - }, - "id": 63, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "10.2.3", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "kafka_network_requestmetrics_responsequeuetimems{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\",quantile=~\"$percentile\",request=\"Produce\"}", - "hide": false, - "legendFormat": "{{instance}} - {{quantile}}", - "refId": "A" - } - ], - "title": "Producer - ResponseQueueTimeMs", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "A high value can imply the zero-copy from disk to the network is slow, or the network is the bottleneck because the network cant dequeue responses of the TCP socket as quickly as theyre being created. If the network buffer gets full, Kafka will block. ", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 126 - }, - "id": 64, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "10.2.3", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "kafka_network_requestmetrics_responsesendtimems{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\",quantile=~\"$percentile\",request=\"Produce\"}", - "hide": false, - "legendFormat": "{{instance}} - {{quantile}}", - "refId": "A" - } - ], - "title": "Producer - ResponseSendTimeMs", - "type": "timeseries" - }, - { - "collapsed": false, - "datasource": { - "uid": "$datasource" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 134 - }, - "id": 68, - "panels": [], - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "refId": "A" - } - ], - "title": "Consumer Performance", - "type": "row" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "A high value can imply there aren't enough IO threads or the CPU is a bottleneck, or the request queue isnt large enough. The request queue size should match the number of connections.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 135 - }, - "id": 69, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "10.2.3", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "kafka_network_requestmetrics_requestqueuetimems{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\",quantile=~\"$percentile\",request=\"Fetch\"}", - "hide": false, - "legendFormat": "{{instance}} - {{quantile}}", - "refId": "A" - } - ], - "title": "Consumer - RequestQueueTimeMs", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "In most cases, a high value can imply slow local storage or the storage is a bottleneck. One should also investigate LogFlushRateAndTimeMs to know how long page flushes are taking, which will also indicate a slow disk. In the case of FetchFollower requests, time spent in LocalTimeMs can be the result of a ZooKeeper write to change the ISR.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 135 - }, - "id": 70, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "10.2.3", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "kafka_network_requestmetrics_localtimems{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\",quantile=~\"$percentile\",request=\"Fetch\"}", - "hide": false, - "legendFormat": "{{instance}} - {{quantile}}", - "refId": "A" - } - ], - "title": "Consumer - LocalTimeMs", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "A high value can imply a slow network connection. For fetch request, if the remote time is high, it could be that there is not enough data to give in a fetch response. This can happen when the consumer or replica is caught up and there is no new incoming data. If this is the case, remote time will be close to the max wait time, which is normal. Max wait time is configured via replica.fetch.wait.max.ms and fetch.max.wait.ms. ", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 143 - }, - "id": 71, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "10.2.3", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "kafka_network_requestmetrics_remotetimems{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\",quantile=~\"$percentile\",request=\"Fetch\"}", - "hide": false, - "legendFormat": "{{instance}} - {{quantile}}", - "refId": "A" - } - ], - "title": "Consumer - RemoteTimeMs", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "A high value can imply there aren't enough network threads or the network cant dequeue responses quickly enough, causing back pressure in the response queue. ", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 143 - }, - "id": 72, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "10.2.3", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "kafka_network_requestmetrics_responsequeuetimems{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\",quantile=~\"$percentile\",request=\"Fetch\"}", - "hide": false, - "legendFormat": "{{instance}} - {{quantile}}", - "refId": "A" - } - ], - "title": "Consumer - ResponseQueueTimeMs", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "A high value can imply the zero-copy from disk to the network is slow, or the network is the bottleneck because the network cant dequeue responses of the TCP socket as quickly as theyre being created. If the network buffer gets full, Kafka will block. ", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 151 - }, - "id": 73, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "10.2.3", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "kafka_network_requestmetrics_responsesendtimems{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\",quantile=~\"$percentile\",request=\"Fetch\"}", - "hide": false, - "legendFormat": "{{instance}} - {{quantile}}", - "refId": "A" - } - ], - "title": "Consumer - ResponseSendTimeMs", - "type": "timeseries" - }, - { - "collapsed": false, - "datasource": { - "uid": "$datasource" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 159 - }, - "id": 66, - "panels": [], - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "refId": "A" - } - ], - "title": "Fetch Follower Performance", - "type": "row" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "A high value can imply there aren't enough IO threads or the CPU is a bottleneck, or the request queue isnt large enough. The request queue size should match the number of connections.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 160 - }, - "id": 74, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "10.2.3", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "kafka_network_requestmetrics_requestqueuetimems{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\",quantile=~\"$percentile\",request=\"FetchFollower\"}", - "hide": false, - "legendFormat": "{{instance}} - {{quantile}}", - "refId": "A" - } - ], - "title": "FetchFollower - RequestQueueTimeMs", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "In most cases, a high value can imply slow local storage or the storage is a bottleneck. One should also investigate LogFlushRateAndTimeMs to know how long page flushes are taking, which will also indicate a slow disk. In the case of FetchFollower requests, time spent in LocalTimeMs can be the result of a ZooKeeper write to change the ISR.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 160 - }, - "id": 75, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "10.2.3", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "kafka_network_requestmetrics_localtimems{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\",quantile=~\"$percentile\",request=\"FetchFollower\"}", - "hide": false, - "legendFormat": "{{instance}} - {{quantile}}", - "refId": "A" - } - ], - "title": "FetchFollower - LocalTimeMs", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "A high value can imply a slow network connection. For fetch request, if the remote time is high, it could be that there is not enough data to give in a fetch response. This can happen when the consumer or replica is caught up and there is no new incoming data. If this is the case, remote time will be close to the max wait time, which is normal. Max wait time is configured via replica.fetch.wait.max.ms and fetch.max.wait.ms. ", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 168 - }, - "id": 76, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "10.2.3", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "kafka_network_requestmetrics_remotetimems{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\",quantile=~\"$percentile\",request=\"FetchFollower\"}", - "hide": false, - "legendFormat": "{{instance}} - {{quantile}}", - "refId": "A" - } - ], - "title": "FetchFollower - RemoteTimeMs", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "A high value can imply there aren't enough network threads or the network cant dequeue responses quickly enough, causing back pressure in the response queue. ", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 168 - }, - "id": 77, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "10.2.3", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "kafka_network_requestmetrics_responsequeuetimems{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\",quantile=~\"$percentile\",request=\"FetchFollower\"}", - "hide": false, - "legendFormat": "{{instance}} - {{quantile}}", - "refId": "A" - } - ], - "title": "FetchFollower - ResponseQueueTimeMs", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "A high value can imply the zero-copy from disk to the network is slow, or the network is the bottleneck because the network cant dequeue responses of the TCP socket as quickly as theyre being created. If the network buffer gets full, Kafka will block. ", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 176 - }, - "id": 78, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "10.2.3", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "kafka_network_requestmetrics_responsesendtimems{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\",quantile=~\"$percentile\",request=\"FetchFollower\"}", - "hide": false, - "legendFormat": "{{instance}} - {{quantile}}", - "refId": "A" - } - ], - "title": "FetchFollower - ResponseSendTimeMs", - "type": "timeseries" - }, - { - "collapsed": false, - "datasource": { - "uid": "$datasource" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 184 - }, - "id": 97, - "panels": [], - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "refId": "A" - } - ], - "title": "Group Coordinator", - "type": "row" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "Number of consumer groups per group coordinator", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 185 - }, - "id": 99, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "10.2.3", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "kafka_coordinator_group_groupmetadatamanager_numgroups{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\"}", - "instant": false, - "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" - } - ], - "title": "Consumer groups number per coordinator", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "Number of consumer group per state", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 185 - }, - "id": 100, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "10.2.3", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupsstable{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\"})", - "instant": false, - "interval": "", - "legendFormat": "stable", - "refId": "A" - }, - { - "datasource": { - "uid": "$datasource" - }, - "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupspreparingrebalance{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\"})", - "interval": "", - "legendFormat": "preparing-rebalance", - "refId": "B" - }, - { - "datasource": { - "uid": "$datasource" - }, - "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupsdead{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\"})", - "interval": "", - "legendFormat": "dead", - "refId": "C" - }, - { - "datasource": { - "uid": "$datasource" - }, - "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupscompletingrebalance{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\"})", - "interval": "", - "legendFormat": "completing-rebalance", - "refId": "D" - }, - { - "datasource": { - "uid": "$datasource" - }, - "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupsempty{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\"})", - "interval": "", - "legendFormat": "empty", - "refId": "E" - } - ], - "title": "Nb consumer groups per state", - "type": "timeseries" - }, - { - "collapsed": false, - "datasource": { - "uid": "$datasource" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 193 - }, - "id": 102, - "panels": [], - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "refId": "A" - } - ], - "title": "Connections", - "type": "row" - }, - { - "datasource": { - "uid": "$datasource" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 194 - }, - "id": 104, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "10.2.3", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "sum(kafka_server_socketservermetrics_connection_count{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\"}) by (listener)", - "interval": "", - "legendFormat": "{{listener}}", - "refId": "A" - } - ], - "title": "Connections count per listener", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 194 - }, - "id": 105, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "10.2.3", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "sum(kafka_server_socketservermetrics_connection_count{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\"}) by (instance)", - "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" - } - ], - "title": "Connections count per broker", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 202 - }, - "id": 106, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "10.2.3", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "sum(kafka_server_socketservermetrics_connection_creation_rate{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\"}) by (listener)", - "interval": "", - "legendFormat": "{{listener}}", - "refId": "A" - } - ], - "title": "Connections creation rate per listener", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 202 - }, - "id": 107, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "10.2.3", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "sum(kafka_server_socketservermetrics_connection_creation_rate{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\"}) by (instance)", - "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" - } - ], - "title": "Connections creation rate per instance", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 210 - }, - "id": 108, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "10.2.3", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "sum(kafka_server_socketservermetrics_connection_close_rate{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\"}) by (listener)", - "interval": "", - "legendFormat": "{{listener}}", - "refId": "A" - } - ], - "title": "Connections close rate per listener", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 210 - }, - "id": 110, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "10.2.3", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "sum(kafka_server_socketservermetrics_connection_close_rate{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\"}) by (instance)", - "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" - } - ], - "title": "Connections close rate per instance", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "Tracks the amount of time Acceptor is blocked from accepting connections. See KIP-402 for more details.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percent" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 218 - }, - "id": 124, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "10.2.3", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "kafka_network_acceptor_acceptorblockedpercent{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\"}", - "interval": "", - "legendFormat": "{{instance}} - {{listener}}", - "refId": "A" - } - ], - "title": "Acceptor Blocked Percentage", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 218 - }, - "id": 113, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "10.2.3", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "sum(kafka_server_socketservermetrics_connections{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\"}) by (client_software_name, client_software_version)", - "interval": "", - "legendFormat": "{{client_software_name}} {{client_software_version}}", - "refId": "A" - } - ], - "title": "Connections per client version", - "type": "timeseries" - }, - { - "collapsed": false, - "datasource": { - "uid": "$datasource" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 226 - }, - "id": 31, - "panels": [], - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "refId": "A" - } - ], - "title": "Request rate", - "type": "row" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "Total request rate.", - "fieldConfig": { - "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 0, - "y": 227 - }, - "id": 37, - "links": [], - "maxDataPoints": 100, - "options": { - "colorMode": "value", - "fieldOptions": { - "calcs": [ - "lastNotNull" - ] - }, - "graphMode": "area", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "10.2.3", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "sum(rate(kafka_network_requestmetrics_requestspersec{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\"}[$__rate_interval]))", - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "title": "Total Request Per Sec", - "type": "stat" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "Produce request rate.", - "fieldConfig": { - "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 4, - "y": 227 - }, - "id": 112, - "links": [], - "maxDataPoints": 100, - "options": { - "colorMode": "value", - "fieldOptions": { - "calcs": [ - "lastNotNull" - ] - }, - "graphMode": "area", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "10.2.3", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "sum(rate(kafka_network_requestmetrics_requestspersec{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\",request=\"Produce\"}[$__rate_interval]))", - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "title": "Produce Request Per Sec", - "type": "stat" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "Fetch request rate.", - "fieldConfig": { - "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 8, - "y": 227 - }, - "id": 111, - "links": [], - "maxDataPoints": 100, - "options": { - "colorMode": "value", - "fieldOptions": { - "calcs": [ - "lastNotNull" - ] - }, - "graphMode": "area", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "10.2.3", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "sum(rate(kafka_network_requestmetrics_requestspersec{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\",request=\"FetchConsumer\"}[$__rate_interval]))", - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "title": "Fetch Request Per Sec", - "type": "stat" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "Offset Commit request rate.", - "fieldConfig": { - "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 12, - "y": 227 - }, - "id": 38, - "links": [], - "maxDataPoints": 100, - "options": { - "colorMode": "value", - "fieldOptions": { - "calcs": [ - "lastNotNull" - ] - }, - "graphMode": "area", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "10.2.3", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "sum(rate(kafka_network_requestmetrics_requestspersec{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\",request=\"OffsetCommit\"}[$__rate_interval]))", - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "title": "Offset Commit Request Per Sec", - "type": "stat" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "Metadata request rate.", - "fieldConfig": { - "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 16, - "y": 227 - }, - "id": 36, - "links": [], - "maxDataPoints": 100, - "options": { - "colorMode": "value", - "fieldOptions": { - "calcs": [ - "lastNotNull" - ] - }, - "graphMode": "area", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "10.2.3", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "sum(rate(kafka_network_requestmetrics_requestspersec{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\",request=\"Metadata\"}[$__rate_interval]))", - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "title": "Metadata Request Per Sec", - "type": "stat" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 231 - }, - "id": 94, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "10.2.3", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "sum(rate(kafka_server_brokertopicmetrics_totalproducerequestspersec{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\",topic=~\"$topic\"}[$__rate_interval])) by (topic)", - "interval": "", - "legendFormat": "{{topic}}", - "refId": "A" - } - ], - "title": "Produce request per sec per topic", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 231 - }, - "id": 95, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "10.2.3", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "sum(rate(kafka_server_brokertopicmetrics_totalfetchrequestspersec{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\",topic=~\"$topic\"}[$__rate_interval])) by (topic)", - "interval": "", - "legendFormat": "{{topic}}", - "refId": "A" - } - ], - "title": "Fetch request per sec per topic", - "type": "timeseries" - }, - { - "collapsed": false, - "datasource": { - "uid": "$datasource" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 239 - }, - "id": 46, - "panels": [], - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "refId": "A" - } - ], - "title": "Message Conversion", - "type": "row" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "The number of messages produced converted to match the log.message.format.version.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "iops" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 240 - }, - "id": 48, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "10.2.3", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "sum(rate(kafka_server_brokertopicmetrics_producemessageconversionspersec{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\"}[$__rate_interval]))", - "hide": false, - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "title": "Number of procuded message conversion", - "type": "timeseries" - }, - { - "datasource": { - "uid": "${datasource}" - }, - "description": "The number of messages consumed converted at consumer to match the log.message.format.version.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "iops" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 240 - }, - "id": 51, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "10.2.3", - "targets": [ - { - "datasource": { - "uid": "${datasource}" - }, - "expr": "sum(rate(kafka_server_brokertopicmetrics_fetchmessageconversionspersec{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\"}[$__rate_interval]))", - "hide": false, - "interval": "", - "legendFormat": "{{topic}}", - "refId": "A" - } - ], - "title": "Number of consumed message conversion", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "Number of connection per client version", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - } - }, - "mappings": [] - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 248 - }, - "id": 115, - "links": [], - "options": { - "displayLabels": [], - "legend": { - "displayMode": "list", - "placement": "right", - "showLegend": true, - "values": [] - }, - "pieType": "pie", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "text": {}, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "sum(kafka_server_socketservermetrics_connections{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\"}) by (client_software_name, client_software_version) ", - "interval": "", - "legendFormat": "{{client_software_name}} - {{client_software_version}}", - "refId": "A" - } - ], - "title": "Client version repartition", - "type": "piechart" - } - ], - "refresh": "30s", - "schemaVersion": 39, - "tags": [ - "kafka-integration" - ], - "templating": { - "list": [ - { - "current": { - "selected": false, - "text": "default", - "value": "default" - }, - "hide": 0, - "includeAll": false, - "label": "Data source", - "multi": false, - "name": "datasource", - "options": [], - "query": "prometheus", - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": false, - "text": "All", - "value": "$__all" - }, - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "definition": "", - "hide": 0, - "includeAll": true, - "label": "Job", - "multi": true, - "name": "job", - "options": [], - "query": "label_values(kafka_server_kafkaserver_brokerstate{job=\"integrations/kafka\"}, job)", - "refresh": 2, - "regex": "", - "skipUrlSync": false, - "sort": 1, - "type": "query" - }, - { - "allValue": ".*", - "current": { - "selected": false, - "text": "All", - "value": "$__all" - }, - "datasource": { - "uid": "$datasource" - }, - "definition": "", - "hide": 0, - "includeAll": true, - "label": "Cluster", - "multi": true, - "name": "cluster", - "options": [], - "query": "label_values(kafka_server_kafkaserver_brokerstate{job=~\"$job\"}, cluster)", - "refresh": 2, - "regex": "", - "skipUrlSync": false, - "sort": 1, - "tagValuesQuery": "", - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": false, - "text": "All", - "value": "$__all" - }, - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "definition": "", - "hide": 0, - "includeAll": true, - "label": "Kafka_cluster", - "multi": true, - "name": "kafka_cluster", - "options": [], - "query": "label_values(kafka_server_kafkaserver_brokerstate{job=\"integrations/kafka\",job=~\"$job\"}, kafka_cluster)", - "refresh": 2, - "regex": "", - "skipUrlSync": false, - "sort": 1, - "type": "query" - }, - { - "allValue": ".+", - "current": { - "selected": false, - "text": "All", - "value": "$__all" - }, - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "definition": "", - "hide": 0, - "includeAll": true, - "label": "Instance", - "multi": true, - "name": "instance", - "options": [], - "query": "label_values(kafka_server_kafkaserver_brokerstate{job=\"integrations/kafka\",job=~\"$job\",kafka_cluster=~\"$kafka_cluster\"}, instance)", - "refresh": 2, - "regex": "", - "skipUrlSync": false, - "sort": 1, - "type": "query" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": [ - "0.99" - ], - "value": [ - "0.99" - ] - }, - "datasource": { - "uid": "${datasource}" - }, - "definition": "label_values(kafka_network_requestmetrics_requestqueuetimems{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\"}, quantile)", - "hide": 0, - "includeAll": true, - "label": "Percentile", - "multi": true, - "name": "percentile", - "options": [], - "query": "label_values(kafka_network_requestmetrics_requestqueuetimems{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\"}, quantile)", - "refresh": 2, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": false, - "text": "All", - "value": "$__all" - }, - "datasource": { - "uid": "${datasource}" - }, - "definition": "label_values(kafka_log_log_size{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\"},topic)", - "hide": 0, - "includeAll": true, - "label": "Topic", - "multi": true, - "name": "topic", - "options": [], - "query": "label_values(kafka_log_log_size{job=\"integrations/kafka\",job=~\"$job\",cluster=~\"$cluster\",kafka_cluster=~\"$kafka_cluster\",instance=~\"$instance\"},topic)", - "refresh": 2, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-30m", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "browser", - "title": "Kafka Overview", - "uid": "qu-QZdfZz", - "version": 7, - "weekStart": "" - } \ No newline at end of file diff --git a/kafka-mixin/mixin.libsonnet b/kafka-mixin/mixin.libsonnet index 1a03f3805..22a4274c9 100644 --- a/kafka-mixin/mixin.libsonnet +++ b/kafka-mixin/mixin.libsonnet @@ -6,6 +6,7 @@ local kafka = + kafkalib.withConfigMixin( { filteringSelector: config.kafkaFilteringSelector, + zookeeperfilteringSelector: config.zookeeperFilteringSelector, groupLabels: config.groupLabels, instanceLabels: config.instanceLabels, dashboardTags: config.dashboardTags, @@ -17,5 +18,4 @@ local kafka = kafka.asMonitoringMixin() + // json dashboards and alerts: (import 'dashboards/dashboards.libsonnet') + -(import 'alerts/alerts.libsonnet') + (import 'config.libsonnet') diff --git a/kafka-observ-lib/README.md b/kafka-observ-lib/README.md index e5cb43577..aeb0ff50b 100644 --- a/kafka-observ-lib/README.md +++ b/kafka-observ-lib/README.md @@ -18,5 +18,11 @@ jb install https://github.com/grafana/jsonnet-libs/kafka-observ-lib ## Example +Kafka broker overview dashboard: +![broker](image.png) + Kafka topic overview dashboard: -![image](https://github.com/user-attachments/assets/2396de66-f782-4efc-9edf-66af5d836f3e) \ No newline at end of file +![image](https://github.com/user-attachments/assets/2396de66-f782-4efc-9edf-66af5d836f3e) + +Zookeeper overview dashboard: +![zookeeper](image-zookeeper.png) diff --git a/kafka-observ-lib/alerts.libsonnet b/kafka-observ-lib/alerts.libsonnet index 2e55a57b7..18d778e47 100644 --- a/kafka-observ-lib/alerts.libsonnet +++ b/kafka-observ-lib/alerts.libsonnet @@ -38,6 +38,137 @@ description: 'Total kafka lag across all partitions is too high ({{ printf "%.0f" $value }}) for consumer group: {{$labels.consumergroup}}, topic: {{$labels.topic}}.', }, }, + { + alert: 'KafkaISRExpandRate', + expr: ||| + sum without() (%s) != 0 + ||| % this.signals.replicaManager.isrExpands.asRuleExpression(), + 'for': '5m', + keep_firing_for: '15m', + labels: { + severity: 'warning', + }, + annotations: { + summary: 'Kafka ISR expansion rate is expanding.', + description: 'Kafka broker {{ $labels.%s }} in cluster {{ $labels.%s }} In-Sync Replica (ISR) is expanding by {{ $value }} per second. If a broker goes down, ISR for some of the partitions shrink. When that broker is up again, ISRs are expanded once the replicas are fully caught up. Other than that, the expected value for ISR expansion rate is 0. If ISR is expanding and shrinking frequently, adjust Allowed replica lag.' + % [ + this.config.instanceLabels[0], + this.config.groupLabels[0], + ], + }, + }, + { + alert: 'KafkaISRShrinkRate', + expr: ||| + sum without() (%s) != 0 + ||| % this.signals.replicaManager.isrShrinks.asRuleExpression(), + 'for': '5m', + keep_firing_for: '15m', + labels: { + severity: 'warning', + }, + annotations: { + summary: 'Kafka ISR expansion rate is shrinking.', + description: 'Kafka broker {{ $labels.%s }} in cluster {{ $labels.%s }} In-Sync Replica (ISR) is shrinking by {{ $value }} per second. If a broker goes down, ISR for some of the partitions shrink. When that broker is up again, ISRs are expanded once the replicas are fully caught up. Other than that, the expected value for ISR shrink rate is 0. If ISR is expanding and shrinking frequently, adjust Allowed replica lag.' + % [ + this.config.instanceLabels[0], + this.config.groupLabels[0], + ], + + }, + }, + { + alert: 'KafkaOfflinePartitonCount', + expr: ||| + sum by (%s) (%s) > 0 + ||| % [ + std.join(',', this.config.groupLabels), + this.signals.replicaManager.offlinePartitions.asRuleExpression(), + ], + labels: { + severity: 'critical', + }, + annotations: { + summary: 'Kafka has offline partitons.', + description: 'Kafka cluster {{ $labels.%s }} has {{ $value }} offline partitions. After successful leader election, if the leader for partition dies, then the partition moves to the OfflinePartition state. Offline partitions are not available for reading and writing. Restart the brokers, if needed, and check the logs for errors.' + % this.config.groupLabels[0], + }, + }, + { + alert: 'KafkaUnderReplicatedPartitionCount', + expr: ||| + %s > 0 + ||| % [ + this.signals.replicaManager.underReplicatedPartitions.asRuleExpression(), + ], + 'for': '5m', + labels: { + severity: 'critical', + }, + annotations: { + summary: 'Kafka has under replicated partitons.', + description: 'Kafka broker {{ $labels.%s }} in cluster {{ $labels.%s }} has {{ $value }} under replicated partitons' + % [ + this.config.instanceLabels[0], + this.config.groupLabels[0], + ], + }, + }, + { + alert: 'KafkaNoActiveController', + expr: 'sum by(' + std.join(',', this.config.groupLabels) + ') (' + this.signals.cluster.activeControllers.asRuleExpression() + ') != 1', + 'for': '5m', + labels: { + severity: 'critical', + }, + annotations: { + summary: 'Kafka has no active controller.', + description: 'Kafka cluster {{ $labels.%s }} has {{ $value }} broker(s) reporting as the active controller in the last 5 minute interval. During steady state there should be only one active controller per cluster.' + % this.config.groupLabels[0], + }, + }, + { + alert: 'KafkaUncleanLeaderElection', + expr: this.signals.replicaManager.uncleanLeaderElection.asRuleExpression() + ' != 0', + 'for': '5m', + labels: { + severity: 'critical', + }, + annotations: { + summary: 'Kafka has unclean leader elections.', + description: 'Kafka cluster {{ $labels.%s }} has {{ $value }} unclean partition leader elections reported in the last 5 minute interval. When unclean leader election is held among out-of-sync replicas, there is a possibility of data loss if any messages were not synced prior to the loss of the former leader. So if the number of unclean elections is greater than 0, investigate broker logs to determine why leaders were re-elected, and look for WARN or ERROR messages. Consider setting the broker configuration parameter unclean.leader.election.enable to false so that a replica outside of the set of in-sync replicas is never elected leader.' + % this.config.groupLabels[0], + }, + }, + { + alert: 'KafkaBrokerCount', + expr: 'count by(' + std.join(',', this.config.groupLabels) + ') (' + this.signals.cluster.brokersCount.asRuleExpression() + ') == 0', + 'for': '5m', + labels: { + severity: 'critical', + }, + annotations: { + summary: 'Kafka has no brokers online.', + description: 'Kafka cluster {{ $labels.%s }} broker count is 0.' % this.config.groupLabels[0], + }, + }, + { + alert: 'KafkaZookeeperSyncConnect', + expr: 'avg by(' + std.join(',', this.config.groupLabels + this.config.instanceLabels) + ') (' + this.signals.zookeeperClient.zookeeperConnections.asRuleExpression() + ') < 0', + 'for': '5m', + labels: { + severity: 'critical', + }, + annotations: { + summary: 'Kafka Zookeeper sync disconected.', + description: 'Kafka broker {{ $labels.%s }} in cluster {{ $labels.%s }} has disconected from Zookeeper.' + % [ + this.config.instanceLabels[0], + this.config.groupLabels[0], + ], + }, + }, + ], }, ], diff --git a/kafka-observ-lib/config.libsonnet b/kafka-observ-lib/config.libsonnet index 771518e8b..5f5ae488a 100644 --- a/kafka-observ-lib/config.libsonnet +++ b/kafka-observ-lib/config.libsonnet @@ -1,8 +1,9 @@ { local this = self, filteringSelector: 'job!=""', - groupLabels: ['kafka_cluster'], - instanceLabels: ['instance'], + zookeeperfilteringSelector: this.filteringSelector, + groupLabels: ['kafka_cluster'], // label(s) that defines kafka cluster + instanceLabels: ['instance'], // label(s) that defines single broker uid: 'kafka', dashboardNamePrefix: '', dashboardTags: ['kafka'], @@ -12,9 +13,17 @@ //Can be regex: topicsIgnoreSelector: '__consumer_offsets', zookeeperEnabled: true, + totalTimeMsQuantile: '0.95', // quantile to use for totalTimeMs metrics: 0.50, 0.75, 0.95, 0.98, 0.99, 0.999... + zookeeperClientQuantile: '0.95', // quantile to use for zookeeperClient metrics: 0.50, 0.75, 0.95, 0.98, 0.99, 0.999... signals+: { + cluster: (import './signals/cluster.libsonnet')(this), + broker: (import './signals/broker.libsonnet')(this), topic: (import './signals/topic.libsonnet')(this), consumerGroup: (import './signals/consumerGroup.libsonnet')(this), + zookeeperClient: (import './signals/zookeeperClient.libsonnet')(this), + totalTime: (import './signals/totalTime.libsonnet')(this), + replicaManager: (import './signals/replicaManager.libsonnet')(this), + conversion: (import './signals/conversion.libsonnet')(this), }, } diff --git a/kafka-observ-lib/dashboards.libsonnet b/kafka-observ-lib/dashboards.libsonnet index f00c8c1fa..f82ee41ed 100644 --- a/kafka-observ-lib/dashboards.libsonnet +++ b/kafka-observ-lib/dashboards.libsonnet @@ -6,13 +6,42 @@ local g = import './g.libsonnet'; g.dashboard.new(this.config.dashboardNamePrefix + 'Kafka topic overview') + g.dashboard.withVariables(this.signals.consumerGroup.getVariablesMultiChoice()) + g.dashboard.withTags(this.config.dashboardTags) - + g.dashboard.withUid(this.config.uid + '-topic-dashboard') + + g.dashboard.withUid(this.config.uid + '-kafka-topic-dashboard') + + g.dashboard.withLinks(this.grafana.links.otherDashboards) + g.dashboard.withPanels( - g.util.grid.wrapPanels( - std.flattenArrays([ - this.grafana.rows.topic, - this.grafana.rows.consumerGroup, - ]) + g.util.panel.resolveCollapsedFlagOnRows( + g.util.grid.wrapPanels( + [ + this.grafana.rows.topic, + this.grafana.rows.consumerGroup, + ] + ) + ), setPanelIDs=false + ), + 'kafka-overview-dashboard.json': + g.dashboard.new(this.config.dashboardNamePrefix + 'Kafka overview') + + g.dashboard.withVariables(this.signals.broker.getVariablesMultiChoice()) + + g.dashboard.withTags(this.config.dashboardTags) + + g.dashboard.withUid(this.config.uid + '-kafka-overview-dashboard') + + g.dashboard.withLinks(this.grafana.links.otherDashboards) + + g.dashboard.withPanels( + g.util.panel.resolveCollapsedFlagOnRows( + g.util.grid.wrapPanels( + [ + this.grafana.rows.overview, + this.grafana.rows.throughput, + this.grafana.rows.replication, + this.grafana.rows.totalTimePerformance, + this.grafana.rows.messageConversion, + ] + + (if this.config.zookeeperEnabled then [this.grafana.rows.zookeeperClient] else []) + + [ + this.grafana.rows.jvm.process + g.panel.row.withCollapsed(false), + this.grafana.rows.jvm.overview + g.panel.row.withCollapsed(true), + this.grafana.rows.jvm.memory + g.panel.row.withCollapsed(true), + this.grafana.rows.jvm.threads + g.panel.row.withCollapsed(true), + ] + ) ), setPanelIDs=false ), }, diff --git a/kafka-observ-lib/image-1.png b/kafka-observ-lib/image-1.png new file mode 100644 index 000000000..4734b9076 Binary files /dev/null and b/kafka-observ-lib/image-1.png differ diff --git a/kafka-observ-lib/image.png b/kafka-observ-lib/image.png new file mode 100644 index 000000000..46c2669c9 Binary files /dev/null and b/kafka-observ-lib/image.png differ diff --git a/kafka-observ-lib/jsonnetfile.json b/kafka-observ-lib/jsonnetfile.json index 9b301aec0..83bdbe9af 100644 --- a/kafka-observ-lib/jsonnetfile.json +++ b/kafka-observ-lib/jsonnetfile.json @@ -25,6 +25,14 @@ } }, "version": "" + }, + { + "source": { + "local": { + "directory": "../jvm-observ-lib" + } + }, + "version": "" } ], "legacyImports": true diff --git a/kafka-observ-lib/main.libsonnet b/kafka-observ-lib/main.libsonnet index dafca1d28..c748ed96d 100644 --- a/kafka-observ-lib/main.libsonnet +++ b/kafka-observ-lib/main.libsonnet @@ -1,16 +1,30 @@ local g = import './g.libsonnet'; local commonlib = import 'common-lib/common/main.libsonnet'; +local jvmlib = import 'jvm-observ-lib/main.libsonnet'; local zookeeperlib = import 'zookeeper-observ-lib/main.libsonnet'; { new(): { local this = self, config: import './config.libsonnet', //include jvm lib + jvm:: + jvmlib.new() + + jvmlib.withConfigMixin( + { + filteringSelector: this.config.filteringSelector, + groupLabels: this.config.groupLabels, + instanceLabels: this.config.instanceLabels, + uid: this.config.uid, + dashboardNamePrefix: this.config.dashboardNamePrefix, + dashboardTags: this.config.dashboardTags, + metricsSource: 'jmx_exporter', + } + ), zookeeper:: zookeeperlib.new() + zookeeperlib.withConfigMixin( { - filteringSelector: this.config.filteringSelector, + filteringSelector: this.config.zookeeperfilteringSelector, groupLabels: this.config.groupLabels, instanceLabels: this.config.instanceLabels, uid: this.config.uid, @@ -19,21 +33,44 @@ local zookeeperlib = import 'zookeeper-observ-lib/main.libsonnet'; metricsSource: this.config.metricsSource, } ), + signals: { [sig]: commonlib.signals.unmarshallJsonMulti(this.config.signals[sig], type=this.config.metricsSource) for sig in std.objectFields(this.config.signals) - }, + } + + if this.config.zookeeperEnabled then { zookeeper: this.zookeeper.signals } else {}, grafana: { - panels: (import './panels/main.libsonnet').new(this.signals) + panels: (import './panels/main.libsonnet').new(this.signals, this.config) + + { jvm: this.jvm.grafana.panels } + if this.config.zookeeperEnabled then { zookeeper: this.zookeeper.grafana.panels } else {}, rows: (import './rows.libsonnet').new(this.grafana.panels, type=this.config.metricsSource) + + { jvm: this.jvm.grafana.rows } + if this.config.zookeeperEnabled then { zookeeper: this.zookeeper.grafana.rows } else {}, + + // common links here + links: { + local link = g.dashboard.link, + otherDashboards: + link.dashboards.new('All Kafka dashboards', this.config.dashboardTags) + + link.dashboards.options.withIncludeVars(true) + + link.dashboards.options.withKeepTime(true) + + link.dashboards.options.withAsDropdown(false), + }, + dashboards: (import './dashboards.libsonnet').new(this) - + if this.config.zookeeperEnabled then this.zookeeper.grafana.dashboards else {}, + + if this.config.zookeeperEnabled then + { + 'zookeeper-overview.json': + this.zookeeper.grafana.dashboards['zookeeper-overview.json'] + + g.dashboard.withLinks(this.grafana.links.otherDashboards), + } + + else {}, }, prometheus: { alerts: (import './alerts.libsonnet').new(this) + + { groups+: this.jvm.prometheus.alerts.groups } + if this.config.zookeeperEnabled then { groups+: this.zookeeper.prometheus.alerts.groups } else {}, recordingRules: {}, }, diff --git a/kafka-observ-lib/panels/broker.libsonnet b/kafka-observ-lib/panels/broker.libsonnet new file mode 100644 index 000000000..67308bdaa --- /dev/null +++ b/kafka-observ-lib/panels/broker.libsonnet @@ -0,0 +1,19 @@ +local g = import '../g.libsonnet'; +local commonlib = import 'common-lib/common/main.libsonnet'; +{ + new(signals):: { + + brokerBytesBothPerSec: + g.panel.timeSeries.new('Broker network throughput') + + commonlib.panels.network.timeSeries.traffic.stylize() + + commonlib.panels.network.timeSeries.traffic.withNegateOutPackets() + + signals.broker.brokerBytesInPerSec.asPanelMixin() + + signals.broker.brokerBytesOutPerSec.asPanelMixin(), + brokerMessagesPerSec: + g.panel.timeSeries.new('Broker messages throughput') + + commonlib.panels.network.timeSeries.packets.stylize() + + signals.broker.brokerMessagesInPerSec.asPanelMixin(), + + + }, +} diff --git a/kafka-observ-lib/panels/cluster.libsonnet b/kafka-observ-lib/panels/cluster.libsonnet new file mode 100644 index 000000000..5755622e6 --- /dev/null +++ b/kafka-observ-lib/panels/cluster.libsonnet @@ -0,0 +1,27 @@ +local g = import '../g.libsonnet'; +local commonlib = import 'common-lib/common/main.libsonnet'; +{ + new(signals):: { + activeControllers: + signals.cluster.activeControllers.asStat() + + commonlib.panels.generic.stat.info.stylize(), + brokersCount: + signals.cluster.brokersCount.asStat() + + commonlib.panels.generic.stat.info.stylize(), + clusterRoles: + signals.cluster.role.asStatusHistory() + + signals.zookeeper.cluster.role.asPanelMixin() + + commonlib.panels.generic.statusHistory.base.stylize(), + + clusterBytesBothPerSec: + g.panel.timeSeries.new('Cluster network throughput') + + commonlib.panels.network.timeSeries.traffic.stylize() + + commonlib.panels.network.timeSeries.traffic.withNegateOutPackets() + + signals.cluster.clusterBytesInPerSec.asPanelMixin() + + signals.cluster.clusterBytesOutPerSec.asPanelMixin(), + clusterMessagesPerSec: + g.panel.timeSeries.new('Cluster messages throughput') + + commonlib.panels.network.timeSeries.packets.stylize() + + signals.cluster.clusterMessagesInPerSec.asPanelMixin(), + }, +} diff --git a/kafka-observ-lib/panels/conversion.libsonnet b/kafka-observ-lib/panels/conversion.libsonnet new file mode 100644 index 000000000..994dc2bb2 --- /dev/null +++ b/kafka-observ-lib/panels/conversion.libsonnet @@ -0,0 +1,13 @@ +local g = import '../g.libsonnet'; +local commonlib = import 'common-lib/common/main.libsonnet'; +{ + new(signals):: { + producerConversion: + signals.conversion.producerConversion.asTimeSeries() + + commonlib.panels.generic.timeSeries.base.stylize(), + consumerConversion: + signals.conversion.consumerConversion.asTimeSeries() + + commonlib.panels.generic.timeSeries.base.stylize(), + + }, +} diff --git a/kafka-observ-lib/panels/main.libsonnet b/kafka-observ-lib/panels/main.libsonnet index 4e6a2145a..f96e79e10 100644 --- a/kafka-observ-lib/panels/main.libsonnet +++ b/kafka-observ-lib/panels/main.libsonnet @@ -1,6 +1,12 @@ { - new(signals):: { + new(signals, config):: { + broker: (import './broker.libsonnet').new(signals), + cluster: (import './cluster.libsonnet').new(signals), consumerGroup: (import './consumerGroup.libsonnet').new(signals), + conversion: (import './conversion.libsonnet').new(signals), + replicaManager: (import './replicaManager.libsonnet').new(signals), topic: (import './topic.libsonnet').new(signals), + totalTime: (import './totalTime.libsonnet').new(signals, config), + zookeeperClient: (import './zookeeperClient.libsonnet').new(signals, config), }, } diff --git a/kafka-observ-lib/panels/replicaManager.libsonnet b/kafka-observ-lib/panels/replicaManager.libsonnet new file mode 100644 index 000000000..6d5ab3a9e --- /dev/null +++ b/kafka-observ-lib/panels/replicaManager.libsonnet @@ -0,0 +1,46 @@ +local g = import '../g.libsonnet'; +local commonlib = import 'common-lib/common/main.libsonnet'; +{ + new(signals):: { + isrShrinks: + signals.replicaManager.isrShrinks.asTimeSeries() + + commonlib.panels.generic.timeSeries.base.stylize(), + isrExpands: + signals.replicaManager.isrExpands.asTimeSeries() + + commonlib.panels.generic.timeSeries.base.stylize(), + onlinePartitions: + signals.replicaManager.onlinePartitions.asTimeSeries() + + commonlib.panels.generic.timeSeries.base.stylize(), + offlinePartitions: + signals.replicaManager.offlinePartitions.asTimeSeries() + + commonlib.panels.requests.timeSeries.errors.stylize(), + underReplicatedPartitions: + signals.replicaManager.underReplicatedPartitions.asTimeSeries() + + commonlib.panels.requests.timeSeries.errors.stylize(), + underMinISRPartitions: + signals.replicaManager.underMinISRPartitions.asTimeSeries() + + commonlib.panels.requests.timeSeries.errors.stylize(), + + //for overview: + uncleanLeaderElectionStat: + signals.replicaManager.uncleanLeaderElection.asStat() + + commonlib.panels.generic.stat.base.stylize(), + preferredReplicaInbalanceStat: + signals.replicaManager.preferredReplicaInbalance.asStat() + + commonlib.panels.generic.stat.base.stylize(), + + onlinePartitionsStat: + signals.replicaManager.onlinePartitions.asStat() + + commonlib.panels.generic.stat.base.stylize(), + offlinePartitionsStat: + signals.replicaManager.offlinePartitions.asStat() + + commonlib.panels.generic.stat.base.stylize(), + underReplicatedPartitionsStat: + signals.replicaManager.underReplicatedPartitions.asStat() + + commonlib.panels.generic.stat.base.stylize(), + underMinISRPartitionsStat: + signals.replicaManager.underMinISRPartitions.asStat() + + commonlib.panels.generic.stat.base.stylize(), + + }, +} diff --git a/kafka-observ-lib/panels/totalTime.libsonnet b/kafka-observ-lib/panels/totalTime.libsonnet new file mode 100644 index 000000000..34490432e --- /dev/null +++ b/kafka-observ-lib/panels/totalTime.libsonnet @@ -0,0 +1,81 @@ +local g = import '../g.libsonnet'; +local commonlib = import 'common-lib/common/main.libsonnet'; +{ + new(signals, config):: { + + _common:: + commonlib.panels.generic.timeSeries.base.stylize() + + g.panel.timeSeries.panelOptions.withRepeat(config.instanceLabels[0]) + + g.panel.timeSeries.panelOptions.withRepeatDirection('v') + + g.panel.timeSeries.fieldConfig.defaults.custom.withStacking({ mode: 'normal' }) + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(0) + + g.panel.timeSeries.fieldConfig.defaults.custom.withGradientMode('none'), + + fetchConsumerTotalTimeBreakdown: + g.panel.timeSeries.new('Fetch-consumer ($%s)' % config.instanceLabels[0]) + + g.panel.timeSeries.panelOptions.withDescription( + ||| + Total time breakdown for fetch requests. + quantile: %s + + - `Request queue`: Time spent waiting in the request queue. + - `Local time`: Time spent being processed by leader. + - `Remote time`: Time spent waiting for follower response (only when 'require acks' is set). + - `Response queue`: Time spent waiting in the response queue. + - `Response time`: Time to send the response. + + ||| % config.totalTimeMsQuantile + ) + + signals.totalTime.fetchQueueTime.asPanelMixin() + + signals.totalTime.fetchLocalTime.asPanelMixin() + + signals.totalTime.fetchRemoteTime.asPanelMixin() + + signals.totalTime.fetchResponseQueue.asPanelMixin() + + signals.totalTime.fetchResponseTime.asPanelMixin() + + self._common, + + producerTotalTimeBreakdown: + g.panel.timeSeries.new('Producer ($%s)' % config.instanceLabels[0]) + + g.panel.timeSeries.panelOptions.withDescription( + ||| + Total time breakdown for producer requests. + quantile: %s + + - `Request queue`: Time spent waiting in the request queue. + - `Local time`: Time spent being processed by leader. + - `Remote time`: Time spent waiting for follower response (only when 'require acks' is set). + - `Response queue`: Time spent waiting in the response queue. + - `Response time`: Time to send the response. + + ||| % config.totalTimeMsQuantile + ) + + signals.totalTime.producerQueueTime.asPanelMixin() + + signals.totalTime.producerLocalTime.asPanelMixin() + + signals.totalTime.producerRemoteTime.asPanelMixin() + + signals.totalTime.producerResponseQueue.asPanelMixin() + + signals.totalTime.producerResponseTime.asPanelMixin() + + self._common, + + fetchFollowerTotalTimeBreakdown: + g.panel.timeSeries.new('Fetch-follower ($%s)' % config.instanceLabels[0]) + + g.panel.timeSeries.panelOptions.withDescription( + ||| + Total time breakdown for fetch-follower requests. + quantile: %s + + - `Request queue`: Time spent waiting in the request queue. + - `Local time`: Time spent being processed by leader. + - `Remote time`: Time spent waiting for follower response (only when 'require acks' is set). + - `Response queue`: Time spent waiting in the response queue. + - `Response time`: Time to send the response. + + ||| % config.totalTimeMsQuantile + ) + + signals.totalTime.fetchFollowerQueueTime.asPanelMixin() + + signals.totalTime.fetchFollowerLocalTime.asPanelMixin() + + signals.totalTime.fetchFollowerRemoteTime.asPanelMixin() + + signals.totalTime.fetchFollowerResponseQueue.asPanelMixin() + + signals.totalTime.fetchFollowerResponseTime.asPanelMixin() + + self._common, + + }, +} diff --git a/kafka-observ-lib/panels/zookeeperClient.libsonnet b/kafka-observ-lib/panels/zookeeperClient.libsonnet new file mode 100644 index 000000000..cb4912533 --- /dev/null +++ b/kafka-observ-lib/panels/zookeeperClient.libsonnet @@ -0,0 +1,16 @@ +local g = import '../g.libsonnet'; +local commonlib = import 'common-lib/common/main.libsonnet'; +{ + new(signals, config):: { + zookeeperRequestLatency: + signals.zookeeperClient.zookeeperRequestLatency.asTimeSeries() + + commonlib.panels.generic.timeSeries.base.stylize(), + zookeeperConnections: + + signals.zookeeperClient.zookeeperConnections.asTimeSeries() + + signals.zookeeperClient.zookeeperExpiredConnections.asPanelMixin() + + signals.zookeeperClient.zookeeperDisconnects.asPanelMixin() + + signals.zookeeperClient.zookeeperAuthFailures.asPanelMixin() + + commonlib.panels.generic.timeSeries.base.stylize(), + }, +} diff --git a/kafka-observ-lib/rows.libsonnet b/kafka-observ-lib/rows.libsonnet index 8cc133f0b..56875e93f 100644 --- a/kafka-observ-lib/rows.libsonnet +++ b/kafka-observ-lib/rows.libsonnet @@ -2,29 +2,110 @@ local g = import './g.libsonnet'; local commonlib = import 'common-lib/common/main.libsonnet'; { new(panels, type):: { - topic: [ - g.panel.row.new('Topics'), - panels.topic.topicTable { gridPos+: { w: 24, h: 8 } }, - panels.topic.topicMessagesPerSec { gridPos+: { w: 24, h: 8 } }, - panels.topic.topicBytesInPerSec { gridPos+: { w: 12, h: 6 } }, - panels.topic.topicBytesOutPerSec { gridPos+: { w: 12, h: 6 } }, - ], + overview: + g.panel.row.new('Overview') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels( + [ + panels.cluster.activeControllers { gridPos+: { w: 3, h: 4 } }, + panels.cluster.brokersCount { gridPos+: { w: 3, h: 4 } }, + panels.replicaManager.uncleanLeaderElectionStat { gridPos+: { w: 3, h: 4 } }, + panels.replicaManager.preferredReplicaInbalanceStat { gridPos+: { w: 3, h: 4 } }, + panels.cluster.clusterBytesBothPerSec { gridPos+: { w: 6, h: 8 } }, + panels.cluster.clusterMessagesPerSec { gridPos+: { w: 6, h: 8 } }, + //next row + panels.replicaManager.onlinePartitionsStat { gridPos+: { w: 3, h: 4 } }, + panels.replicaManager.offlinePartitionsStat { gridPos+: { w: 3, h: 4 } }, + panels.replicaManager.underReplicatedPartitionsStat { gridPos+: { w: 3, h: 4 } }, + panels.replicaManager.underMinISRPartitionsStat { gridPos+: { w: 3, h: 4 } }, + // status rows + panels.cluster.clusterRoles { gridPos+: { w: 24, h: 7 } }, + ] + ), + throughput: + g.panel.row.new('Throughput') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels( + [ + panels.broker.brokerBytesBothPerSec { gridPos+: { w: 12, h: 8 } }, + panels.broker.brokerMessagesPerSec { gridPos+: { w: 12, h: 8 } }, + ] + ), + topic: + g.panel.row.new('Topics') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels( + [ + panels.topic.topicTable { gridPos+: { w: 24, h: 8 } }, + panels.topic.topicMessagesPerSec { gridPos+: { w: 24, h: 8 } }, + panels.topic.topicBytesInPerSec { gridPos+: { w: 12, h: 6 } }, + panels.topic.topicBytesOutPerSec { gridPos+: { w: 12, h: 6 } }, + ] + ) + , consumerGroup: - [ - g.panel.row.new('Consumer groups'), - panels.consumerGroup.consumerGroupTable { gridPos+: { w: 24, h: 8 } }, - ] - + - if type == 'prometheus' then - [ - panels.consumerGroup.consumerGroupConsumeRate { gridPos+: { w: 12, h: 8 } }, - panels.consumerGroup.consumerGroupLag { gridPos+: { w: 12, h: 8 } }, - ] - else if type == 'grafanacloud' then - [ - panels.consumerGroup.consumerGroupConsumeRate { gridPos+: { w: 8, h: 8 } }, - panels.consumerGroup.consumerGroupLag { gridPos+: { w: 8, h: 8 } }, - panels.consumerGroup.consumerGroupLagTime { gridPos+: { w: 8, h: 8 } }, - ], + g.panel.row.new('Consumer groups') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels( + [ + panels.consumerGroup.consumerGroupTable { gridPos+: { w: 24, h: 8 } }, + ] + + + + + if type == 'prometheus' then + [ + panels.consumerGroup.consumerGroupConsumeRate { gridPos+: { w: 12, h: 8 } }, + panels.consumerGroup.consumerGroupLag { gridPos+: { w: 12, h: 8 } }, + ] + else if type == 'grafanacloud' then + [ + panels.consumerGroup.consumerGroupConsumeRate { gridPos+: { w: 8, h: 8 } }, + panels.consumerGroup.consumerGroupLag { gridPos+: { w: 8, h: 8 } }, + panels.consumerGroup.consumerGroupLagTime { gridPos+: { w: 8, h: 8 } }, + ] + ), + replication: + g.panel.row.new('Replication') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels( + [ + panels.replicaManager.onlinePartitions { gridPos+: { w: 12, h: 6 } }, + panels.replicaManager.offlinePartitions { gridPos+: { w: 12, h: 6 } }, + panels.replicaManager.underReplicatedPartitions { gridPos+: { w: 12, h: 6 } }, + panels.replicaManager.underMinISRPartitions { gridPos+: { w: 12, h: 6 } }, + panels.replicaManager.isrShrinks { gridPos+: { w: 12, h: 6 } }, + panels.replicaManager.isrExpands { gridPos+: { w: 12, h: 6 } }, + ] + ), + totalTimePerformance: + g.panel.row.new('Requests time breakdown') + + g.panel.row.withCollapsed(true) + + g.panel.row.withPanels( + [ + panels.totalTime.producerTotalTimeBreakdown { gridPos+: { w: 8, h: 6 } }, + panels.totalTime.fetchFollowerTotalTimeBreakdown { gridPos+: { w: 8, h: 6 } }, + panels.totalTime.fetchConsumerTotalTimeBreakdown { gridPos+: { w: 8, h: 6 } }, + + ] + ), + zookeeperClient: + g.panel.row.new('Zookeeper client') + + g.panel.row.withCollapsed(true) + + g.panel.row.withPanels( + [ + panels.zookeeperClient.zookeeperRequestLatency { gridPos+: { w: 8, h: 6 } }, + panels.zookeeperClient.zookeeperConnections { gridPos+: { w: 8, h: 6 } }, + ] + ), + messageConversion: + g.panel.row.new('Message conversion') + + g.panel.row.withCollapsed(true) + + g.panel.row.withPanels( + [ + panels.conversion.producerConversion { gridPos+: { w: 12, h: 6 } }, + panels.conversion.consumerConversion { gridPos+: { w: 12, h: 6 } }, + ] + ), }, } diff --git a/kafka-observ-lib/signals/broker.libsonnet b/kafka-observ-lib/signals/broker.libsonnet new file mode 100644 index 000000000..d667b6cbe --- /dev/null +++ b/kafka-observ-lib/signals/broker.libsonnet @@ -0,0 +1,66 @@ +local commonlib = import 'common-lib/common/main.libsonnet'; + +function(this) + { + filteringSelector: this.filteringSelector, + groupLabels: this.groupLabels, + instanceLabels: this.instanceLabels, + aggLevel: 'instance', + aggFunction: 'sum', + discoveryMetric: { + prometheus: 'kafka_controller_kafkacontroller_activecontrollercount', + grafanacloud: 'kafka_controller_kafkacontroller_activecontrollercount', + }, + signals: { + brokerMessagesInPerSec: { + name: 'Broker messages in', + description: 'Broker messages in.', + type: 'counter', + unit: 'mps', + sources: { + prometheus: { + legendCustomTemplate: '{{ %s }}: messages in' % this.instanceLabels[0], + expr: 'kafka_server_brokertopicmetrics_messagesin_total{%(queriesSelector)s}', + }, + grafanacloud: { + legendCustomTemplate: '{{ %s }}: messages in' % this.instanceLabels[0], + expr: 'kafka_server_brokertopicmetrics_messagesinpersec{%(queriesSelector)s}', + }, + }, + }, + brokerBytesInPerSec: { + name: 'Broker bytes in', + description: 'Broker bytes in rate.', + type: 'counter', + unit: 'Bps', + sources: { + prometheus: { + legendCustomTemplate: '{{ %s }}: bytes in' % this.instanceLabels[0], + expr: 'kafka_server_brokertopicmetrics_bytesinpersec_count{%(queriesSelector)s}', + }, + grafanacloud: { + legendCustomTemplate: '{{ %s }}: bytes in' % this.instanceLabels[0], + expr: 'kafka_server_brokertopicmetrics_bytesinpersec{%(queriesSelector)s}', + }, + }, + }, + brokerBytesOutPerSec: { + name: 'Broker bytes out', + description: 'Broker bytes out rate.', + type: 'counter', + unit: 'Bps', + sources: { + prometheus: { + legendCustomTemplate: '{{ %s }}: bytes out' % this.instanceLabels[0], + expr: 'kafka_server_brokertopicmetrics_bytesoutpersec_count{%(queriesSelector)s}', + }, + grafanacloud: { + legendCustomTemplate: '{{ %s }}: bytes out' % this.instanceLabels[0], + expr: 'kafka_server_brokertopicmetrics_bytesoutpersec{%(queriesSelector)s}', + }, + }, + }, + + + }, + } diff --git a/kafka-observ-lib/signals/cluster.libsonnet b/kafka-observ-lib/signals/cluster.libsonnet new file mode 100644 index 000000000..f30a267a2 --- /dev/null +++ b/kafka-observ-lib/signals/cluster.libsonnet @@ -0,0 +1,160 @@ +local commonlib = import 'common-lib/common/main.libsonnet'; + +function(this) + { + filteringSelector: this.filteringSelector, + groupLabels: this.groupLabels, + // do not add instance selector at all for cluster wide metrics + instanceLabels: [], + aggLevel: 'group', + aggFunction: 'sum', + discoveryMetric: { + prometheus: 'kafka_controller_kafkacontroller_activecontrollercount', + grafanacloud: 'kafka_controller_kafkacontroller_activecontrollercount', + }, + signals: { + activeControllers: { + name: 'Active kafka controllers', + description: ||| + Active kafka controllers count. + |||, + type: 'gauge', + unit: 'short', + aggFunction: 'sum', + sources: { + grafanacloud: + { + expr: 'kafka_controller_kafkacontroller_activecontrollercount{%(queriesSelector)s}', + }, + prometheus: + { + expr: 'kafka_controller_kafkacontroller_activecontrollercount{%(queriesSelector)s}', + }, + }, + }, + // used in status map + role: { + name: 'Current role', + description: ||| + 0 - follower, 1 - controller. + |||, + type: 'gauge', + unit: 'short', + aggFunction: 'sum', + sources: { + grafanacloud: + { + expr: 'kafka_controller_kafkacontroller_activecontrollercount{%(queriesSelector)s}', + legendCustomTemplate: '{{ %s }}' % this.instanceLabels[0], + aggKeepLabels: this.instanceLabels, + valueMappings: [{ + type: 'value', + options: { + '0': { + text: 'follower', + color: 'light-purple', + index: 0, + }, + '1': { + text: 'controller', + color: 'light-blue', + index: 1, + }, + }, + }], + }, + prometheus: + { + expr: 'kafka_controller_kafkacontroller_activecontrollercount{%(queriesSelector)s}', + aggKeepLabels: this.instanceLabels, + legendCustomTemplate: '{{ %s }}' % this.instanceLabels[0], + valueMappings: [ + { + type: 'value', + options: { + '0': { + text: 'follower', + color: 'light-purple', + index: 0, + }, + '1': { + text: 'controller', + color: 'light-blue', + index: 1, + }, + }, + }, + ], + }, + }, + }, + brokersCount: { + name: 'Brokers count', + description: ||| + Active brokers count. + |||, + type: 'gauge', + unit: 'short', + aggFunction: 'count', + sources: { + grafanacloud: + { + expr: 'kafka_server_kafkaserver_brokerstate{%(queriesSelector)s}', + }, + prometheus: + { + expr: 'kafka_server_kafkaserver_brokerstate{%(queriesSelector)s}', + }, + }, + }, + + clusterMessagesInPerSec: { + name: 'Cluster messages in', + description: 'Cluster messages in.', + type: 'counter', + unit: 'mps', + sources: { + prometheus: { + legendCustomTemplate: '%s: messages in' % commonlib.utils.labelsToPanelLegend(this.groupLabels), + expr: 'kafka_server_brokertopicmetrics_messagesin_total{%(queriesSelector)s}', + }, + grafanacloud: { + legendCustomTemplate: '%s: messages in' % commonlib.utils.labelsToPanelLegend(this.groupLabels), + expr: 'kafka_server_brokertopicmetrics_messagesinpersec{%(queriesSelector)s}', + }, + }, + }, + clusterBytesInPerSec: { + name: 'Cluster bytes in', + description: 'Cluster bytes in rate.', + type: 'counter', + unit: 'Bps', + sources: { + prometheus: { + legendCustomTemplate: '%s: bytes in' % commonlib.utils.labelsToPanelLegend(this.groupLabels), + expr: 'kafka_server_brokertopicmetrics_bytesinpersec_count{%(queriesSelector)s}', + }, + grafanacloud: { + legendCustomTemplate: '%s: bytes in' % commonlib.utils.labelsToPanelLegend(this.groupLabels), + expr: 'kafka_server_brokertopicmetrics_bytesinpersec{%(queriesSelector)s}', + }, + }, + }, + clusterBytesOutPerSec: { + name: 'Cluster bytes out', + description: 'Cluster bytes out rate.', + type: 'counter', + unit: 'Bps', + sources: { + prometheus: { + legendCustomTemplate: '%s: bytes out' % commonlib.utils.labelsToPanelLegend(this.groupLabels), + expr: 'kafka_server_brokertopicmetrics_bytesoutpersec_count{%(queriesSelector)s}', + }, + grafanacloud: { + legendCustomTemplate: '%s: bytes out' % commonlib.utils.labelsToPanelLegend(this.groupLabels), + expr: 'kafka_server_brokertopicmetrics_bytesoutpersec{%(queriesSelector)s}', + }, + }, + }, + }, + } diff --git a/kafka-observ-lib/signals/consumerGroup.libsonnet b/kafka-observ-lib/signals/consumerGroup.libsonnet index 93b659e17..f9b5a87d9 100644 --- a/kafka-observ-lib/signals/consumerGroup.libsonnet +++ b/kafka-observ-lib/signals/consumerGroup.libsonnet @@ -4,7 +4,7 @@ function(this) { filteringSelector: this.filteringSelector + ', topic!~"%s"' % this.topicsIgnoreSelector, groupLabels: this.groupLabels, - instanceLabels: this.instanceLabels + ['topic', 'consumergroup'], + instanceLabels: ['topic', 'consumergroup'], // this.instanceLabels is ommitted, as it would point to kafka_exporter instance. aggLevel: 'group', aggFunction: 'avg', legendCustomTemplate: '{{ consumergroup }} ({{ topic }})', diff --git a/kafka-observ-lib/signals/conversion.libsonnet b/kafka-observ-lib/signals/conversion.libsonnet new file mode 100644 index 000000000..e1d107108 --- /dev/null +++ b/kafka-observ-lib/signals/conversion.libsonnet @@ -0,0 +1,44 @@ +local commonlib = import 'common-lib/common/main.libsonnet'; + +function(this) + { + filteringSelector: this.filteringSelector, + groupLabels: this.groupLabels, + instanceLabels: this.instanceLabels, + aggLevel: 'group', + aggFunction: 'sum', + discoveryMetric: { + prometheus: 'kafka_server_brokertopicmetrics_producemessageconversions_total', + grafanacloud: 'kafka_server_brokertopicmetrics_producemessageconversionspersec', + }, + signals: { + producerConversion: { + name: 'Message conversion (producer)', + description: 'The number of messages produced converted to match the log.message.format.version.', + type: 'counter', + unit: 'mps', + sources: { + prometheus: { + expr: 'kafka_server_brokertopicmetrics_producemessageconversions_total{%(queriesSelector)s}', + }, + grafanacloud: { + expr: 'kafka_server_brokertopicmetrics_producemessageconversionspersec{%(queriesSelector)s}', + }, + }, + }, + consumerConversion: { + name: 'Message conversion (consumer)', + description: 'The number of messages consumed converted at consumer to match the log.message.format.version.', + type: 'counter', + unit: 'mps', + sources: { + prometheus: { + expr: 'kafka_server_brokertopicmetrics_fetchmessageconversions_total{%(queriesSelector)s}', + }, + grafanacloud: { + expr: 'kafka_server_brokertopicmetrics_fetchmessageconversionspersec{%(queriesSelector)s}', + }, + }, + }, + }, + } diff --git a/kafka-observ-lib/signals/replicaManager.libsonnet b/kafka-observ-lib/signals/replicaManager.libsonnet new file mode 100644 index 000000000..fdfc7a08d --- /dev/null +++ b/kafka-observ-lib/signals/replicaManager.libsonnet @@ -0,0 +1,189 @@ +local commonlib = import 'common-lib/common/main.libsonnet'; + +function(this) + { + filteringSelector: this.filteringSelector, + groupLabels: this.groupLabels, + instanceLabels: this.instanceLabels, + aggLevel: 'group', + aggFunction: 'sum', + discoveryMetric: { + prometheus: 'kafka_server_replicamanager_isrshrinks_total', + grafanacloud: 'kafka_server_replicamanager_isrshrinkspersec', + }, + signals: { + local s = this, + isrShrinks: { + name: 'ISR shrinks', + description: ||| + The number of in-sync replicas (ISRs) for a particular partition should remain fairly static, + the only exceptions are when you are expanding your broker cluster or removing partitions. + In order to maintain high availability, a healthy Kafka cluster requires a minimum number of ISRs for failover. + + A replica could be removed from the ISR pool for a couple of reasons: it is too far behind the leaders offset + (user-configurable by setting the replica.lag.max.messages configuration parameter), + or it has not contacted the leader for some time (configurable with the replica.socket.timeout.ms parameter). No matter the reason, + an increase in IsrShrinksPerSec without a corresponding increase in IsrExpandsPerSec shortly thereafter is cause for concern and requires user intervention. + + The Kafka documentation provides a wealth of information on the user-configurable parameters for brokers. + |||, + type: 'raw', + unit: 'short', + aggFunction: 'sum', + sources: { + grafanacloud: + { + expr: '%(aggFunction)s by (%(agg)s) (kafka_server_replicamanager_isrshrinkspersec{%(queriesSelector)s})', + }, + prometheus: + { + expr: '%(aggFunction)s by (%(agg)s) (rate(kafka_server_replicamanager_isrshrinks_total{%(queriesSelector)s}[%(interval)s]))', + }, + }, + }, + isrExpands: { + name: 'ISR expands', + description: ||| + The number of in-sync replicas (ISRs) for a particular partition should remain fairly static, + the only exceptions are when you are expanding your broker cluster or removing partitions. + In order to maintain high availability, a healthy Kafka cluster requires a minimum number of ISRs for failover. + + A replica could be removed from the ISR pool for a couple of reasons: it is too far behind the leaders offset + (user-configurable by setting the replica.lag.max.messages configuration parameter), + or it has not contacted the leader for some time (configurable with the replica.socket.timeout.ms parameter). No matter the reason, + an increase in IsrShrinksPerSec without a corresponding increase in IsrExpandsPerSec shortly thereafter is cause for concern and requires user intervention. + + The Kafka documentation provides a wealth of information on the user-configurable parameters for brokers. + |||, + type: 'raw', + unit: 'short', + aggFunction: 'sum', + sources: { + grafanacloud: + { + expr: '%(aggFunction)s by (%(agg)s) (kafka_server_replicamanager_isrexpandspersec{%(queriesSelector)s})', + }, + prometheus: + { + expr: '%(aggFunction)s by (%(agg)s) (rate(kafka_server_replicamanager_isrexpands_total{%(queriesSelector)s}[%(interval)s]))', + }, + }, + }, + onlinePartitions: { + name: 'Online partitions', + description: ||| + Online partitions. + |||, + type: 'gauge', + unit: 'short', + aggFunction: 'sum', + sources: { + grafanacloud: + { + expr: 'kafka_server_replicamanager_partitioncount{%(queriesSelector)s}', + }, + prometheus: + { + expr: 'kafka_server_replicamanager_partitioncount{%(queriesSelector)s}', + }, + }, + }, + offlinePartitions: { + name: 'Offline partitions', + description: ||| + Number of partitions that dont have an active leader and are hence not writable or readable. + |||, + type: 'gauge', + unit: 'short', + aggFunction: 'sum', + sources: { + grafanacloud: + { + expr: 'kafka_controller_kafkacontroller_offlinepartitionscount{%(queriesSelector)s}', + }, + prometheus: + { + expr: 'kafka_controller_kafkacontroller_offlinepartitionscount{%(queriesSelector)s}', + }, + }, + }, + underReplicatedPartitions: { + name: 'Under replicated partitions', + description: ||| + Number of under replicated partitions (| ISR | < | all replicas |). + |||, + type: 'gauge', + unit: 'short', + aggFunction: 'sum', + sources: { + grafanacloud: + { + expr: 'kafka_cluster_partition_underreplicated{%(queriesSelector)s}', + }, + prometheus: + { + expr: 'kafka_cluster_partition_underreplicated{%(queriesSelector)s}', + }, + }, + }, + underMinISRPartitions: { + name: 'Under min ISR partitions', + description: ||| + Under min ISR(In-Sync replicas) partitions. + |||, + type: 'gauge', + unit: 'short', + aggFunction: 'sum', + sources: { + grafanacloud: + { + expr: 'kafka_cluster_partition_underminisr{%(queriesSelector)s}', + }, + prometheus: + { + expr: 'kafka_cluster_partition_underminisr{%(queriesSelector)s}', + }, + }, + }, + uncleanLeaderElection: { + name: 'Unclean leader election', + description: ||| + Unclean leader election rate. + |||, + type: 'raw', + unit: 'short', + aggFunction: 'sum', + sources: { + grafanacloud: + { + expr: '%(aggFunction)s by (%(agg)s) (kafka_controller_controllerstats_uncleanleaderelectionspersec{%(queriesSelector)s})', + }, + prometheus: + { + expr: '%(aggFunction)s by (%(agg)s) (rate(kafka_controller_controllerstats_uncleanleaderelections_total{%(queriesSelector)s}[%(interval)s]))', + }, + }, + }, + preferredReplicaInbalance: { + name: 'Preferred replica inbalance', + description: ||| + The count of topic partitions for which the leader is not the preferred leader. + |||, + type: 'gauge', + unit: 'short', + aggFunction: 'sum', + sources: { + grafanacloud: + { + expr: 'kafka_controller_kafkacontroller_preferredreplicaimbalancecount{%(queriesSelector)s}', + }, + prometheus: + { + expr: 'kafka_controller_kafkacontroller_preferredreplicaimbalancecount{%(queriesSelector)s}', + }, + }, + }, + + + }, + } diff --git a/kafka-observ-lib/signals/topic.libsonnet b/kafka-observ-lib/signals/topic.libsonnet index 6391e46cd..b4da4b502 100644 --- a/kafka-observ-lib/signals/topic.libsonnet +++ b/kafka-observ-lib/signals/topic.libsonnet @@ -4,7 +4,7 @@ function(this) { filteringSelector: this.filteringSelector + ', topic!~"%s"' % this.topicsIgnoreSelector, groupLabels: this.groupLabels, - instanceLabels: this.instanceLabels + ['topic'], + instanceLabels: ['topic'], // this.instanceLabels is ommitted, as it would point to kafka_exporter instance. aggLevel: 'group', aggFunction: 'sum', legendCustomTemplate: '{{ topic }}', diff --git a/kafka-observ-lib/signals/totalTime.libsonnet b/kafka-observ-lib/signals/totalTime.libsonnet new file mode 100644 index 000000000..86daf43b6 --- /dev/null +++ b/kafka-observ-lib/signals/totalTime.libsonnet @@ -0,0 +1,243 @@ +local commonlib = import 'common-lib/common/main.libsonnet'; + +// TotalTimeMs metric +function(this) + { + filteringSelector: this.filteringSelector + ', quantile="%s"' % this.totalTimeMsQuantile, + groupLabels: this.groupLabels, + instanceLabels: this.instanceLabels, + aggLevel: 'instance', + aggFunction: 'avg', + discoveryMetric: { + prometheus: 'kafka_network_requestmetrics_requestqueuetimems', + grafanacloud: self.prometheus, + }, + signals: { + + local commonRequestQueueDescription = ||| + A high value can imply there aren't enough IO threads or the CPU is a bottleneck, + or the request queue isnt large enough. The request queue size should match the number of connections. + |||, + + local commonLocalDescription = ||| + In most cases, a high value can imply slow local storage or the storage is a bottleneck. One should also investigate LogFlushRateAndTimeMs to know how long page flushes are taking, which will also indicate a slow disk. In the case of FetchFollower requests, time spent in LocalTimeMs can be the result of a ZooKeeper write to change the ISR. + |||, + + local commonRemoteDesription = ||| + A high value can imply a slow network connection. For fetch request, if the remote time is high, it could be that there is not enough data to give in a fetch response. This can happen when the consumer or replica is caught up and there is no new incoming data. If this is the case, remote time will be close to the max wait time, which is normal. Max wait time is configured via replica.fetch.wait.max.ms and fetch.max.wait.ms. + |||, + + local commonResponseQueueDescription = ||| + A high value can imply there aren't enough network threads or the network cant dequeue responses quickly enough, causing back pressure in the response queue. + |||, + + local commonResponseDescription = ||| + A high value can imply the zero-copy from disk to the network is slow, or the network is the bottleneck because the network cant dequeue responses of the TCP socket as quickly as theyre being created. If the network buffer gets full, Kafka will block. + |||, + + fetchQueueTime: { + name: 'Fetch-consumer queue time', + description: 'Time spent waiting in the request queue.' + '\n' + commonRequestQueueDescription, + type: 'gauge', + unit: 'ms', + sources: { + prometheus: { + expr: 'kafka_network_requestmetrics_requestqueuetimems{%(queriesSelector)s, request="Fetch"}', + legendCustomTemplate: 'request queue time', + }, + grafanacloud: self.prometheus, + }, + }, + fetchLocalTime: { + name: 'Fetch-consumer local time', + description: 'Time spent being processed by leader.' + '\n' + commonLocalDescription, + type: 'gauge', + unit: 'ms', + + sources: { + prometheus: { + expr: 'kafka_network_requestmetrics_localtimems{%(queriesSelector)s, request="Fetch"}', + legendCustomTemplate: 'local time', + }, + grafanacloud: self.prometheus, + }, + }, + fetchRemoteTime: { + name: 'Fetch-consumer remote time', + description: "Time spent waiting for follower response (only when 'require acks' is set)." + + '\n' + commonRemoteDesription, + type: 'gauge', + unit: 'ms', + + sources: { + prometheus: { + expr: 'kafka_network_requestmetrics_remotetimems{%(queriesSelector)s, request="Fetch"}', + legendCustomTemplate: 'remote time', + }, + grafanacloud: self.prometheus, + }, + }, + fetchResponseQueue: { + name: 'Fetch-consumer response queue time', + description: 'Time spent waiting in the response queue.' + '\n' + commonResponseQueueDescription, + type: 'gauge', + unit: 'ms', + sources: { + prometheus: { + expr: 'kafka_network_requestmetrics_responsequeuetimems{%(queriesSelector)s, request="Fetch"}', + legendCustomTemplate: 'response queue time', + }, + grafanacloud: self.prometheus, + }, + }, + fetchResponseTime: { + name: 'Fetch-consumer response time', + description: 'Time to send the response.' + '\n' + commonResponseDescription, + type: 'gauge', + unit: 'ms', + sources: { + prometheus: { + expr: 'kafka_network_requestmetrics_responsesendtimems{%(queriesSelector)s, request="Fetch"}', + legendCustomTemplate: 'response time', + }, + grafanacloud: self.prometheus, + }, + }, + + //fetch follower + fetchFollowerQueueTime: { + name: 'Fetch-follower queue time', + description: 'Time spent waiting in the request queue.' + '\n' + commonRequestQueueDescription, + type: 'gauge', + unit: 'ms', + sources: { + prometheus: { + expr: 'kafka_network_requestmetrics_requestqueuetimems{%(queriesSelector)s, request="FetchFollower"}', + legendCustomTemplate: 'request queue time', + }, + grafanacloud: self.prometheus, + }, + }, + fetchFollowerLocalTime: { + name: 'Fetch-follower local time', + description: 'Time spent being processed by leader.' + '\n' + commonLocalDescription, + type: 'gauge', + unit: 'ms', + sources: { + prometheus: { + expr: 'kafka_network_requestmetrics_localtimems{%(queriesSelector)s, request="FetchFollower"}', + legendCustomTemplate: 'local time', + }, + grafanacloud: self.prometheus, + }, + }, + fetchFollowerRemoteTime: { + name: 'Fetch-follower remote time', + description: "Time spent waiting for follower response (only when 'require acks' is set)." + + '\n' + commonRemoteDesription, + type: 'gauge', + unit: 'ms', + sources: { + prometheus: { + expr: 'kafka_network_requestmetrics_remotetimems{%(queriesSelector)s, request="FetchFollower"}', + legendCustomTemplate: 'remote time', + }, + grafanacloud: self.prometheus, + }, + }, + fetchFollowerResponseQueue: { + name: 'Fetch-follower response queue time', + description: 'Time spent waiting in the response queue.' + '\n' + commonResponseQueueDescription, + type: 'gauge', + unit: 'ms', + sources: { + prometheus: { + expr: 'kafka_network_requestmetrics_responsequeuetimems{%(queriesSelector)s, request="FetchFollower"}', + legendCustomTemplate: 'response queue time', + }, + grafanacloud: self.prometheus, + }, + }, + fetchFollowerResponseTime: { + name: 'Fetch-follower response time', + description: 'Time to send the response.' + '\n' + commonResponseDescription, + type: 'gauge', + unit: 'ms', + sources: { + prometheus: { + expr: 'kafka_network_requestmetrics_responsesendtimems{%(queriesSelector)s, request="FetchFollower"}', + legendCustomTemplate: 'response time', + }, + grafanacloud: self.prometheus, + }, + }, + + //produce + producerQueueTime: { + name: 'Produce follower queue time', + description: 'Time spent waiting in the request queue.' + '\n' + commonRequestQueueDescription, + type: 'gauge', + unit: 'ms', + sources: { + prometheus: { + expr: 'kafka_network_requestmetrics_requestqueuetimems{%(queriesSelector)s, request="Produce"}', + legendCustomTemplate: 'request queue time', + }, + grafanacloud: self.prometheus, + }, + }, + producerLocalTime: { + name: 'Produce follower local time', + description: 'Time spent being processed by leader.' + '\n' + commonLocalDescription, + type: 'gauge', + unit: 'ms', + sources: { + prometheus: { + expr: 'kafka_network_requestmetrics_localtimems{%(queriesSelector)s, request="Produce"}', + legendCustomTemplate: 'local time', + }, + grafanacloud: self.prometheus, + }, + }, + producerRemoteTime: { + name: 'Produce follower remote time', + description: "Time spent waiting for follower response (only when 'require acks' is set)." + + '\n' + commonRemoteDesription, + type: 'gauge', + unit: 'ms', + sources: { + prometheus: { + expr: 'kafka_network_requestmetrics_remotetimems{%(queriesSelector)s, request="Produce"}', + legendCustomTemplate: 'remote time', + }, + grafanacloud: self.prometheus, + }, + }, + producerResponseQueue: { + name: 'Produce follower response queue time', + description: 'Time spent waiting in the response queue.' + '\n' + commonResponseQueueDescription, + type: 'gauge', + unit: 'ms', + sources: { + prometheus: { + expr: 'kafka_network_requestmetrics_responsequeuetimems{%(queriesSelector)s, request="Produce"}', + legendCustomTemplate: 'response queue time', + }, + grafanacloud: self.prometheus, + }, + }, + producerResponseTime: { + name: 'Produce follower response time', + description: 'Time to send the response.' + '\n' + commonResponseDescription, + type: 'gauge', + unit: 'ms', + sources: { + prometheus: { + expr: 'kafka_network_requestmetrics_responsesendtimems{%(queriesSelector)s, request="Produce"}', + legendCustomTemplate: 'response time', + }, + grafanacloud: self.prometheus, + }, + }, + }, + } diff --git a/kafka-observ-lib/signals/zookeeperClient.libsonnet b/kafka-observ-lib/signals/zookeeperClient.libsonnet new file mode 100644 index 000000000..0a0bbc18c --- /dev/null +++ b/kafka-observ-lib/signals/zookeeperClient.libsonnet @@ -0,0 +1,86 @@ +local commonlib = import 'common-lib/common/main.libsonnet'; + +function(this) + { + filteringSelector: this.filteringSelector + ', quantile="%s"' % this.zookeeperClientQuantile, + groupLabels: this.groupLabels, + instanceLabels: this.instanceLabels, + aggLevel: 'instance', + aggFunction: 'avg', + discoveryMetric: { + prometheus: 'kafka_server_zookeeperclientmetrics_zookeeperrequestlatencyms', + grafanacloud: self.prometheus, + }, + signals: { + zookeeperRequestLatency: { + name: 'Zookeeper request latency', + description: 'Latency in millseconds for ZooKeeper requests from broker.', + type: 'gauge', + unit: 'ms', + sources: { + prometheus: { + expr: 'kafka_server_zookeeperclientmetrics_zookeeperrequestlatencyms{%(queriesSelector)s}', + }, + grafanacloud: self.prometheus, + }, + }, + zookeeperConnections: { + name: 'Zookeeper connections', + description: 'Zookeeper connections rate.', + type: 'counter', + unit: 'short', + sources: { + grafanacloud: { + expr: 'kafka_server_sessionexpirelistener_zookeepersyncconnectspersec{%(queriesSelector)s}', + }, + prometheus: { + expr: 'kafka_server_sessionexpirelistener_zookeepersyncconnects_total{%(queriesSelector)s}', + }, + }, + }, + zookeeperExpiredConnections: { + name: 'Zookeeper expired connections', + description: 'Zookeeper expired connections rate.', + type: 'counter', + unit: 'short', + sources: { + grafanacloud: { + expr: 'kafka_server_sessionexpirelistener_zookeeperexpirespersec{%(queriesSelector)s}', + }, + prometheus: { + expr: 'kafka_server_sessionexpirelistener_zookeeperexpires_total{%(queriesSelector)s}', + }, + }, + }, + zookeeperDisconnects: { + name: 'Zookeeper disconnects', + description: 'Zookeeper disconnects rate.', + type: 'counter', + unit: 'short', + sources: { + grafanacloud: { + expr: 'kafka_server_sessionexpirelistener_zookeeperdisconnectspersec{%(queriesSelector)s}', + }, + prometheus: { + expr: 'kafka_server_sessionexpirelistener_zookeeperdisconnects_total{%(queriesSelector)s}', + }, + }, + }, + zookeeperAuthFailures: { + name: 'Zookeeper auth failures', + description: 'Zookeeper auth failures from Kafka.', + type: 'counter', + unit: 'short', + sources: { + grafanacloud: { + expr: 'kafka_server_sessionexpirelistener_zookeeperauthfailurespersec{%(queriesSelector)s}', + }, + prometheus: { + expr: 'kafka_server_sessionexpirelistener_zookeeperauthfailures_total{%(queriesSelector)s}', + }, + }, + }, + + + }, + } diff --git a/process-observ-lib/README.md b/process-observ-lib/README.md index 3d7cf2b34..90e9da034 100644 --- a/process-observ-lib/README.md +++ b/process-observ-lib/README.md @@ -8,6 +8,7 @@ Supports the following sources: - otel - java_otel - java_micrometer (springboot) +- jmx_exporter ## Import diff --git a/process-observ-lib/panels.libsonnet b/process-observ-lib/panels.libsonnet index 0a83db50e..9e9cd3217 100644 --- a/process-observ-lib/panels.libsonnet +++ b/process-observ-lib/panels.libsonnet @@ -26,9 +26,8 @@ local commonlib = import 'common-lib/common/main.libsonnet'; + commonlib.panels.memory.timeSeries.usageBytes.stylize(), filesUsed: - g.panel.timeSeries.new('Process files open') + signals.process.filesOpen.asTimeSeries() + signals.process.filesMax.asPanelMixin() - + signals.process.filesOpen.asPanelMixin() + commonlib.panels.generic.timeSeries.base.stylize(), }, } diff --git a/process-observ-lib/rows.libsonnet b/process-observ-lib/rows.libsonnet index 542b98134..a7f7c7b9e 100644 --- a/process-observ-lib/rows.libsonnet +++ b/process-observ-lib/rows.libsonnet @@ -4,16 +4,16 @@ local g = import './g.libsonnet'; new(panels, type): { process: - g.panel.row.new('Process') + g.panel.row.new('Process overview') + g.panel.row.withCollapsed(false) + g.panel.row.withPanels( [ - panels.uptime { gridPos: { w: 4, h: 4 } }, - panels.startTime { gridPos: { w: 4, h: 4 } }, - panels.loadAverage { gridPos: { w: 16, h: 4 } }, + panels.uptime { gridPos: { w: 8, h: 6 } }, + panels.startTime { gridPos: { w: 8, h: 6 } }, + panels.loadAverage { gridPos: { w: 8, h: 6 } }, panels.cpuUsage { gridPos: { w: 8, h: 6 } }, - panels.filesUsed { gridPos: { w: 8, h: 6 } }, panels.memoryUsage { gridPos: { w: 8, h: 6 } }, + panels.filesUsed { gridPos: { w: 8, h: 6 } }, ] ), }, diff --git a/process-observ-lib/signals/process.libsonnet b/process-observ-lib/signals/process.libsonnet index c6449899b..ed56a2e33 100644 --- a/process-observ-lib/signals/process.libsonnet +++ b/process-observ-lib/signals/process.libsonnet @@ -13,6 +13,9 @@ function(this) // acceptable if container has single process running cadvisor: 'container_cpu_usage_seconds_total', + + //https://github.com/prometheus/jmx_exporter/blob/main/collector/src/test/java/io/prometheus/jmx/JmxCollectorTest.java#L195 + jmx_exporter: 'java_lang_operatingsystem_processcputime', }, filteringSelector: this.filteringSelector, groupLabels: this.groupLabels, @@ -38,6 +41,13 @@ function(this) java_micrometer: { expr: 'process_uptime_seconds{%(queriesSelector)s}', }, + jmx_exporter: { + expr: 'java_lang_runtime_uptime{%(queriesSelector)s}', + exprWrappers: [ + //ms + ['', '/1000'], + ], + }, }, }, startTime: { @@ -60,6 +70,9 @@ function(this) cadvisor: { cadvisor: 'container_start_time_seconds{%(queriesSelector)s}', }, + jmx_exporter: { + expr: 'java_lang_runtime_starttime{%(queriesSelector)s}', + }, }, }, // cpuUsage @@ -73,10 +86,6 @@ function(this) java_micrometer: { expr: 'process_cpu_usage{%(queriesSelector)s} * 100', }, - // otel: { - // // expr: 'process_runtime_jvm_cpu_utilization{%(queriesSelector)s}', - // expr: '?', - // }, java_otel: { expr: 'process_runtime_jvm_cpu_utilization{%(queriesSelector)s} * 100', }, @@ -87,6 +96,13 @@ function(this) cadvisor: { expr: 'rate(container_cpu_usage_seconds_total{cpu="total", %(queriesSelector)s}[%(interval)s]) * 100', }, + jmx_exporter: { + expr: 'rate(java_lang_operatingsystem_processcputime{%(queriesSelector)s}[%(interval)s])', + exprWrappers: [ + //nanoseconds + ['', '/1000000000 * 100'], + ], + }, }, }, @@ -97,18 +113,16 @@ function(this) unit: 'bytes', optional: true, sources: { - // java_micrometer: { - // expr: '?{%(queriesSelector)s}', - // }, - // otel: { - // expr: '?{%(queriesSelector)s}', - // }, prometheus: { expr: 'process_resident_memory_bytes{%(queriesSelector)s}', }, cadvisor: { expr: 'container_memory_rss{%(queriesSelector)s}', }, + //best could be found: + jmx_exporter: { + expr: 'java_lang_memory_heapmemoryusage_used{%(queriesSelector)s} + java_lang_memory_nonheapmemoryusage_used{%(queriesSelector)s}', + }, }, }, memoryUsedVirtual: { @@ -118,12 +132,6 @@ function(this) unit: 'bytes', optional: true, sources: { - // java_micrometer: { - // expr: '?{%(queriesSelector)s}', - // }, - // otel: { - // expr: '?{%(queriesSelector)s}', - // }, prometheus: { expr: 'process_virtual_memory_bytes{%(queriesSelector)s}', }, diff --git a/process-observ-lib/signals/system.libsonnet b/process-observ-lib/signals/system.libsonnet index 3890acf0e..291f63285 100644 --- a/process-observ-lib/signals/system.libsonnet +++ b/process-observ-lib/signals/system.libsonnet @@ -25,19 +25,15 @@ function(this) unit: 'short', optional: true, sources: { - // prometheus: - // { - // expr: '?{%(queriesSelector)s}', - // }, java_otel: { expr: 'process_runtime_jvm_system_cpu_load_1m{%(queriesSelector)s}', }, - // otel: { - // expr: ? - // }, java_micrometer: { expr: 'system_load_average_1m{%(queriesSelector)s}', }, + jmx_exporter: { + expr: 'java_lang_operatingsystem_systemloadaverage{%(queriesSelector)s}', + }, }, }, systemCPUUsage: { @@ -53,9 +49,13 @@ function(this) java_otel: { expr: 'process_runtime_jvm_system_cpu_utilization{%(queriesSelector)s}', }, - // otel: { - // ? - // } + jmx_exporter: { + expr: 'java_lang_operatingsystem_cpuload{%(queriesSelector)s}', + exprWrappers: [ + //nanoseconds + ['', '* 100'], + ], + }, }, }, diff --git a/zookeeper-observ-lib/README.md b/zookeeper-observ-lib/README.md index 31f5ce993..cc13b0759 100644 --- a/zookeeper-observ-lib/README.md +++ b/zookeeper-observ-lib/README.md @@ -16,4 +16,4 @@ jb install https://github.com/grafana/jsonnet-libs/zookeeper-observ-lib ## Example -![alt text](image.png) \ No newline at end of file +![zookeeper](image.png) diff --git a/zookeeper-observ-lib/alerts.libsonnet b/zookeeper-observ-lib/alerts.libsonnet index f15b3ee8e..dedaca173 100644 --- a/zookeeper-observ-lib/alerts.libsonnet +++ b/zookeeper-observ-lib/alerts.libsonnet @@ -1,11 +1,11 @@ { new(this): { groups+: [ - { - name: this.config.uid + 'zookeeper-alerts', - rules: - [], - }, + // { + // name: this.config.uid + '-zookeeper-alerts', + // rules: + // [], + // }, ], }, } diff --git a/zookeeper-observ-lib/config.libsonnet b/zookeeper-observ-lib/config.libsonnet index c5ea341e4..0e0ca81ee 100644 --- a/zookeeper-observ-lib/config.libsonnet +++ b/zookeeper-observ-lib/config.libsonnet @@ -9,6 +9,7 @@ metricsSource: 'prometheus', //or grafanacloud signals+: { + cluster: (import './signals/cluster.libsonnet')(this), zookeeper: (import './signals/zookeeper.libsonnet')(this), latency: (import './signals/latency.libsonnet')(this), }, diff --git a/zookeeper-observ-lib/image.png b/zookeeper-observ-lib/image.png index c14628d86..4734b9076 100644 Binary files a/zookeeper-observ-lib/image.png and b/zookeeper-observ-lib/image.png differ diff --git a/zookeeper-observ-lib/main.libsonnet b/zookeeper-observ-lib/main.libsonnet index 551631a58..92c511dea 100644 --- a/zookeeper-observ-lib/main.libsonnet +++ b/zookeeper-observ-lib/main.libsonnet @@ -15,7 +15,7 @@ local jvmlib = import 'jvm-observ-lib/main.libsonnet'; filteringSelector: this.config.filteringSelector, groupLabels: this.config.groupLabels, instanceLabels: this.config.instanceLabels, - uid: this.config.uid, + uid: this.config.uid + '-zookeeper', dashboardNamePrefix: this.config.dashboardNamePrefix, dashboardTags: this.config.dashboardTags, metricsSource: 'prometheus_old', diff --git a/zookeeper-observ-lib/panels/zookeeper.libsonnet b/zookeeper-observ-lib/panels/zookeeper.libsonnet index 7f2b6f975..8fa3dd0db 100644 --- a/zookeeper-observ-lib/panels/zookeeper.libsonnet +++ b/zookeeper-observ-lib/panels/zookeeper.libsonnet @@ -2,6 +2,9 @@ local g = import '../g.libsonnet'; local commonlib = import 'common-lib/common/main.libsonnet'; { new(signals):: { + clusterRoles: + signals.cluster.role.asStatusHistory() + + commonlib.panels.generic.statusHistory.base.stylize(), aliveConnections: signals.zookeeper.aliveConnections.asStat() + commonlib.panels.generic.stat.info.stylize(), diff --git a/zookeeper-observ-lib/rows.libsonnet b/zookeeper-observ-lib/rows.libsonnet index 3f884855a..700e3f38e 100644 --- a/zookeeper-observ-lib/rows.libsonnet +++ b/zookeeper-observ-lib/rows.libsonnet @@ -12,6 +12,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; panels.zookeeper.outstandingRequests { gridPos+: { w: 16, h: 8 } }, panels.zookeeper.znodes { gridPos+: { w: 4, h: 4 } }, panels.zookeeper.watchers { gridPos+: { w: 4, h: 4 } }, + panels.zookeeper.clusterRoles { gridPos+: { w: 24, h: 6 } }, ] ), diff --git a/zookeeper-observ-lib/signals/cluster.libsonnet b/zookeeper-observ-lib/signals/cluster.libsonnet new file mode 100644 index 000000000..ab4f4b040 --- /dev/null +++ b/zookeeper-observ-lib/signals/cluster.libsonnet @@ -0,0 +1,74 @@ +local commonlib = import 'common-lib/common/main.libsonnet'; + +function(this) + { + filteringSelector: this.filteringSelector, + groupLabels: this.groupLabels, + instanceLabels: [], + aggLevel: 'group', + aggFunction: 'avg', + discoveryMetric: { + prometheus: 'num_alive_connections', + grafanacloud: 'zookeeper_numaliveconnections', + }, + signals: { + // used in status map + role: { + name: 'Current zookeeper role', + description: ||| + 0 - zookeeper, 1 - zookeeper(leader). + |||, + type: 'raw', + unit: 'short', + aggFunction: 'sum', + sources: { + grafanacloud: + { + expr: 'clamp_max(zookeeper_leaderuptime{%(queriesSelector)s}, 1) or clamp_max(zookeeper_numaliveconnections{%(queriesSelector)s}, 0)', + legendCustomTemplate: '{{ %s }}' % this.instanceLabels[0], + aggKeepLabels: this.instanceLabels, + valueMappings: [ + { + type: 'value', + options: { + '0': { + color: 'light-yellow', + index: 0, + text: 'zookeeper', + }, + '1': { + color: 'light-orange', + index: 0, + text: 'zookeeper(leader)', + }, + }, + }, + ], + }, + prometheus: + { + expr: 'clamp_max(leader_uptime{%(queriesSelector)s}, 1) or clamp_max(num_alive_connections{%(queriesSelector)s}, 0)', + aggKeepLabels: this.instanceLabels, + legendCustomTemplate: '{{ %s }}' % this.instanceLabels[0], + valueMappings: [ + { + type: 'value', + options: { + '0': { + color: 'light-yellow', + index: 0, + text: 'zookeeper', + }, + '1': { + color: 'light-orange', + index: 0, + text: 'zookeeper(leader)', + }, + }, + }, + ], + }, + }, + }, + }, + }