From ecc6d6cb20ac28fa5928c065382fa12861490c5a Mon Sep 17 00:00:00 2001 From: Gabriel mermelstein Date: Fri, 5 Jul 2024 18:10:16 +0300 Subject: [PATCH 01/34] adding initial files --- apps/sonda/sonda.nim | 36 ++++++++++++++++++++++++++++++++++++ apps/sonda/sonda_config.nim | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+) create mode 100644 apps/sonda/sonda.nim create mode 100644 apps/sonda/sonda_config.nim diff --git a/apps/sonda/sonda.nim b/apps/sonda/sonda.nim new file mode 100644 index 0000000000..5b0dc3db38 --- /dev/null +++ b/apps/sonda/sonda.nim @@ -0,0 +1,36 @@ +{.push raises: [].} + +import + std/[options, strutils, os, sequtils, net], + chronicles, + chronos, + metrics, + libbacktrace, + system/ansi_c, + libp2p/crypto/crypto, + confutils + +import ../../waku/common/logging, ../../waku/factory/waku + +logScope: + topics = "sonda main" + +proc logConfig(conf: LiteProtocolTesterConf) = + info "Configuration: Sonda", conf = $conf + +{.pop.} +when isMainModule: + const versionString = "version / git commit hash: " & waku.git_version + + let confRes = SondaConf.load(version = versionString) + if confRes.isErr(): + error "failure while loading the configuration", error = confRes.error + quit(QuitFailure) + + var conf = confRes.get() + + ## Logging setup + logging.setupLog(conf.logLevel, conf.logFormat) + + info "Running Sonda", version = waku.git_version + logConfig(conf) diff --git a/apps/sonda/sonda_config.nim b/apps/sonda/sonda_config.nim new file mode 100644 index 0000000000..d3064d4f08 --- /dev/null +++ b/apps/sonda/sonda_config.nim @@ -0,0 +1,36 @@ +import confutils/defs + +import ../../waku/common/logging + +type SondaConf* = object ## Log configuration + logLevel* {. + desc: + "Sets the log level for process. Supported levels: TRACE, DEBUG, INFO, NOTICE, WARN, ERROR or FATAL", + defaultValue: logging.LogLevel.DEBUG, + name: "log-level" + .}: logging.LogLevel + + logFormat* {. + desc: + "Specifies what kind of logs should be written to stdout. Suported formats: TEXT, JSON", + defaultValue: logging.LogFormat.TEXT, + name: "log-format" + .}: logging.LogFormat + + clusterId* {. + desc: + "Cluster id that the node is running in. Node in a different cluster id is disconnected.", + defaultValue: 0, + name: "cluster-id" + .}: uint32 + +{.push warning[ProveInit]: off.} + +proc load*(T: type SondaConf): Result[T, string] = + try: + let conf = SondaConf.load(version = git_version) + ok(conf) + except CatchableError: + err(getCurrentExceptionMsg()) + +{.pop.} From e751b12f11f0e5acfa3b99bf728cd67cc541a536 Mon Sep 17 00:00:00 2001 From: Gabriel mermelstein Date: Fri, 5 Jul 2024 19:00:25 +0300 Subject: [PATCH 02/34] adding makefile --- .github/workflows/pre-release.yml | 2 +- Makefile | 6 +++++- apps/sonda/sonda.nim | 4 ++-- apps/sonda/sonda_config.nim | 6 +++--- waku.nimble | 4 ++++ 5 files changed, 15 insertions(+), 7 deletions(-) diff --git a/.github/workflows/pre-release.yml b/.github/workflows/pre-release.yml index c93e0513bb..5524645e94 100644 --- a/.github/workflows/pre-release.yml +++ b/.github/workflows/pre-release.yml @@ -73,7 +73,7 @@ jobs: tools tar -cvzf ${{steps.vars.outputs.nwaku}} ./build/wakunode2 ./build/chat2 - tar -cvzf ${{steps.vars.outputs.nwakutools}} ./build/wakucanary ./build/networkmonitor + tar -cvzf ${{steps.vars.outputs.nwakutools}} ./build/wakucanary ./build/networkmonitor ./build/sonda - name: upload artifacts uses: actions/upload-artifact@v3 diff --git a/Makefile b/Makefile index d0c21a6ef4..24a5e2d31c 100644 --- a/Makefile +++ b/Makefile @@ -222,7 +222,7 @@ liteprotocoltester: | build deps librln ################ ## Waku tools ## ################ -.PHONY: tools wakucanary networkmonitor +.PHONY: tools wakucanary networkmonitor sonda tools: networkmonitor wakucanary @@ -234,6 +234,10 @@ networkmonitor: | build deps librln echo -e $(BUILD_MSG) "build/$@" && \ $(ENV_SCRIPT) nim networkmonitor $(NIM_PARAMS) waku.nims +sonda: | build deps librln + echo -e $(BUILD_MSG) "build/$@" && \ + $(ENV_SCRIPT) nim sonda $(NIM_PARAMS) waku.nims + ################### ## Documentation ## diff --git a/apps/sonda/sonda.nim b/apps/sonda/sonda.nim index 5b0dc3db38..25e0f903ec 100644 --- a/apps/sonda/sonda.nim +++ b/apps/sonda/sonda.nim @@ -10,12 +10,12 @@ import libp2p/crypto/crypto, confutils -import ../../waku/common/logging, ../../waku/factory/waku +import ./sonda_config, ../../waku/common/logging, ../../waku/factory/waku logScope: topics = "sonda main" -proc logConfig(conf: LiteProtocolTesterConf) = +proc logConfig(conf: SondaConf) = info "Configuration: Sonda", conf = $conf {.pop.} diff --git a/apps/sonda/sonda_config.nim b/apps/sonda/sonda_config.nim index d3064d4f08..c264f988cb 100644 --- a/apps/sonda/sonda_config.nim +++ b/apps/sonda/sonda_config.nim @@ -1,4 +1,4 @@ -import confutils/defs +import confutils/defs, stew/results import ../../waku/common/logging @@ -26,9 +26,9 @@ type SondaConf* = object ## Log configuration {.push warning[ProveInit]: off.} -proc load*(T: type SondaConf): Result[T, string] = +proc load*(T: type SondaConf, version = ""): Result[T, string] = try: - let conf = SondaConf.load(version = git_version) + let conf = SondaConf.load(version = version) ok(conf) except CatchableError: err(getCurrentExceptionMsg()) diff --git a/waku.nimble b/waku.nimble index a6db1f2b7f..556cb9bfa5 100644 --- a/waku.nimble +++ b/waku.nimble @@ -88,6 +88,10 @@ task networkmonitor, "Build network monitor tool": let name = "networkmonitor" buildBinary name, "apps/networkmonitor/" +task sonda, "Build sonda tool": + let name = "sonda" + buildBinary name, "apps/sonda/" + task rln_db_inspector, "Build the rln db inspector": let name = "rln_db_inspector" buildBinary name, "tools/rln_db_inspector/" From 5de37cece84ddf41d620b8f6b5749df832d67b7d Mon Sep 17 00:00:00 2001 From: Gabriel mermelstein Date: Mon, 8 Jul 2024 10:18:50 +0300 Subject: [PATCH 03/34] fixing compilation error --- apps/sonda/sonda.nim | 2 +- apps/sonda/sonda_config.nim | 8 ++------ 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/apps/sonda/sonda.nim b/apps/sonda/sonda.nim index 25e0f903ec..af5ba17252 100644 --- a/apps/sonda/sonda.nim +++ b/apps/sonda/sonda.nim @@ -22,7 +22,7 @@ proc logConfig(conf: SondaConf) = when isMainModule: const versionString = "version / git commit hash: " & waku.git_version - let confRes = SondaConf.load(version = versionString) + let confRes = SondaConf.loadConfig(version = versionString) if confRes.isErr(): error "failure while loading the configuration", error = confRes.error quit(QuitFailure) diff --git a/apps/sonda/sonda_config.nim b/apps/sonda/sonda_config.nim index c264f988cb..0dfc524aea 100644 --- a/apps/sonda/sonda_config.nim +++ b/apps/sonda/sonda_config.nim @@ -24,13 +24,9 @@ type SondaConf* = object ## Log configuration name: "cluster-id" .}: uint32 -{.push warning[ProveInit]: off.} - -proc load*(T: type SondaConf, version = ""): Result[T, string] = +proc loadConfig*(T: type SondaConf, version = ""): Result[T, string] = try: let conf = SondaConf.load(version = version) - ok(conf) + return ok(conf) except CatchableError: err(getCurrentExceptionMsg()) - -{.pop.} From 5ecd12b810c5a7a516944e52bdef1e6644080a8b Mon Sep 17 00:00:00 2001 From: Gabriel mermelstein Date: Mon, 8 Jul 2024 11:10:48 +0300 Subject: [PATCH 04/34] creating waku node configuration --- apps/sonda/sonda.nim | 19 +++++++++++++++++-- apps/sonda/sonda_config.nim | 20 +++++++++++++++++++- 2 files changed, 36 insertions(+), 3 deletions(-) diff --git a/apps/sonda/sonda.nim b/apps/sonda/sonda.nim index af5ba17252..49af155c19 100644 --- a/apps/sonda/sonda.nim +++ b/apps/sonda/sonda.nim @@ -8,9 +8,14 @@ import libbacktrace, system/ansi_c, libp2p/crypto/crypto, - confutils + confutils, + stew/results -import ./sonda_config, ../../waku/common/logging, ../../waku/factory/waku +import + ./sonda_config, + ../../waku/common/logging, + ../../waku/factory/waku, + ../../waku/factory/external_config logScope: topics = "sonda main" @@ -34,3 +39,13 @@ when isMainModule: info "Running Sonda", version = waku.git_version logConfig(conf) + + var wakuConf = defaultWakuNodeConf().valueOr: + error "failed retrieving default node configuration", error = confRes.error + quit(QuitFailure) + + wakuConf.logLevel = conf.logLevel + wakuConf.logFormat = conf.logFormat + wakuConf.clusterId = conf.clusterId + wakuConf.shards = @[conf.shard] + wakuConf.staticnodes = conf.storenodes # connect directly to store nodes to query diff --git a/apps/sonda/sonda_config.nim b/apps/sonda/sonda_config.nim index 0dfc524aea..77e8e84480 100644 --- a/apps/sonda/sonda_config.nim +++ b/apps/sonda/sonda_config.nim @@ -2,7 +2,7 @@ import confutils/defs, stew/results import ../../waku/common/logging -type SondaConf* = object ## Log configuration +type SondaConf* = object logLevel* {. desc: "Sets the log level for process. Supported levels: TRACE, DEBUG, INFO, NOTICE, WARN, ERROR or FATAL", @@ -22,8 +22,26 @@ type SondaConf* = object ## Log configuration "Cluster id that the node is running in. Node in a different cluster id is disconnected.", defaultValue: 0, name: "cluster-id" + .}: uint16 + + shard* {. + desc: "Shard where sonda messages are going to be published", + defaultValue: 0, + name: "shard" + .}: uint16 + + period* {. + desc: "Time in seconds between consecutive sonda messages", + defaultValue: 60, + name: "period" .}: uint32 + storenodes* {. + desc: "Multiaddresses of store nodes to query", + defaultValue: @[], + name: "storenodes" + .}: seq[string] + proc loadConfig*(T: type SondaConf, version = ""): Result[T, string] = try: let conf = SondaConf.load(version = version) From 3170bf61cede94b4835b18f6cbefcd7615f39a17 Mon Sep 17 00:00:00 2001 From: Gabriel mermelstein Date: Mon, 8 Jul 2024 11:41:59 +0300 Subject: [PATCH 05/34] initializing node --- apps/sonda/sonda.nim | 86 ++++++++++++++++++++++++++++++++++++- apps/sonda/sonda_config.nim | 2 +- 2 files changed, 85 insertions(+), 3 deletions(-) diff --git a/apps/sonda/sonda.nim b/apps/sonda/sonda.nim index 49af155c19..e93c56cf65 100644 --- a/apps/sonda/sonda.nim +++ b/apps/sonda/sonda.nim @@ -9,13 +9,16 @@ import system/ansi_c, libp2p/crypto/crypto, confutils, - stew/results + results import ./sonda_config, ../../waku/common/logging, ../../waku/factory/waku, - ../../waku/factory/external_config + ../../waku/factory/external_config, + ../../waku/node/health_monitor, + ../../waku/waku_api/rest/builder as rest_server_builder, + ../../waku/node/waku_metrics logScope: topics = "sonda main" @@ -49,3 +52,82 @@ when isMainModule: wakuConf.clusterId = conf.clusterId wakuConf.shards = @[conf.shard] wakuConf.staticnodes = conf.storenodes # connect directly to store nodes to query + + var nodeHealthMonitor {.threadvar.}: WakuNodeHealthMonitor + nodeHealthMonitor = WakuNodeHealthMonitor() + nodeHealthMonitor.setOverallHealth(HealthStatus.INITIALIZING) + + let restServer = rest_server_builder.startRestServerEsentials( + nodeHealthMonitor, wakuConf + ).valueOr: + error "Starting esential REST server failed.", error = $error + quit(QuitFailure) + + var wakuApp = Waku.init(wakuConf).valueOr: + error "Waku initialization failed", error = error + quit(QuitFailure) + + wakuApp.restServer = restServer + + nodeHealthMonitor.setNode(wakuApp.node) + + (waitFor startWaku(addr wakuApp)).isOkOr: + error "Starting waku failed", error = error + quit(QuitFailure) + + rest_server_builder.startRestServerProtocolSupport( + restServer, wakuApp.node, wakuApp.wakuDiscv5, wakuConf + ).isOkOr: + error "Starting protocols support REST server failed.", error = $error + quit(QuitFailure) + + wakuApp.metricsServer = waku_metrics.startMetricsServerAndLogging(wakuConf).valueOr: + error "Starting monitoring and external interfaces failed", error = error + quit(QuitFailure) + + nodeHealthMonitor.setOverallHealth(HealthStatus.READY) + + debug "Setting up shutdown hooks" + ## Setup shutdown hooks for this process. + ## Stop node gracefully on shutdown. + + proc asyncStopper(wakuApp: Waku) {.async: (raises: [Exception]).} = + nodeHealthMonitor.setOverallHealth(HealthStatus.SHUTTING_DOWN) + await wakuApp.stop() + quit(QuitSuccess) + + # Handle Ctrl-C SIGINT + proc handleCtrlC() {.noconv.} = + when defined(windows): + # workaround for https://github.com/nim-lang/Nim/issues/4057 + setupForeignThreadGc() + notice "Shutting down after receiving SIGINT" + asyncSpawn asyncStopper(wakuApp) + + setControlCHook(handleCtrlC) + + # Handle SIGTERM + when defined(posix): + proc handleSigterm(signal: cint) {.noconv.} = + notice "Shutting down after receiving SIGTERM" + asyncSpawn asyncStopper(wakuApp) + + c_signal(ansi_c.SIGTERM, handleSigterm) + + # Handle SIGSEGV + when defined(posix): + proc handleSigsegv(signal: cint) {.noconv.} = + # Require --debugger:native + fatal "Shutting down after receiving SIGSEGV", stacktrace = getBacktrace() + + # Not available in -d:release mode + writeStackTrace() + + waitFor wakuApp.stop() + quit(QuitFailure) + + c_signal(ansi_c.SIGSEGV, handleSigsegv) + + info "Node setup complete" + + runForever() diff --git a/apps/sonda/sonda_config.nim b/apps/sonda/sonda_config.nim index 77e8e84480..ded1381f5f 100644 --- a/apps/sonda/sonda_config.nim +++ b/apps/sonda/sonda_config.nim @@ -1,4 +1,4 @@ -import confutils/defs, stew/results +import confutils/defs, results import ../../waku/common/logging From ecf74df73d7c156a89f6aec5dd8529d7d8140b5c Mon Sep 17 00:00:00 2001 From: Gabriel mermelstein Date: Mon, 8 Jul 2024 13:14:12 +0300 Subject: [PATCH 06/34] adding docker compose --- apps/sonda/.gitignore | 4 + apps/sonda/Dockerfile.rest-traffic | 3 + apps/sonda/docker-compose.yml | 92 + .../customizations/custom-logo.png | Bin 0 -> 10938 bytes .../customizations/custom-logo.svg | 3 + .../monitoring/configuration/dashboards.yaml | 9 + .../dashboards/nwaku-monitoring.json | 5303 +++++++++++++++++ .../monitoring/configuration/datasources.yaml | 11 + .../configuration/grafana-plugins.env | 2 + .../monitoring/configuration/grafana.ini | 51 + .../configuration/pg-exporter-queries.yml | 284 + .../configuration/postgres-exporter.yml | 9 + apps/sonda/monitoring/prometheus-config.yml | 14 + apps/sonda/register_rln.sh | 31 + apps/sonda/run_node.sh | 99 + apps/sonda/traffic.py | 62 + 16 files changed, 5977 insertions(+) create mode 100644 apps/sonda/.gitignore create mode 100644 apps/sonda/Dockerfile.rest-traffic create mode 100644 apps/sonda/docker-compose.yml create mode 100644 apps/sonda/monitoring/configuration/customizations/custom-logo.png create mode 100644 apps/sonda/monitoring/configuration/customizations/custom-logo.svg create mode 100644 apps/sonda/monitoring/configuration/dashboards.yaml create mode 100644 apps/sonda/monitoring/configuration/dashboards/nwaku-monitoring.json create mode 100644 apps/sonda/monitoring/configuration/datasources.yaml create mode 100644 apps/sonda/monitoring/configuration/grafana-plugins.env create mode 100644 apps/sonda/monitoring/configuration/grafana.ini create mode 100644 apps/sonda/monitoring/configuration/pg-exporter-queries.yml create mode 100644 apps/sonda/monitoring/configuration/postgres-exporter.yml create mode 100644 apps/sonda/monitoring/prometheus-config.yml create mode 100755 apps/sonda/register_rln.sh create mode 100644 apps/sonda/run_node.sh create mode 100644 apps/sonda/traffic.py diff --git a/apps/sonda/.gitignore b/apps/sonda/.gitignore new file mode 100644 index 0000000000..f366b9465a --- /dev/null +++ b/apps/sonda/.gitignore @@ -0,0 +1,4 @@ +.env +keystore +rln_tree +.env diff --git a/apps/sonda/Dockerfile.rest-traffic b/apps/sonda/Dockerfile.rest-traffic new file mode 100644 index 0000000000..f09b1e3450 --- /dev/null +++ b/apps/sonda/Dockerfile.rest-traffic @@ -0,0 +1,3 @@ +FROM python:3.9.18-alpine3.18 + +RUN pip install requests argparse \ No newline at end of file diff --git a/apps/sonda/docker-compose.yml b/apps/sonda/docker-compose.yml new file mode 100644 index 0000000000..e5d022ab99 --- /dev/null +++ b/apps/sonda/docker-compose.yml @@ -0,0 +1,92 @@ + +version: "3.7" +x-logging: &logging + logging: + driver: json-file + options: + max-size: 1000m + +# Environment variable definitions +x-rln-relay-eth-client-address: &rln_relay_eth_client_address ${RLN_RELAY_ETH_CLIENT_ADDRESS:-} # Add your RLN_RELAY_ETH_CLIENT_ADDRESS after the "-" + +x-rln-environment: &rln_env + RLN_RELAY_CONTRACT_ADDRESS: ${RLN_RELAY_CONTRACT_ADDRESS:-0xCB33Aa5B38d79E3D9Fa8B10afF38AA201399a7e3} + RLN_RELAY_CRED_PATH: ${RLN_RELAY_CRED_PATH:-} # Optional: Add your RLN_RELAY_CRED_PATH after the "-" + RLN_RELAY_CRED_PASSWORD: ${RLN_RELAY_CRED_PASSWORD:-} # Optional: Add your RLN_RELAY_CRED_PASSWORD after the "-" + +# Services definitions +services: + nwaku: + image: ${NWAKU_IMAGE:-harbor.status.im/wakuorg/nwaku:v0.30.1} + restart: on-failure + ports: + - 30304:30304/tcp + - 30304:30304/udp + - 9005:9005/udp + - 127.0.0.1:8003:8003 + - 80:80 #Let's Encrypt + - 8000:8000/tcp #WSS + - 127.0.0.1:8645:8645 + <<: + - *logging + environment: + DOMAIN: ${DOMAIN} + NODEKEY: ${NODEKEY} + RLN_RELAY_CRED_PASSWORD: "${RLN_RELAY_CRED_PASSWORD}" + RLN_RELAY_ETH_CLIENT_ADDRESS: *rln_relay_eth_client_address + EXTRA_ARGS: ${EXTRA_ARGS} + STORAGE_SIZE: ${STORAGE_SIZE} + <<: + - *rln_env + volumes: + - ./run_node.sh:/opt/run_node.sh:Z + - ${CERTS_DIR:-./certs}:/etc/letsencrypt/:Z + - ./rln_tree:/etc/rln_tree/:Z + - ./keystore:/keystore:Z + entrypoint: sh + command: + - /opt/run_node.sh + + rest-traffic: + build: + context: . + dockerfile: Dockerfile.rest-traffic + command: > + python /opt/traffic.py + --msg-size-kbytes=${MSG_SIZE_KBYTES:-10} + --delay-seconds=${TRAFFIC_DELAY_SECONDS:-15} + volumes: + - ./traffic.py:/opt/traffic.py:Z + depends_on: + - nwaku + + prometheus: + image: docker.io/prom/prometheus:latest + volumes: + - ./monitoring/prometheus-config.yml:/etc/prometheus/prometheus.yml:Z + command: + - --config.file=/etc/prometheus/prometheus.yml + # ports: + # - 127.0.0.1:9090:9090 + restart: on-failure:5 + depends_on: + - nwaku + + grafana: + image: docker.io/grafana/grafana:latest + env_file: + - ./monitoring/configuration/grafana-plugins.env + volumes: + - ./monitoring/configuration/grafana.ini:/etc/grafana/grafana.ini:Z + - ./monitoring/configuration/dashboards.yaml:/etc/grafana/provisioning/dashboards/dashboards.yaml:Z + - ./monitoring/configuration/datasources.yaml:/etc/grafana/provisioning/datasources/datasources.yaml:Z + - ./monitoring/configuration/dashboards:/var/lib/grafana/dashboards/:Z + - ./monitoring/configuration/customizations/custom-logo.svg:/usr/share/grafana/public/img/grafana_icon.svg:Z + - ./monitoring/configuration/customizations/custom-logo.svg:/usr/share/grafana/public/img/grafana_typelogo.svg:Z + - ./monitoring/configuration/customizations/custom-logo.png:/usr/share/grafana/public/img/fav32.png:Z + ports: + - 0.0.0.0:3000:3000 + restart: on-failure:5 + depends_on: + - prometheus + diff --git a/apps/sonda/monitoring/configuration/customizations/custom-logo.png b/apps/sonda/monitoring/configuration/customizations/custom-logo.png new file mode 100644 index 0000000000000000000000000000000000000000..dcf13b931be20127443409243f18a69329dc3d7e GIT binary patch literal 10938 zcmeHt_cxqf)aWy65JZavA(7}UYKSf*L>ayJ&gjHo^h6|j5H(u#-ZIL_AQA-8%V>j` z=)>rv`@Hx5{`~{@K5LzIo>O)^XYIYtE+4hEUQv=Wkplogsivx|3jm;zn>KojR(Ih`2YaSMu^~0#7=y6e&9tV^DxX00IE#r{`e!R{prLK zuf=xf!4Ja!U9AGC`?gEa&)KUpoj89g1s@{qd=}1QfLRG3?!Ql-Z^aUB@F<*M<=^dm zF4BB%0`9uoQn38bgh?zQdhSxDlLtKJ)(%$#AZFj1u7iJlDaFLaucSye({U}w@GuX# zloUdVNLFr%LNlig3ex#=vxCLxRm0Ym49M+oKBBugq*dPRMd`JMoLDpBTq7g?HlQx7 z{jt=ti_FYDe)bX6g8Ae=%&W7XMxj0kZT|>z z+dECnF_Gbyj}|*GdhQ@HD}Oo$+Jn0!Dopo~Yb9Lo>t>VAtiih!|5+6B zdN1~r2}EzUC*)MBxHu$v?702G$LBOPcyQ|2rF8YUl29TXh|ydR66?4Jt^6V@ksizN ztE5Ei+HbE{OKCf|IyA+@w|tUuATiYuFQVwL!Zfc#mpuuyiF)3JBoO zO3Ae{^;z7?!CJ5n`l9=edl>8whY~#4*_7i@*#f_R?_36X{RdQ)N&Rz3G)HY}Ik38=GR5_Cz&J}GMw zta0!e7OE34yx-A>Yz()v-zX^8EHq7iVD|GzmL+$L?QQyE%3|7(T@77-Twe+oo5l3( z2rU=7|K#$Y_$Sa7Zwh{BB5e!iFh5v(dNEimZ7J@RUa$!2MD%Himtdo=$~=2b8d*K_ z9~gQ{ss&B1=WsB;#U?N$bK*N{&DlY{1IZ;M9ycfS6(Js88qrpSjXJZO8SX(}?D?dh zR>m!+m-VNdvD@IVC^#6ts^^b8m|)quW&zwxd_{uY51%9-lQT3)h?!-|&ooBnA&BBo znQj!qgHPM}+wgrST;bb##;)QsrkZdU%#3cU2aN67(8&KD6RKw>o3 z(qz-p`L}5|KeH^P#VG^(uePa7VG1KQe4y1-bY2^?zy-)?8w-gYeLQjR?2`}VHOh?T zK1}d^cGxwqOx|`&Ee8P~>xQo`ZZQD;$!H4AtSr5NGhr8Al%!>Uy&7MqN+O)M%35iE znZBX7Hf2bVB;4QzpWhm-?sR~2f4<}AX?3Zo^D#+7KPmYr1`X95;kq=Lz|Vx?y{i5~ zVuA4dRjI}4wG}bAk3;E0J;RN19~O1dB9Ar|kUeFE-It(bN!3F#9 zobnrwo)ZDtU3XX4%2t?s=GDs09Trs2`}bTq$e$yS5UoRB64qz`mN`v^9{`4$H>{m< zPP=lQ%=bw+>7V1N2BBe3Yg=vtoCEB!Dl<>dqPCLFCTt}+Q=?APg$}|)2LEK|4ug(9mIcyDv z@#q`RMc0RUhF*-$%Tdh_=6&ytGc!#|yz<%zkqEV-ychxx)f4D9$ja5$$;~CAp+ta} zny7%wM#45s;StZD{5^Tf(#LN(50%>!M%|631^H&CEP1Q&W>P?>dzOuwI&QkaMZ*_$ z7ZcbZ&^@yIu8i7fhfU>>D z`%Uy;V_HKDv0FQF@rMVVZ5N3%4vUYnt2ap#=15wOvPq=hz)trn-+YWQeE zKd4jJMbaQJ!KBlQYXVcsN1W@lsJ;pJKB;ksQrv%^EC|3wNOcFl4bH?jy))cx^|8wE6VGYD+TN#Qf3v$C;I7C;YoeU$m3s@e)f3^6A-%xBdlS0{R~A>8mzkZ zOW33?;#~oFgqT0cnn}*t6YjQG2KJikD%rL@_xX&%yLc%Fbyr^bTMwT}xtcDeI<~9> zb?k#=%dI8V95Q*%$;7%dTu9p#>mYX~(K+}8(btigF5mA~hI2kUQb7kA-s^yJ5pady zGgWwKFr770TJtmt%)5{8Gf@uJ*iCih*0&B0?VAeawOEq*;3p7HpZYYZ=SzltsYa|| z?>e0Co32iiWkVP5>m!Ctnt8}#<@~mebn902tuH{@>kWbpn?E04Q!E6V{KXXX5vF$? zAAi>mT5S-Qs`mLL7GgURwm_%ja7%&gHJvK*Ede;+bR=se*8c&62aZ-JyCP1f3W&nO z^9tX#&WdeAipjo^GEBEq0_}x<5r@M_l+f;l?1$IGWbS%^&yC-c(dahNK+^R;TNPjf4a5RB;NlLfa zC1feb0s07dT5j;7?kK_1j zA}s(?A_n&#cAWuzJERhTmvnF*5Y>KABI~(q>;N+j-JQqpr2FV59q!6L@**&nhl7@? z$Kp~<{c8Z?!+rXmQl2mt$)QAYutWQf?9k=|r)Y%c zWn&mKMmmir_q``MB)luNBgpD>G}Wm2B>8foh&*dS@rIK`HQ%ut)o^KR*X^E*4hnb! z%@aoUHX9|-Fool4WTvvJv$vIA1j?c0dY*w4Tzb`psrtX0EB-{TQzOsgXFk9dVe@jk z!qLpzjZKjQBA-yh2A zGgCgHr0VEHk7~c2wzc`2i@$gMeNPf%E}~yu$17);pEzr7b{bg{CU?D1pUH(VF;*yYK}#^>C{!nbkOylspw?ZXWe7;1RE&tq;MQ3!nF_d$kZLVhcv zeo#&}mHD=TAF~ud5`|Ma&6Brvx7U3EB^dCw#k-L z%d)aY#QjGLtnmHJZ>~c8uJJFZ>5XZ-85tMbD7DY-=bI+&RT z!mFIw+zFk)Qxsxzg$p64#;rWD3Ias*qz&4q0*+>{%fv%ugjZ#mMG)x)b+%-q8_?qH z7BPbrLyhsPAw$_r>S-}ezL#}SK8`kHh3a6t0MvIc8nOBFY%%}p@8vt%l@G4B7CJIG z46G_Sq=2n_9O~Ws3iudnBz6Kbz7yKsoE7B0QCvn}?2&#EYwAe5$Es<-+yUNy{#tKt zE3WlKGA31^PAevFr4`Ee7#4?X;7y`7luG-hvE%S&`#JX>RzE(^?e1aFq&stoGF@ew z(qDBw!O5?6vUQ;`;H@dh^S^zTTLEIM*}NHoP3gscnEY23GY~y8F2h01J><|NIGMmp zaqq|h@4;a|5lksnzfs`Cl)Y+!Ri>G->wbYlqi8=gFT9vxe5%rvp8a~yF6XN4RW02# zulx6BywVOx-^b(r0 zOLv+oCURJNf^mpBUrmMM%OnitMfmBB$3o9$huVFvvxF|(M{FYsZO^@J<_MJ-*Ydkiykb-G_*4V(I8}ew?0^9Z$NMT4Z*!v-bkIW z$cp(t_NDYtDz!3;GhzEi^=h&`UTB|9-8R;z zBS?C`?7F#w*09IPbV)k1Gv&sK&Yf4C#L8EYfmGF-8JDP~TuV}Guqj-!aUbo|aoG+R z+TJ{Kj~B@R6PF90bW_ndj=qKa_G?c+V)urn3J zCc6Y&G&);Y!(Gk`d&;|&9|6TY9&sD_kuD;_R9T6f; z+iyACPbQX(-2>zF@b@o@G-PGth2j+Y?{@k)UldV>(W+ar8MreIH+Lt7g(ijAqNS8S z$zBC+`CgBN$X)ryw1?}=Q}M#fxQVP?=+np0ZDbQAI~RQ?`Fn6p*&y~$^hVOBS-wI? zkek<_zlMOBZWEBEiP1y6x{oA)BPrSVt=@1ch3&JL4IgINV*JbsTYM4=+Q}+5iRHQ- zq+7HJ>Zn}DVMiZQzI1p_$~NV_m*uW;x*rpx{K;O3?)6|8by;_(L^^jW>qWXQ~AiE*x+$QJcS{f9E zC)%Z2uQu;gZ=nT_4lc2?6e8CL6Z2QQ*B){Q!&Is_o1HDf51XCP!(oS47Og8s8W(+s zcF^!rlW}jrDTG9CZ!ldxM+EMy|TQ zk9*xASN~#m%5omn%o|3-_J`*~@E?9_QyUZLv+Hd_tQF(+H3g$xA|1%j?r_^}Dl|#@ z`kF21x+APoaHm;fjWwzx#LeS+wW#T~g~HLy#~j$L3Nv#pAJ(7pM^}p*%TT9g!PSdP z`2(~CDZx6qpW^87U9`nTg-@c9yz$kKmMMgvT`3rS1}ks9m<=w)Q^mdnELuKfR++87 z{Wo0mpm3Qy#14_aNpXQ=`EHekJC%;^KFj_&cIpHD{B{joCr=~uY^(Tjz%hr=n~&et zBLu(9K6v5p0UNSYS>qt7H9v`p4Eu8}qUR%h0}Fd5XpKex%=+u(QHZG+gnscojNJRn z|7BK|fv;KX@?l`^KMZO)Bt0@-)gUixPM+-R)|BSc{XE$E7~?fEa3*-?x_!_5vRHq3 zzImW(s%LoTvhAlWjRK>Vd-ar&N9K?y#W3=>9v)YzI5w68TxNis!T zZa)8g{BxewHH@d5Bnx%X1Fw~$7rkr5zWjC9;$Y@#`R-Sj4y#a%US;!?KHhW4!YWbr zUm>JpIPXEY*ah28{2c8#IHerjUS-&V{Q0#Uo#63D!Y_M4U~P7>kuaQo!sClM-~58j zr>x=C!4Jcuq$X+JKqozoVyb2XV~^Ez+{uM&eKZ$PRN$YoEu77Be(oUxM@rEAX!HTX zzh$Lt&CzgSz(k{g;<)R!PkBZw01zm~HPc4JwUU*xFO!A8y!*S6?83)F1}#s&6Bdw5BKuywwd(WB_Ze$C2x!kNBcWJ;-v6 zWseEF@18tw;fm(CRc!#fp?&IX{)w1(&c4K(=Lks%q&^b~NEYE&#-!C^9{~9*@qA+7 zU49J=x#i99VaMvbHocPsR-)ld=XsIGuZ0K{w3wdt^kl64c}b%4`V^!TXJ4K222DIUp@pmq&%7TcAssAj1haCd7 zRY__9g360Ghk5LUOegOU^oErb(7g|Jmm5dU-GkGW)U=ZZN2A^X2!cVjxq;Q_#d_O6 z{M~x^gIHJ@)xk>tc5Q4}UY7jQh^xqqAJ;6kAb&uphsX86N2dobE$;>5vWAZtn*n63 z@$~@|G|eGVhvSDy_g zHb+3i7Ux4vltx~)e0Z33#9j^=K0SU5yOp$dy=ER@rnh(hb0Sr&3Sc1#Cmw-%v<5Go zVOa^PIxhcS-vpe({2!NF=(8dGeT8M|;!wl_Vcz$idxZGVxSUxcCfL@>VYd=)R7keB zIB@d&)FR8>SODP_DiLMYtOq^El0-tF$pgDs&uv$;j!zn@ipd7^=Vwlqj%{Zfx*`EZP7sM2inimX&q7z^ZMaiM@ny|5@5N#mz5X2HE5_a0RfF5^y)HPaVcrzuAPn4ec5qfk5MRnp8x1j}8VVkvh3s>-cV3vd8G1zSZ!l<@LI(g4+mPO6I zE>?%{sWBCc^lDoZ6sQv$?AwU%IU^2f>r*n`!%FZ}Db6Qf%T+DR;EbMj6YK{!c8E#-t^u}a7LI#W zDB$a+-_GL-J8NdKc*I^zVJ#qbRk4WIkFh*@);o9KJ%%+*Mg1oxUj=sJ`gWZ(UN1IP zFq{^VGNFU{BeL@Z796)X{Lwz~?T)$4pfFA>p`!(c0Uk2KWfAoQuY&Jn7n6UQ?F>=h zPI|YO)8Z?tJ-^$fc=!@jRMhfOsavm!(Y+?_SokVal`+7BM}jJ-*rJ}tg7Y3IL(9Fk zaZ0jLh`~AGy5IRRSD&(dEoly%s-dUU4>=>k1xP4hU^!M`84r-yJl{TV^b>phu~Ub! z&OARl3r7~NGm*d+!W7uB$20dYipJLE#QwDJ5fA3lQn^(b{(^=yz8MNwkDkORE$C1n zbkk);Q_q?|x&-tbd?hz5)5IlSCZN>M5(tg&bPIs@xC*+fi&JGed#U(8isLraF|t+@ z$pt&-auNf}+Yf**JT<7+>8!5}LPQxX$r;!Ra4f%RK?~W-Bi)X3fLtegQ^xhXiUvL6 z4e(l!!#Y$PLr&Vn=6vqPE_2=sgpn?J>&%%= zL%y3*WO4h&V^{bJgOt#)6}_%k=$+zo>u)Cyc^`L{Bkz@&Kbb+cs&u@HnOppJ7pb%4 ztv!#>a^cuowjXW~>s?0Re_L6zFnFZJNEg^!86Tcf5QL3t$IHAofCcP9JQ=R>^-!Qm z&3p1)5yRp0b1|7TGisTgFIf+y?*=zb(-)VXQ1))^gpg$s*?$kK%($#NSrLS1O!$Hl@LP2Vi0 zVro)*py6NpH)@P@usugzfjVz41aE>x6Ez6&+fKN?b$R2>&u8%6mv1cOE@?MXJ~ftT zN_G35rACsmJEisvGKxjmvj( za$XdmOl|(7byPrGBKWD5L1y@CWZ5z~$*<-5V)>78$b7lwz^?|v&4TSd%ub+{+y~lc&s>fE@NWY(@ z3&l!$ekh#vMzYniKvgvg-n)sLh<`8elN~8BV(Pg6rfk;z-Lk~CqbSsx$3AbO;}kRb znoE3ru0o+4%~vOu_Z}8osFjcc5F>@Qx1gWKB2f1?bZ)QEVXR+hWG42P*(^oDclU1U+b-S*t;VvjcK2W z&8-y3$rw{Ck_X8+_NSLz?!4O)`DD@Z*XLB-58Qyyjgs`o4@+E0?T1u+UeKAyB;L*7 zw#qtAbHivhz*5>4LZUf8AD*7;6ZQrdKb32t9!%AgxN^}6u8+6_r;UdYb}p~mNVr#! zajuzk8$RtrOld_BGl(-}DN`S~}bor%j2_Tn*n@B|r+ zfw9G&xL<7gr<9zXtAFib?s6(t5JX{y7m@WxddqhHcev3^+1_vr!^}66zXQ4+?Uv6X zmYXKCisQau>9==!Fu8o18=a;^y8kCG>g!joIHOqj`|F!{zU8oT!>kI`&pk(HPBT%j z{F(68dnfvf&HwH7RSjU5=Cvh@#2&iWb1+%JY;~vLv!p46?A_)p0(-ZMZC}m81MpixQ3techJo*rg*tz;hGRwqC-*x1CvjnJsR_9fM{ z8`26v^q=E_`L8WU??Pq>oND8tFPUQ|q1*5cZnJ8o2n%nHtJQV58KaAIdeRut*njse z$RiivNd4vUtg&C<`H{bZ>^|L8PuD{qza)?2HiLZabF`EQ9W-*Ym(=D?-P#UQ;hd8&+@X4;fe zT{NBT&1?x=_P$;KcVGU595_;ZK^_Y@ZkH~@_&9>}Zp2e^THi4THK5+zg{aqR$|bQf zlsNaP0u;qfcwMV64})EyO`S6kGxsmQWB#xkoD>y6Uc2qc>MJbiLGzO8=WJ7A#K!Hv zhWpE=Um~-Mx#h!&b>|)j6nr4SwK}i%aU%U?#>Hk}d`t_>hc9rUkL(syf{aXmuSt){ z3EPMZBJkoh_Sv;^eA%n$6L_T&sC`KThxC!xn7)-11R8$a<21K=m+yDh7iotPc_ z1D>fW9(4gf5gMQ>0n2Le@wGVI4a%5U`2v^Sg6xgJuyV;9^Fx5~uK&=u{$3at-um|_ zsv0k29ynhrj0aCI6wt4mVd@LIA2qlWEx<9(Eb{06UO=5|R_l$VQ?I8GV~@wYmIg%} z`J~^t4cSf-^t^B|;^Z~dy9XGm`QwDU{o>0Oyej8syKFitv< z0)JF7ov&6;eZn@9HQ<5|Q!gdK6qdP}LC#E#ZqxXcd>8mYcPG4oVb>$~Jjaa`BUP2z z)(3Ua>9VZt>U{wfB#heTZuAAhJzYx93d!f_;Az4n+|tBff{pG{f?Jr-c6v9-?`n-k zTL*k(th)s%P?=x%(6IFveY-tJ*`3Juy?e=4fOEjA=fpl;)a1Zm!#5Rv7f`sjnBMRU z8+F6TbZuwJdtB7dU$zwzHg)y9XpDOm%OL>Vc_}Jjp<`SNy#b>v)lab`BKq#BDC$L; zJD0_f?ck!`xuzsU56BGG`Kj2H&y9kVh*6RKT2QTuj>`M#O~0uYu)SnU-g?G3(ZCAr zOx;wbT!w7FyY+F)go2NV(B~SVU-gZLd3n*ik1yA0MItql z7izlFnx24HV!Fryuk5a|GBsEpIrChl9bqK4i==NVl6Q|bEdpa)zFgG1^#EUUC=9Y@ z5PR{LD5}__kfL-zOO0!eaUwuK&FO;I%|(4ozlOUC08o89A)FR9`E?W9MV)kBV_-uK zMPk$!*LMA#ACjC%Z6Gn0oKZ_VVAFqp{}x74O?Qmc07_%J}WK0MLvl=d+SjI0WA9M`1?NJ?Kf;4xND&+_Y8;m zjjCst%gv*Xl=w&iv(iW;IBM`v6DfC)bPjh3pmd%ZEENay;+kedhK33DSb$)>zNpn(7K`JjP+p z?Lo_`Clz<4Na%H?x`MmsFD8^s@$-c!d5h4=PMnrtBNsR~*d_GBZGJF}gh#J(%IT;Z zL`bh4e@59=JQXomd{$hqxQEU%`EcMd)k-VD}&q*V#YX9OR(1D!+-ke@-_oLSB-TrxaSRe#0a>4Um~R+Xsf&1{Mt!J0xmy;;NeK4&{R(~k8G4s8BCUwD=hT1Q5tU1fbBob;5%K8w=UgtzjIW$j4Tv)`?N^!hR*EtF-aS7oo! z7T|0gV;Uv!^)j7MB43K91Ja`uvMVX%ASK>xg`hiSc~3Ai0U!?PcZ@>E3=c9-y3X3Y z-i67BdR{Gd+*H6db7MktdT2A%RG5HwkE1A3pTS8@DK8QPkl{+V@)cN#M65CY^@Gg@}D##$ob&6M|>qI2wfr%FMzfnO)iSGgY?@v zyxcq1;U+B+C|$d{CsbQ4FnOEVg)ni3j`U%+pmF<~?SE_?dVQYg1P-2D q{LyJ(_S!`{;r~|P_y6SD>yt2q?MdW2>6pg< + + diff --git a/apps/sonda/monitoring/configuration/dashboards.yaml b/apps/sonda/monitoring/configuration/dashboards.yaml new file mode 100644 index 0000000000..e59ac96b38 --- /dev/null +++ b/apps/sonda/monitoring/configuration/dashboards.yaml @@ -0,0 +1,9 @@ +apiVersion: 1 + +providers: +- name: 'Prometheus' + orgId: 1 + folder: '' + type: file + options: + path: /var/lib/grafana/dashboards \ No newline at end of file diff --git a/apps/sonda/monitoring/configuration/dashboards/nwaku-monitoring.json b/apps/sonda/monitoring/configuration/dashboards/nwaku-monitoring.json new file mode 100644 index 0000000000..2b024e32fc --- /dev/null +++ b/apps/sonda/monitoring/configuration/dashboards/nwaku-monitoring.json @@ -0,0 +1,5303 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "gnetId": 12485, + "graphTooltip": 0, + "links": [], + "liveNow": false, + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 45, + "panels": [], + "title": "Waku Node", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 9, + "x": 0, + "y": 1 + }, + "id": 41, + "options": { + "displayMode": "gradient", + "maxVizHeight": 300, + "minVizHeight": 10, + "minVizWidth": 0, + "namePlacement": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [], + "fields": "", + "values": false + }, + "showUnfilled": true, + "sizing": "auto", + "valueMode": "color" + }, + "pluginVersion": "10.2.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "rate(waku_histogram_message_size_bucket[1h])/scalar(rate(waku_histogram_message_size_count[1h]))*100", + "format": "heatmap", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Message distrubution %/kBytes (Last Hour)", + "type": "bargauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "deckbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 9, + "x": 9, + "y": 1 + }, + "id": 38, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.2.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "rate(waku_histogram_message_size_sum[1h])/rate(waku_histogram_message_size_count[1h])", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Average Msg Size (Last Hour)", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "deckbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 5, + "x": 9, + "y": 5 + }, + "id": 42, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.2.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.75, rate(waku_histogram_message_size_bucket[1h]))", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "75% Percentile (Last hour)", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "deckbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 14, + "y": 5 + }, + "id": 39, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.2.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, rate(waku_histogram_message_size_bucket[1h]))", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "99% Percentile (Last Hour)", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 9, + "x": 0, + "y": 9 + }, + "id": 12, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "(increase(waku_node_messages_total[1m]))/60", + "legendFormat": "{{instance}}", + "range": true, + "refId": "A" + } + ], + "title": "Messages/second", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "deckbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 9, + "x": 9, + "y": 9 + }, + "id": 43, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "9.4.7", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "waku_histogram_message_size_sum/waku_histogram_message_size_count", + "format": "heatmap", + "instant": false, + "legendFormat": "{{instance}}", + "range": true, + "refId": "A" + } + ], + "title": "Average msg size (kBytes)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 0, + "y": 16 + }, + "id": 2, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "10.2.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "builder", + "exemplar": false, + "expr": "waku_version{instance=\"nwaku:8003\"}", + "format": "table", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Version", + "transformations": [ + { + "id": "filterFieldsByName", + "options": { + "include": { + "names": [ + "version" + ] + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 3, + "y": 16 + }, + "id": 22, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.2.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "libp2p_autonat_reachability_confidence", + "legendFormat": "{{reachability}}", + "range": true, + "refId": "A" + } + ], + "title": "Reachability", + "transformations": [ + { + "id": "reduce", + "options": { + "includeTimeField": false, + "mode": "reduceFields", + "reducers": [ + "max" + ] + } + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 6, + "y": 16 + }, + "id": 32, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.2.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "routing_table_nodes{state=\"seen\"}", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Discv5 (Seen Nodes)", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 9, + "y": 16 + }, + "id": 33, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.2.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "routing_table_nodes", + "legendFormat": "{{label_name}}", + "range": true, + "refId": "A" + } + ], + "title": "Discv5 (Nodes)", + "transformations": [ + { + "id": "filterFieldsByName", + "options": { + "include": { + "names": [ + "Time", + "{__name__=\"routing_table_nodes\", instance=\"nwaku:8003\", job=\"nwaku\"}" + ] + } + } + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 12, + "y": 16 + }, + "id": 25, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.2.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "builder", + "expr": "libp2p_peers", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Connected Peers", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 15, + "y": 16 + }, + "id": 28, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.2.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "libp2p_pubsub_topics", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Number Pubsub Topics", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "dateTimeAsIso" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 0, + "y": 21 + }, + "id": 10, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.2.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "process_start_time_seconds{job=\"nwaku\"}*1000", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Start Times (UTC)", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 15, + "x": 3, + "y": 21 + }, + "id": 44, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.2.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "waku_connected_peers", + "legendFormat": "{{direction}}_{{protocol}}", + "range": true, + "refId": "A" + } + ], + "title": "Connected Peers (Direction/Protocol)", + "transformations": [], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 0, + "y": 26 + }, + "id": 36, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.2.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "waku_peer_store_size", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Peer Store Size", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 6, + "x": 0, + "y": 31 + }, + "id": 4, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "9.3.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "builder", + "expr": "libp2p_peers", + "legendFormat": "{{__name__}}", + "range": true, + "refId": "A" + } + ], + "title": "Connected Peers", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 6, + "x": 6, + "y": 31 + }, + "id": 8, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "rate(libp2p_network_bytes_total{direction=\"in\"}[$__rate_interval])", + "legendFormat": "traffic_{{direction}}", + "range": true, + "refId": "A" + } + ], + "title": "libp2p traffic (in)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 6, + "x": 12, + "y": 31 + }, + "id": 29, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "rate(libp2p_network_bytes_total{direction=\"out\"}[$__rate_interval])", + "legendFormat": "traffic_{{direction}}", + "range": true, + "refId": "A" + } + ], + "title": "libp2p traffic (out)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 0, + "y": 40 + }, + "id": 20, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "nim_gc_heap_instance_occupied_bytes{}", + "legendFormat": "{{__name__}}", + "range": true, + "refId": "A" + } + ], + "title": "Heap allocation", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 6, + "y": 40 + }, + "id": 18, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "nim_gc_mem_bytes{}", + "hide": false, + "legendFormat": "{{__name__}}", + "range": true, + "refId": "A" + } + ], + "title": "Nim Memory Usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 12, + "y": 40 + }, + "id": 128, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "waku_rln_number_registered_memberships", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "{{__name__}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "RLN Registered Memberships", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 0, + "y": 48 + }, + "id": 127, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "waku_rln_proof_generation_duration_seconds", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "{{__name__}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "RLN Proof Generation (seconds)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 6, + "y": 48 + }, + "id": 126, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "waku_rln_proof_verification_duration_seconds", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "{{__name__}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "RLN Proof Verification (seconds)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 12, + "y": 48 + }, + "id": 135, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "waku_rln_membership_insertion_duration_seconds", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "{{__name__}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "RLN Membership Insertion (seconds)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 0, + "y": 54 + }, + "id": 134, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "waku_rln_membership_credentials_import_duration_seconds", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "{{__name__}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "RLN Credentials Import (seconds)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 6, + "y": 54 + }, + "id": 137, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "waku_rln_messages_total_total", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "{{__name__}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "RLN Messages Total", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 12, + "y": 54 + }, + "id": 136, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "waku_rln_proof_verification_total_total", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "{{__name__}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "RLN Proof Verification Total", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 0, + "y": 60 + }, + "id": 133, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "waku_rln_invalid_messages_total_total", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "{{__name__}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "RLN Invalid Messages Total", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 6, + "y": 60 + }, + "id": 130, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "waku_rln_spam_messages_total_total", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "{{__name__}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "RLN Spam Messages Total", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 12, + "y": 60 + }, + "id": 138, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "waku_rln_invalid_messages_total_total", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "{{type}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "RLN Invalid Messages", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "Number of messages currently stored in the database", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 9, + "x": 0, + "y": 66 + }, + "id": 141, + "options": { + "legend": { + "calcs": [ + "last" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "pg_tb_stats_messages{}", + "instant": false, + "legendFormat": "{{ pubsubtopic }}", + "range": true, + "refId": "A" + } + ], + "title": "# messages per shard", + "type": "timeseries" + }, + { + "datasource": { + "type": "postgres", + "uid": "e5d2e0c2-371d-4178-ac71-edc122fb459c" + }, + "description": "Messages in local database per app name, as extracted from the content topic.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "inspect": false + }, + "mappings": [ + { + "options": { + "/waku/2/rs/1/0": { + "index": 0, + "text": "0" + }, + "/waku/2/rs/1/1": { + "index": 1, + "text": "1" + }, + "/waku/2/rs/1/2": { + "index": 2, + "text": "2" + }, + "/waku/2/rs/1/3": { + "index": 3, + "text": "3" + }, + "/waku/2/rs/1/4": { + "index": 4, + "text": "4" + }, + "/waku/2/rs/1/5": { + "index": 5, + "text": "5" + }, + "/waku/2/rs/1/6": { + "index": 6, + "text": "6" + }, + "/waku/2/rs/1/7": { + "index": 7, + "text": "7" + } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "string" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Number of Messages (sum)" + }, + "properties": [ + { + "id": "unit", + "value": "none" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Total Payload Size (sum)" + }, + "properties": [ + { + "id": "unit", + "value": "decbytes" + } + ] + } + ] + }, + "gridPos": { + "h": 11, + "w": 9, + "x": 9, + "y": 66 + }, + "id": 144, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "enablePagination": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "frameIndex": 1, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "10.2.3", + "targets": [ + { + "datasource": { + "type": "postgres", + "uid": "e5d2e0c2-371d-4178-ac71-edc122fb459c" + }, + "editorMode": "code", + "format": "table", + "hide": false, + "rawQuery": true, + "rawSql": "SELECT REGEXP_REPLACE(contenttopic,'^\\/(.+)\\/(\\d+)\\/(.+)\\/(.+)$','\\1') as \"App name\", COUNT(id), pg_column_size(payload)\nFROM messages\nGROUP BY contenttopic, payload", + "refId": "A", + "sql": { + "columns": [ + { + "parameters": [ + { + "name": "pubsubtopic", + "type": "functionParameter" + } + ], + "type": "function" + } + ], + "groupBy": [ + { + "property": { + "name": "pubsubtopic", + "type": "string" + }, + "type": "groupBy" + } + ], + "limit": 50 + }, + "table": "messages" + } + ], + "title": "Stored Message by Content Topic App Name", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": {}, + "renameByName": { + "contenttopic": "App name", + "count": "Number of Messages", + "pg_column_size": "Total Payload Size" + } + } + }, + { + "id": "groupBy", + "options": { + "fields": { + "App name": { + "aggregations": [ + "uniqueValues" + ], + "operation": "groupby" + }, + "Number of Messages": { + "aggregations": [ + "sum" + ], + "operation": "aggregate" + }, + "Total Payload Size": { + "aggregations": [ + "sum" + ], + "operation": "aggregate" + }, + "pg_column_size": { + "aggregations": [ + "sum" + ], + "operation": "aggregate" + } + } + } + }, + { + "id": "sortBy", + "options": { + "fields": {}, + "sort": [ + { + "desc": true, + "field": "Number of Messages (sum)" + } + ] + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "Number of messages currently stored in the database", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 9, + "x": 0, + "y": 77 + }, + "id": 146, + "options": { + "legend": { + "calcs": [ + "last" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "pg_tb_messages_count{}", + "instant": false, + "interval": "", + "legendFormat": "messages", + "range": true, + "refId": "A" + } + ], + "title": "Unique stored messages (Postgres)", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 84 + }, + "id": 46, + "panels": [], + "title": "Postgres", + "type": "row" + }, + { + "colorBackground": false, + "colorValue": false, + "datasource": "Prometheus", + "description": "Source: server_version_num", + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 0, + "y": 85 + }, + "id": 11, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.2.3", + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(50, 168, 82)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "max(pg_settings_server_version_num)", + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "PostgreSQL Version", + "type": "stat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "Prometheus", + "description": "Transactions committed + roolback per minute\n\nSource: pg_stat_database,xact_commit + xact_rollback", + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 4, + "y": 85 + }, + "id": 14, + "interval": "", + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.2.3", + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum((rate(pg_stat_database_xact_commit{instance=\"$Instance\"}[$Interval])))+sum((rate(pg_stat_database_xact_rollback{instance=\"$Instance\"}[$Interval])))", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Transaction rate (Postgres)", + "type": "stat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "Prometheus", + "description": "Statements executed per Minute.\n\nSource: pg_stat_statements.calls", + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 8, + "y": 85 + }, + "id": 93, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.2.3", + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum((rate(pg_stat_statements_calls{instance=\"$Instance\"}[$Interval])))", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Query rate (Postgres)", + "type": "stat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "Prometheus", + "description": "Source: pg_stat_statements.total_time / pg_stat_statements.calls", + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "format": "s", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 12, + "y": 85 + }, + "id": 102, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.2.3", + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum((delta(pg_stat_statements_total_time_seconds{instance=\"$Instance\"}[$Interval])))/sum((delta(pg_stat_statements_calls{instance=\"$Instance\"}[$Interval])))", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Average query runtime (Postgres)", + "type": "stat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "Prometheus", + "decimals": 2, + "description": "Size of all databases in $Instance.\n\nSource: pg_database_size()", + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 16, + "y": 85 + }, + "id": 37, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.2.3", + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(pg_database_size_bytes{instance=\"$Instance\"})", + "refId": "A" + } + ], + "thresholds": "", + "title": "Total database size (Postgres)", + "type": "stat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "Prometheus", + "description": "Max Replication lag behind master in seconds\n\nOnly available on a standby system.\n\nSource: pg_last_xact_replay_timestamp\n\nUse: pg_stat_replication for Details.", + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "format": "s", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 20, + "y": 85 + }, + "id": 84, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.2.3", + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "max(pg_replication_lag{instance=\"$Instance\"})", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Max Replication Lag (Postgres)", + "type": "stat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "max" + }, + { + "datasource": "Prometheus", + "description": "Shared buffer hits vs reads from disc", + "fieldConfig": { + "defaults": { + "decimals": 2, + "mappings": [ + { + "id": 0, + "op": "=", + "text": "N/A", + "type": 1, + "value": "null" + } + ], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "semi-dark-red" + }, + { + "color": "semi-dark-yellow", + "value": 80 + }, + { + "color": "semi-dark-green", + "value": 90 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 3, + "x": 0, + "y": 88 + }, + "id": 16, + "links": [], + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "sizing": "auto" + }, + "pluginVersion": "10.2.3", + "targets": [ + { + "expr": "sum(pg_stat_database_blks_hit{instance=~\"$Instance\"})/(sum(pg_stat_database_blks_hit{instance=~\"$Instance\"})+sum(pg_stat_database_blks_read{instance=~\"$Instance\"}))*100", + "refId": "A" + } + ], + "title": "Shared Buffer Hits (Postgres)", + "type": "gauge" + }, + { + "datasource": "Prometheus", + "description": "Percentage of max_connections used", + "fieldConfig": { + "defaults": { + "decimals": 0, + "mappings": [ + { + "id": 0, + "op": "=", + "text": "N/A", + "type": 1, + "value": "null" + } + ], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "semi-dark-green", + "value": null + }, + { + "color": "semi-dark-yellow", + "value": 0.75 + }, + { + "color": "semi-dark-red", + "value": 0.9 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 3, + "x": 3, + "y": 88 + }, + "id": 9, + "links": [], + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "sizing": "auto" + }, + "pluginVersion": "10.2.3", + "targets": [ + { + "expr": "sum(pg_stat_database_numbackends)/max(pg_settings_max_connections)", + "refId": "A" + } + ], + "title": "Connections used (Postgres)", + "type": "gauge" + }, + { + "datasource": "Prometheus", + "description": "Transaction committed vs rollbacked", + "fieldConfig": { + "defaults": { + "decimals": 2, + "mappings": [ + { + "id": 0, + "op": "=", + "text": "N/A", + "type": 1, + "value": "null" + } + ], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "semi-dark-red", + "value": null + }, + { + "color": "#EAB839", + "value": 0.75 + }, + { + "color": "semi-dark-green", + "value": 0.9 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 3, + "x": 6, + "y": 88 + }, + "id": 15, + "links": [], + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "sizing": "auto" + }, + "pluginVersion": "10.2.3", + "targets": [ + { + "expr": "sum(pg_stat_database_xact_commit{instance=\"$Instance\"})/(sum(pg_stat_database_xact_commit{instance=\"$Instance\"}) + sum(pg_stat_database_xact_rollback{instance=\"$Instance\"}))", + "refId": "A" + } + ], + "title": "Commit Ratio (Postgres)", + "type": "gauge" + }, + { + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "Prometheus", + "description": "Clients executing Statements.\n\nSource: pg_stat_activity", + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 12, + "y": 88 + }, + "id": 23, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.2.3", + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(pg_stat_activity_count{state=\"active\",instance=\"$Instance\"})", + "refId": "A" + } + ], + "thresholds": "", + "title": "Active clients (Postgres)", + "type": "stat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "dateTimeAsIso" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 16, + "y": 88 + }, + "id": 125, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.2.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "pg_postmaster_start_time_seconds*1000", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Postgres start time", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 51, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 6, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 91 + }, + "id": 142, + "options": { + "legend": { + "calcs": [ + "last", + "max" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "pg_stat_user_tables_n_live_tup{datname=\"postgres\"}", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "Live", + "range": true, + "refId": "A", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "pg_stat_user_tables_n_dead_tup", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "Dead", + "range": true, + "refId": "B", + "useBackend": false + } + ], + "title": "Estimated number of rows (Postgres)", + "type": "timeseries" + }, + { + "aliasColors": {}, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 0, + "description": "View: pg_stat_activity", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 95 + }, + "hiddenSeries": false, + "id": 24, + "interval": "$Interval", + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": false, + "linewidth": 1, + "links": [ + { + "targetBlank": true, + "title": "PostgreSQL Documentation", + "url": "https://www.postgresql.org/docs/current/monitoring-stats.html" + } + ], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.2.3", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (state) (pg_stat_activity_count{instance=\"$Instance\"})", + "legendFormat": "{{state}}", + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Connections by state (stacked) (Postgres)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 0, + "format": "short", + "logBase": 1, + "show": true + }, + { + "decimals": 0, + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 0, + "description": "View: pg_stat_activity", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 99 + }, + "hiddenSeries": false, + "id": 121, + "interval": "$Interval", + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": false, + "linewidth": 1, + "links": [ + { + "targetBlank": true, + "title": "PostgreSQL Documentation", + "url": "https://www.postgresql.org/docs/current/monitoring-stats.html" + } + ], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.2.3", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (datname) (pg_stat_activity_count{instance=\"$Instance\"})", + "legendFormat": "{{datname}}", + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Connections by database (stacked) (Postgres)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 0, + "format": "short", + "logBase": 1, + "show": true + }, + { + "decimals": 0, + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 2, + "description": "1 Minute rate of transactions committed or rollback.", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 104 + }, + "hiddenSeries": false, + "id": 122, + "interval": "", + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.2.3", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum ((rate(pg_stat_database_xact_commit[$Interval])))", + "interval": "", + "legendFormat": "committed", + "refId": "A" + }, + { + "expr": "sum ((rate(pg_stat_database_xact_rollback[$Interval])))", + "hide": false, + "interval": "", + "legendFormat": "rollback", + "refId": "B" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Transactions (Postgres)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "description": "Source: pg_stat_database", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 108 + }, + "hiddenSeries": false, + "id": 27, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideZero": false, + "max": true, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.2.3", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum((rate(pg_stat_database_tup_inserted{instance=\"$Instance\"}[$Interval])))", + "interval": "", + "legendFormat": "Inserts", + "refId": "A" + }, + { + "expr": "sum((rate(pg_stat_database_tup_updated{instance=\"$Instance\"}[$Interval])))", + "interval": "", + "legendFormat": "Updates", + "refId": "B" + }, + { + "expr": "sum((rate(pg_stat_database_tup_deleted{instance=\"$Instance\"}[$Interval])))", + "interval": "", + "legendFormat": "Deletes", + "refId": "C" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Tuples inserts/updates/deletes (Postgres)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "logBase": 1, + "min": "0", + "show": true + }, + { + "format": "short", + "logBase": 1, + "min": "0", + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "description": "* blk_read_time: Time spent reading data file blocks by backends in this database, in milliseconds\n* blk_write_time: Time spent writing data file blocks by backends in this database, in milliseconds\n\ntrack_io_timings needs to be activated", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 113 + }, + "hiddenSeries": false, + "id": 26, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.2.3", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum ((rate(pg_stat_database_blk_read_time{instance=\"$Instance\"}[$Interval])))", + "interval": "", + "legendFormat": "blk_read_time", + "refId": "A" + }, + { + "expr": "sum ((rate(pg_stat_database_blk_write_time{instance=\"$Instance\"}[$Interval])))", + "interval": "", + "legendFormat": "blk_read_time", + "refId": "B" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "I/O Read/Write time (Postgres)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "logBase": 1, + "min": "0", + "show": true + }, + { + "format": "short", + "logBase": 1, + "min": "0", + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "description": "Source: pg_stat_database\n\n* tup_fetched: rows needed to satisfy queries\n* tup_returned: rows read/scanned", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 117 + }, + "hiddenSeries": false, + "id": 111, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideZero": false, + "max": true, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.2.3", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum((rate(pg_stat_database_tup_fetched{instance=\"$Instance\"}[$Interval])))", + "interval": "", + "legendFormat": "Fetched", + "refId": "A" + }, + { + "expr": "sum((rate(pg_stat_database_tup_returned{instance=\"$Instance\"}[$Interval])))", + "interval": "", + "legendFormat": "Returned", + "refId": "B" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Tuples fetched/returned (Postgres)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "min": "0", + "show": true + }, + { + "format": "short", + "logBase": 1, + "min": "0", + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "description": "Source: pg_locks", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 122 + }, + "hiddenSeries": false, + "id": 123, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ + { + "title": "PostgreSQL Lock Modes", + "url": "https://www.postgresql.org/docs/12/explicit-locking.html#LOCKING-TABLES" + } + ], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.2.3", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (mode) (pg_locks_count{instance=\"$Instance\"})", + "legendFormat": "{{mode}}", + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Locks by state (Postgres)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "logBase": 1, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "description": "Should be 0 \n\nSource: pg_stat_database\n\nWith log_lock_waits turned on, deadlocks will be logged to the PostgreSQL Logfiles.", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 12, + "x": 12, + "y": 126 + }, + "hiddenSeries": false, + "id": 30, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ + { + "title": "PostgreSQL Locking", + "url": "https://www.postgresql.org/docs/12/explicit-locking.html" + } + ], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.2.3", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (datname) ((rate(pg_stat_database_deadlocks{instance=\"$Instance\"}[$Interval])))", + "interval": "", + "legendFormat": "{{datname}}", + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Deadlocks by database (Postgres)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "logBase": 1, + "min": "0", + "show": true + }, + { + "format": "short", + "logBase": 1, + "min": "0", + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "description": "Should be 0. If temporary files are created, it can indicate insufficient work_mem. With log_temp_files the creation of temporary files are logged to the PostgreSQL Logfiles.", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 12, + "x": 12, + "y": 131 + }, + "hiddenSeries": false, + "id": 31, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ + { + "title": "PostgreSQL Ressources", + "url": "https://www.postgresql.org/docs/current/runtime-config-resource.html" + } + ], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.2.3", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (datname) ((rate(pg_stat_database_temp_files{instance=\"$Instance\"}[$Interval])))", + "interval": "", + "legendFormat": "{{datname}}", + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Temporary files by database (Postgres)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "logBase": 1, + "min": "0", + "show": true + }, + { + "format": "short", + "logBase": 1, + "min": "0", + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "description": "Lag behind master in seconds.\n\nOnly available on a standby System.", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 132 + }, + "hiddenSeries": false, + "id": 120, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.2.3", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max(pg_replication_lag{instance=\"$Instance\"})", + "instant": false, + "intervalFactor": 1, + "legendFormat": "lag ", + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Replication lag (Postgres)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "logBase": 1, + "min": "0", + "show": true + }, + { + "format": "short", + "logBase": 1, + "min": "0", + "show": true + } + ], + "yaxis": { + "align": false + } + } + ], + "refresh": "1m", + "revision": 1, + "schemaVersion": 39, + "tags": [], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "postgres-exporter:9187", + "value": "postgres-exporter:9187" + }, + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "definition": "label_values({job=\"postgres-exporter\"}, instance)", + "hide": 0, + "includeAll": false, + "multi": false, + "name": "Instance", + "options": [], + "query": "label_values({job=\"postgres-exporter\"}, instance)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "definition": "label_values(datname)", + "hide": 0, + "includeAll": true, + "multi": true, + "name": "Database", + "options": [], + "query": "label_values(datname)", + "refresh": 1, + "regex": "/^(?!template*|postgres).*$/", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "auto": true, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "text": "10m", + "value": "10m" + }, + "hide": 0, + "name": "Interval", + "options": [ + { + "selected": false, + "text": "auto", + "value": "$__auto_interval_Interval" + }, + { + "selected": false, + "text": "30sec", + "value": "30sec" + }, + { + "selected": false, + "text": "1m", + "value": "1m" + }, + { + "selected": true, + "text": "10m", + "value": "10m" + }, + { + "selected": false, + "text": "30m", + "value": "30m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + }, + { + "selected": false, + "text": "6h", + "value": "6h" + }, + { + "selected": false, + "text": "12h", + "value": "12h" + }, + { + "selected": false, + "text": "1d", + "value": "1d" + } + ], + "query": "30sec,1m,10m,30m,1h,6h,12h,1d", + "queryValue": "", + "refresh": 2, + "skipUrlSync": false, + "type": "interval" + } + ] + }, + "time": { + "from": "now-15m", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ] + }, + "timezone": "browser", + "title": "nwaku-monitoring", + "uid": "yns_4vFVk", + "version": 1, + "weekStart": "" +} \ No newline at end of file diff --git a/apps/sonda/monitoring/configuration/datasources.yaml b/apps/sonda/monitoring/configuration/datasources.yaml new file mode 100644 index 0000000000..9f4f51f86a --- /dev/null +++ b/apps/sonda/monitoring/configuration/datasources.yaml @@ -0,0 +1,11 @@ +apiVersion: 1 + +datasources: + - name: Prometheus + type: prometheus + access: proxy + org_id: 1 + url: http://prometheus:9090 + is_default: true + version: 1 + editable: true \ No newline at end of file diff --git a/apps/sonda/monitoring/configuration/grafana-plugins.env b/apps/sonda/monitoring/configuration/grafana-plugins.env new file mode 100644 index 0000000000..27808098c7 --- /dev/null +++ b/apps/sonda/monitoring/configuration/grafana-plugins.env @@ -0,0 +1,2 @@ +#GF_INSTALL_PLUGINS=grafana-worldmap-panel,grafana-piechart-panel,digrich-bubblechart-panel,yesoreyeram-boomtheme-panel,briangann-gauge-panel,jdbranham-diagram-panel,agenty-flowcharting-panel,citilogics-geoloop-panel,savantly-heatmap-panel,mtanda-histogram-panel,pierosavi-imageit-panel,michaeldmoore-multistat-panel,zuburqan-parity-report-panel,natel-plotly-panel,bessler-pictureit-panel,grafana-polystat-panel,corpglory-progresslist-panel,snuids-radar-panel,fzakaria-simple-config.config.annotations-datasource,vonage-status-panel,snuids-trafficlights-panel,pr0ps-trackmap-panel,alexandra-trackmap-panel,btplc-trend-box-panel +GF_INSTALL_PLUGINS=grafana-worldmap-panel,grafana-piechart-panel,yesoreyeram-boomtheme-panel,briangann-gauge-panel,pierosavi-imageit-panel,bessler-pictureit-panel,vonage-status-panel diff --git a/apps/sonda/monitoring/configuration/grafana.ini b/apps/sonda/monitoring/configuration/grafana.ini new file mode 100644 index 0000000000..f237726b3e --- /dev/null +++ b/apps/sonda/monitoring/configuration/grafana.ini @@ -0,0 +1,51 @@ +instance_name = nwaku dashboard + +;[dashboards.json] +;enabled = true +;path = /home/git/grafana/grafana-dashboards/dashboards + + +#################################### Auth ########################## +[auth] +disable_login_form = false + +#################################### Anonymous Auth ########################## +[auth.anonymous] +# enable anonymous access +enabled = true + +# specify organization name that should be used for unauthenticated users +;org_name = Public + +# specify role for unauthenticated users +org_role = Admin +; org_role = Viewer + +;[security] +;admin_user = ocr +;admin_password = ocr + +;[users] +# disable user signup / registration +;allow_sign_up = false + +# Set to true to automatically assign new users to the default organization (id 1) +;auto_assign_org = true + +# Default role new users will be automatically assigned (if disabled above is set to true) +;auto_assign_org_role = Viewer + +#################################### SMTP / Emailing ########################## +;[smtp] +;enabled = false +;host = localhost:25 +;user = +;password = +;cert_file = +;key_file = +;skip_verify = false +;from_address = admin@grafana.localhost + +;[emails] +;welcome_email_on_sign_up = false + diff --git a/apps/sonda/monitoring/configuration/pg-exporter-queries.yml b/apps/sonda/monitoring/configuration/pg-exporter-queries.yml new file mode 100644 index 0000000000..bb1d7320a1 --- /dev/null +++ b/apps/sonda/monitoring/configuration/pg-exporter-queries.yml @@ -0,0 +1,284 @@ +pg_replication: + query: "SELECT CASE WHEN NOT pg_is_in_recovery() THEN 0 ELSE GREATEST (0, EXTRACT(EPOCH FROM (now() - pg_last_xact_replay_timestamp()))) END AS lag" + master: true + metrics: + - lag: + usage: "GAUGE" + description: "Replication lag behind master in seconds" + +pg_postmaster: + query: "SELECT pg_postmaster_start_time as start_time_seconds from pg_postmaster_start_time()" + master: true + metrics: + - start_time_seconds: + usage: "GAUGE" + description: "Time at which postmaster started" + +pg_stat_user_tables: + query: | + SELECT + current_database() datname, + schemaname, + relname, + seq_scan, + seq_tup_read, + idx_scan, + idx_tup_fetch, + n_tup_ins, + n_tup_upd, + n_tup_del, + n_tup_hot_upd, + n_live_tup, + n_dead_tup, + n_mod_since_analyze, + COALESCE(last_vacuum, '1970-01-01Z') as last_vacuum, + COALESCE(last_autovacuum, '1970-01-01Z') as last_autovacuum, + COALESCE(last_analyze, '1970-01-01Z') as last_analyze, + COALESCE(last_autoanalyze, '1970-01-01Z') as last_autoanalyze, + vacuum_count, + autovacuum_count, + analyze_count, + autoanalyze_count + FROM + pg_stat_user_tables + metrics: + - datname: + usage: "LABEL" + description: "Name of current database" + - schemaname: + usage: "LABEL" + description: "Name of the schema that this table is in" + - relname: + usage: "LABEL" + description: "Name of this table" + - seq_scan: + usage: "COUNTER" + description: "Number of sequential scans initiated on this table" + - seq_tup_read: + usage: "COUNTER" + description: "Number of live rows fetched by sequential scans" + - idx_scan: + usage: "COUNTER" + description: "Number of index scans initiated on this table" + - idx_tup_fetch: + usage: "COUNTER" + description: "Number of live rows fetched by index scans" + - n_tup_ins: + usage: "COUNTER" + description: "Number of rows inserted" + - n_tup_upd: + usage: "COUNTER" + description: "Number of rows updated" + - n_tup_del: + usage: "COUNTER" + description: "Number of rows deleted" + - n_tup_hot_upd: + usage: "COUNTER" + description: "Number of rows HOT updated (i.e., with no separate index update required)" + - n_live_tup: + usage: "GAUGE" + description: "Estimated number of live rows" + - n_dead_tup: + usage: "GAUGE" + description: "Estimated number of dead rows" + - n_mod_since_analyze: + usage: "GAUGE" + description: "Estimated number of rows changed since last analyze" + - last_vacuum: + usage: "GAUGE" + description: "Last time at which this table was manually vacuumed (not counting VACUUM FULL)" + - last_autovacuum: + usage: "GAUGE" + description: "Last time at which this table was vacuumed by the autovacuum daemon" + - last_analyze: + usage: "GAUGE" + description: "Last time at which this table was manually analyzed" + - last_autoanalyze: + usage: "GAUGE" + description: "Last time at which this table was analyzed by the autovacuum daemon" + - vacuum_count: + usage: "COUNTER" + description: "Number of times this table has been manually vacuumed (not counting VACUUM FULL)" + - autovacuum_count: + usage: "COUNTER" + description: "Number of times this table has been vacuumed by the autovacuum daemon" + - analyze_count: + usage: "COUNTER" + description: "Number of times this table has been manually analyzed" + - autoanalyze_count: + usage: "COUNTER" + description: "Number of times this table has been analyzed by the autovacuum daemon" + +pg_statio_user_tables: + query: "SELECT current_database() datname, schemaname, relname, heap_blks_read, heap_blks_hit, idx_blks_read, idx_blks_hit, toast_blks_read, toast_blks_hit, tidx_blks_read, tidx_blks_hit FROM pg_statio_user_tables" + metrics: + - datname: + usage: "LABEL" + description: "Name of current database" + - schemaname: + usage: "LABEL" + description: "Name of the schema that this table is in" + - relname: + usage: "LABEL" + description: "Name of this table" + - heap_blks_read: + usage: "COUNTER" + description: "Number of disk blocks read from this table" + - heap_blks_hit: + usage: "COUNTER" + description: "Number of buffer hits in this table" + - idx_blks_read: + usage: "COUNTER" + description: "Number of disk blocks read from all indexes on this table" + - idx_blks_hit: + usage: "COUNTER" + description: "Number of buffer hits in all indexes on this table" + - toast_blks_read: + usage: "COUNTER" + description: "Number of disk blocks read from this table's TOAST table (if any)" + - toast_blks_hit: + usage: "COUNTER" + description: "Number of buffer hits in this table's TOAST table (if any)" + - tidx_blks_read: + usage: "COUNTER" + description: "Number of disk blocks read from this table's TOAST table indexes (if any)" + - tidx_blks_hit: + usage: "COUNTER" + description: "Number of buffer hits in this table's TOAST table indexes (if any)" + +# WARNING: This set of metrics can be very expensive on a busy server as every unique query executed will create an additional time series +pg_stat_statements: + query: "SELECT t2.rolname, t3.datname, queryid, calls, ( total_plan_time + total_exec_time ) / 1000 as total_time_seconds, ( min_plan_time + min_exec_time ) / 1000 as min_time_seconds, ( max_plan_time + max_exec_time ) / 1000 as max_time_seconds, ( mean_plan_time + mean_exec_time ) / 1000 as mean_time_seconds, ( stddev_plan_time + stddev_exec_time ) / 1000 as stddev_time_seconds, rows, shared_blks_hit, shared_blks_read, shared_blks_dirtied, shared_blks_written, local_blks_hit, local_blks_read, local_blks_dirtied, local_blks_written, temp_blks_read, temp_blks_written, blk_read_time / 1000 as blk_read_time_seconds, blk_write_time / 1000 as blk_write_time_seconds FROM pg_stat_statements t1 JOIN pg_roles t2 ON (t1.userid=t2.oid) JOIN pg_database t3 ON (t1.dbid=t3.oid) WHERE t2.rolname != 'rdsadmin' AND queryid IS NOT NULL" + master: true + metrics: + - rolname: + usage: "LABEL" + description: "Name of user" + - datname: + usage: "LABEL" + description: "Name of database" + - queryid: + usage: "LABEL" + description: "Query ID" + - calls: + usage: "COUNTER" + description: "Number of times executed" + - total_time_seconds: + usage: "COUNTER" + description: "Total time spent in the statement, in milliseconds" + - min_time_seconds: + usage: "GAUGE" + description: "Minimum time spent in the statement, in milliseconds" + - max_time_seconds: + usage: "GAUGE" + description: "Maximum time spent in the statement, in milliseconds" + - mean_time_seconds: + usage: "GAUGE" + description: "Mean time spent in the statement, in milliseconds" + - stddev_time_seconds: + usage: "GAUGE" + description: "Population standard deviation of time spent in the statement, in milliseconds" + - rows: + usage: "COUNTER" + description: "Total number of rows retrieved or affected by the statement" + - shared_blks_hit: + usage: "COUNTER" + description: "Total number of shared block cache hits by the statement" + - shared_blks_read: + usage: "COUNTER" + description: "Total number of shared blocks read by the statement" + - shared_blks_dirtied: + usage: "COUNTER" + description: "Total number of shared blocks dirtied by the statement" + - shared_blks_written: + usage: "COUNTER" + description: "Total number of shared blocks written by the statement" + - local_blks_hit: + usage: "COUNTER" + description: "Total number of local block cache hits by the statement" + - local_blks_read: + usage: "COUNTER" + description: "Total number of local blocks read by the statement" + - local_blks_dirtied: + usage: "COUNTER" + description: "Total number of local blocks dirtied by the statement" + - local_blks_written: + usage: "COUNTER" + description: "Total number of local blocks written by the statement" + - temp_blks_read: + usage: "COUNTER" + description: "Total number of temp blocks read by the statement" + - temp_blks_written: + usage: "COUNTER" + description: "Total number of temp blocks written by the statement" + - blk_read_time_seconds: + usage: "COUNTER" + description: "Total time the statement spent reading blocks, in milliseconds (if track_io_timing is enabled, otherwise zero)" + - blk_write_time_seconds: + usage: "COUNTER" + description: "Total time the statement spent writing blocks, in milliseconds (if track_io_timing is enabled, otherwise zero)" + +pg_process_idle: + query: | + WITH + metrics AS ( + SELECT + application_name, + SUM(EXTRACT(EPOCH FROM (CURRENT_TIMESTAMP - state_change))::bigint)::float AS process_idle_seconds_sum, + COUNT(*) AS process_idle_seconds_count + FROM pg_stat_activity + WHERE state = 'idle' + GROUP BY application_name + ), + buckets AS ( + SELECT + application_name, + le, + SUM( + CASE WHEN EXTRACT(EPOCH FROM (CURRENT_TIMESTAMP - state_change)) <= le + THEN 1 + ELSE 0 + END + )::bigint AS bucket + FROM + pg_stat_activity, + UNNEST(ARRAY[1, 2, 5, 15, 30, 60, 90, 120, 300]) AS le + GROUP BY application_name, le + ORDER BY application_name, le + ) + SELECT + application_name, + process_idle_seconds_sum as seconds_sum, + process_idle_seconds_count as seconds_count, + ARRAY_AGG(le) AS seconds, + ARRAY_AGG(bucket) AS seconds_bucket + FROM metrics JOIN buckets USING (application_name) + GROUP BY 1, 2, 3 + metrics: + - application_name: + usage: "LABEL" + description: "Application Name" + - seconds: + usage: "HISTOGRAM" + description: "Idle time of server processes" + +pg_tb_stats: + query: | + select pubsubtopic, count(*) AS messages FROM (SELECT id, array_agg(pubsubtopic ORDER BY pubsubtopic) AS pubsubtopic FROM messages GROUP BY id) sub GROUP BY pubsubtopic ORDER BY pubsubtopic; + metrics: + - pubsubtopic: + usage: "LABEL" + description: "pubsubtopic" + - messages: + usage: "GAUGE" + description: "Number of messages for the given pubsub topic" + +pg_tb_messages: + query: | + SELECT + COUNT(ID) + FROM messages + metrics: + - count: + usage: "GAUGE" + description: "Row count in `messages` table" diff --git a/apps/sonda/monitoring/configuration/postgres-exporter.yml b/apps/sonda/monitoring/configuration/postgres-exporter.yml new file mode 100644 index 0000000000..a8380dd72e --- /dev/null +++ b/apps/sonda/monitoring/configuration/postgres-exporter.yml @@ -0,0 +1,9 @@ +auth_modules: + mypostgres: + type: userpass + userpass: + username: postgres + password: ${POSTGRES_PASSWORD} + options: + # options become key=value parameters of the DSN + sslmode: disable diff --git a/apps/sonda/monitoring/prometheus-config.yml b/apps/sonda/monitoring/prometheus-config.yml new file mode 100644 index 0000000000..ddbba3bd6d --- /dev/null +++ b/apps/sonda/monitoring/prometheus-config.yml @@ -0,0 +1,14 @@ +global: + scrape_interval: 15s + evaluation_interval: 15s + external_labels: + monitor: "Monitoring" + +scrape_configs: + - job_name: "nwaku" + static_configs: + - targets: ["nwaku:8003"] + + - job_name: postgres-exporter + static_configs: + - targets: ['postgres-exporter:9187'] diff --git a/apps/sonda/register_rln.sh b/apps/sonda/register_rln.sh new file mode 100755 index 0000000000..ab660f1d83 --- /dev/null +++ b/apps/sonda/register_rln.sh @@ -0,0 +1,31 @@ +#!/bin/sh + + +if test -f ./keystore/keystore.json; then + echo "keystore/keystore.json already exists. Use it instead of creating a new one." + echo "Exiting" + exit 1 +fi + + +if test -f .env; then + echo "Using .env file" + . $(pwd)/.env +fi + +# TODO: Set nwaku release when ready instead of quay + +if test -n "${ETH_CLIENT_ADDRESS}"; then + echo "ETH_CLIENT_ADDRESS variable was renamed to RLN_RELAY_ETH_CLIENT_ADDRESS" + echo "Please update your .env file" + exit 1 +fi + +docker run -v $(pwd)/keystore:/keystore/:Z harbor.status.im/wakuorg/nwaku:v0.30.1 generateRlnKeystore \ +--rln-relay-eth-client-address=${RLN_RELAY_ETH_CLIENT_ADDRESS} \ +--rln-relay-eth-private-key=${ETH_TESTNET_KEY} \ +--rln-relay-eth-contract-address=0xCB33Aa5B38d79E3D9Fa8B10afF38AA201399a7e3 \ +--rln-relay-cred-path=/keystore/keystore.json \ +--rln-relay-cred-password="${RLN_RELAY_CRED_PASSWORD}" \ +--rln-relay-user-message-limit=20 \ +--execute diff --git a/apps/sonda/run_node.sh b/apps/sonda/run_node.sh new file mode 100644 index 0000000000..e715ff26a3 --- /dev/null +++ b/apps/sonda/run_node.sh @@ -0,0 +1,99 @@ +#!/bin/sh + +echo "I am a nwaku node" + +if test -n "${ETH_CLIENT_ADDRESS}" -o ; then + echo "ETH_CLIENT_ADDRESS variable was renamed to RLN_RELAY_ETH_CLIENT_ADDRESS" + echo "Please update your .env file" + exit 1 +fi + +if [ -z "${RLN_RELAY_ETH_CLIENT_ADDRESS}" ]; then + echo "Missing Eth client address, please refer to README.md for detailed instructions" + exit 1 +fi + +MY_EXT_IP=$(wget -qO- https://api4.ipify.org) +DNS_WSS_CMD= + +if [ -n "${DOMAIN}" ]; then + + LETSENCRYPT_PATH=/etc/letsencrypt/live/${DOMAIN} + + if ! [ -d "${LETSENCRYPT_PATH}" ]; then + apk add --no-cache certbot + + certbot certonly\ + --non-interactive\ + --agree-tos\ + --no-eff-email\ + --no-redirect\ + --email admin@${DOMAIN}\ + -d ${DOMAIN}\ + --standalone + fi + + if ! [ -e "${LETSENCRYPT_PATH}/privkey.pem" ]; then + echo "The certificate does not exist" + sleep 60 + exit 1 + fi + + WS_SUPPORT="--websocket-support=true" + WSS_SUPPORT="--websocket-secure-support=true" + WSS_KEY="--websocket-secure-key-path=${LETSENCRYPT_PATH}/privkey.pem" + WSS_CERT="--websocket-secure-cert-path=${LETSENCRYPT_PATH}/cert.pem" + DNS4_DOMAIN="--dns4-domain-name=${DOMAIN}" + + DNS_WSS_CMD="${WS_SUPPORT} ${WSS_SUPPORT} ${WSS_CERT} ${WSS_KEY} ${DNS4_DOMAIN}" +fi + +if [ -n "${NODEKEY}" ]; then + NODEKEY=--nodekey=${NODEKEY} +fi + +RLN_RELAY_CRED_PATH=--rln-relay-cred-path=${RLN_RELAY_CRED_PATH:-/keystore/keystore.json} + + +if [ -n "${RLN_RELAY_CRED_PASSWORD}" ]; then + RLN_RELAY_CRED_PASSWORD=--rln-relay-cred-password="${RLN_RELAY_CRED_PASSWORD}" +fi + +STORE_RETENTION_POLICY=--store-message-retention-policy=size:1GB + +if [ -n "${STORAGE_SIZE}" ]; then + STORE_RETENTION_POLICY=--store-message-retention-policy=size:"${STORAGE_SIZE}" +fi + +exec /usr/bin/wakunode\ + --relay=false\ + --filter=false\ + --lightpush=false\ + --keep-alive=true\ + --max-connections=150\ + --cluster-id=2\ + --discv5-discovery=true\ + --discv5-udp-port=9005\ + --discv5-enr-auto-update=True\ + --log-level=DEBUG\ + --tcp-port=30304\ + --metrics-server=True\ + --metrics-server-port=8003\ + --metrics-server-address=0.0.0.0\ + --rest=true\ + --rest-admin=true\ + --rest-address=0.0.0.0\ + --rest-port=8645\ + --rest-allow-origin="waku-org.github.io"\ + --rest-allow-origin="localhost:*"\ + --nat=extip:"${MY_EXT_IP}"\ + --store=false\ + --rln-relay-eth-client-address="${RLN_RELAY_ETH_CLIENT_ADDRESS}"\ + --rln-relay-tree-path="/etc/rln_tree"\ + ${RLN_RELAY_CRED_PATH}\ + ${RLN_RELAY_CRED_PASSWORD}\ + ${DNS_WSS_CMD}\ + ${NODEKEY}\ + ${STORE_RETENTION_POLICY}\ + ${EXTRA_ARGS} + diff --git a/apps/sonda/traffic.py b/apps/sonda/traffic.py new file mode 100644 index 0000000000..312132930b --- /dev/null +++ b/apps/sonda/traffic.py @@ -0,0 +1,62 @@ +import requests +import time +import json +import os +import base64 +import sys +import urllib.parse +import requests +import argparse + +def send_waku_msg(node_address, kbytes, pubsub_topic, content_topic): + # TODO dirty trick .replace("=", "") + base64_payload = (base64.b64encode(os.urandom(kbytes*1000)).decode('ascii')).replace("=", "") + print("size message kBytes", len(base64_payload) *(3/4)/1000, "KBytes") + body = { + "payload": base64_payload, + "contentTopic": content_topic, + "version": 1, # You can adjust the version as needed + #"timestamp": int(time.time()) + } + + encoded_pubsub_topic = urllib.parse.quote(pubsub_topic, safe='') + + url = f"{node_address}/relay/v1/messages/{encoded_pubsub_topic}" + headers = {'content-type': 'application/json'} + + print('Waku REST API: %s PubSubTopic: %s, ContentTopic: %s' % (url, pubsub_topic, content_topic)) + s_time = time.time() + + response = None + + try: + print("Sending request") + response = requests.post(url, json=body, headers=headers) + except Exception as e: + print(f"Error sending request: {e}") + + if(response != None): + elapsed_ms = (time.time() - s_time) * 1000 + print('Response from %s: status:%s content:%s [%.4f ms.]' % (node_address, \ + response.status_code, response.text, elapsed_ms)) + +parser = argparse.ArgumentParser(description='') + + + +parser.add_argument('-p', '--pubsub-topic', type=str, help='pubsub topic', default="/waku/2/rs/2/0") +parser.add_argument('-s', '--msg-size-kbytes', type=int, help='message size in kBytes', default=10) +parser.add_argument('-d', '--delay-seconds', type=int, help='delay in second between messages', default=60) +args = parser.parse_args() + +print(args) + + +while True: + # calls are blocking + # limited by the time it takes the REST API to reply + + send_waku_msg('http://nwaku:8645', args.msg_size_kbytes, args.pubsub_topic, "random_content_topic") + + print("sleeping: ", args.delay_seconds, " seconds") + time.sleep(args.delay_seconds) \ No newline at end of file From 878a3e39afc8913d3d870d3e26f7479ae8c36584 Mon Sep 17 00:00:00 2001 From: Gabriel mermelstein Date: Tue, 9 Jul 2024 11:19:51 +0300 Subject: [PATCH 07/34] fixing env variables --- apps/sonda/docker-compose.yml | 7 +++++++ apps/sonda/run_node.sh | 30 ++++++++++++++++++------------ 2 files changed, 25 insertions(+), 12 deletions(-) diff --git a/apps/sonda/docker-compose.yml b/apps/sonda/docker-compose.yml index e5d022ab99..dfb4086b69 100644 --- a/apps/sonda/docker-compose.yml +++ b/apps/sonda/docker-compose.yml @@ -14,6 +14,9 @@ x-rln-environment: &rln_env RLN_RELAY_CRED_PATH: ${RLN_RELAY_CRED_PATH:-} # Optional: Add your RLN_RELAY_CRED_PATH after the "-" RLN_RELAY_CRED_PASSWORD: ${RLN_RELAY_CRED_PASSWORD:-} # Optional: Add your RLN_RELAY_CRED_PASSWORD after the "-" +x-sonda-env: &sonda_env + CLUSTER_ID: ${CLUSTER_ID:-2} # Add your CLUSTER_ID after the "-" + # Services definitions services: nwaku: @@ -38,6 +41,7 @@ services: STORAGE_SIZE: ${STORAGE_SIZE} <<: - *rln_env + - *sonda_env volumes: - ./run_node.sh:/opt/run_node.sh:Z - ${CERTS_DIR:-./certs}:/etc/letsencrypt/:Z @@ -51,6 +55,9 @@ services: build: context: . dockerfile: Dockerfile.rest-traffic + environment: + <<: + - *sonda_env command: > python /opt/traffic.py --msg-size-kbytes=${MSG_SIZE_KBYTES:-10} diff --git a/apps/sonda/run_node.sh b/apps/sonda/run_node.sh index e715ff26a3..4a3fd8ae99 100644 --- a/apps/sonda/run_node.sh +++ b/apps/sonda/run_node.sh @@ -8,11 +8,18 @@ if test -n "${ETH_CLIENT_ADDRESS}" -o ; then exit 1 fi -if [ -z "${RLN_RELAY_ETH_CLIENT_ADDRESS}" ]; then +if [ -z "${RLN_RELAY_ETH_CLIENT_ADDRESS}" ] && [ "${CLUSTER_ID}" -eq 1 ]; then echo "Missing Eth client address, please refer to README.md for detailed instructions" exit 1 fi +if [ "${CLUSTER_ID}" -ne 1 ]; then + echo "CLUSTER_ID is not equal to 1, clearing RLN configurations" + RLN_RELAY_CRED_PATH="" + RLN_RELAY_ETH_CLIENT_ADDRESS="" + RLN_RELAY_CRED_PASSWORD="" +fi + MY_EXT_IP=$(wget -qO- https://api4.ipify.org) DNS_WSS_CMD= @@ -52,26 +59,26 @@ if [ -n "${NODEKEY}" ]; then NODEKEY=--nodekey=${NODEKEY} fi -RLN_RELAY_CRED_PATH=--rln-relay-cred-path=${RLN_RELAY_CRED_PATH:-/keystore/keystore.json} - +if [ "${CLUSTER_ID}" -eq 1 ]; then + RLN_RELAY_CRED_PATH=--rln-relay-cred-path=${RLN_RELAY_CRED_PATH:-/keystore/keystore.json} + RLN_TREE_PATH=--rln-relay-tree-path="/etc/rln_tree" +fi if [ -n "${RLN_RELAY_CRED_PASSWORD}" ]; then RLN_RELAY_CRED_PASSWORD=--rln-relay-cred-password="${RLN_RELAY_CRED_PASSWORD}" fi -STORE_RETENTION_POLICY=--store-message-retention-policy=size:1GB - -if [ -n "${STORAGE_SIZE}" ]; then - STORE_RETENTION_POLICY=--store-message-retention-policy=size:"${STORAGE_SIZE}" +if [ -n "${RLN_RELAY_ETH_CLIENT_ADDRESS}" ]; then + RLN_RELAY_ETH_CLIENT_ADDRESS=--rln-relay-eth-client-address="${RLN_RELAY_ETH_CLIENT_ADDRESS}" fi exec /usr/bin/wakunode\ - --relay=false\ + --relay=true\ --filter=false\ --lightpush=false\ --keep-alive=true\ --max-connections=150\ - --cluster-id=2\ + --cluster-id="${CLUSTER_ID}"\ --discv5-discovery=true\ --discv5-udp-port=9005\ --discv5-enr-auto-update=True\ @@ -88,12 +95,11 @@ exec /usr/bin/wakunode\ --rest-allow-origin="localhost:*"\ --nat=extip:"${MY_EXT_IP}"\ --store=false\ - --rln-relay-eth-client-address="${RLN_RELAY_ETH_CLIENT_ADDRESS}"\ - --rln-relay-tree-path="/etc/rln_tree"\ ${RLN_RELAY_CRED_PATH}\ ${RLN_RELAY_CRED_PASSWORD}\ + ${RLN_RELAY_TREE_PATH}\ + ${RLN_RELAY_ETH_CLIENT_ADDRESS}\ ${DNS_WSS_CMD}\ ${NODEKEY}\ - ${STORE_RETENTION_POLICY}\ ${EXTRA_ARGS} From 637d032291f61d248b1147f803eca139cdb2a96e Mon Sep 17 00:00:00 2001 From: Gabriel mermelstein Date: Tue, 9 Jul 2024 11:51:05 +0300 Subject: [PATCH 08/34] configuring bootstrap nodes and shard --- apps/sonda/docker-compose.yml | 4 +++- apps/sonda/run_node.sh | 6 ++++++ apps/sonda/traffic.py | 2 +- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/apps/sonda/docker-compose.yml b/apps/sonda/docker-compose.yml index dfb4086b69..ac32ae2b89 100644 --- a/apps/sonda/docker-compose.yml +++ b/apps/sonda/docker-compose.yml @@ -15,7 +15,8 @@ x-rln-environment: &rln_env RLN_RELAY_CRED_PASSWORD: ${RLN_RELAY_CRED_PASSWORD:-} # Optional: Add your RLN_RELAY_CRED_PASSWORD after the "-" x-sonda-env: &sonda_env - CLUSTER_ID: ${CLUSTER_ID:-2} # Add your CLUSTER_ID after the "-" + CLUSTER_ID: ${CLUSTER_ID:-1} # Add your CLUSTER_ID after the "-" + SHARD: ${SHARD:-0} # Add your SHARD after the "-" # Services definitions services: @@ -62,6 +63,7 @@ services: python /opt/traffic.py --msg-size-kbytes=${MSG_SIZE_KBYTES:-10} --delay-seconds=${TRAFFIC_DELAY_SECONDS:-15} + --pubsub-topic=/waku/2/rs/${CLUSTER_ID}/${SHARD} volumes: - ./traffic.py:/opt/traffic.py:Z depends_on: diff --git a/apps/sonda/run_node.sh b/apps/sonda/run_node.sh index 4a3fd8ae99..619603ddfc 100644 --- a/apps/sonda/run_node.sh +++ b/apps/sonda/run_node.sh @@ -72,6 +72,8 @@ if [ -n "${RLN_RELAY_ETH_CLIENT_ADDRESS}" ]; then RLN_RELAY_ETH_CLIENT_ADDRESS=--rln-relay-eth-client-address="${RLN_RELAY_ETH_CLIENT_ADDRESS}" fi +# TO DO: configure bootstrap nodes in env + exec /usr/bin/wakunode\ --relay=true\ --filter=false\ @@ -95,6 +97,10 @@ exec /usr/bin/wakunode\ --rest-allow-origin="localhost:*"\ --nat=extip:"${MY_EXT_IP}"\ --store=false\ + --pubsub-topic="/waku/2/rs/${CLUSTER_ID}/${SHARD}"\ + --discv5-bootstrap-node="enr:-QEKuECA0zhRJej2eaOoOPddNcYr7-5NdRwuoLCe2EE4wfEYkAZhFotg6Kkr8K15pMAGyUyt0smHkZCjLeld0BUzogNtAYJpZIJ2NIJpcISnYxMvim11bHRpYWRkcnO4WgAqNiVib290LTAxLmRvLWFtczMuc2hhcmRzLnRlc3Quc3RhdHVzLmltBnZfACw2JWJvb3QtMDEuZG8tYW1zMy5zaGFyZHMudGVzdC5zdGF0dXMuaW0GAbveA4Jyc40AEAUAAQAgAEAAgAEAiXNlY3AyNTZrMaEC3rRtFQSgc24uWewzXaxTY8hDAHB8sgnxr9k8Rjb5GeSDdGNwgnZfg3VkcIIjKIV3YWt1Mg0"\ + --discv5-bootstrap-node="enr:-QEcuEAgXDqrYd_TrpUWtn3zmxZ9XPm7O3GS6lV7aMJJOTsbOAAeQwSd_eoHcCXqVzTUtwTyB4855qtbd8DARnExyqHPAYJpZIJ2NIJpcIQihw1Xim11bHRpYWRkcnO4bAAzNi5ib290LTAxLmdjLXVzLWNlbnRyYWwxLWEuc2hhcmRzLnRlc3Quc3RhdHVzLmltBnZfADU2LmJvb3QtMDEuZ2MtdXMtY2VudHJhbDEtYS5zaGFyZHMudGVzdC5zdGF0dXMuaW0GAbveA4Jyc40AEAUAAQAgAEAAgAEAiXNlY3AyNTZrMaECxjqgDQ0WyRSOilYU32DA5k_XNlDis3m1VdXkK9xM6kODdGNwgnZfg3VkcIIjKIV3YWt1Mg0"\ + --discv5-bootstrap-node="enr:-QEcuEAX6Qk-vVAoJLxR4A_4UVogGhvQrqKW4DFKlf8MA1PmCjgowL-LBtSC9BLjXbb8gf42FdDHGtSjEvvWKD10erxqAYJpZIJ2NIJpcIQI2hdMim11bHRpYWRkcnO4bAAzNi5ib290LTAxLmFjLWNuLWhvbmdrb25nLWMuc2hhcmRzLnRlc3Quc3RhdHVzLmltBnZfADU2LmJvb3QtMDEuYWMtY24taG9uZ2tvbmctYy5zaGFyZHMudGVzdC5zdGF0dXMuaW0GAbveA4Jyc40AEAUAAQAgAEAAgAEAiXNlY3AyNTZrMaEDP7CbRk-YKJwOFFM4Z9ney0GPc7WPJaCwGkpNRyla7mCDdGNwgnZfg3VkcIIjKIV3YWt1Mg0"\ ${RLN_RELAY_CRED_PATH}\ ${RLN_RELAY_CRED_PASSWORD}\ ${RLN_RELAY_TREE_PATH}\ diff --git a/apps/sonda/traffic.py b/apps/sonda/traffic.py index 312132930b..70d11ccd2c 100644 --- a/apps/sonda/traffic.py +++ b/apps/sonda/traffic.py @@ -44,7 +44,7 @@ def send_waku_msg(node_address, kbytes, pubsub_topic, content_topic): -parser.add_argument('-p', '--pubsub-topic', type=str, help='pubsub topic', default="/waku/2/rs/2/0") +parser.add_argument('-p', '--pubsub-topic', type=str, help='pubsub topic', default="/waku/2/rs/1/0") parser.add_argument('-s', '--msg-size-kbytes', type=int, help='message size in kBytes', default=10) parser.add_argument('-d', '--delay-seconds', type=int, help='delay in second between messages', default=60) args = parser.parse_args() From bdc418ea1f76f54dc5e8f6bd11344b1f6b4874df Mon Sep 17 00:00:00 2001 From: Gabriel mermelstein Date: Tue, 9 Jul 2024 12:16:28 +0300 Subject: [PATCH 09/34] adding store nodes env and fixing script logs --- apps/sonda/docker-compose.yml | 4 +++- apps/sonda/traffic.py | 5 +++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/apps/sonda/docker-compose.yml b/apps/sonda/docker-compose.yml index ac32ae2b89..533b28ce07 100644 --- a/apps/sonda/docker-compose.yml +++ b/apps/sonda/docker-compose.yml @@ -17,6 +17,7 @@ x-rln-environment: &rln_env x-sonda-env: &sonda_env CLUSTER_ID: ${CLUSTER_ID:-1} # Add your CLUSTER_ID after the "-" SHARD: ${SHARD:-0} # Add your SHARD after the "-" + STORE_NODES: ${STORE_NODES:-} # Services definitions services: @@ -60,10 +61,11 @@ services: <<: - *sonda_env command: > - python /opt/traffic.py + python -u /opt/traffic.py --msg-size-kbytes=${MSG_SIZE_KBYTES:-10} --delay-seconds=${TRAFFIC_DELAY_SECONDS:-15} --pubsub-topic=/waku/2/rs/${CLUSTER_ID}/${SHARD} + --store-nodes=${STORE_NODES} volumes: - ./traffic.py:/opt/traffic.py:Z depends_on: diff --git a/apps/sonda/traffic.py b/apps/sonda/traffic.py index 70d11ccd2c..83bdcce5f9 100644 --- a/apps/sonda/traffic.py +++ b/apps/sonda/traffic.py @@ -47,10 +47,15 @@ def send_waku_msg(node_address, kbytes, pubsub_topic, content_topic): parser.add_argument('-p', '--pubsub-topic', type=str, help='pubsub topic', default="/waku/2/rs/1/0") parser.add_argument('-s', '--msg-size-kbytes', type=int, help='message size in kBytes', default=10) parser.add_argument('-d', '--delay-seconds', type=int, help='delay in second between messages', default=60) +parser.add_argument('-n', '--store-nodes', type=str, help='comma separated list of store nodes to query', required=True) args = parser.parse_args() print(args) +nodes = [] +if args.store_nodes is not None: + nodes = [s.strip() for s in args.store_nodes.split(",")] +print(nodes) while True: # calls are blocking From 35ddf9957b5aabc7c70a13bcc7cf081e4d596f1a Mon Sep 17 00:00:00 2001 From: Gabriel mermelstein Date: Tue, 9 Jul 2024 12:34:32 +0300 Subject: [PATCH 10/34] adding query period and sending sonda message --- apps/sonda/docker-compose.yml | 4 ++-- apps/sonda/traffic.py | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/apps/sonda/docker-compose.yml b/apps/sonda/docker-compose.yml index 533b28ce07..c8e7029155 100644 --- a/apps/sonda/docker-compose.yml +++ b/apps/sonda/docker-compose.yml @@ -18,6 +18,7 @@ x-sonda-env: &sonda_env CLUSTER_ID: ${CLUSTER_ID:-1} # Add your CLUSTER_ID after the "-" SHARD: ${SHARD:-0} # Add your SHARD after the "-" STORE_NODES: ${STORE_NODES:-} + QUERY_PERIOD: ${QUERY_PERIOD-60} # Services definitions services: @@ -62,8 +63,7 @@ services: - *sonda_env command: > python -u /opt/traffic.py - --msg-size-kbytes=${MSG_SIZE_KBYTES:-10} - --delay-seconds=${TRAFFIC_DELAY_SECONDS:-15} + --delay-seconds=${QUERY_PERIOD} --pubsub-topic=/waku/2/rs/${CLUSTER_ID}/${SHARD} --store-nodes=${STORE_NODES} volumes: diff --git a/apps/sonda/traffic.py b/apps/sonda/traffic.py index 83bdcce5f9..90854c48e0 100644 --- a/apps/sonda/traffic.py +++ b/apps/sonda/traffic.py @@ -8,12 +8,12 @@ import requests import argparse -def send_waku_msg(node_address, kbytes, pubsub_topic, content_topic): - # TODO dirty trick .replace("=", "") - base64_payload = (base64.b64encode(os.urandom(kbytes*1000)).decode('ascii')).replace("=", "") - print("size message kBytes", len(base64_payload) *(3/4)/1000, "KBytes") +def send_sonda_msg(node_address, pubsub_topic, content_topic): + + message = "Hi, I'm Sonda" + base64_message = base64.b64encode(message.encode('utf-8')).decode('ascii') body = { - "payload": base64_payload, + "payload": base64_message, "contentTopic": content_topic, "version": 1, # You can adjust the version as needed #"timestamp": int(time.time()) @@ -61,7 +61,7 @@ def send_waku_msg(node_address, kbytes, pubsub_topic, content_topic): # calls are blocking # limited by the time it takes the REST API to reply - send_waku_msg('http://nwaku:8645', args.msg_size_kbytes, args.pubsub_topic, "random_content_topic") + send_sonda_msg('http://nwaku:8645', args.pubsub_topic, "random_content_topic") print("sleeping: ", args.delay_seconds, " seconds") time.sleep(args.delay_seconds) \ No newline at end of file From a330aa09379962329ea692d1b291e7a9254afd3b Mon Sep 17 00:00:00 2001 From: Gabriel mermelstein Date: Tue, 9 Jul 2024 13:30:04 +0300 Subject: [PATCH 11/34] adding message querying --- apps/sonda/traffic.py | 78 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 62 insertions(+), 16 deletions(-) diff --git a/apps/sonda/traffic.py b/apps/sonda/traffic.py index 90854c48e0..880037ff56 100644 --- a/apps/sonda/traffic.py +++ b/apps/sonda/traffic.py @@ -8,20 +8,20 @@ import requests import argparse -def send_sonda_msg(node_address, pubsub_topic, content_topic): +def send_sonda_msg(rest_address, pubsub_topic, content_topic, timestamp): message = "Hi, I'm Sonda" base64_message = base64.b64encode(message.encode('utf-8')).decode('ascii') body = { - "payload": base64_message, - "contentTopic": content_topic, - "version": 1, # You can adjust the version as needed - #"timestamp": int(time.time()) + 'payload': base64_message, + 'contentTopic': content_topic, + 'version': 1, # You can adjust the version as needed + 'timestamp': timestamp } encoded_pubsub_topic = urllib.parse.quote(pubsub_topic, safe='') - url = f"{node_address}/relay/v1/messages/{encoded_pubsub_topic}" + url = f'{rest_address}/relay/v1/messages/{encoded_pubsub_topic}' headers = {'content-type': 'application/json'} print('Waku REST API: %s PubSubTopic: %s, ContentTopic: %s' % (url, pubsub_topic, content_topic)) @@ -30,21 +30,59 @@ def send_sonda_msg(node_address, pubsub_topic, content_topic): response = None try: - print("Sending request") + print('Sending request') response = requests.post(url, json=body, headers=headers) except Exception as e: - print(f"Error sending request: {e}") + print(f'Error sending request: {e}') if(response != None): elapsed_ms = (time.time() - s_time) * 1000 - print('Response from %s: status:%s content:%s [%.4f ms.]' % (node_address, \ + print('Response from %s: status:%s content:%s [%.4f ms.]' % (rest_address, \ response.status_code, response.text, elapsed_ms)) + + if(response.status_code == 200): + return True + + return False parser = argparse.ArgumentParser(description='') +def send_store_query(rest_address, store_node, encoded_pubsub_topic, encoded_content_topic, timestamp): + url = f'{rest_address}/store/v3/messages' + params = {'peerAddr': urllib.parse.quote(store_node, safe=''), 'pubsubTopic': encoded_pubsub_topic, \ + 'contentTopics': encoded_content_topic, 'includeData': 'true'} + + s_time = time.time() + response = None + + try: + print(f'Sending store request to {store_node}') + response = requests.get(url, params=params) + except Exception as e: + print(f'Error sending request: {e}') + + if(response != None): + elapsed_ms = (time.time() - s_time) * 1000 + print('Response from %s: status:%s content:%s [%.4f ms.]' % (rest_address, \ + response.status_code, response.text, elapsed_ms)) + + if(response.status_code == 200): + return True + + return False + + + +def send_store_queries(rest_address, store_nodes, pubsub_topic, content_topic, timestamp): + print(f'Sending store queries. nodes = {store_nodes}') + encoded_pubsub_topic = urllib.parse.quote(pubsub_topic, safe='') + encoded_content_topic = urllib.parse.quote(content_topic, safe='') + + for node in store_nodes: + send_store_query(rest_address, node, encoded_pubsub_topic, encoded_content_topic, timestamp) -parser.add_argument('-p', '--pubsub-topic', type=str, help='pubsub topic', default="/waku/2/rs/1/0") +parser.add_argument('-p', '--pubsub-topic', type=str, help='pubsub topic', default='/waku/2/rs/1/0') parser.add_argument('-s', '--msg-size-kbytes', type=int, help='message size in kBytes', default=10) parser.add_argument('-d', '--delay-seconds', type=int, help='delay in second between messages', default=60) parser.add_argument('-n', '--store-nodes', type=str, help='comma separated list of store nodes to query', required=True) @@ -52,16 +90,24 @@ def send_sonda_msg(node_address, pubsub_topic, content_topic): print(args) -nodes = [] +store_nodes = [] if args.store_nodes is not None: - nodes = [s.strip() for s in args.store_nodes.split(",")] -print(nodes) + store_nodes = [s.strip() for s in args.store_nodes.split(",")] +print(store_nodes) +sonda_content_topic = '/sonda/2/polls/proto' +node_rest_address = 'http://nwaku:8645' while True: # calls are blocking # limited by the time it takes the REST API to reply - send_sonda_msg('http://nwaku:8645', args.pubsub_topic, "random_content_topic") + timestamp = int(time.time()) + + res = send_sonda_msg(node_rest_address, args.pubsub_topic, sonda_content_topic, timestamp) + + print(f'sleeping: {args.delay_seconds} seconds') + time.sleep(args.delay_seconds) - print("sleeping: ", args.delay_seconds, " seconds") - time.sleep(args.delay_seconds) \ No newline at end of file + # Only send store query if message was successfully published + if(res): + send_store_queries(node_rest_address, store_nodes, args.pubsub_topic, sonda_content_topic, timestamp) From 111482880ca404a004c2711a4748ec3a564d4ba2 Mon Sep 17 00:00:00 2001 From: Gabriel mermelstein Date: Tue, 9 Jul 2024 13:45:16 +0300 Subject: [PATCH 12/34] adding timestamp to messages and queries --- apps/sonda/traffic.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/sonda/traffic.py b/apps/sonda/traffic.py index 880037ff56..008a2518fc 100644 --- a/apps/sonda/traffic.py +++ b/apps/sonda/traffic.py @@ -50,7 +50,7 @@ def send_sonda_msg(rest_address, pubsub_topic, content_topic, timestamp): def send_store_query(rest_address, store_node, encoded_pubsub_topic, encoded_content_topic, timestamp): url = f'{rest_address}/store/v3/messages' params = {'peerAddr': urllib.parse.quote(store_node, safe=''), 'pubsubTopic': encoded_pubsub_topic, \ - 'contentTopics': encoded_content_topic, 'includeData': 'true'} + 'contentTopics': encoded_content_topic, 'includeData': 'true', 'startTime': timestamp} s_time = time.time() response = None @@ -101,7 +101,7 @@ def send_store_queries(rest_address, store_nodes, pubsub_topic, content_topic, t # calls are blocking # limited by the time it takes the REST API to reply - timestamp = int(time.time()) + timestamp = time.time_ns() res = send_sonda_msg(node_rest_address, args.pubsub_topic, sonda_content_topic, timestamp) From 096a6d6957a3873cbb7e75094615f902108d6b67 Mon Sep 17 00:00:00 2001 From: Gabriel mermelstein Date: Tue, 9 Jul 2024 15:01:28 +0300 Subject: [PATCH 13/34] initial prometheus integration --- apps/sonda/Dockerfile.rest-traffic | 3 --- apps/sonda/Dockerfile.sonda | 3 +++ apps/sonda/docker-compose.yml | 6 ++++-- apps/sonda/monitoring/prometheus-config.yml | 6 +----- apps/sonda/traffic.py | 10 ++++++++++ 5 files changed, 18 insertions(+), 10 deletions(-) delete mode 100644 apps/sonda/Dockerfile.rest-traffic create mode 100644 apps/sonda/Dockerfile.sonda diff --git a/apps/sonda/Dockerfile.rest-traffic b/apps/sonda/Dockerfile.rest-traffic deleted file mode 100644 index f09b1e3450..0000000000 --- a/apps/sonda/Dockerfile.rest-traffic +++ /dev/null @@ -1,3 +0,0 @@ -FROM python:3.9.18-alpine3.18 - -RUN pip install requests argparse \ No newline at end of file diff --git a/apps/sonda/Dockerfile.sonda b/apps/sonda/Dockerfile.sonda new file mode 100644 index 0000000000..536be13d41 --- /dev/null +++ b/apps/sonda/Dockerfile.sonda @@ -0,0 +1,3 @@ +FROM python:3.9.18-alpine3.18 + +RUN pip install requests argparse prometheus_client \ No newline at end of file diff --git a/apps/sonda/docker-compose.yml b/apps/sonda/docker-compose.yml index c8e7029155..297b10944d 100644 --- a/apps/sonda/docker-compose.yml +++ b/apps/sonda/docker-compose.yml @@ -54,10 +54,12 @@ services: command: - /opt/run_node.sh - rest-traffic: + sonda: build: context: . - dockerfile: Dockerfile.rest-traffic + dockerfile: Dockerfile.sonda + ports: + - 127.0.0.1:8004:8004 environment: <<: - *sonda_env diff --git a/apps/sonda/monitoring/prometheus-config.yml b/apps/sonda/monitoring/prometheus-config.yml index ddbba3bd6d..51e2b50b13 100644 --- a/apps/sonda/monitoring/prometheus-config.yml +++ b/apps/sonda/monitoring/prometheus-config.yml @@ -7,8 +7,4 @@ global: scrape_configs: - job_name: "nwaku" static_configs: - - targets: ["nwaku:8003"] - - - job_name: postgres-exporter - static_configs: - - targets: ['postgres-exporter:9187'] + - targets: ["nwaku:8003", "sonda:8004"] diff --git a/apps/sonda/traffic.py b/apps/sonda/traffic.py index 008a2518fc..8a8e0e696e 100644 --- a/apps/sonda/traffic.py +++ b/apps/sonda/traffic.py @@ -7,6 +7,11 @@ import urllib.parse import requests import argparse +from prometheus_client import Counter, start_http_server + +# Initialize Prometheus metrics +successful_sonda_msgs = Counter('successful_sonda_msgs', 'Number of successful Sonda messages sent') +successful_store_queries = Counter('successful_store_queries', 'Number of successful store queries') def send_sonda_msg(rest_address, pubsub_topic, content_topic, timestamp): @@ -41,6 +46,7 @@ def send_sonda_msg(rest_address, pubsub_topic, content_topic, timestamp): response.status_code, response.text, elapsed_ms)) if(response.status_code == 200): + successful_sonda_msgs.inc() # Increment the counter return True return False @@ -67,6 +73,7 @@ def send_store_query(rest_address, store_node, encoded_pubsub_topic, encoded_con response.status_code, response.text, elapsed_ms)) if(response.status_code == 200): + successful_store_queries.inc() # Increment the counter return True return False @@ -95,6 +102,9 @@ def send_store_queries(rest_address, store_nodes, pubsub_topic, content_topic, t store_nodes = [s.strip() for s in args.store_nodes.split(",")] print(store_nodes) +# Start Prometheus HTTP server at port 8004 +start_http_server(8004) + sonda_content_topic = '/sonda/2/polls/proto' node_rest_address = 'http://nwaku:8645' while True: From c0e5f6db6214abcd330846e4e398213aca18c482 Mon Sep 17 00:00:00 2001 From: Gabriel mermelstein Date: Tue, 9 Jul 2024 15:07:27 +0300 Subject: [PATCH 14/34] changing name and removin unnecessary flag --- apps/sonda/docker-compose.yml | 4 ++-- apps/sonda/{traffic.py => sonda.py} | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) rename apps/sonda/{traffic.py => sonda.py} (97%) diff --git a/apps/sonda/docker-compose.yml b/apps/sonda/docker-compose.yml index 297b10944d..7b4f0bd974 100644 --- a/apps/sonda/docker-compose.yml +++ b/apps/sonda/docker-compose.yml @@ -64,12 +64,12 @@ services: <<: - *sonda_env command: > - python -u /opt/traffic.py + python -u /opt/sonda.py --delay-seconds=${QUERY_PERIOD} --pubsub-topic=/waku/2/rs/${CLUSTER_ID}/${SHARD} --store-nodes=${STORE_NODES} volumes: - - ./traffic.py:/opt/traffic.py:Z + - ./sonda.py:/opt/sonda.py:Z depends_on: - nwaku diff --git a/apps/sonda/traffic.py b/apps/sonda/sonda.py similarity index 97% rename from apps/sonda/traffic.py rename to apps/sonda/sonda.py index 8a8e0e696e..d77217bd03 100644 --- a/apps/sonda/traffic.py +++ b/apps/sonda/sonda.py @@ -90,7 +90,6 @@ def send_store_queries(rest_address, store_nodes, pubsub_topic, content_topic, t parser.add_argument('-p', '--pubsub-topic', type=str, help='pubsub topic', default='/waku/2/rs/1/0') -parser.add_argument('-s', '--msg-size-kbytes', type=int, help='message size in kBytes', default=10) parser.add_argument('-d', '--delay-seconds', type=int, help='delay in second between messages', default=60) parser.add_argument('-n', '--store-nodes', type=str, help='comma separated list of store nodes to query', required=True) args = parser.parse_args() From 9e08daa8d0dcf73181665c8cabd0974b9619be4f Mon Sep 17 00:00:00 2001 From: Gabriel mermelstein Date: Tue, 9 Jul 2024 15:10:12 +0300 Subject: [PATCH 15/34] adding env example --- apps/sonda/.env.example | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 apps/sonda/.env.example diff --git a/apps/sonda/.env.example b/apps/sonda/.env.example new file mode 100644 index 0000000000..e29b1f90bf --- /dev/null +++ b/apps/sonda/.env.example @@ -0,0 +1,30 @@ +# RPC URL for accessing testnet via HTTP. +# e.g. https://sepolia.infura.io/v3/123aa110320f4aec179150fba1e1b1b1 +ETH_CLIENT_ADDRESS= + +# Private key of testnet where you have sepolia ETH that would be staked into RLN contract. +# Note: make sure you don't use the '0x' prefix. +# e.g. 0116196e9a8abed42dd1a22eb63fa2a5a17b0c27d716b87ded2c54f1bf192a0b +ETH_TESTNET_KEY= + +# Password you would like to use to protect your RLN membership. +RLN_RELAY_CRED_PASSWORD= + +# Advanced. Can be left empty in normal use cases. +NWAKU_IMAGE= +NODEKEY= +DOMAIN= +EXTRA_ARGS= +RLN_RELAY_CONTRACT_ADDRESS= + +# -------------------- SONDA CONFIG ------------------ +CLUSTER_ID=16 +SHARD=32 +STORE_NODES="/dns4/store-01.do-ams3.shards.test.status.im/tcp/30303/p2p/16Uiu2HAmAUdrQ3uwzuE4Gy4D56hX6uLKEeerJAnhKEHZ3DxF1EfT,\ +/dns4/store-02.do-ams3.shards.test.status.im/tcp/30303/p2p/16Uiu2HAm9aDJPkhGxc2SFcEACTFdZ91Q5TJjp76qZEhq9iF59x7R,\ +/dns4/store-01.gc-us-central1-a.shards.test.status.im/tcp/30303/p2p/16Uiu2HAmMELCo218hncCtTvC2Dwbej3rbyHQcR8erXNnKGei7WPZ,\ +/dns4/store-02.gc-us-central1-a.shards.test.status.im/tcp/30303/p2p/16Uiu2HAmJnVR7ZzFaYvciPVafUXuYGLHPzSUigqAmeNw9nJUVGeM,\ +/dns4/store-01.ac-cn-hongkong-c.shards.test.status.im/tcp/30303/p2p/16Uiu2HAm2M7xs7cLPc3jamawkEqbr7cUJX11uvY7LxQ6WFUdUKUT,\ +/dns4/store-02.ac-cn-hongkong-c.shards.test.status.im/tcp/30303/p2p/16Uiu2HAm9CQhsuwPR54q27kNj9iaQVfyRzTGKrhFmr94oD8ujU6P" +# Wait time in seconds between two consecutive queries +QUERY_PERIOD=30 \ No newline at end of file From 9deb6769ede6ba1d0486b38df4f3669cb336fbaa Mon Sep 17 00:00:00 2001 From: Gabriel mermelstein Date: Tue, 9 Jul 2024 15:34:44 +0300 Subject: [PATCH 16/34] adding labels --- apps/sonda/sonda.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/apps/sonda/sonda.py b/apps/sonda/sonda.py index d77217bd03..42684e4a1a 100644 --- a/apps/sonda/sonda.py +++ b/apps/sonda/sonda.py @@ -7,11 +7,12 @@ import urllib.parse import requests import argparse -from prometheus_client import Counter, start_http_server +from prometheus_client import Counter, Gauge, start_http_server # Initialize Prometheus metrics successful_sonda_msgs = Counter('successful_sonda_msgs', 'Number of successful Sonda messages sent') -successful_store_queries = Counter('successful_store_queries', 'Number of successful store queries') +successful_store_queries = Counter('successful_store_queries', 'Number of successful store queries', ['node']) +store_query_latency = Gauge('store_query_latency', 'Latency of store queries in milliseconds', ['node']) def send_sonda_msg(rest_address, pubsub_topic, content_topic, timestamp): @@ -73,7 +74,8 @@ def send_store_query(rest_address, store_node, encoded_pubsub_topic, encoded_con response.status_code, response.text, elapsed_ms)) if(response.status_code == 200): - successful_store_queries.inc() # Increment the counter + successful_store_queries.labels(node=store_node).inc() # Increment the counter with node label + store_query_latency.labels(node=store_node).set(elapsed_ms) # Set the latency gauge with node label return True return False From cbd9752419e9c6a078b4c8da78eb92862d8e2dad Mon Sep 17 00:00:00 2001 From: Gabriel mermelstein Date: Tue, 9 Jul 2024 16:07:00 +0300 Subject: [PATCH 17/34] refactor --- apps/sonda/sonda.py | 84 +++++++++++++++++++++++++-------------------- 1 file changed, 46 insertions(+), 38 deletions(-) diff --git a/apps/sonda/sonda.py b/apps/sonda/sonda.py index 42684e4a1a..2006181f36 100644 --- a/apps/sonda/sonda.py +++ b/apps/sonda/sonda.py @@ -7,15 +7,25 @@ import urllib.parse import requests import argparse -from prometheus_client import Counter, Gauge, start_http_server +from prometheus_client import Counter, Gauge, Histogram, start_http_server -# Initialize Prometheus metrics +# Prometheus metrics successful_sonda_msgs = Counter('successful_sonda_msgs', 'Number of successful Sonda messages sent') +failed_sonda_msgs = Counter('failed_sonda_msgs', 'Number of failed Sonda messages attempts') successful_store_queries = Counter('successful_store_queries', 'Number of successful store queries', ['node']) -store_query_latency = Gauge('store_query_latency', 'Latency of store queries in milliseconds', ['node']) +failed_store_queries = Counter('failed_store_queries', 'Number of failed store queries', ['node', 'error']) +empty_store_responses = Counter('empty_store_responses', "Number of store responses without the latest Sonda message", ['node']) +store_query_latency = Histogram('store_query_latency', 'Latency of store queries in seconds', ['node'], + buckets=(0.1, 0.25, 0.5, 0.75, 1.0, 2.5, 5.0, 7.5, 10.0, float('inf'))) + +# Argparser configuration +parser = argparse.ArgumentParser(description='') +parser.add_argument('-p', '--pubsub-topic', type=str, help='pubsub topic', default='/waku/2/rs/1/0') +parser.add_argument('-d', '--delay-seconds', type=int, help='delay in second between messages', default=60) +parser.add_argument('-n', '--store-nodes', type=str, help='comma separated list of store nodes to query', required=True) +args = parser.parse_args() def send_sonda_msg(rest_address, pubsub_topic, content_topic, timestamp): - message = "Hi, I'm Sonda" base64_message = base64.b64encode(message.encode('utf-8')).decode('ascii') body = { @@ -42,17 +52,17 @@ def send_sonda_msg(rest_address, pubsub_topic, content_topic, timestamp): print(f'Error sending request: {e}') if(response != None): - elapsed_ms = (time.time() - s_time) * 1000 - print('Response from %s: status:%s content:%s [%.4f ms.]' % (rest_address, \ - response.status_code, response.text, elapsed_ms)) + elapsed_seconds = (time.time() - s_time) + print('Response from %s: status:%s content:%s [%.4f s.]' % (rest_address, \ + response.status_code, response.text, elapsed_seconds)) if(response.status_code == 200): - successful_sonda_msgs.inc() # Increment the counter + successful_sonda_msgs.inc() return True + failed_sonda_msgs.inc() return False -parser = argparse.ArgumentParser(description='') def send_store_query(rest_address, store_node, encoded_pubsub_topic, encoded_content_topic, timestamp): url = f'{rest_address}/store/v3/messages' @@ -69,17 +79,16 @@ def send_store_query(rest_address, store_node, encoded_pubsub_topic, encoded_con print(f'Error sending request: {e}') if(response != None): - elapsed_ms = (time.time() - s_time) * 1000 - print('Response from %s: status:%s content:%s [%.4f ms.]' % (rest_address, \ - response.status_code, response.text, elapsed_ms)) + elapsed_seconds = (time.time() - s_time) + print('Response from %s: status:%s content:%s [%.4f s.]' % (rest_address, \ + response.status_code, response.text, elapsed_seconds)) if(response.status_code == 200): successful_store_queries.labels(node=store_node).inc() # Increment the counter with node label - store_query_latency.labels(node=store_node).set(elapsed_ms) # Set the latency gauge with node label + store_query_latency.labels(node=store_node).observe(elapsed_seconds) # Observe the latency return True return False - def send_store_queries(rest_address, store_nodes, pubsub_topic, content_topic, timestamp): @@ -91,34 +100,33 @@ def send_store_queries(rest_address, store_nodes, pubsub_topic, content_topic, t send_store_query(rest_address, node, encoded_pubsub_topic, encoded_content_topic, timestamp) -parser.add_argument('-p', '--pubsub-topic', type=str, help='pubsub topic', default='/waku/2/rs/1/0') -parser.add_argument('-d', '--delay-seconds', type=int, help='delay in second between messages', default=60) -parser.add_argument('-n', '--store-nodes', type=str, help='comma separated list of store nodes to query', required=True) -args = parser.parse_args() +def main(): + print(f'Running Sonda with args={args}') + + store_nodes = [] + if args.store_nodes is not None: + store_nodes = [s.strip() for s in args.store_nodes.split(",")] + print(f'Store nodes to query: {store_nodes}') -print(args) + # Start Prometheus HTTP server at port 8004 + start_http_server(8004) -store_nodes = [] -if args.store_nodes is not None: - store_nodes = [s.strip() for s in args.store_nodes.split(",")] -print(store_nodes) + sonda_content_topic = '/sonda/2/polls/proto' + node_rest_address = 'http://nwaku:8645' + while True: + # calls are blocking + # limited by the time it takes the REST API to reply -# Start Prometheus HTTP server at port 8004 -start_http_server(8004) + timestamp = time.time_ns() + + res = send_sonda_msg(node_rest_address, args.pubsub_topic, sonda_content_topic, timestamp) -sonda_content_topic = '/sonda/2/polls/proto' -node_rest_address = 'http://nwaku:8645' -while True: - # calls are blocking - # limited by the time it takes the REST API to reply + print(f'sleeping: {args.delay_seconds} seconds') + time.sleep(args.delay_seconds) - timestamp = time.time_ns() - - res = send_sonda_msg(node_rest_address, args.pubsub_topic, sonda_content_topic, timestamp) + # Only send store query if message was successfully published + if(res): + send_store_queries(node_rest_address, store_nodes, args.pubsub_topic, sonda_content_topic, timestamp) - print(f'sleeping: {args.delay_seconds} seconds') - time.sleep(args.delay_seconds) - # Only send store query if message was successfully published - if(res): - send_store_queries(node_rest_address, store_nodes, args.pubsub_topic, sonda_content_topic, timestamp) +main() From 619615d1805e964d09fbf1625cac05fa500989b4 Mon Sep 17 00:00:00 2001 From: Gabriel mermelstein Date: Tue, 9 Jul 2024 19:04:56 +0300 Subject: [PATCH 18/34] debugging --- apps/sonda/sonda.py | 82 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 65 insertions(+), 17 deletions(-) diff --git a/apps/sonda/sonda.py b/apps/sonda/sonda.py index 2006181f36..5eac2c7f6c 100644 --- a/apps/sonda/sonda.py +++ b/apps/sonda/sonda.py @@ -64,32 +64,80 @@ def send_sonda_msg(rest_address, pubsub_topic, content_topic, timestamp): return False +# We return true if both our node and the queried Store node returned a 200 +# If our message isn't found but we did get a store 200 response, this function still returns true +def check_store_response(json_response, store_node, timestamp): + # Check for the store node status code + if json_response.get('statusCode') != 200: + error = f"{json_response.get('statusCode')} {json_response.get('statusDesc')}" + print(f'Failed performing store query {error}') + failed_store_queries.labels(node=store_node, error=error).inc() + return False + + messages = json_response.get('messages') + # If there's no message in the response, increase counters and return + if not messages: + print("No messages in store response") + empty_store_responses.labels(node=store_node).inc() + return True + + # Search for the Sonda message in the returned messages + for message in messages: + # If message field is missing in current message, continue + if not message.get("message"): + print("Could not retrieve message") + continue + + # If a message is found with the same timestamp as sonda message, increase counters and return + if timestamp == message.get('message').get('timestamp'): + print(f'Found Sonda message in store response node={store_node}') + successful_store_queries.labels(node=store_node).inc() + return True + + # If our message wasn't found in the returned messages, increase counter and return + empty_store_responses.labels(node=store_node).inc() + return True + + def send_store_query(rest_address, store_node, encoded_pubsub_topic, encoded_content_topic, timestamp): url = f'{rest_address}/store/v3/messages' - params = {'peerAddr': urllib.parse.quote(store_node, safe=''), 'pubsubTopic': encoded_pubsub_topic, \ - 'contentTopics': encoded_content_topic, 'includeData': 'true', 'startTime': timestamp} + params = { + 'peerAddr': urllib.parse.quote(store_node, safe=''), + 'pubsubTopic': encoded_pubsub_topic, + 'contentTopics': encoded_content_topic, + 'includeData': 'true', + 'startTime': timestamp + } s_time = time.time() - response = None try: - print(f'Sending store request to {store_node}') - response = requests.get(url, params=params) + print(f'Sending store request to {store_node}') + response = requests.get(url, params=params) except Exception as e: print(f'Error sending request: {e}') - - if(response != None): - elapsed_seconds = (time.time() - s_time) - print('Response from %s: status:%s content:%s [%.4f s.]' % (rest_address, \ - response.status_code, response.text, elapsed_seconds)) - - if(response.status_code == 200): - successful_store_queries.labels(node=store_node).inc() # Increment the counter with node label - store_query_latency.labels(node=store_node).observe(elapsed_seconds) # Observe the latency - return True - - return False + failed_store_queries.labels(node=store_node, error=str(e)).inc() + return False + + elapsed_seconds = time.time() - s_time + print(f'Response from {rest_address}: status:{response.status_code} [{elapsed_seconds:.4f} s.]') + + if response.status_code != 200: + failed_store_queries.labels(node=store_node, error=f'{response.status_code} {response.content}').inc() + return False + + try: + json_response = response.json() + except Exception as e: + print(f'Error parsing response JSON: {e}') + failed_store_queries.labels(node=store_node, error="JSON parse error").inc() + return False + + if not check_store_response(json_response, store_node, timestamp): + return False + store_query_latency.labels(node=store_node).observe(elapsed_seconds) + return True def send_store_queries(rest_address, store_nodes, pubsub_topic, content_topic, timestamp): print(f'Sending store queries. nodes = {store_nodes}') From b4e1c68c5fc2db51c6ee1ced8da0ddd83f9c290b Mon Sep 17 00:00:00 2001 From: Gabriel mermelstein Date: Wed, 10 Jul 2024 10:53:33 +0300 Subject: [PATCH 19/34] creating sonda dashboard and fixing bug --- .../dashboards/sonda-monitoring.json | 141 ++++++++++++++++++ apps/sonda/sonda.py | 2 +- 2 files changed, 142 insertions(+), 1 deletion(-) create mode 100644 apps/sonda/monitoring/configuration/dashboards/sonda-monitoring.json diff --git a/apps/sonda/monitoring/configuration/dashboards/sonda-monitoring.json b/apps/sonda/monitoring/configuration/dashboards/sonda-monitoring.json new file mode 100644 index 0000000000..f5c4cd6e2c --- /dev/null +++ b/apps/sonda/monitoring/configuration/dashboards/sonda-monitoring.json @@ -0,0 +1,141 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 2, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 1, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "successful_sonda_msgs_total", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Sent Sonda Messages", + "type": "timeseries" + } + ], + "refresh": "", + "schemaVersion": 39, + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Sonda", + "uid": "cbd1b6c8-63d2-41f3-b57b-a776ec8fa23e", + "version": 1, + "weekStart": "" + } \ No newline at end of file diff --git a/apps/sonda/sonda.py b/apps/sonda/sonda.py index 5eac2c7f6c..0f00acbf07 100644 --- a/apps/sonda/sonda.py +++ b/apps/sonda/sonda.py @@ -72,7 +72,7 @@ def check_store_response(json_response, store_node, timestamp): error = f"{json_response.get('statusCode')} {json_response.get('statusDesc')}" print(f'Failed performing store query {error}') failed_store_queries.labels(node=store_node, error=error).inc() - return False + return False messages = json_response.get('messages') # If there's no message in the response, increase counters and return From c561cc95e9a6ab2e693c233e68c73076cf9f3917 Mon Sep 17 00:00:00 2001 From: Gabriel mermelstein Date: Wed, 10 Jul 2024 11:28:18 +0300 Subject: [PATCH 20/34] adding visualizations --- .../dashboards/sonda-monitoring.json | 684 +++++++++++++++++- 1 file changed, 680 insertions(+), 4 deletions(-) diff --git a/apps/sonda/monitoring/configuration/dashboards/sonda-monitoring.json b/apps/sonda/monitoring/configuration/dashboards/sonda-monitoring.json index f5c4cd6e2c..78049af969 100644 --- a/apps/sonda/monitoring/configuration/dashboards/sonda-monitoring.json +++ b/apps/sonda/monitoring/configuration/dashboards/sonda-monitoring.json @@ -22,11 +22,214 @@ "links": [], "liveNow": false, "panels": [ + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 7, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "title": "Panel Title", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "Sonda messages that failed to be sent to the network", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "__systemRef": "hideSeriesFrom", + "matcher": { + "id": "byNames", + "options": { + "mode": "exclude", + "names": [ + "{__name__=\"failed_sonda_msgs_total\", instance=\"sonda:8004\", job=\"nwaku\"}" + ], + "prefix": "All except:", + "readOnly": true + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": false, + "tooltip": false, + "viz": true + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 2, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "failed_sonda_msgs_total", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Failed Sonda Messages", + "type": "timeseries" + }, { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "description": "Sonda messages successfully sent to the network", "fieldConfig": { "defaults": { "color": { @@ -85,7 +288,7 @@ "h": 8, "w": 12, "x": 0, - "y": 0 + "y": 8 }, "id": 1, "options": { @@ -120,21 +323,494 @@ ], "title": "Sent Sonda Messages", "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "__systemRef": "hideSeriesFrom", + "matcher": { + "id": "byNames", + "options": { + "mode": "exclude", + "names": [ + "failed_store_queries_total {__name__=\"failed_store_queries_total\", error=\"504 PEER_DIAL_FAILURE: 16Uiu2HAm2M7xs7cLPc3jamawkEqbr7cUJX11uvY7LxQ6WFUdUKUT\", instance=\"sonda:8004\", job=\"nwaku\", node=\"/dns4/store-01.ac-cn-hongkong-c.shards.test.status.im/tcp/30303/p2p/16Uiu2HAm2M7xs7cLPc3jamawkEqbr7cUJX11uvY7LxQ6WFUdUKUT\"}" + ], + "prefix": "All except:", + "readOnly": true + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": false, + "tooltip": false, + "viz": true + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 8 + }, + "id": 4, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "failed_store_queries_total", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "{{label_name}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Failed Store Queries", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "Store responses including the latest Sonda message ", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "__systemRef": "hideSeriesFrom", + "matcher": { + "id": "byNames", + "options": { + "mode": "exclude", + "names": [ + "{__name__=\"successful_store_queries_total\", instance=\"sonda:8004\", job=\"nwaku\", node=\"/dns4/store-02.gc-us-central1-a.shards.test.status.im/tcp/30303/p2p/16Uiu2HAmJnVR7ZzFaYvciPVafUXuYGLHPzSUigqAmeNw9nJUVGeM\"}" + ], + "prefix": "All except:", + "readOnly": true + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": false, + "tooltip": false, + "viz": true + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 16 + }, + "id": 3, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "successful_store_queries_total", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Successful Store Responses", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "fillOpacity": 80, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineWidth": 1 + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "__systemRef": "hideSeriesFrom", + "matcher": { + "id": "byNames", + "options": { + "mode": "exclude", + "names": [ + "{__name__=\"store_query_latency_sum\", instance=\"sonda:8004\", job=\"nwaku\", node=\"/dns4/store-01.do-ams3.shards.test.status.im/tcp/30303/p2p/16Uiu2HAmAUdrQ3uwzuE4Gy4D56hX6uLKEeerJAnhKEHZ3DxF1EfT\"}" + ], + "prefix": "All except:", + "readOnly": true + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": false, + "tooltip": false, + "viz": true + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 16 + }, + "id": 6, + "options": { + "bucketOffset": 0, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "store_query_latency_sum", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Store Query Latency (seconds)", + "type": "histogram" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "Store responses that didn't include our latest Sonda message", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "__systemRef": "hideSeriesFrom", + "matcher": { + "id": "byNames", + "options": { + "mode": "exclude", + "names": [ + "{__name__=\"empty_store_responses_total\", instance=\"sonda:8004\", job=\"nwaku\", node=\"/dns4/store-02.gc-us-central1-a.shards.test.status.im/tcp/30303/p2p/16Uiu2HAmJnVR7ZzFaYvciPVafUXuYGLHPzSUigqAmeNw9nJUVGeM\"}" + ], + "prefix": "All except:", + "readOnly": true + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": false, + "tooltip": false, + "viz": true + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 24 + }, + "id": 5, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "empty_store_responses_total", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Empty Store Responses", + "type": "timeseries" } ], - "refresh": "", + "refresh": false, "schemaVersion": 39, "tags": [], "templating": { "list": [] }, "time": { - "from": "now-6h", + "from": "now-5m", "to": "now" }, "timepicker": {}, "timezone": "", - "title": "Sonda", + "title": "sonda-monitoring", "uid": "cbd1b6c8-63d2-41f3-b57b-a776ec8fa23e", "version": 1, "weekStart": "" From c58f7376a644a96af06d8a1abe2faf0f79bc4551 Mon Sep 17 00:00:00 2001 From: Gabriel mermelstein Date: Wed, 10 Jul 2024 12:41:14 +0300 Subject: [PATCH 21/34] fixing dashboard --- .../dashboards/sonda-monitoring.json | 162 ++++++++++++------ apps/sonda/sonda.py | 7 +- 2 files changed, 109 insertions(+), 60 deletions(-) diff --git a/apps/sonda/monitoring/configuration/dashboards/sonda-monitoring.json b/apps/sonda/monitoring/configuration/dashboards/sonda-monitoring.json index 78049af969..6cb2d5acf8 100644 --- a/apps/sonda/monitoring/configuration/dashboards/sonda-monitoring.json +++ b/apps/sonda/monitoring/configuration/dashboards/sonda-monitoring.json @@ -23,6 +23,11 @@ "liveNow": false, "panels": [ { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "Sonda messages successfully sent to the network", "fieldConfig": { "defaults": { "color": { @@ -83,7 +88,7 @@ "x": 0, "y": 0 }, - "id": 7, + "id": 1, "options": { "legend": { "calcs": [], @@ -96,7 +101,25 @@ "sort": "none" } }, - "title": "Panel Title", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "successful_sonda_msgs_total", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Sent Sonda Messages", "type": "timeseries" }, { @@ -229,7 +252,7 @@ "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "description": "Sonda messages successfully sent to the network", + "description": "Store responses including the latest Sonda message ", "fieldConfig": { "defaults": { "color": { @@ -282,7 +305,32 @@ ] } }, - "overrides": [] + "overrides": [ + { + "__systemRef": "hideSeriesFrom", + "matcher": { + "id": "byNames", + "options": { + "mode": "exclude", + "names": [ + "{__name__=\"successful_store_queries_total\", instance=\"sonda:8004\", job=\"nwaku\", node=\"/dns4/store-01.ac-cn-hongkong-c.shards.test.status.im/tcp/30303/p2p/16Uiu2HAm2M7xs7cLPc3jamawkEqbr7cUJX11uvY7LxQ6WFUdUKUT\"}" + ], + "prefix": "All except:", + "readOnly": true + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": false, + "tooltip": false, + "viz": true + } + } + ] + } + ] }, "gridPos": { "h": 8, @@ -290,7 +338,7 @@ "x": 0, "y": 8 }, - "id": 1, + "id": 3, "options": { "legend": { "calcs": [], @@ -311,7 +359,7 @@ }, "disableTextWrap": false, "editorMode": "builder", - "expr": "successful_sonda_msgs_total", + "expr": "successful_store_queries_total", "fullMetaSearch": false, "includeNullMetadata": true, "instant": false, @@ -321,7 +369,7 @@ "useBackend": false } ], - "title": "Sent Sonda Messages", + "title": "Successful Store Responses", "type": "timeseries" }, { @@ -329,6 +377,7 @@ "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "description": "Store queries with a non-200 response", "fieldConfig": { "defaults": { "color": { @@ -389,7 +438,7 @@ "options": { "mode": "exclude", "names": [ - "failed_store_queries_total {__name__=\"failed_store_queries_total\", error=\"504 PEER_DIAL_FAILURE: 16Uiu2HAm2M7xs7cLPc3jamawkEqbr7cUJX11uvY7LxQ6WFUdUKUT\", instance=\"sonda:8004\", job=\"nwaku\", node=\"/dns4/store-01.ac-cn-hongkong-c.shards.test.status.im/tcp/30303/p2p/16Uiu2HAm2M7xs7cLPc3jamawkEqbr7cUJX11uvY7LxQ6WFUdUKUT\"}" + "failed_store_queries_total {__name__=\"failed_store_queries_total\", error=\"300 BAD_RESPONSE: Future operation cancelled!\", instance=\"sonda:8004\", job=\"nwaku\", node=\"/dns4/store-01.ac-cn-hongkong-c.shards.test.status.im/tcp/30303/p2p/16Uiu2HAm2M7xs7cLPc3jamawkEqbr7cUJX11uvY7LxQ6WFUdUKUT\"}" ], "prefix": "All except:", "readOnly": true @@ -453,7 +502,7 @@ "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "description": "Store responses including the latest Sonda message ", + "description": "Store responses that didn't include our latest Sonda message", "fieldConfig": { "defaults": { "color": { @@ -514,7 +563,7 @@ "options": { "mode": "exclude", "names": [ - "{__name__=\"successful_store_queries_total\", instance=\"sonda:8004\", job=\"nwaku\", node=\"/dns4/store-02.gc-us-central1-a.shards.test.status.im/tcp/30303/p2p/16Uiu2HAmJnVR7ZzFaYvciPVafUXuYGLHPzSUigqAmeNw9nJUVGeM\"}" + "{__name__=\"empty_store_responses_total\", instance=\"sonda:8004\", job=\"nwaku\", node=\"/dns4/store-02.gc-us-central1-a.shards.test.status.im/tcp/30303/p2p/16Uiu2HAmJnVR7ZzFaYvciPVafUXuYGLHPzSUigqAmeNw9nJUVGeM\"}" ], "prefix": "All except:", "readOnly": true @@ -539,7 +588,7 @@ "x": 0, "y": 16 }, - "id": 3, + "id": 5, "options": { "legend": { "calcs": [], @@ -560,7 +609,7 @@ }, "disableTextWrap": false, "editorMode": "builder", - "expr": "successful_store_queries_total", + "expr": "empty_store_responses_total", "fullMetaSearch": false, "includeNullMetadata": true, "instant": false, @@ -570,7 +619,7 @@ "useBackend": false } ], - "title": "Successful Store Responses", + "title": "Empty Store Responses", "type": "timeseries" }, { @@ -578,20 +627,43 @@ "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "description": "Latency of each store query", "fieldConfig": { "defaults": { "color": { - "mode": "thresholds" + "mode": "palette-classic" }, "custom": { - "fillOpacity": 80, + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, - "lineWidth": 1 + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } }, "mappings": [], "thresholds": { @@ -616,7 +688,7 @@ "options": { "mode": "exclude", "names": [ - "{__name__=\"store_query_latency_sum\", instance=\"sonda:8004\", job=\"nwaku\", node=\"/dns4/store-01.do-ams3.shards.test.status.im/tcp/30303/p2p/16Uiu2HAmAUdrQ3uwzuE4Gy4D56hX6uLKEeerJAnhKEHZ3DxF1EfT\"}" + "{__name__=\"store_query_latency\", instance=\"sonda:8004\", job=\"nwaku\", node=\"/dns4/store-01.ac-cn-hongkong-c.shards.test.status.im/tcp/30303/p2p/16Uiu2HAm2M7xs7cLPc3jamawkEqbr7cUJX11uvY7LxQ6WFUdUKUT\"}" ], "prefix": "All except:", "readOnly": true @@ -643,12 +715,15 @@ }, "id": 6, "options": { - "bucketOffset": 0, "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" } }, "targets": [ @@ -659,7 +734,7 @@ }, "disableTextWrap": false, "editorMode": "builder", - "expr": "store_query_latency_sum", + "expr": "store_query_latency", "fullMetaSearch": false, "includeNullMetadata": true, "instant": false, @@ -670,50 +745,28 @@ } ], "title": "Store Query Latency (seconds)", - "type": "histogram" + "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "description": "Store responses that didn't include our latest Sonda message", + "description": "Latency of store queries", "fieldConfig": { "defaults": { "color": { - "mode": "palette-classic" + "mode": "thresholds" }, "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, + "fillOpacity": 80, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "lineWidth": 1 }, "mappings": [], "thresholds": { @@ -738,7 +791,7 @@ "options": { "mode": "exclude", "names": [ - "{__name__=\"empty_store_responses_total\", instance=\"sonda:8004\", job=\"nwaku\", node=\"/dns4/store-02.gc-us-central1-a.shards.test.status.im/tcp/30303/p2p/16Uiu2HAmJnVR7ZzFaYvciPVafUXuYGLHPzSUigqAmeNw9nJUVGeM\"}" + "{__name__=\"store_query_latency\", instance=\"sonda:8004\", job=\"nwaku\", node=\"/dns4/store-01.ac-cn-hongkong-c.shards.test.status.im/tcp/30303/p2p/16Uiu2HAm2M7xs7cLPc3jamawkEqbr7cUJX11uvY7LxQ6WFUdUKUT\"}" ], "prefix": "All except:", "readOnly": true @@ -763,17 +816,14 @@ "x": 0, "y": 24 }, - "id": 5, + "id": 7, "options": { + "bucketOffset": 0, "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" } }, "targets": [ @@ -784,7 +834,7 @@ }, "disableTextWrap": false, "editorMode": "builder", - "expr": "empty_store_responses_total", + "expr": "store_query_latency", "fullMetaSearch": false, "includeNullMetadata": true, "instant": false, @@ -794,11 +844,11 @@ "useBackend": false } ], - "title": "Empty Store Responses", - "type": "timeseries" + "title": "Store Query Latency (seconds)", + "type": "histogram" } ], - "refresh": false, + "refresh": "", "schemaVersion": 39, "tags": [], "templating": { diff --git a/apps/sonda/sonda.py b/apps/sonda/sonda.py index 0f00acbf07..46e42f9d8d 100644 --- a/apps/sonda/sonda.py +++ b/apps/sonda/sonda.py @@ -7,7 +7,7 @@ import urllib.parse import requests import argparse -from prometheus_client import Counter, Gauge, Histogram, start_http_server +from prometheus_client import Counter, Gauge, start_http_server # Prometheus metrics successful_sonda_msgs = Counter('successful_sonda_msgs', 'Number of successful Sonda messages sent') @@ -15,8 +15,7 @@ successful_store_queries = Counter('successful_store_queries', 'Number of successful store queries', ['node']) failed_store_queries = Counter('failed_store_queries', 'Number of failed store queries', ['node', 'error']) empty_store_responses = Counter('empty_store_responses', "Number of store responses without the latest Sonda message", ['node']) -store_query_latency = Histogram('store_query_latency', 'Latency of store queries in seconds', ['node'], - buckets=(0.1, 0.25, 0.5, 0.75, 1.0, 2.5, 5.0, 7.5, 10.0, float('inf'))) +store_query_latency = Gauge('store_query_latency', 'Latency of the last store query in seconds', ['node']) # Argparser configuration parser = argparse.ArgumentParser(description='') @@ -136,7 +135,7 @@ def send_store_query(rest_address, store_node, encoded_pubsub_topic, encoded_con if not check_store_response(json_response, store_node, timestamp): return False - store_query_latency.labels(node=store_node).observe(elapsed_seconds) + store_query_latency.labels(node=store_node).set(elapsed_seconds) return True def send_store_queries(rest_address, store_nodes, pubsub_topic, content_topic, timestamp): From 6590684887a7e9d0164b3bae3032a8926facc44a Mon Sep 17 00:00:00 2001 From: Gabriel mermelstein Date: Wed, 10 Jul 2024 12:55:40 +0300 Subject: [PATCH 22/34] updating env variable name --- apps/sonda/.env.example | 2 +- apps/sonda/docker-compose.yml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/apps/sonda/.env.example b/apps/sonda/.env.example index e29b1f90bf..6e3288562b 100644 --- a/apps/sonda/.env.example +++ b/apps/sonda/.env.example @@ -27,4 +27,4 @@ STORE_NODES="/dns4/store-01.do-ams3.shards.test.status.im/tcp/30303/p2p/16Uiu2HA /dns4/store-01.ac-cn-hongkong-c.shards.test.status.im/tcp/30303/p2p/16Uiu2HAm2M7xs7cLPc3jamawkEqbr7cUJX11uvY7LxQ6WFUdUKUT,\ /dns4/store-02.ac-cn-hongkong-c.shards.test.status.im/tcp/30303/p2p/16Uiu2HAm9CQhsuwPR54q27kNj9iaQVfyRzTGKrhFmr94oD8ujU6P" # Wait time in seconds between two consecutive queries -QUERY_PERIOD=30 \ No newline at end of file +QUERY_FREQUENCY=30 \ No newline at end of file diff --git a/apps/sonda/docker-compose.yml b/apps/sonda/docker-compose.yml index 7b4f0bd974..e6db1afa74 100644 --- a/apps/sonda/docker-compose.yml +++ b/apps/sonda/docker-compose.yml @@ -18,7 +18,7 @@ x-sonda-env: &sonda_env CLUSTER_ID: ${CLUSTER_ID:-1} # Add your CLUSTER_ID after the "-" SHARD: ${SHARD:-0} # Add your SHARD after the "-" STORE_NODES: ${STORE_NODES:-} - QUERY_PERIOD: ${QUERY_PERIOD-60} + QUERY_FREQUENCY: ${QUERY_FREQUENCY-60} # Services definitions services: @@ -65,7 +65,7 @@ services: - *sonda_env command: > python -u /opt/sonda.py - --delay-seconds=${QUERY_PERIOD} + --delay-seconds=${QUERY_FREQUENCY} --pubsub-topic=/waku/2/rs/${CLUSTER_ID}/${SHARD} --store-nodes=${STORE_NODES} volumes: From 44f67bab0ff1273752db922bdeb5172dc41d81a7 Mon Sep 17 00:00:00 2001 From: Gabriel mermelstein Date: Wed, 10 Jul 2024 13:53:26 +0300 Subject: [PATCH 23/34] rename again env variables --- apps/sonda/.env.example | 2 +- apps/sonda/docker-compose.yml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/apps/sonda/.env.example b/apps/sonda/.env.example index 6e3288562b..daea0bc87b 100644 --- a/apps/sonda/.env.example +++ b/apps/sonda/.env.example @@ -27,4 +27,4 @@ STORE_NODES="/dns4/store-01.do-ams3.shards.test.status.im/tcp/30303/p2p/16Uiu2HA /dns4/store-01.ac-cn-hongkong-c.shards.test.status.im/tcp/30303/p2p/16Uiu2HAm2M7xs7cLPc3jamawkEqbr7cUJX11uvY7LxQ6WFUdUKUT,\ /dns4/store-02.ac-cn-hongkong-c.shards.test.status.im/tcp/30303/p2p/16Uiu2HAm9CQhsuwPR54q27kNj9iaQVfyRzTGKrhFmr94oD8ujU6P" # Wait time in seconds between two consecutive queries -QUERY_FREQUENCY=30 \ No newline at end of file +QUERY_DELAY=30 \ No newline at end of file diff --git a/apps/sonda/docker-compose.yml b/apps/sonda/docker-compose.yml index e6db1afa74..bb3a5763c3 100644 --- a/apps/sonda/docker-compose.yml +++ b/apps/sonda/docker-compose.yml @@ -18,7 +18,7 @@ x-sonda-env: &sonda_env CLUSTER_ID: ${CLUSTER_ID:-1} # Add your CLUSTER_ID after the "-" SHARD: ${SHARD:-0} # Add your SHARD after the "-" STORE_NODES: ${STORE_NODES:-} - QUERY_FREQUENCY: ${QUERY_FREQUENCY-60} + QUERY_DELAY: ${QUERY_DELAY-60} # Services definitions services: @@ -65,7 +65,7 @@ services: - *sonda_env command: > python -u /opt/sonda.py - --delay-seconds=${QUERY_FREQUENCY} + --delay-seconds=${QUERY_DELAY} --pubsub-topic=/waku/2/rs/${CLUSTER_ID}/${SHARD} --store-nodes=${STORE_NODES} volumes: From 0de03b3f33113527f7b89132eb2237299704b8db Mon Sep 17 00:00:00 2001 From: Gabriel mermelstein Date: Wed, 10 Jul 2024 14:05:49 +0300 Subject: [PATCH 24/34] improving grafana legends --- .../dashboards/sonda-monitoring.json | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/apps/sonda/monitoring/configuration/dashboards/sonda-monitoring.json b/apps/sonda/monitoring/configuration/dashboards/sonda-monitoring.json index 6cb2d5acf8..e91f048eb6 100644 --- a/apps/sonda/monitoring/configuration/dashboards/sonda-monitoring.json +++ b/apps/sonda/monitoring/configuration/dashboards/sonda-monitoring.json @@ -313,7 +313,7 @@ "options": { "mode": "exclude", "names": [ - "{__name__=\"successful_store_queries_total\", instance=\"sonda:8004\", job=\"nwaku\", node=\"/dns4/store-01.ac-cn-hongkong-c.shards.test.status.im/tcp/30303/p2p/16Uiu2HAm2M7xs7cLPc3jamawkEqbr7cUJX11uvY7LxQ6WFUdUKUT\"}" + "{__name__=\"successful_store_queries_total\" , node=\"/dns4/store-02.gc-us-central1-a.shards.test.status.im/tcp/30303/p2p/16Uiu2HAmJnVR7ZzFaYvciPVafUXuYGLHPzSUigqAmeNw9nJUVGeM\"}" ], "prefix": "All except:", "readOnly": true @@ -363,7 +363,7 @@ "fullMetaSearch": false, "includeNullMetadata": true, "instant": false, - "legendFormat": "__auto", + "legendFormat": "{__name__=\"{{__name__}}\" , node=\"{{node}}\"}", "range": true, "refId": "A", "useBackend": false @@ -438,7 +438,7 @@ "options": { "mode": "exclude", "names": [ - "failed_store_queries_total {__name__=\"failed_store_queries_total\", error=\"300 BAD_RESPONSE: Future operation cancelled!\", instance=\"sonda:8004\", job=\"nwaku\", node=\"/dns4/store-01.ac-cn-hongkong-c.shards.test.status.im/tcp/30303/p2p/16Uiu2HAm2M7xs7cLPc3jamawkEqbr7cUJX11uvY7LxQ6WFUdUKUT\"}" + "{__name__=\"failed_store_queries_total\" , error=\"300 BAD_RESPONSE: Future operation cancelled!\", node=\"/dns4/store-01.ac-cn-hongkong-c.shards.test.status.im/tcp/30303/p2p/16Uiu2HAm2M7xs7cLPc3jamawkEqbr7cUJX11uvY7LxQ6WFUdUKUT\"}" ], "prefix": "All except:", "readOnly": true @@ -488,7 +488,7 @@ "fullMetaSearch": false, "includeNullMetadata": true, "instant": false, - "legendFormat": "{{label_name}}", + "legendFormat": "{__name__=\"{{__name__}}\" , error=\"{{error}}\", node=\"{{node}}\"}", "range": true, "refId": "A", "useBackend": false @@ -563,7 +563,7 @@ "options": { "mode": "exclude", "names": [ - "{__name__=\"empty_store_responses_total\", instance=\"sonda:8004\", job=\"nwaku\", node=\"/dns4/store-02.gc-us-central1-a.shards.test.status.im/tcp/30303/p2p/16Uiu2HAmJnVR7ZzFaYvciPVafUXuYGLHPzSUigqAmeNw9nJUVGeM\"}" + "{__name__=\"empty_store_responses_total\" , node=\"/dns4/store-01.do-ams3.shards.test.status.im/tcp/30303/p2p/16Uiu2HAmAUdrQ3uwzuE4Gy4D56hX6uLKEeerJAnhKEHZ3DxF1EfT\"}" ], "prefix": "All except:", "readOnly": true @@ -613,7 +613,7 @@ "fullMetaSearch": false, "includeNullMetadata": true, "instant": false, - "legendFormat": "__auto", + "legendFormat": "{__name__=\"{{__name__}}\" , node=\"{{node}}\"}", "range": true, "refId": "A", "useBackend": false @@ -738,7 +738,7 @@ "fullMetaSearch": false, "includeNullMetadata": true, "instant": false, - "legendFormat": "__auto", + "legendFormat": "{__name__=\"{{__name__}}\" , node=\"{{node}}\"}", "range": true, "refId": "A", "useBackend": false @@ -791,7 +791,7 @@ "options": { "mode": "exclude", "names": [ - "{__name__=\"store_query_latency\", instance=\"sonda:8004\", job=\"nwaku\", node=\"/dns4/store-01.ac-cn-hongkong-c.shards.test.status.im/tcp/30303/p2p/16Uiu2HAm2M7xs7cLPc3jamawkEqbr7cUJX11uvY7LxQ6WFUdUKUT\"}" + "{__name__=\"store_query_latency\" , node=\"/dns4/store-01.do-ams3.shards.test.status.im/tcp/30303/p2p/16Uiu2HAmAUdrQ3uwzuE4Gy4D56hX6uLKEeerJAnhKEHZ3DxF1EfT\"}" ], "prefix": "All except:", "readOnly": true @@ -838,7 +838,7 @@ "fullMetaSearch": false, "includeNullMetadata": true, "instant": false, - "legendFormat": "__auto", + "legendFormat": "{__name__=\"{{__name__}}\" , node=\"{{node}}\"}", "range": true, "refId": "A", "useBackend": false From 22c1106ec5f865192fe7df86a53a04d6319cc196 Mon Sep 17 00:00:00 2001 From: Gabriel mermelstein Date: Wed, 10 Jul 2024 14:12:49 +0300 Subject: [PATCH 25/34] file cleanup --- .../configuration/pg-exporter-queries.yml | 284 ------------------ .../configuration/postgres-exporter.yml | 9 - apps/sonda/sonda.nim | 133 -------- apps/sonda/sonda_config.nim | 50 --- 4 files changed, 476 deletions(-) delete mode 100644 apps/sonda/monitoring/configuration/pg-exporter-queries.yml delete mode 100644 apps/sonda/monitoring/configuration/postgres-exporter.yml delete mode 100644 apps/sonda/sonda.nim delete mode 100644 apps/sonda/sonda_config.nim diff --git a/apps/sonda/monitoring/configuration/pg-exporter-queries.yml b/apps/sonda/monitoring/configuration/pg-exporter-queries.yml deleted file mode 100644 index bb1d7320a1..0000000000 --- a/apps/sonda/monitoring/configuration/pg-exporter-queries.yml +++ /dev/null @@ -1,284 +0,0 @@ -pg_replication: - query: "SELECT CASE WHEN NOT pg_is_in_recovery() THEN 0 ELSE GREATEST (0, EXTRACT(EPOCH FROM (now() - pg_last_xact_replay_timestamp()))) END AS lag" - master: true - metrics: - - lag: - usage: "GAUGE" - description: "Replication lag behind master in seconds" - -pg_postmaster: - query: "SELECT pg_postmaster_start_time as start_time_seconds from pg_postmaster_start_time()" - master: true - metrics: - - start_time_seconds: - usage: "GAUGE" - description: "Time at which postmaster started" - -pg_stat_user_tables: - query: | - SELECT - current_database() datname, - schemaname, - relname, - seq_scan, - seq_tup_read, - idx_scan, - idx_tup_fetch, - n_tup_ins, - n_tup_upd, - n_tup_del, - n_tup_hot_upd, - n_live_tup, - n_dead_tup, - n_mod_since_analyze, - COALESCE(last_vacuum, '1970-01-01Z') as last_vacuum, - COALESCE(last_autovacuum, '1970-01-01Z') as last_autovacuum, - COALESCE(last_analyze, '1970-01-01Z') as last_analyze, - COALESCE(last_autoanalyze, '1970-01-01Z') as last_autoanalyze, - vacuum_count, - autovacuum_count, - analyze_count, - autoanalyze_count - FROM - pg_stat_user_tables - metrics: - - datname: - usage: "LABEL" - description: "Name of current database" - - schemaname: - usage: "LABEL" - description: "Name of the schema that this table is in" - - relname: - usage: "LABEL" - description: "Name of this table" - - seq_scan: - usage: "COUNTER" - description: "Number of sequential scans initiated on this table" - - seq_tup_read: - usage: "COUNTER" - description: "Number of live rows fetched by sequential scans" - - idx_scan: - usage: "COUNTER" - description: "Number of index scans initiated on this table" - - idx_tup_fetch: - usage: "COUNTER" - description: "Number of live rows fetched by index scans" - - n_tup_ins: - usage: "COUNTER" - description: "Number of rows inserted" - - n_tup_upd: - usage: "COUNTER" - description: "Number of rows updated" - - n_tup_del: - usage: "COUNTER" - description: "Number of rows deleted" - - n_tup_hot_upd: - usage: "COUNTER" - description: "Number of rows HOT updated (i.e., with no separate index update required)" - - n_live_tup: - usage: "GAUGE" - description: "Estimated number of live rows" - - n_dead_tup: - usage: "GAUGE" - description: "Estimated number of dead rows" - - n_mod_since_analyze: - usage: "GAUGE" - description: "Estimated number of rows changed since last analyze" - - last_vacuum: - usage: "GAUGE" - description: "Last time at which this table was manually vacuumed (not counting VACUUM FULL)" - - last_autovacuum: - usage: "GAUGE" - description: "Last time at which this table was vacuumed by the autovacuum daemon" - - last_analyze: - usage: "GAUGE" - description: "Last time at which this table was manually analyzed" - - last_autoanalyze: - usage: "GAUGE" - description: "Last time at which this table was analyzed by the autovacuum daemon" - - vacuum_count: - usage: "COUNTER" - description: "Number of times this table has been manually vacuumed (not counting VACUUM FULL)" - - autovacuum_count: - usage: "COUNTER" - description: "Number of times this table has been vacuumed by the autovacuum daemon" - - analyze_count: - usage: "COUNTER" - description: "Number of times this table has been manually analyzed" - - autoanalyze_count: - usage: "COUNTER" - description: "Number of times this table has been analyzed by the autovacuum daemon" - -pg_statio_user_tables: - query: "SELECT current_database() datname, schemaname, relname, heap_blks_read, heap_blks_hit, idx_blks_read, idx_blks_hit, toast_blks_read, toast_blks_hit, tidx_blks_read, tidx_blks_hit FROM pg_statio_user_tables" - metrics: - - datname: - usage: "LABEL" - description: "Name of current database" - - schemaname: - usage: "LABEL" - description: "Name of the schema that this table is in" - - relname: - usage: "LABEL" - description: "Name of this table" - - heap_blks_read: - usage: "COUNTER" - description: "Number of disk blocks read from this table" - - heap_blks_hit: - usage: "COUNTER" - description: "Number of buffer hits in this table" - - idx_blks_read: - usage: "COUNTER" - description: "Number of disk blocks read from all indexes on this table" - - idx_blks_hit: - usage: "COUNTER" - description: "Number of buffer hits in all indexes on this table" - - toast_blks_read: - usage: "COUNTER" - description: "Number of disk blocks read from this table's TOAST table (if any)" - - toast_blks_hit: - usage: "COUNTER" - description: "Number of buffer hits in this table's TOAST table (if any)" - - tidx_blks_read: - usage: "COUNTER" - description: "Number of disk blocks read from this table's TOAST table indexes (if any)" - - tidx_blks_hit: - usage: "COUNTER" - description: "Number of buffer hits in this table's TOAST table indexes (if any)" - -# WARNING: This set of metrics can be very expensive on a busy server as every unique query executed will create an additional time series -pg_stat_statements: - query: "SELECT t2.rolname, t3.datname, queryid, calls, ( total_plan_time + total_exec_time ) / 1000 as total_time_seconds, ( min_plan_time + min_exec_time ) / 1000 as min_time_seconds, ( max_plan_time + max_exec_time ) / 1000 as max_time_seconds, ( mean_plan_time + mean_exec_time ) / 1000 as mean_time_seconds, ( stddev_plan_time + stddev_exec_time ) / 1000 as stddev_time_seconds, rows, shared_blks_hit, shared_blks_read, shared_blks_dirtied, shared_blks_written, local_blks_hit, local_blks_read, local_blks_dirtied, local_blks_written, temp_blks_read, temp_blks_written, blk_read_time / 1000 as blk_read_time_seconds, blk_write_time / 1000 as blk_write_time_seconds FROM pg_stat_statements t1 JOIN pg_roles t2 ON (t1.userid=t2.oid) JOIN pg_database t3 ON (t1.dbid=t3.oid) WHERE t2.rolname != 'rdsadmin' AND queryid IS NOT NULL" - master: true - metrics: - - rolname: - usage: "LABEL" - description: "Name of user" - - datname: - usage: "LABEL" - description: "Name of database" - - queryid: - usage: "LABEL" - description: "Query ID" - - calls: - usage: "COUNTER" - description: "Number of times executed" - - total_time_seconds: - usage: "COUNTER" - description: "Total time spent in the statement, in milliseconds" - - min_time_seconds: - usage: "GAUGE" - description: "Minimum time spent in the statement, in milliseconds" - - max_time_seconds: - usage: "GAUGE" - description: "Maximum time spent in the statement, in milliseconds" - - mean_time_seconds: - usage: "GAUGE" - description: "Mean time spent in the statement, in milliseconds" - - stddev_time_seconds: - usage: "GAUGE" - description: "Population standard deviation of time spent in the statement, in milliseconds" - - rows: - usage: "COUNTER" - description: "Total number of rows retrieved or affected by the statement" - - shared_blks_hit: - usage: "COUNTER" - description: "Total number of shared block cache hits by the statement" - - shared_blks_read: - usage: "COUNTER" - description: "Total number of shared blocks read by the statement" - - shared_blks_dirtied: - usage: "COUNTER" - description: "Total number of shared blocks dirtied by the statement" - - shared_blks_written: - usage: "COUNTER" - description: "Total number of shared blocks written by the statement" - - local_blks_hit: - usage: "COUNTER" - description: "Total number of local block cache hits by the statement" - - local_blks_read: - usage: "COUNTER" - description: "Total number of local blocks read by the statement" - - local_blks_dirtied: - usage: "COUNTER" - description: "Total number of local blocks dirtied by the statement" - - local_blks_written: - usage: "COUNTER" - description: "Total number of local blocks written by the statement" - - temp_blks_read: - usage: "COUNTER" - description: "Total number of temp blocks read by the statement" - - temp_blks_written: - usage: "COUNTER" - description: "Total number of temp blocks written by the statement" - - blk_read_time_seconds: - usage: "COUNTER" - description: "Total time the statement spent reading blocks, in milliseconds (if track_io_timing is enabled, otherwise zero)" - - blk_write_time_seconds: - usage: "COUNTER" - description: "Total time the statement spent writing blocks, in milliseconds (if track_io_timing is enabled, otherwise zero)" - -pg_process_idle: - query: | - WITH - metrics AS ( - SELECT - application_name, - SUM(EXTRACT(EPOCH FROM (CURRENT_TIMESTAMP - state_change))::bigint)::float AS process_idle_seconds_sum, - COUNT(*) AS process_idle_seconds_count - FROM pg_stat_activity - WHERE state = 'idle' - GROUP BY application_name - ), - buckets AS ( - SELECT - application_name, - le, - SUM( - CASE WHEN EXTRACT(EPOCH FROM (CURRENT_TIMESTAMP - state_change)) <= le - THEN 1 - ELSE 0 - END - )::bigint AS bucket - FROM - pg_stat_activity, - UNNEST(ARRAY[1, 2, 5, 15, 30, 60, 90, 120, 300]) AS le - GROUP BY application_name, le - ORDER BY application_name, le - ) - SELECT - application_name, - process_idle_seconds_sum as seconds_sum, - process_idle_seconds_count as seconds_count, - ARRAY_AGG(le) AS seconds, - ARRAY_AGG(bucket) AS seconds_bucket - FROM metrics JOIN buckets USING (application_name) - GROUP BY 1, 2, 3 - metrics: - - application_name: - usage: "LABEL" - description: "Application Name" - - seconds: - usage: "HISTOGRAM" - description: "Idle time of server processes" - -pg_tb_stats: - query: | - select pubsubtopic, count(*) AS messages FROM (SELECT id, array_agg(pubsubtopic ORDER BY pubsubtopic) AS pubsubtopic FROM messages GROUP BY id) sub GROUP BY pubsubtopic ORDER BY pubsubtopic; - metrics: - - pubsubtopic: - usage: "LABEL" - description: "pubsubtopic" - - messages: - usage: "GAUGE" - description: "Number of messages for the given pubsub topic" - -pg_tb_messages: - query: | - SELECT - COUNT(ID) - FROM messages - metrics: - - count: - usage: "GAUGE" - description: "Row count in `messages` table" diff --git a/apps/sonda/monitoring/configuration/postgres-exporter.yml b/apps/sonda/monitoring/configuration/postgres-exporter.yml deleted file mode 100644 index a8380dd72e..0000000000 --- a/apps/sonda/monitoring/configuration/postgres-exporter.yml +++ /dev/null @@ -1,9 +0,0 @@ -auth_modules: - mypostgres: - type: userpass - userpass: - username: postgres - password: ${POSTGRES_PASSWORD} - options: - # options become key=value parameters of the DSN - sslmode: disable diff --git a/apps/sonda/sonda.nim b/apps/sonda/sonda.nim deleted file mode 100644 index e93c56cf65..0000000000 --- a/apps/sonda/sonda.nim +++ /dev/null @@ -1,133 +0,0 @@ -{.push raises: [].} - -import - std/[options, strutils, os, sequtils, net], - chronicles, - chronos, - metrics, - libbacktrace, - system/ansi_c, - libp2p/crypto/crypto, - confutils, - results - -import - ./sonda_config, - ../../waku/common/logging, - ../../waku/factory/waku, - ../../waku/factory/external_config, - ../../waku/node/health_monitor, - ../../waku/waku_api/rest/builder as rest_server_builder, - ../../waku/node/waku_metrics - -logScope: - topics = "sonda main" - -proc logConfig(conf: SondaConf) = - info "Configuration: Sonda", conf = $conf - -{.pop.} -when isMainModule: - const versionString = "version / git commit hash: " & waku.git_version - - let confRes = SondaConf.loadConfig(version = versionString) - if confRes.isErr(): - error "failure while loading the configuration", error = confRes.error - quit(QuitFailure) - - var conf = confRes.get() - - ## Logging setup - logging.setupLog(conf.logLevel, conf.logFormat) - - info "Running Sonda", version = waku.git_version - logConfig(conf) - - var wakuConf = defaultWakuNodeConf().valueOr: - error "failed retrieving default node configuration", error = confRes.error - quit(QuitFailure) - - wakuConf.logLevel = conf.logLevel - wakuConf.logFormat = conf.logFormat - wakuConf.clusterId = conf.clusterId - wakuConf.shards = @[conf.shard] - wakuConf.staticnodes = conf.storenodes # connect directly to store nodes to query - - var nodeHealthMonitor {.threadvar.}: WakuNodeHealthMonitor - nodeHealthMonitor = WakuNodeHealthMonitor() - nodeHealthMonitor.setOverallHealth(HealthStatus.INITIALIZING) - - let restServer = rest_server_builder.startRestServerEsentials( - nodeHealthMonitor, wakuConf - ).valueOr: - error "Starting esential REST server failed.", error = $error - quit(QuitFailure) - - var wakuApp = Waku.init(wakuConf).valueOr: - error "Waku initialization failed", error = error - quit(QuitFailure) - - wakuApp.restServer = restServer - - nodeHealthMonitor.setNode(wakuApp.node) - - (waitFor startWaku(addr wakuApp)).isOkOr: - error "Starting waku failed", error = error - quit(QuitFailure) - - rest_server_builder.startRestServerProtocolSupport( - restServer, wakuApp.node, wakuApp.wakuDiscv5, wakuConf - ).isOkOr: - error "Starting protocols support REST server failed.", error = $error - quit(QuitFailure) - - wakuApp.metricsServer = waku_metrics.startMetricsServerAndLogging(wakuConf).valueOr: - error "Starting monitoring and external interfaces failed", error = error - quit(QuitFailure) - - nodeHealthMonitor.setOverallHealth(HealthStatus.READY) - - debug "Setting up shutdown hooks" - ## Setup shutdown hooks for this process. - ## Stop node gracefully on shutdown. - - proc asyncStopper(wakuApp: Waku) {.async: (raises: [Exception]).} = - nodeHealthMonitor.setOverallHealth(HealthStatus.SHUTTING_DOWN) - await wakuApp.stop() - quit(QuitSuccess) - - # Handle Ctrl-C SIGINT - proc handleCtrlC() {.noconv.} = - when defined(windows): - # workaround for https://github.com/nim-lang/Nim/issues/4057 - setupForeignThreadGc() - notice "Shutting down after receiving SIGINT" - asyncSpawn asyncStopper(wakuApp) - - setControlCHook(handleCtrlC) - - # Handle SIGTERM - when defined(posix): - proc handleSigterm(signal: cint) {.noconv.} = - notice "Shutting down after receiving SIGTERM" - asyncSpawn asyncStopper(wakuApp) - - c_signal(ansi_c.SIGTERM, handleSigterm) - - # Handle SIGSEGV - when defined(posix): - proc handleSigsegv(signal: cint) {.noconv.} = - # Require --debugger:native - fatal "Shutting down after receiving SIGSEGV", stacktrace = getBacktrace() - - # Not available in -d:release mode - writeStackTrace() - - waitFor wakuApp.stop() - quit(QuitFailure) - - c_signal(ansi_c.SIGSEGV, handleSigsegv) - - info "Node setup complete" - - runForever() diff --git a/apps/sonda/sonda_config.nim b/apps/sonda/sonda_config.nim deleted file mode 100644 index ded1381f5f..0000000000 --- a/apps/sonda/sonda_config.nim +++ /dev/null @@ -1,50 +0,0 @@ -import confutils/defs, results - -import ../../waku/common/logging - -type SondaConf* = object - logLevel* {. - desc: - "Sets the log level for process. Supported levels: TRACE, DEBUG, INFO, NOTICE, WARN, ERROR or FATAL", - defaultValue: logging.LogLevel.DEBUG, - name: "log-level" - .}: logging.LogLevel - - logFormat* {. - desc: - "Specifies what kind of logs should be written to stdout. Suported formats: TEXT, JSON", - defaultValue: logging.LogFormat.TEXT, - name: "log-format" - .}: logging.LogFormat - - clusterId* {. - desc: - "Cluster id that the node is running in. Node in a different cluster id is disconnected.", - defaultValue: 0, - name: "cluster-id" - .}: uint16 - - shard* {. - desc: "Shard where sonda messages are going to be published", - defaultValue: 0, - name: "shard" - .}: uint16 - - period* {. - desc: "Time in seconds between consecutive sonda messages", - defaultValue: 60, - name: "period" - .}: uint32 - - storenodes* {. - desc: "Multiaddresses of store nodes to query", - defaultValue: @[], - name: "storenodes" - .}: seq[string] - -proc loadConfig*(T: type SondaConf, version = ""): Result[T, string] = - try: - let conf = SondaConf.load(version = version) - return ok(conf) - except CatchableError: - err(getCurrentExceptionMsg()) From 71744f9ffdca2529bc41d7adf1e3bacc084cc71c Mon Sep 17 00:00:00 2001 From: Gabriel mermelstein Date: Wed, 10 Jul 2024 14:40:12 +0300 Subject: [PATCH 26/34] refactors --- apps/sonda/.env.example | 5 +++- apps/sonda/docker-compose.yml | 2 ++ apps/sonda/sonda.py | 53 ++++++++++++++++++----------------- 3 files changed, 34 insertions(+), 26 deletions(-) diff --git a/apps/sonda/.env.example b/apps/sonda/.env.example index daea0bc87b..614e6a321f 100644 --- a/apps/sonda/.env.example +++ b/apps/sonda/.env.example @@ -20,6 +20,7 @@ RLN_RELAY_CONTRACT_ADDRESS= # -------------------- SONDA CONFIG ------------------ CLUSTER_ID=16 SHARD=32 +# Comma separated list of store nodes to poll STORE_NODES="/dns4/store-01.do-ams3.shards.test.status.im/tcp/30303/p2p/16Uiu2HAmAUdrQ3uwzuE4Gy4D56hX6uLKEeerJAnhKEHZ3DxF1EfT,\ /dns4/store-02.do-ams3.shards.test.status.im/tcp/30303/p2p/16Uiu2HAm9aDJPkhGxc2SFcEACTFdZ91Q5TJjp76qZEhq9iF59x7R,\ /dns4/store-01.gc-us-central1-a.shards.test.status.im/tcp/30303/p2p/16Uiu2HAmMELCo218hncCtTvC2Dwbej3rbyHQcR8erXNnKGei7WPZ,\ @@ -27,4 +28,6 @@ STORE_NODES="/dns4/store-01.do-ams3.shards.test.status.im/tcp/30303/p2p/16Uiu2HA /dns4/store-01.ac-cn-hongkong-c.shards.test.status.im/tcp/30303/p2p/16Uiu2HAm2M7xs7cLPc3jamawkEqbr7cUJX11uvY7LxQ6WFUdUKUT,\ /dns4/store-02.ac-cn-hongkong-c.shards.test.status.im/tcp/30303/p2p/16Uiu2HAm9CQhsuwPR54q27kNj9iaQVfyRzTGKrhFmr94oD8ujU6P" # Wait time in seconds between two consecutive queries -QUERY_DELAY=30 \ No newline at end of file +QUERY_DELAY=30 +# Consecutive successful store requests to consider a store node healthy +HEALTH_THREASHOLD=5 \ No newline at end of file diff --git a/apps/sonda/docker-compose.yml b/apps/sonda/docker-compose.yml index bb3a5763c3..65088b0e68 100644 --- a/apps/sonda/docker-compose.yml +++ b/apps/sonda/docker-compose.yml @@ -19,6 +19,7 @@ x-sonda-env: &sonda_env SHARD: ${SHARD:-0} # Add your SHARD after the "-" STORE_NODES: ${STORE_NODES:-} QUERY_DELAY: ${QUERY_DELAY-60} + HEALTH_THRESHOLD: ${HEALTH_THRESHOLD-5} # Services definitions services: @@ -68,6 +69,7 @@ services: --delay-seconds=${QUERY_DELAY} --pubsub-topic=/waku/2/rs/${CLUSTER_ID}/${SHARD} --store-nodes=${STORE_NODES} + --health-threshold=${HEALTH_THRESHOLD} volumes: - ./sonda.py:/opt/sonda.py:Z depends_on: diff --git a/apps/sonda/sonda.py b/apps/sonda/sonda.py index 46e42f9d8d..7f23b42120 100644 --- a/apps/sonda/sonda.py +++ b/apps/sonda/sonda.py @@ -9,6 +9,9 @@ import argparse from prometheus_client import Counter, Gauge, start_http_server +# Content topic where Sona messages are going to be sent +SONDA_CONTENT_TOPIC = '/sonda/2/polls/proto' + # Prometheus metrics successful_sonda_msgs = Counter('successful_sonda_msgs', 'Number of successful Sonda messages sent') failed_sonda_msgs = Counter('failed_sonda_msgs', 'Number of failed Sonda messages attempts') @@ -16,48 +19,50 @@ failed_store_queries = Counter('failed_store_queries', 'Number of failed store queries', ['node', 'error']) empty_store_responses = Counter('empty_store_responses', "Number of store responses without the latest Sonda message", ['node']) store_query_latency = Gauge('store_query_latency', 'Latency of the last store query in seconds', ['node']) +consecutive_successful_responses = Gauge('consecutive_successful_responses', 'Consecutive successful store responses', ['node']) +node_health = Gauge('node_health', "Binary indicator of a node's health. 1 is healthy, 0 is not", ['node']) + # Argparser configuration parser = argparse.ArgumentParser(description='') parser.add_argument('-p', '--pubsub-topic', type=str, help='pubsub topic', default='/waku/2/rs/1/0') parser.add_argument('-d', '--delay-seconds', type=int, help='delay in second between messages', default=60) parser.add_argument('-n', '--store-nodes', type=str, help='comma separated list of store nodes to query', required=True) +parser.add_argument('-t', '--health-threshold', type=int, help='consecutive successful store requests to consider a store node healthy', default=5) args = parser.parse_args() + +# Sends Sonda message. Returns True if successful, False otherwise def send_sonda_msg(rest_address, pubsub_topic, content_topic, timestamp): message = "Hi, I'm Sonda" base64_message = base64.b64encode(message.encode('utf-8')).decode('ascii') body = { 'payload': base64_message, 'contentTopic': content_topic, - 'version': 1, # You can adjust the version as needed + 'version': 1, 'timestamp': timestamp } encoded_pubsub_topic = urllib.parse.quote(pubsub_topic, safe='') - url = f'{rest_address}/relay/v1/messages/{encoded_pubsub_topic}' headers = {'content-type': 'application/json'} - print('Waku REST API: %s PubSubTopic: %s, ContentTopic: %s' % (url, pubsub_topic, content_topic)) - s_time = time.time() + print(f'Waku REST API: {url} PubSubTopic: {pubsub_topic}, ContentTopic: {content_topic}') - response = None - try: - print('Sending request') - response = requests.post(url, json=body, headers=headers) - except Exception as e: - print(f'Error sending request: {e}') - - if(response != None): - elapsed_seconds = (time.time() - s_time) - print('Response from %s: status:%s content:%s [%.4f s.]' % (rest_address, \ - response.status_code, response.text, elapsed_seconds)) - - if(response.status_code == 200): - successful_sonda_msgs.inc() - return True + start_time = time.time() + response = requests.post(url, json=body, headers=headers, timeout=10) + elapsed_seconds = time.time() - start_time + + print(f'Response from {rest_address}: status:{response.status_code} content:{response.text} [{elapsed_seconds:.4f} s.]') + + if response.status_code == 200: + successful_sonda_msgs.inc() + return True + else: + response.raise_for_status() + except requests.RequestException as e: + print(f'Error sending request: {e}') failed_sonda_msgs.inc() return False @@ -138,6 +143,7 @@ def send_store_query(rest_address, store_node, encoded_pubsub_topic, encoded_con store_query_latency.labels(node=store_node).set(elapsed_seconds) return True + def send_store_queries(rest_address, store_nodes, pubsub_topic, content_topic, timestamp): print(f'Sending store queries. nodes = {store_nodes}') encoded_pubsub_topic = urllib.parse.quote(pubsub_topic, safe='') @@ -158,22 +164,19 @@ def main(): # Start Prometheus HTTP server at port 8004 start_http_server(8004) - sonda_content_topic = '/sonda/2/polls/proto' node_rest_address = 'http://nwaku:8645' while True: - # calls are blocking - # limited by the time it takes the REST API to reply - timestamp = time.time_ns() - res = send_sonda_msg(node_rest_address, args.pubsub_topic, sonda_content_topic, timestamp) + # Send Sonda message + res = send_sonda_msg(node_rest_address, args.pubsub_topic, SONDA_CONTENT_TOPIC, timestamp) print(f'sleeping: {args.delay_seconds} seconds') time.sleep(args.delay_seconds) # Only send store query if message was successfully published if(res): - send_store_queries(node_rest_address, store_nodes, args.pubsub_topic, sonda_content_topic, timestamp) + send_store_queries(node_rest_address, store_nodes, args.pubsub_topic, SONDA_CONTENT_TOPIC, timestamp) main() From 3527083ea924fb012073ea0ce3f57172fe0026a5 Mon Sep 17 00:00:00 2001 From: Gabriel mermelstein Date: Wed, 10 Jul 2024 15:29:22 +0300 Subject: [PATCH 27/34] adding health metric --- apps/sonda/.env.example | 2 +- .../dashboards/sonda-monitoring.json | 127 +++++++++++++++++- apps/sonda/sonda.py | 36 +++-- 3 files changed, 155 insertions(+), 10 deletions(-) diff --git a/apps/sonda/.env.example b/apps/sonda/.env.example index 614e6a321f..7cc1a46df8 100644 --- a/apps/sonda/.env.example +++ b/apps/sonda/.env.example @@ -30,4 +30,4 @@ STORE_NODES="/dns4/store-01.do-ams3.shards.test.status.im/tcp/30303/p2p/16Uiu2HA # Wait time in seconds between two consecutive queries QUERY_DELAY=30 # Consecutive successful store requests to consider a store node healthy -HEALTH_THREASHOLD=5 \ No newline at end of file +HEALTH_THRESHOLD=5 \ No newline at end of file diff --git a/apps/sonda/monitoring/configuration/dashboards/sonda-monitoring.json b/apps/sonda/monitoring/configuration/dashboards/sonda-monitoring.json index e91f048eb6..f7d623cd3c 100644 --- a/apps/sonda/monitoring/configuration/dashboards/sonda-monitoring.json +++ b/apps/sonda/monitoring/configuration/dashboards/sonda-monitoring.json @@ -688,7 +688,7 @@ "options": { "mode": "exclude", "names": [ - "{__name__=\"store_query_latency\", instance=\"sonda:8004\", job=\"nwaku\", node=\"/dns4/store-01.ac-cn-hongkong-c.shards.test.status.im/tcp/30303/p2p/16Uiu2HAm2M7xs7cLPc3jamawkEqbr7cUJX11uvY7LxQ6WFUdUKUT\"}" + "{__name__=\"store_query_latency\" , node=\"/dns4/store-01.ac-cn-hongkong-c.shards.test.status.im/tcp/30303/p2p/16Uiu2HAm2M7xs7cLPc3jamawkEqbr7cUJX11uvY7LxQ6WFUdUKUT\"}" ], "prefix": "All except:", "readOnly": true @@ -846,6 +846,131 @@ ], "title": "Store Query Latency (seconds)", "type": "histogram" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "Node health according to the configured health threshold. 1 means healthy, 0 not.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "stepAfter", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "__systemRef": "hideSeriesFrom", + "matcher": { + "id": "byNames", + "options": { + "mode": "exclude", + "names": [ + "{__name__=\"node_health\" , node=\"/dns4/store-01.gc-us-central1-a.shards.test.status.im/tcp/30303/p2p/16Uiu2HAmMELCo218hncCtTvC2Dwbej3rbyHQcR8erXNnKGei7WPZ\"}" + ], + "prefix": "All except:", + "readOnly": true + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": false, + "tooltip": false, + "viz": true + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 24 + }, + "id": 8, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "node_health", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "{__name__=\"{{__name__}}\" , node=\"{{node}}\"}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Node health", + "type": "timeseries" } ], "refresh": "", diff --git a/apps/sonda/sonda.py b/apps/sonda/sonda.py index 7f23b42120..a1f83202a5 100644 --- a/apps/sonda/sonda.py +++ b/apps/sonda/sonda.py @@ -76,6 +76,8 @@ def check_store_response(json_response, store_node, timestamp): error = f"{json_response.get('statusCode')} {json_response.get('statusDesc')}" print(f'Failed performing store query {error}') failed_store_queries.labels(node=store_node, error=error).inc() + consecutive_successful_responses.labels(node=store_node).set(0) + return False messages = json_response.get('messages') @@ -83,6 +85,7 @@ def check_store_response(json_response, store_node, timestamp): if not messages: print("No messages in store response") empty_store_responses.labels(node=store_node).inc() + consecutive_successful_responses.labels(node=store_node).set(0) return True # Search for the Sonda message in the returned messages @@ -96,10 +99,12 @@ def check_store_response(json_response, store_node, timestamp): if timestamp == message.get('message').get('timestamp'): print(f'Found Sonda message in store response node={store_node}') successful_store_queries.labels(node=store_node).inc() + consecutive_successful_responses.labels(node=store_node).inc() return True # If our message wasn't found in the returned messages, increase counter and return empty_store_responses.labels(node=store_node).inc() + consecutive_successful_responses.labels(node=store_node).set(0) return True @@ -121,6 +126,7 @@ def send_store_query(rest_address, store_node, encoded_pubsub_topic, encoded_con except Exception as e: print(f'Error sending request: {e}') failed_store_queries.labels(node=store_node, error=str(e)).inc() + consecutive_successful_responses.labels(node=store_node).set(0) return False elapsed_seconds = time.time() - s_time @@ -128,15 +134,19 @@ def send_store_query(rest_address, store_node, encoded_pubsub_topic, encoded_con if response.status_code != 200: failed_store_queries.labels(node=store_node, error=f'{response.status_code} {response.content}').inc() + consecutive_successful_responses.labels(node=store_node).set(0) return False + # Parse REST response into JSON try: json_response = response.json() except Exception as e: print(f'Error parsing response JSON: {e}') failed_store_queries.labels(node=store_node, error="JSON parse error").inc() + consecutive_successful_responses.labels(node=store_node).set(0) return False + # Analyze Store response. Return false if response is incorrect or has an error status if not check_store_response(json_response, store_node, timestamp): return False @@ -166,17 +176,27 @@ def main(): node_rest_address = 'http://nwaku:8645' while True: - timestamp = time.time_ns() + timestamp = time.time_ns() - # Send Sonda message - res = send_sonda_msg(node_rest_address, args.pubsub_topic, SONDA_CONTENT_TOPIC, timestamp) + # Send Sonda message + res = send_sonda_msg(node_rest_address, args.pubsub_topic, SONDA_CONTENT_TOPIC, timestamp) + + print(f'sleeping: {args.delay_seconds} seconds') + time.sleep(args.delay_seconds) - print(f'sleeping: {args.delay_seconds} seconds') - time.sleep(args.delay_seconds) + # Only send store query if message was successfully published + if(res): + send_store_queries(node_rest_address, store_nodes, args.pubsub_topic, SONDA_CONTENT_TOPIC, timestamp) + + for store_node in store_nodes: + print("-------------------") + print(f"node: {store_node}, consecutive: {consecutive_successful_responses.labels(node=store_node)._value.get()} threshold: {args.health_threshold}") + if consecutive_successful_responses.labels(node=store_node)._value.get() >= args.health_threshold: + node_health.labels(node=store_node).set(1) + else: + node_health.labels(node=store_node).set(0) - # Only send store query if message was successfully published - if(res): - send_store_queries(node_rest_address, store_nodes, args.pubsub_topic, SONDA_CONTENT_TOPIC, timestamp) + main() From ba84d3c42368007a9651b2b5050ceb5ba4a0770e Mon Sep 17 00:00:00 2001 From: Gabriel mermelstein Date: Wed, 10 Jul 2024 15:47:26 +0300 Subject: [PATCH 28/34] updating .env example --- apps/sonda/.env.example | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/sonda/.env.example b/apps/sonda/.env.example index 7cc1a46df8..673deb70a6 100644 --- a/apps/sonda/.env.example +++ b/apps/sonda/.env.example @@ -1,6 +1,6 @@ # RPC URL for accessing testnet via HTTP. # e.g. https://sepolia.infura.io/v3/123aa110320f4aec179150fba1e1b1b1 -ETH_CLIENT_ADDRESS= +RLN_RELAY_ETH_CLIENT_ADDRESS= # Private key of testnet where you have sepolia ETH that would be staked into RLN contract. # Note: make sure you don't use the '0x' prefix. From 106eb345fc066093eeb6ef2aecbfd9f97557228f Mon Sep 17 00:00:00 2001 From: Gabriel mermelstein Date: Wed, 10 Jul 2024 16:12:28 +0300 Subject: [PATCH 29/34] removing sonda from make configs --- .github/workflows/pre-release.yml | 2 +- Makefile | 6 +----- waku.nimble | 4 ---- 3 files changed, 2 insertions(+), 10 deletions(-) diff --git a/.github/workflows/pre-release.yml b/.github/workflows/pre-release.yml index 5524645e94..c93e0513bb 100644 --- a/.github/workflows/pre-release.yml +++ b/.github/workflows/pre-release.yml @@ -73,7 +73,7 @@ jobs: tools tar -cvzf ${{steps.vars.outputs.nwaku}} ./build/wakunode2 ./build/chat2 - tar -cvzf ${{steps.vars.outputs.nwakutools}} ./build/wakucanary ./build/networkmonitor ./build/sonda + tar -cvzf ${{steps.vars.outputs.nwakutools}} ./build/wakucanary ./build/networkmonitor - name: upload artifacts uses: actions/upload-artifact@v3 diff --git a/Makefile b/Makefile index 24a5e2d31c..d0c21a6ef4 100644 --- a/Makefile +++ b/Makefile @@ -222,7 +222,7 @@ liteprotocoltester: | build deps librln ################ ## Waku tools ## ################ -.PHONY: tools wakucanary networkmonitor sonda +.PHONY: tools wakucanary networkmonitor tools: networkmonitor wakucanary @@ -234,10 +234,6 @@ networkmonitor: | build deps librln echo -e $(BUILD_MSG) "build/$@" && \ $(ENV_SCRIPT) nim networkmonitor $(NIM_PARAMS) waku.nims -sonda: | build deps librln - echo -e $(BUILD_MSG) "build/$@" && \ - $(ENV_SCRIPT) nim sonda $(NIM_PARAMS) waku.nims - ################### ## Documentation ## diff --git a/waku.nimble b/waku.nimble index 556cb9bfa5..a6db1f2b7f 100644 --- a/waku.nimble +++ b/waku.nimble @@ -88,10 +88,6 @@ task networkmonitor, "Build network monitor tool": let name = "networkmonitor" buildBinary name, "apps/networkmonitor/" -task sonda, "Build sonda tool": - let name = "sonda" - buildBinary name, "apps/sonda/" - task rln_db_inspector, "Build the rln db inspector": let name = "rln_db_inspector" buildBinary name, "tools/rln_db_inspector/" From 23eca9e97aa80d82c6797c0cea8cb887fa394882 Mon Sep 17 00:00:00 2001 From: Gabriel mermelstein Date: Wed, 10 Jul 2024 16:24:23 +0300 Subject: [PATCH 30/34] removing debug logs --- apps/sonda/sonda.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/apps/sonda/sonda.py b/apps/sonda/sonda.py index a1f83202a5..479cb70444 100644 --- a/apps/sonda/sonda.py +++ b/apps/sonda/sonda.py @@ -188,15 +188,12 @@ def main(): if(res): send_store_queries(node_rest_address, store_nodes, args.pubsub_topic, SONDA_CONTENT_TOPIC, timestamp) + # Update node health metrics for store_node in store_nodes: - print("-------------------") - print(f"node: {store_node}, consecutive: {consecutive_successful_responses.labels(node=store_node)._value.get()} threshold: {args.health_threshold}") if consecutive_successful_responses.labels(node=store_node)._value.get() >= args.health_threshold: node_health.labels(node=store_node).set(1) else: node_health.labels(node=store_node).set(0) - - main() From 86ac05972dba0de4be548777ea90b7d9afd828f7 Mon Sep 17 00:00:00 2001 From: Gabriel mermelstein Date: Wed, 10 Jul 2024 16:26:35 +0300 Subject: [PATCH 31/34] changing delay in example --- apps/sonda/.env.example | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/sonda/.env.example b/apps/sonda/.env.example index 673deb70a6..accc2732c7 100644 --- a/apps/sonda/.env.example +++ b/apps/sonda/.env.example @@ -28,6 +28,6 @@ STORE_NODES="/dns4/store-01.do-ams3.shards.test.status.im/tcp/30303/p2p/16Uiu2HA /dns4/store-01.ac-cn-hongkong-c.shards.test.status.im/tcp/30303/p2p/16Uiu2HAm2M7xs7cLPc3jamawkEqbr7cUJX11uvY7LxQ6WFUdUKUT,\ /dns4/store-02.ac-cn-hongkong-c.shards.test.status.im/tcp/30303/p2p/16Uiu2HAm9CQhsuwPR54q27kNj9iaQVfyRzTGKrhFmr94oD8ujU6P" # Wait time in seconds between two consecutive queries -QUERY_DELAY=30 +QUERY_DELAY=60 # Consecutive successful store requests to consider a store node healthy HEALTH_THRESHOLD=5 \ No newline at end of file From 07bee43c8f7b6c846edb80b3da07fb45f3b83df6 Mon Sep 17 00:00:00 2001 From: Gabriel mermelstein Date: Wed, 10 Jul 2024 16:27:48 +0300 Subject: [PATCH 32/34] removing comments --- apps/sonda/docker-compose.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/sonda/docker-compose.yml b/apps/sonda/docker-compose.yml index 65088b0e68..bfad693da0 100644 --- a/apps/sonda/docker-compose.yml +++ b/apps/sonda/docker-compose.yml @@ -15,8 +15,8 @@ x-rln-environment: &rln_env RLN_RELAY_CRED_PASSWORD: ${RLN_RELAY_CRED_PASSWORD:-} # Optional: Add your RLN_RELAY_CRED_PASSWORD after the "-" x-sonda-env: &sonda_env - CLUSTER_ID: ${CLUSTER_ID:-1} # Add your CLUSTER_ID after the "-" - SHARD: ${SHARD:-0} # Add your SHARD after the "-" + CLUSTER_ID: ${CLUSTER_ID:-1} + SHARD: ${SHARD:-0} STORE_NODES: ${STORE_NODES:-} QUERY_DELAY: ${QUERY_DELAY-60} HEALTH_THRESHOLD: ${HEALTH_THRESHOLD-5} From d72f130dc2b833fdec0cab264d4f64e14ab9fc78 Mon Sep 17 00:00:00 2001 From: Gabriel mermelstein Date: Wed, 10 Jul 2024 19:06:12 +0300 Subject: [PATCH 33/34] added README --- apps/sonda/README.md | 52 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 apps/sonda/README.md diff --git a/apps/sonda/README.md b/apps/sonda/README.md new file mode 100644 index 0000000000..f8ed47f15e --- /dev/null +++ b/apps/sonda/README.md @@ -0,0 +1,52 @@ +# Sonda + +Sonda is a tool to monitor store nodes and measure their performance. + +It works by running a `nwaku` node, publishing a message from it every fixed interval and performing a store query to all the store nodes we want to monitor to check they respond with the last message we published. + +## Instructions + +1. Create an `.env` file which will contain the configuration parameters. + You can start by copying `.env.example` and adapting it for your use case + + ``` + cp .env.example .env + ${EDITOR} .env + ``` + + The variables that have to be filled for Sonda are + + ``` + CLUSTER_ID= + SHARD= + # Comma separated list of store nodes to poll + STORE_NODES= + # Wait time in seconds between two consecutive queries + QUERY_DELAY= + # Consecutive successful store requests to consider a store node healthy + HEALTH_THRESHOLD= + ``` + +2. If you want to query nodes in `cluster-id` 1, then you have to follow the steps of registering an RLN membership. Otherwise, you can skip this step. + + For it, you need: + * Ethereum Sepolia WebSocket endpoint. Get one free from [Infura](https://www.infura.io/). + * Ethereum Sepolia account with some balance <0.01 Eth. Get some [here](https://www.infura.io/faucet/sepolia). + * A password to protect your rln membership. + + Fill the `RLN_RELAY_ETH_CLIENT_ADDRESS`, `ETH_TESTNET_KEY` and `RLN_RELAY_CRED_PASSWORD` env variables and run + + ``` + ./register_rln.sh + ``` + +3. Start Sonda by running + + ``` + docker-compose up -d + ``` + +4. Browse to http://localhost:3000/dashboards and monitor the performance + + There's two Grafana dashboards, `nwaku-monitoring` to track the stats of your node that is publishing messages and performing queries, and `sonda-monitoring` to monitor the responses of the store nodes. + From fe9a875865633bde027bf1b3c05c5b5523966d74 Mon Sep 17 00:00:00 2001 From: Gabriel mermelstein Date: Wed, 10 Jul 2024 19:15:48 +0300 Subject: [PATCH 34/34] fixed puntuation --- apps/sonda/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/sonda/README.md b/apps/sonda/README.md index f8ed47f15e..75342c3cb9 100644 --- a/apps/sonda/README.md +++ b/apps/sonda/README.md @@ -48,5 +48,5 @@ It works by running a `nwaku` node, publishing a message from it every fixed int 4. Browse to http://localhost:3000/dashboards and monitor the performance - There's two Grafana dashboards, `nwaku-monitoring` to track the stats of your node that is publishing messages and performing queries, and `sonda-monitoring` to monitor the responses of the store nodes. + There's two Grafana dashboards: `nwaku-monitoring` to track the stats of your node that is publishing messages and performing queries, and `sonda-monitoring` to monitor the responses of the store nodes.