diff --git a/docker-compose.production.yml b/docker-compose.production.yml index 611243a7..bb8bcecc 100644 --- a/docker-compose.production.yml +++ b/docker-compose.production.yml @@ -91,6 +91,8 @@ services: - "traefik.enable=true" - "traefik.http.routers.nchan.priority=20" - "traefik.http.routers.nchan.rule=PathPrefix(`/sse`)" + - "prometheus-job=nchan" + - "prometheus-port=81" service-js-polyfill: image: gamingplatform/service-js-polyfill restart: on-failure diff --git a/docker-compose.yml b/docker-compose.yml index 5cbd735b..50a50d45 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -115,6 +115,8 @@ services: - "traefik.enable=true" - "traefik.http.routers.nchan.priority=20" - "traefik.http.routers.nchan.rule=PathPrefix(`/sse`)" + - "prometheus-job=nchan" + - "prometheus-port=81" service-js-polyfill: image: gamingplatform/service-js-polyfill restart: on-failure diff --git a/docker/grafana/dashboards/nchan.json b/docker/grafana/dashboards/nchan.json new file mode 100644 index 00000000..8306e2f2 --- /dev/null +++ b/docker/grafana/dashboards/nchan.json @@ -0,0 +1,361 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "iteration": 1665434791783, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 2, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "8.5.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "rate(nchan_total_published_messages{job=\"nchan\", instance=\"$instance\"}[$__rate_interval])", + "refId": "A" + } + ], + "title": "Incoming Messages / s", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "decimals": 2, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "decmbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 6, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "8.5.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "nchan_shared_memory_used{job=\"nchan\", instance=\"$instance\"} / 1024", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Shared Memory", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 8, + "x": 0, + "y": 6 + }, + "id": 4, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "8.5.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "nchan_stored_messages{job=\"nchan\", instance=\"$instance\"}", + "refId": "A" + } + ], + "title": "Stored Messages", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 8, + "x": 8, + "y": 6 + }, + "id": 8, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "8.5.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "nchan_subscribers{job=\"nchan\", instance=\"$instance\"}", + "refId": "A" + } + ], + "title": "Subscribers", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 8, + "x": 16, + "y": 6 + }, + "id": 9, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "8.5.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "nchan_channels{job=\"nchan\", instance=\"$instance\"}", + "refId": "A" + } + ], + "title": "Channels", + "type": "stat" + } + ], + "refresh": "5s", + "schemaVersion": 36, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "php-gaming-website_nchan_1", + "value": "php-gaming-website_nchan_1" + }, + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "definition": "", + "hide": 0, + "includeAll": false, + "label": "Instance", + "multi": false, + "name": "instance", + "options": [], + "query": { + "query": "label_values(nchan_channels{job=\"nchan\"}, instance)", + "refId": "Prometheus-instance-Variable-Query" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-15m", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Nchan Overview", + "description": "Visualizes Nchan metrics.", + "uid": "EaeSxGIVk", + "version": 1, + "weekStart": "" +} diff --git a/docker/nchan/default.conf b/docker/nchan/default.conf index 324b67f6..5e96a645 100644 --- a/docker/nchan/default.conf +++ b/docker/nchan/default.conf @@ -11,11 +11,15 @@ server { access_log /dev/stdout; } +map $nchan_stub_status_shared_memory_used $raw_nchan_stub_status_shared_memory_used { + default "0"; + "~^(\d+).*$" $1; +} + +# These endpoints aren't protected (for simplicity). +# The port 81 is never exposed. server { listen 81; - - # This endpoint is not protected with nchan_authorize_request (for simplicity). - # The port 81 is never exposed. location = /pub { nchan_publisher; nchan_channel_id $arg_id; @@ -25,6 +29,75 @@ server { nchan_store_messages on; } + location /metrics { + nchan_stub_status; + + set $CONTENT ''; + set $CONTENT '${CONTENT}# HELP nchan_total_published_messages Number of messages published to all channels through this Nchan server.\n'; + set $CONTENT '${CONTENT}# TYPE nchan_total_published_messages counter\n'; + set $CONTENT '${CONTENT}nchan_total_published_messages{} ${nchan_stub_status_total_published_messages}\n'; + + set $CONTENT '${CONTENT}# HELP nchan_stored_messages Number of messages currently buffered in memory.\n'; + set $CONTENT '${CONTENT}# TYPE nchan_stored_messages gauge\n'; + set $CONTENT '${CONTENT}nchan_stored_messages{} ${nchan_stub_status_stored_messages}\n'; + + set $CONTENT '${CONTENT}# HELP nchan_shared_memory_used Total shared memory used for buffering messages, storing channel information, and other purposes. This value should be comfortably below nchan_shared_memory_size.\n'; + set $CONTENT '${CONTENT}# TYPE nchan_shared_memory_used gauge\n'; + set $CONTENT '${CONTENT}nchan_shared_memory_used{} ${raw_nchan_stub_status_shared_memory_used}\n'; + + set $CONTENT '${CONTENT}# HELP nchan_channels Number of channels present on this Nchan server.\n'; + set $CONTENT '${CONTENT}# TYPE nchan_channels gauge\n'; + set $CONTENT '${CONTENT}nchan_channels{} ${nchan_stub_status_channels}\n'; + + set $CONTENT '${CONTENT}# HELP nchan_subscribers Number of subscribers to all channels on this Nchan server.\n'; + set $CONTENT '${CONTENT}# TYPE nchan_subscribers gauge\n'; + set $CONTENT '${CONTENT}nchan_subscribers{} ${nchan_stub_status_subscribers}\n'; + + set $CONTENT '${CONTENT}# HELP nchan_redis_pending_commands Number of commands sent to Redis that are awaiting a reply. May spike during high load, especially if the Redis server is overloaded. Should tend towards 0.\n'; + set $CONTENT '${CONTENT}# TYPE nchan_redis_pending_commands gauge\n'; + set $CONTENT '${CONTENT}nchan_redis_pending_commands{} ${nchan_stub_status_redis_pending_commands}\n'; + + set $CONTENT '${CONTENT}# HELP nchan_redis_connected_servers Number of redis servers to which Nchan is currently connected.\n'; + set $CONTENT '${CONTENT}# TYPE nchan_redis_connected_servers gauge\n'; + set $CONTENT '${CONTENT}nchan_redis_connected_servers{} ${nchan_stub_status_redis_connected_servers}\n'; + +# The following variable isn't yet available in the current Nchan version. +# Follow-up issue: https://github.com/marein/php-gaming-website/issues/150. +# set $CONTENT '${CONTENT}# HELP nchan_redis_unhealthy_upstreams Number of redis upstreams (individual server or cluster mode) that are currently not usable for publishing and subscribing.\n'; +# set $CONTENT '${CONTENT}# TYPE nchan_redis_unhealthy_upstreams gauge\n'; +# set $CONTENT '${CONTENT}nchan_redis_unhealthy_upstreams{} ${nchan_stub_status_redis_unhealthy_upstreams}\n'; + +# The following variable isn't yet available in the current Nchan version. +# Follow-up issue: https://github.com/marein/php-gaming-website/issues/150. +# set $CONTENT '${CONTENT}# HELP nchan_total_redis_commands_send Total number of commands this Nchan instance sent to Redis.\n'; +# set $CONTENT '${CONTENT}# TYPE nchan_total_redis_commands_send counter\n'; +# set $CONTENT '${CONTENT}nchan_total_redis_commands_send{} ${nchan_stub_status_total_redis_commands_send}\n'; + + set $CONTENT '${CONTENT}# HELP nchan_total_ipc_alerts_received Number of interprocess communication packets transmitted between Nginx workers processes for Nchan. Can grow at 100-10000 per second at high load.\n'; + set $CONTENT '${CONTENT}# TYPE nchan_total_ipc_alerts_received counter\n'; + set $CONTENT '${CONTENT}nchan_total_ipc_alerts_received{} ${nchan_stub_status_total_ipc_alerts_received}\n'; + + set $CONTENT '${CONTENT}# HELP nchan_ipc_alerts_in_transit Number of interprocess communication packets in transit between Nginx workers. May be nonzero during high load, but should always tend toward 0 over time.\n'; + set $CONTENT '${CONTENT}# TYPE nchan_ipc_alerts_in_transit gauge\n'; + set $CONTENT '${CONTENT}nchan_ipc_alerts_in_transit{} ${nchan_stub_status_ipc_alerts_in_transit}\n'; + + set $CONTENT '${CONTENT}# HELP nchan_ipc_queued_alerts Number of interprocess communication packets waiting to be sent. May be nonzero during high load, but should always tend toward 0 over time.\n'; + set $CONTENT '${CONTENT}# TYPE nchan_ipc_queued_alerts gauge\n'; + set $CONTENT '${CONTENT}nchan_ipc_queued_alerts{} ${nchan_stub_status_ipc_queued_alerts}\n'; + + set $CONTENT '${CONTENT}# HELP nchan_total_ipc_send_delay Total amount of time interprocess communication packets spend being queued if delayed. May increase during high load.\n'; + set $CONTENT '${CONTENT}# TYPE nchan_total_ipc_send_delay counter\n'; + set $CONTENT '${CONTENT}nchan_total_ipc_send_delay{} ${nchan_stub_status_total_ipc_send_delay}\n'; + + set $CONTENT '${CONTENT}# HELP nchan_total_ipc_receive_delay Total amount of time interprocess communication packets spend in transit if delayed. May increase during high load.\n'; + set $CONTENT '${CONTENT}# TYPE nchan_total_ipc_receive_delay counter\n'; + set $CONTENT '${CONTENT}nchan_total_ipc_receive_delay{} ${nchan_stub_status_total_ipc_receive_delay}\n'; + + add_header Content-Type text/plain; + + return 200 $CONTENT; + } + error_log /dev/stdout; access_log /dev/stdout; }