From f55ebca3e243fe92c52959e296b54dc5ec7f14ed Mon Sep 17 00:00:00 2001 From: Christopher Bartz Date: Mon, 9 Sep 2024 15:55:39 +0200 Subject: [PATCH 1/7] add timeout command --- src-docs/event_timer.md | 8 ++------ src/charm.py | 5 ++++- src/event_timer.py | 11 +++-------- templates/dispatch-event.service.j2 | 2 +- 4 files changed, 10 insertions(+), 16 deletions(-) diff --git a/src-docs/event_timer.md b/src-docs/event_timer.md index 57055e575..7d10d807c 100644 --- a/src-docs/event_timer.md +++ b/src-docs/event_timer.md @@ -109,7 +109,7 @@ Construct the timer manager. --- - + ### method `disable_event_timer` @@ -138,11 +138,7 @@ Disable the systemd timer for the given event. ### method `ensure_event_timer` ```python -ensure_event_timer( - event_name: str, - interval: int, - timeout: Optional[int] = None -) → None +ensure_event_timer(event_name: str, interval: int, timeout: int) → None ``` Ensure that a systemd service and timer are registered to dispatch the given event. diff --git a/src/charm.py b/src/charm.py index 0b17dbf52..cba747124 100755 --- a/src/charm.py +++ b/src/charm.py @@ -555,7 +555,10 @@ def _set_reconcile_timer(self) -> None: self._event_timer.ensure_event_timer( event_name="reconcile-runners", interval=int(self.config[RECONCILE_INTERVAL_CONFIG_NAME]), - timeout=int(self.config[RECONCILE_INTERVAL_CONFIG_NAME]) - 1, + # We assume a stuck reconcile event when it takes longer + # than 10 times a normal interval. Currently, we are only aware of + # https://bugs.launchpad.net/juju/+bug/2055184 causing a stuck reconcile event. + timeout=10 * int(self.config[RECONCILE_INTERVAL_CONFIG_NAME]), ) def _ensure_reconcile_timer_is_active(self) -> None: diff --git a/src/event_timer.py b/src/event_timer.py index da03150ff..5cc284fa4 100644 --- a/src/event_timer.py +++ b/src/event_timer.py @@ -46,7 +46,7 @@ class EventConfig(TypedDict): event: str interval: int random_delay: int - timeout: int + timeout: Optional[int] unit: str @@ -107,9 +107,7 @@ def is_active(self, event_name: str) -> bool: return ret_code == 0 - def ensure_event_timer( - self, event_name: str, interval: int, timeout: Optional[int] = None - ) -> None: + def ensure_event_timer(self, event_name: str, interval: int, timeout: int) -> None: """Ensure that a systemd service and timer are registered to dispatch the given event. The interval is how frequently, in minutes, the event should be dispatched. @@ -125,10 +123,7 @@ def ensure_event_timer( Raises: TimerEnableError: Timer cannot be started. Events will be not emitted. """ - if timeout is not None: - timeout_in_secs = timeout * 60 - else: - timeout_in_secs = interval * 30 + timeout_in_secs = timeout * 60 if timeout is not None else None context: EventConfig = { "event": event_name, diff --git a/templates/dispatch-event.service.j2 b/templates/dispatch-event.service.j2 index 7bef08f96..b4482795f 100644 --- a/templates/dispatch-event.service.j2 +++ b/templates/dispatch-event.service.j2 @@ -4,7 +4,7 @@ Description=Dispatch the {{event}} event on {{unit}} [Service] Type=oneshot # For juju 3 and juju 2 compatibility. The juju-run binary was renamed to juju-exec for juju 3. -ExecStart=/usr/bin/run-one /usr/bin/bash -c '/usr/bin/juju-exec "{{unit}}" "JUJU_DISPATCH_PATH={{event}} ./dispatch" || /usr/bin/juju-run "{{unit}}" "JUJU_DISPATCH_PATH={{event}} ./dispatch"' +ExecStart=/usr/bin/timeout "{{timeout}}" /usr/bin/run-one /usr/bin/bash -c '/usr/bin/juju-exec "{{unit}}" "JUJU_DISPATCH_PATH={{event}} ./dispatch" || /usr/bin/juju-run "{{unit}}" "JUJU_DISPATCH_PATH={{event}} ./dispatch"' [Install] WantedBy=multi-user.target From 663ae2d1555c1df3aa3df7a49c97bfba97a934a5 Mon Sep 17 00:00:00 2001 From: Christopher Bartz Date: Mon, 9 Sep 2024 20:32:15 +0200 Subject: [PATCH 2/7] Check-in panel with average actual reconciliation interval --- src/grafana_dashboards/metrics.json | 172 +++++++++++++++++++++------- 1 file changed, 133 insertions(+), 39 deletions(-) diff --git a/src/grafana_dashboards/metrics.json b/src/grafana_dashboards/metrics.json index 8582b77ef..346f971be 100644 --- a/src/grafana_dashboards/metrics.json +++ b/src/grafana_dashboards/metrics.json @@ -24,10 +24,23 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, - "id": 537, + "id": 557, "links": [], "liveNow": false, "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 10, + "panels": [], + "title": "General", + "type": "row" + }, { "datasource": { "type": "loki", @@ -91,7 +104,7 @@ "h": 8, "w": 12, "x": 0, - "y": 0 + "y": 1 }, "id": 3, "options": { @@ -212,7 +225,7 @@ "h": 8, "w": 12, "x": 12, - "y": 0 + "y": 1 }, "id": 22, "options": { @@ -253,19 +266,6 @@ "title": "Available Runners", "type": "barchart" }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 8 - }, - "id": 10, - "panels": [], - "title": "General", - "type": "row" - }, { "datasource": { "type": "loki", @@ -485,7 +485,7 @@ "uid": "${lokids}" }, "editorMode": "builder", - "expr": "max_over_time({filename=\"/var/log/github-runner-metrics.log\", juju_application=~\"$juju_application\", juju_model=~\"$juju_model\", juju_model_uuid=~\"$juju_model_uuid\", juju_unit=~\"$juju_unit\"} | json event=\"event\", duration=\"queue_duration\", flavor=\"flavor\" | __error__=`` | event = `runner_start` | flavor =~ `$flavor` | unwrap duration [1h]) by (flavor)", + "expr": "max_over_time({filename=\"/var/log/github-runner-metrics.log\", juju_application=~\"$juju_application\", juju_model=~\"$juju_model\", juju_model_uuid=~\"$juju_model_uuid\", juju_unit=~\"$juju_unit\"} | json event=\"event\",duration=\"queue_duration\",flavor=\"flavor\" | __error__=\"\" | event=\"runner_start\" | flavor=~\"$flavor\" | unwrap duration[1h]) by(flavor)", "hide": false, "key": "Q-9302bc4d-cce0-4674-bad5-353257fdd2f4-0", "legendFormat": "{{flavor}}", @@ -789,10 +789,10 @@ "gridPos": { "h": 8, "w": 12, - "x": 12, + "x": 0, "y": 25 }, - "id": 4, + "id": 24, "options": { "legend": { "calcs": [], @@ -812,7 +812,7 @@ "uid": "${lokids}" }, "editorMode": "builder", - "expr": "quantile_over_time(0.5,{filename=\"/var/log/github-runner-metrics.log\", juju_application=~\"$juju_application\", juju_model=~\"$juju_model\", juju_model_uuid=~\"$juju_model_uuid\", juju_unit=~\"$juju_unit\"} | json event=\"event\",idle=\"idle\",flavor=\"flavor\" | event=\"runner_start\" | flavor=~\"$flavor\" | unwrap idle[1h]) by(filename)", + "expr": "quantile_over_time(0.5,{filename=\"/var/log/github-runner-metrics.log\", juju_application=~\"$juju_application\", juju_model=~\"$juju_model\", juju_model_uuid=~\"$juju_model_uuid\", juju_unit=~\"$juju_unit\"} | json event=\"event\",duration=\"duration\",flavor=\"flavor\" | event=\"reconciliation\" | flavor=~\"$flavor\" | unwrap duration[1h]) by(filename)", "key": "Q-9302bc4d-cce0-4674-bad5-353257fdd2f4-0", "legendFormat": "50%", "queryType": "range", @@ -824,12 +824,12 @@ "uid": "${lokids}" }, "editorMode": "builder", - "expr": "quantile_over_time(0.95,{filename=\"/var/log/github-runner-metrics.log\", juju_application=~\"$juju_application\", juju_model=~\"$juju_model\", juju_model_uuid=~\"$juju_model_uuid\", juju_unit=~\"$juju_unit\"} | json event=\"event\",idle=\"idle\",flavor=\"flavor\" | event=\"runner_start\" | flavor=~\"$flavor\" | unwrap idle[1h]) by(filename)", + "expr": "quantile_over_time(0.95,{filename=\"/var/log/github-runner-metrics.log\", juju_application=~\"$juju_application\", juju_model=~\"$juju_model\", juju_model_uuid=~\"$juju_model_uuid\", juju_unit=~\"$juju_unit\"} | json event=\"event\",duration=\"duration\",flavor=\"flavor\" | event=\"reconciliation\" | flavor=~\"$flavor\" | unwrap duration[1h]) by(filename)", "hide": false, "key": "Q-9302bc4d-cce0-4674-bad5-353257fdd2f4-0", "legendFormat": "95%", "queryType": "range", - "refId": "C" + "refId": "D" }, { "datasource": { @@ -837,30 +837,124 @@ "uid": "${lokids}" }, "editorMode": "builder", - "expr": "quantile_over_time(0.99,{filename=\"/var/log/github-runner-metrics.log\", juju_application=~\"$juju_application\", juju_model=~\"$juju_model\", juju_model_uuid=~\"$juju_model_uuid\", juju_unit=~\"$juju_unit\"} | json event=\"event\",idle=\"idle\",flavor=\"flavor\" | event=\"runner_start\" | flavor=~\"$flavor\" | unwrap idle[1h]) by(filename)", + "expr": "quantile_over_time(0.99,{filename=\"/var/log/github-runner-metrics.log\", juju_application=~\"$juju_application\", juju_model=~\"$juju_model\", juju_model_uuid=~\"$juju_model_uuid\", juju_unit=~\"$juju_unit\"} | json event=\"event\",duration=\"duration\",flavor=\"flavor\" | event=\"reconciliation\" | flavor=~\"$flavor\" | unwrap duration[1h]) by(filename)", "hide": false, "key": "Q-9302bc4d-cce0-4674-bad5-353257fdd2f4-0", "legendFormat": "99%", "queryType": "range", - "refId": "D" + "refId": "E" }, { "datasource": { "type": "loki", "uid": "${lokids}" }, - "editorMode": "code", - "expr": "max_over_time({filename=\"/var/log/github-runner-metrics.log\", juju_application=~\"$juju_application\", juju_model=~\"$juju_model\", juju_model_uuid=~\"$juju_model_uuid\", juju_unit=~\"$juju_unit\"} | json event=\"event\",idle=\"idle\",flavor=\"flavor\" | event=\"runner_start\" | flavor=~\"$flavor\" | unwrap idle[1h]) by(filename)", + "editorMode": "builder", + "expr": "max_over_time({filename=\"/var/log/github-runner-metrics.log\", juju_application=~\"$juju_application\", juju_model=~\"$juju_model\", juju_model_uuid=~\"$juju_model_uuid\", juju_unit=~\"$juju_unit\"} | json event=\"event\",duration=\"duration\",flavor=\"flavor\" | event=\"reconciliation\" | flavor=~\"$flavor\" | unwrap duration[1h]) by(filename)", "hide": false, "key": "Q-9302bc4d-cce0-4674-bad5-353257fdd2f4-0", "legendFormat": "Max", "queryType": "range", - "refId": "E" + "refId": "C" } ], - "title": "Runner Idle Duration (Percentile)", + "title": "Reconciliation Duration (Percentile)", "type": "timeseries" }, + { + "datasource": { + "type": "loki", + "uid": "${lokids}" + }, + "description": "The average actual reconciliation interval.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "fillOpacity": 80, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineWidth": 1, + "scaleDistribution": { + "type": "linear" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 25 + }, + "id": 6, + "options": { + "barRadius": 0, + "barWidth": 0.97, + "fullHighlight": false, + "groupWidth": 0.7, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "right", + "showLegend": false + }, + "orientation": "horizontal", + "showValue": "never", + "stacking": "none", + "tooltip": { + "mode": "single", + "sort": "none" + }, + "xTickLabelRotation": 0, + "xTickLabelSpacing": 0 + }, + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "${lokids}" + }, + "editorMode": "code", + "expr": "1 / sum by (flavor) (rate({filename=\"/var/log/github-runner-metrics.log\", juju_application=~\"$juju_application\", juju_model=~\"$juju_model\", juju_model_uuid=~\"$juju_model_uuid\", juju_unit=~\"$juju_unit\"} | json event=\"event\", flavor=\"flavor\" | event=\"reconciliation\" [$__range]))", + "key": "Q-9302bc4d-cce0-4674-bad5-353257fdd2f4-0", + "legendFormat": "50%", + "queryType": "instant", + "refId": "A" + } + ], + "title": "Actual Average Reconciliation Interval", + "type": "barchart" + }, { "datasource": { "type": "loki", @@ -926,7 +1020,7 @@ "x": 12, "y": 33 }, - "id": 6, + "id": 4, "options": { "legend": { "calcs": [], @@ -946,7 +1040,7 @@ "uid": "${lokids}" }, "editorMode": "builder", - "expr": "quantile_over_time(0.5,{filename=\"/var/log/github-runner-metrics.log\", juju_application=~\"$juju_application\", juju_model=~\"$juju_model\", juju_model_uuid=~\"$juju_model_uuid\", juju_unit=~\"$juju_unit\"} | json event=\"event\",duration=\"duration\",flavor=\"flavor\" | event=\"reconciliation\" | flavor=~\"$flavor\" | unwrap duration[1h]) by(filename)", + "expr": "quantile_over_time(0.5,{filename=\"/var/log/github-runner-metrics.log\", juju_application=~\"$juju_application\", juju_model=~\"$juju_model\", juju_model_uuid=~\"$juju_model_uuid\", juju_unit=~\"$juju_unit\"} | json event=\"event\",idle=\"idle\",flavor=\"flavor\" | event=\"runner_start\" | flavor=~\"$flavor\" | unwrap idle[1h]) by(filename)", "key": "Q-9302bc4d-cce0-4674-bad5-353257fdd2f4-0", "legendFormat": "50%", "queryType": "range", @@ -958,12 +1052,12 @@ "uid": "${lokids}" }, "editorMode": "builder", - "expr": "quantile_over_time(0.95,{filename=\"/var/log/github-runner-metrics.log\", juju_application=~\"$juju_application\", juju_model=~\"$juju_model\", juju_model_uuid=~\"$juju_model_uuid\", juju_unit=~\"$juju_unit\"} | json event=\"event\",duration=\"duration\",flavor=\"flavor\" | event=\"reconciliation\" | flavor=~\"$flavor\" | unwrap duration[1h]) by(filename)", + "expr": "quantile_over_time(0.95,{filename=\"/var/log/github-runner-metrics.log\", juju_application=~\"$juju_application\", juju_model=~\"$juju_model\", juju_model_uuid=~\"$juju_model_uuid\", juju_unit=~\"$juju_unit\"} | json event=\"event\",idle=\"idle\",flavor=\"flavor\" | event=\"runner_start\" | flavor=~\"$flavor\" | unwrap idle[1h]) by(filename)", "hide": false, "key": "Q-9302bc4d-cce0-4674-bad5-353257fdd2f4-0", "legendFormat": "95%", "queryType": "range", - "refId": "D" + "refId": "C" }, { "datasource": { @@ -971,28 +1065,28 @@ "uid": "${lokids}" }, "editorMode": "builder", - "expr": "quantile_over_time(0.99,{filename=\"/var/log/github-runner-metrics.log\", juju_application=~\"$juju_application\", juju_model=~\"$juju_model\", juju_model_uuid=~\"$juju_model_uuid\", juju_unit=~\"$juju_unit\"} | json event=\"event\",duration=\"duration\",flavor=\"flavor\" | event=\"reconciliation\" | flavor=~\"$flavor\" | unwrap duration[1h]) by(filename)", + "expr": "quantile_over_time(0.99,{filename=\"/var/log/github-runner-metrics.log\", juju_application=~\"$juju_application\", juju_model=~\"$juju_model\", juju_model_uuid=~\"$juju_model_uuid\", juju_unit=~\"$juju_unit\"} | json event=\"event\",idle=\"idle\",flavor=\"flavor\" | event=\"runner_start\" | flavor=~\"$flavor\" | unwrap idle[1h]) by(filename)", "hide": false, "key": "Q-9302bc4d-cce0-4674-bad5-353257fdd2f4-0", "legendFormat": "99%", "queryType": "range", - "refId": "E" + "refId": "D" }, { "datasource": { "type": "loki", "uid": "${lokids}" }, - "editorMode": "builder", - "expr": "max_over_time({filename=\"/var/log/github-runner-metrics.log\", juju_application=~\"$juju_application\", juju_model=~\"$juju_model\", juju_model_uuid=~\"$juju_model_uuid\", juju_unit=~\"$juju_unit\"} | json event=\"event\",duration=\"duration\",flavor=\"flavor\" | event=\"reconciliation\" | flavor=~\"$flavor\" | unwrap duration[1h]) by(filename)", + "editorMode": "code", + "expr": "max_over_time({filename=\"/var/log/github-runner-metrics.log\", juju_application=~\"$juju_application\", juju_model=~\"$juju_model\", juju_model_uuid=~\"$juju_model_uuid\", juju_unit=~\"$juju_unit\"} | json event=\"event\",idle=\"idle\",flavor=\"flavor\" | event=\"runner_start\" | flavor=~\"$flavor\" | unwrap idle[1h]) by(filename)", "hide": false, "key": "Q-9302bc4d-cce0-4674-bad5-353257fdd2f4-0", "legendFormat": "Max", "queryType": "range", - "refId": "C" + "refId": "E" } ], - "title": "Reconciliation Duration (Percentile)", + "title": "Runner Idle Duration (Percentile)", "type": "timeseries" }, { @@ -1924,6 +2018,6 @@ "timepicker": {}, "timezone": "", "title": "GitHub Self-Hosted Runner Metrics", - "version": 14, + "version": 15, "weekStart": "" } From ee9a9f74a7bd56c29e145cf3d3b9254b6a897f70 Mon Sep 17 00:00:00 2001 From: Christopher Bartz Date: Tue, 10 Sep 2024 11:39:33 +0200 Subject: [PATCH 3/7] introduce constant --- src-docs/charm.md | 11 ++++++----- src/charm.py | 10 ++++++---- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/src-docs/charm.md b/src-docs/charm.md index fa38de542..5b1540fee 100644 --- a/src-docs/charm.md +++ b/src-docs/charm.md @@ -15,12 +15,13 @@ Charm for creating and managing GitHub self-hosted runner instances. - **RECONCILE_INTERVAL_CONFIG_NAME** - **TEST_MODE_CONFIG_NAME** - **TOKEN_CONFIG_NAME** +- **RECONCILIATION_INTERVAL_TIMEOUT_FACTOR** - **RECONCILE_RUNNERS_EVENT** - **REACTIVE_MQ_DB_NAME** --- - + ## function `catch_charm_errors` @@ -46,7 +47,7 @@ Catch common errors in charm. --- - + ## function `catch_action_errors` @@ -72,7 +73,7 @@ Catch common errors in actions. --- - + ## class `ReconcileRunnersEvent` Event representing a periodic check to ensure runners are ok. @@ -83,7 +84,7 @@ Event representing a periodic check to ensure runners are ok. --- - + ## class `GithubRunnerCharm` Charm for managing GitHub self-hosted runners. @@ -100,7 +101,7 @@ Charm for managing GitHub self-hosted runners. - `ram_pool_path`: The path to memdisk storage. - `kernel_module_path`: The path to kernel modules. - + ### method `__init__` diff --git a/src/charm.py b/src/charm.py index cba747124..9dae00059 100755 --- a/src/charm.py +++ b/src/charm.py @@ -94,6 +94,10 @@ from runner_manager import LXDRunnerManager, LXDRunnerManagerConfig from runner_manager_type import LXDFlushMode +# We assume a stuck reconcile event when it takes longer +# than 10 times a normal interval. Currently, we are only aware of +# https://bugs.launchpad.net/juju/+bug/2055184 causing a stuck reconcile event. +RECONCILIATION_INTERVAL_TIMEOUT_FACTOR = 10 RECONCILE_RUNNERS_EVENT = "reconcile-runners" # This is currently hardcoded and may be moved to a config option in the future. @@ -555,10 +559,8 @@ def _set_reconcile_timer(self) -> None: self._event_timer.ensure_event_timer( event_name="reconcile-runners", interval=int(self.config[RECONCILE_INTERVAL_CONFIG_NAME]), - # We assume a stuck reconcile event when it takes longer - # than 10 times a normal interval. Currently, we are only aware of - # https://bugs.launchpad.net/juju/+bug/2055184 causing a stuck reconcile event. - timeout=10 * int(self.config[RECONCILE_INTERVAL_CONFIG_NAME]), + timeout=RECONCILIATION_INTERVAL_TIMEOUT_FACTOR + * int(self.config[RECONCILE_INTERVAL_CONFIG_NAME]), ) def _ensure_reconcile_timer_is_active(self) -> None: From 7784e1c74d1a4ccd193da8f08389b73a28018519 Mon Sep 17 00:00:00 2001 From: Christopher Bartz Date: Tue, 10 Sep 2024 11:55:52 +0200 Subject: [PATCH 4/7] update dashboard --- src/grafana_dashboards/metrics.json | 37 +++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 5 deletions(-) diff --git a/src/grafana_dashboards/metrics.json b/src/grafana_dashboards/metrics.json index 346f971be..929e0d93a 100644 --- a/src/grafana_dashboards/metrics.json +++ b/src/grafana_dashboards/metrics.json @@ -24,7 +24,7 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, - "id": 557, + "id": 567, "links": [], "liveNow": false, "panels": [ @@ -866,7 +866,7 @@ "type": "loki", "uid": "${lokids}" }, - "description": "The average actual reconciliation interval.", + "description": "The average actual reconciliation interval.\n\nUnlike the Reconciliation Duration panel, this panel shows the time between the actual start of reconciliations. This is useful for identifying problems where the reconciliation itself is behaving normally, but the event itself is not being scheduled as expected.", "fieldConfig": { "defaults": { "color": { @@ -908,7 +908,20 @@ }, "unit": "s" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Value #A" + }, + "properties": [ + { + "id": "displayName", + "value": "Duration" + } + ] + } + ] }, "gridPos": { "h": 8, @@ -947,12 +960,26 @@ "editorMode": "code", "expr": "1 / sum by (flavor) (rate({filename=\"/var/log/github-runner-metrics.log\", juju_application=~\"$juju_application\", juju_model=~\"$juju_model\", juju_model_uuid=~\"$juju_model_uuid\", juju_unit=~\"$juju_unit\"} | json event=\"event\", flavor=\"flavor\" | event=\"reconciliation\" [$__range]))", "key": "Q-9302bc4d-cce0-4674-bad5-353257fdd2f4-0", - "legendFormat": "50%", + "legendFormat": "", "queryType": "instant", "refId": "A" } ], - "title": "Actual Average Reconciliation Interval", + "title": "Average Reconciliation Interval", + "transformations": [ + { + "id": "sortBy", + "options": { + "fields": {}, + "sort": [ + { + "desc": true, + "field": "Value #A" + } + ] + } + } + ], "type": "barchart" }, { From f0f153ba8ef3e666890197e34857ed95460e438e Mon Sep 17 00:00:00 2001 From: Christopher Bartz Date: Tue, 10 Sep 2024 12:01:52 +0200 Subject: [PATCH 5/7] rename panel using term "Application" --- src/grafana_dashboards/metrics.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/grafana_dashboards/metrics.json b/src/grafana_dashboards/metrics.json index 929e0d93a..0e8728765 100644 --- a/src/grafana_dashboards/metrics.json +++ b/src/grafana_dashboards/metrics.json @@ -493,7 +493,7 @@ "refId": "E" } ], - "title": "Max Job Queue Duration By Flavour", + "title": "Max Job Queue Duration By Application", "type": "timeseries" }, { From deaa76da29e54d6fc0e90f22aec9bac8d7a5a08f Mon Sep 17 00:00:00 2001 From: Christopher Bartz Date: Tue, 10 Sep 2024 12:03:15 +0200 Subject: [PATCH 6/7] update docs --- docs/reference/cos.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/reference/cos.md b/docs/reference/cos.md index 898845c3e..799bc8a95 100644 --- a/docs/reference/cos.md +++ b/docs/reference/cos.md @@ -19,6 +19,8 @@ The "GitHub Self-Hosted Runner Metrics" metrics dashboard presents the following - Runner idle duration - Charm reconciliation duration - Job queue duration - how long a job waits in the queue before a runner picks it up + - Max job queue duration by application: Similar to "Job queue duration" panel, but shows maximum durations by charm application. + - Average reconciliation interval: Shows the average time between reconciliation events, broken down by charm application. - Jobs: Displays certain metrics about the jobs executed by the runners. These metrics can be displayed per repository by specifying a regular expression on the `Repository` variable. The following metrics are displayed: - Proportion charts: Share of jobs by completion status, job conclusion, application, repo policy check failure http codes and github events over time. From 402da2d430006f8ae42494189ca4191e9fc9d521 Mon Sep 17 00:00:00 2001 From: Christopher Bartz Date: Tue, 10 Sep 2024 12:13:21 +0200 Subject: [PATCH 7/7] remove Optional --- src/event_timer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/event_timer.py b/src/event_timer.py index 5cc284fa4..ab94c5736 100644 --- a/src/event_timer.py +++ b/src/event_timer.py @@ -5,7 +5,7 @@ import logging import subprocess # nosec B404 from pathlib import Path -from typing import Optional, TypedDict +from typing import TypedDict import jinja2 @@ -46,7 +46,7 @@ class EventConfig(TypedDict): event: str interval: int random_delay: int - timeout: Optional[int] + timeout: int unit: str @@ -123,7 +123,7 @@ def ensure_event_timer(self, event_name: str, interval: int, timeout: int) -> No Raises: TimerEnableError: Timer cannot be started. Events will be not emitted. """ - timeout_in_secs = timeout * 60 if timeout is not None else None + timeout_in_secs = timeout * 60 context: EventConfig = { "event": event_name,