diff --git a/engine/apps/metrics_exporter/metrics_collectors.py b/engine/apps/metrics_exporter/metrics_collectors.py index 3709783c61..302f5756a4 100644 --- a/engine/apps/metrics_exporter/metrics_collectors.py +++ b/engine/apps/metrics_exporter/metrics_collectors.py @@ -102,14 +102,19 @@ def _get_alert_groups_total_metric(self, org_ids): labels_values = list(map(str, labels_values)) # clause below is needed for compatibility with old metric cache during rollout metrics with services if "services" in integration_data: + count_per_state = {state.value: 0 for state in AlertGroupState} for service_name in integration_data["services"]: for state in AlertGroupState: - alert_groups_total.add_metric( - labels_values + [state.value], - # todo:metrics: replace [state.value] when all metric cache is updated - # + [service_name, state.value], - integration_data["services"][service_name][state.value], - ) + count_per_state[state.value] += integration_data["services"][service_name][state.value] + # todo:metrics: with enabling service_name label move "add_metric" under + # "for service_name..." iteration + for state_name, counter in count_per_state.items(): + alert_groups_total.add_metric( + labels_values + [state_name], + # todo:metrics: replace [state.value] when all metric cache is updated + # + [service_name, state.value], + counter, + ) else: for state in AlertGroupState: alert_groups_total.add_metric(labels_values + [state.value], integration_data[state.value]) @@ -143,24 +148,25 @@ def _get_response_time_metric(self, org_ids): # clause below is needed for compatibility with old metric cache during rollout metrics with services if "services" in integration_data: + response_time_values = [] # todo:metrics: for service_name, response_time for _, response_time in integration_data["services"].items(): if not response_time: continue - buckets, sum_value = self.get_buckets_with_sum(response_time) - buckets = sorted(list(buckets.items()), key=lambda x: float(x[0])) - alert_groups_response_time_seconds.add_metric( - labels_values, # + [service_name] todo:metrics: uncomment when all metric cache is updated - buckets=buckets, - sum_value=sum_value, - ) + response_time_values.extend(response_time) else: response_time_values = integration_data["response_time"] if not response_time_values: continue - buckets, sum_value = self.get_buckets_with_sum(response_time_values) - buckets = sorted(list(buckets.items()), key=lambda x: float(x[0])) - alert_groups_response_time_seconds.add_metric(labels_values, buckets=buckets, sum_value=sum_value) + # todo:metrics: with enabling service_name label move "add_metric" under + # "for service_name, response_time..." iteration + buckets, sum_value = self.get_buckets_with_sum(response_time_values) + buckets = sorted(list(buckets.items()), key=lambda x: float(x[0])) + alert_groups_response_time_seconds.add_metric( + labels_values, # + [service_name] todo:metrics: uncomment when all metric cache is updated + buckets=buckets, + sum_value=sum_value, + ) org_id_from_key = RE_ALERT_GROUPS_RESPONSE_TIME.match(org_key).groups()[0] processed_org_ids.add(int(org_id_from_key)) missing_org_ids = org_ids - processed_org_ids diff --git a/engine/apps/metrics_exporter/tests/conftest.py b/engine/apps/metrics_exporter/tests/conftest.py index 91d7d315f0..2290e720f3 100644 --- a/engine/apps/metrics_exporter/tests/conftest.py +++ b/engine/apps/metrics_exporter/tests/conftest.py @@ -118,6 +118,12 @@ def _mock_cache_get(key, *args, **kwargs): "acknowledged": 3, "resolved": 5, }, + "test_service": { + "firing": 10, + "silenced": 10, + "acknowledged": 10, + "resolved": 10, + }, }, }, }, @@ -138,9 +144,7 @@ def _mock_cache_get(key, *args, **kwargs): "org_id": 1, "slug": "Test stack", "id": 1, - "services": { - NO_SERVICE_VALUE: [2, 10, 200, 650], - }, + "services": {NO_SERVICE_VALUE: [2, 10, 200, 650], "test_service": [4, 8, 12]}, }, }, USER_WAS_NOTIFIED_OF_ALERT_GROUPS: { diff --git a/engine/apps/metrics_exporter/tests/test_metrics_collectors.py b/engine/apps/metrics_exporter/tests/test_metrics_collectors.py index 685b11d2e4..61921157c8 100644 --- a/engine/apps/metrics_exporter/tests/test_metrics_collectors.py +++ b/engine/apps/metrics_exporter/tests/test_metrics_collectors.py @@ -62,9 +62,13 @@ def test_application_metrics_collector_mixed_cache( if metric.name == ALERT_GROUPS_TOTAL: # integration with labels for each alert group state assert len(metric.samples) == len(AlertGroupState) * 2 + # check that values from different services were combined to one sample + assert {2, 3, 4, 5, 12, 13, 14, 15} == set(sample.value for sample in metric.samples) elif metric.name == ALERT_GROUPS_RESPONSE_TIME: # integration with labels for each value in collector's bucket + _count and _sum histogram values assert len(metric.samples) == (len(collector._buckets) + 2) * 2 + # check that values from different services were combined to one sample + assert 7.0 in set(sample.value for sample in metric.samples) elif metric.name == USER_WAS_NOTIFIED_OF_ALERT_GROUPS: # metric with labels for each notified user assert len(metric.samples) == 1