diff --git a/docs/sources/integrations/alertmanager/index.md b/docs/sources/integrations/alertmanager/index.md index a9131e8612..07cb5576ac 100644 --- a/docs/sources/integrations/alertmanager/index.md +++ b/docs/sources/integrations/alertmanager/index.md @@ -110,17 +110,17 @@ Add receiver configuration to `prometheus.yaml` with the **OnCall Heartbeat URL* route: ... routes: - - match: + - match: alertname: heartbeat receiver: 'grafana-oncall-heartbeat' group_wait: 0s group_interval: 1m repeat_interval: 50s receivers: - - name: 'grafana-oncall-heartbeat' + - name: 'grafana-oncall-heartbeat' webhook_configs: - - url: https://oncall-dev-us-central-0.grafana.net/oncall/integrations/v1/alertmanager/1234567890/heartbeat/ - send_resolved: false + - url: https://oncall-dev-us-central-0.grafana.net/oncall/integrations/v1/alertmanager/1234567890/heartbeat/ + send_resolved: false ``` ## Note about legacy integration diff --git a/engine/apps/alerts/incident_log_builder/incident_log_builder.py b/engine/apps/alerts/incident_log_builder/incident_log_builder.py index 27f484f591..6b92092ac4 100644 --- a/engine/apps/alerts/incident_log_builder/incident_log_builder.py +++ b/engine/apps/alerts/incident_log_builder/incident_log_builder.py @@ -3,6 +3,7 @@ from django.db.models import Q from django.utils import timezone +from apps.alerts.constants import BUNDLED_NOTIFICATION_DELAY_SECONDS from apps.base.messaging import get_messaging_backend_from_id from apps.schedules.ical_utils import list_users_to_notify_from_ical @@ -640,6 +641,24 @@ def _get_notification_plan_for_user( last_user_log = None + # get ids of notification policies with bundled notification + notification_policies_in_bundle = ( + self.alert_group.bundled_notifications.all() + .values( + "notification_policy", + "bundle_uuid", + ) + .distinct() + ) + # get lists of notification policies with scheduled but not triggered bundled notifications + # and of all notification policies with bundled notifications + notification_policy_ids_in_scheduled_bundle: typing.Set[int] = set() + notification_policy_ids_in_bundle: typing.Set[int] = set() + for notification_policy_in_bundle in notification_policies_in_bundle: + if notification_policy_in_bundle["bundle_uuid"] is None: + notification_policy_ids_in_scheduled_bundle.add(notification_policy_in_bundle["notification_policy"]) + notification_policy_ids_in_bundle.add(notification_policy_in_bundle["notification_policy"]) + notification_policy_order = 0 if not future_step: # escalation step has been passed, so escalation for user has been already triggered. last_user_log = ( @@ -651,6 +670,8 @@ def _get_notification_plan_for_user( UserNotificationPolicyLogRecord.TYPE_PERSONAL_NOTIFICATION_FINISHED, ], ) + # exclude logs with bundled notification + .exclude(notification_policy_id__in=notification_policy_ids_in_bundle) .order_by("created_at") .last() ) @@ -673,19 +694,30 @@ def _get_notification_plan_for_user( _, notification_policies = user_to_notify.get_notification_policies_or_use_default_fallback(important=important) for notification_policy in notification_policies: - future_notification = notification_policy.order >= notification_policy_order + # notification step has been passed but was bundled and delayed - show this step in notification plan + is_scheduled_bundled_notification = notification_policy.id in notification_policy_ids_in_scheduled_bundle + # notification step has not been passed - show this step in notification plan as well + future_notification = ( + notification_policy.order >= notification_policy_order + and notification_policy.id not in notification_policy_ids_in_bundle + ) if notification_policy.step == UserNotificationPolicy.Step.WAIT: wait_delay = notification_policy.wait_delay if wait_delay is not None: timedelta += wait_delay # increase timedelta for next steps - elif future_notification: + elif future_notification or is_scheduled_bundled_notification: + notification_timedelta = ( + timedelta + timezone.timedelta(seconds=BUNDLED_NOTIFICATION_DELAY_SECONDS) + if is_scheduled_bundled_notification + else timedelta + ) plan_line = self._render_user_notification_line( user_to_notify, notification_policy, for_slack=for_slack ) # add plan_line to user plan_lines list - if not notification_plan_dict.get(timedelta): + if not notification_plan_dict.get(notification_timedelta): plan = {"user_id": user_to_notify.pk, "plan_lines": [plan_line]} - notification_plan_dict.setdefault(timedelta, []).append(plan) + notification_plan_dict.setdefault(notification_timedelta, []).append(plan) else: - notification_plan_dict[timedelta][0]["plan_lines"].append(plan_line) + notification_plan_dict[notification_timedelta][0]["plan_lines"].append(plan_line) return notification_plan_dict diff --git a/engine/apps/alerts/migrations/0055_alter_bundlednotification_alert_group.py b/engine/apps/alerts/migrations/0055_alter_bundlednotification_alert_group.py new file mode 100644 index 0000000000..b9b42e7b19 --- /dev/null +++ b/engine/apps/alerts/migrations/0055_alter_bundlednotification_alert_group.py @@ -0,0 +1,19 @@ +# Generated by Django 4.2.10 on 2024-07-24 14:24 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('alerts', '0054_usernotificationbundle_bundlednotification_and_more'), + ] + + operations = [ + migrations.AlterField( + model_name='bundlednotification', + name='alert_group', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='bundled_notifications', to='alerts.alertgroup'), + ), + ] diff --git a/engine/apps/alerts/models/alert_group.py b/engine/apps/alerts/models/alert_group.py index 3429361cd4..59162a349a 100644 --- a/engine/apps/alerts/models/alert_group.py +++ b/engine/apps/alerts/models/alert_group.py @@ -42,6 +42,7 @@ Alert, AlertGroupLogRecord, AlertReceiveChannel, + BundledNotification, ResolutionNote, ResolutionNoteSlackMessage, ) @@ -189,6 +190,7 @@ def slack_templated_first_alert(self): class AlertGroup(AlertGroupSlackRenderingMixin, EscalationSnapshotMixin, models.Model): acknowledged_by_user: typing.Optional["User"] alerts: "RelatedManager['Alert']" + bundled_notifications: "RelatedManager['BundledNotification']" dependent_alert_groups: "RelatedManager['AlertGroup']" channel: "AlertReceiveChannel" log_records: "RelatedManager['AlertGroupLogRecord']" diff --git a/engine/apps/alerts/models/user_notification_bundle.py b/engine/apps/alerts/models/user_notification_bundle.py index 4c7936378b..cf4d1d4e88 100644 --- a/engine/apps/alerts/models/user_notification_bundle.py +++ b/engine/apps/alerts/models/user_notification_bundle.py @@ -77,7 +77,7 @@ class BundledNotification(models.Model): notification_policy: typing.Optional["UserNotificationPolicy"] notification_bundle: "UserNotificationBundle" - alert_group = models.ForeignKey("alerts.AlertGroup", on_delete=models.CASCADE) + alert_group = models.ForeignKey("alerts.AlertGroup", on_delete=models.CASCADE, related_name="bundled_notifications") alert_receive_channel = models.ForeignKey("alerts.AlertReceiveChannel", on_delete=models.CASCADE) notification_policy = models.ForeignKey("base.UserNotificationPolicy", on_delete=models.SET_NULL, null=True) notification_bundle = models.ForeignKey( diff --git a/engine/apps/alerts/tasks/notify_user.py b/engine/apps/alerts/tasks/notify_user.py index 8258da1956..d9265571f2 100644 --- a/engine/apps/alerts/tasks/notify_user.py +++ b/engine/apps/alerts/tasks/notify_user.py @@ -583,6 +583,7 @@ def send_bundled_notification(user_notification_bundle_id: int): active_alert_group_ids: typing.Set[int] = set() log_record_notification_triggered = None is_notification_allowed = user_notification_bundle.user.is_notification_allowed + bundle_uuid = uuid4() # create logs for notification in notifications: @@ -609,11 +610,19 @@ def send_bundled_notification(user_notification_bundle_id: int): author=user_notification_bundle.user, type=UserNotificationPolicyLogRecord.TYPE_PERSONAL_NOTIFICATION_TRIGGERED, alert_group=notification.alert_group, + notification_policy=notification.notification_policy, notification_step=UserNotificationPolicy.Step.NOTIFY, notification_channel=user_notification_bundle.notification_channel, ) log_records_to_create.append(log_record_notification_triggered) + # delete non-active notifications and update bundle_uuid for the rest notifications + if not is_notification_allowed: + notifications.delete() + else: + notifications.filter(id__in=skip_notification_ids).delete() + notifications.update(bundle_uuid=bundle_uuid) + if len(log_records_to_create) == 1 and log_record_notification_triggered: # perform regular notification log_record_notification_triggered.save() @@ -629,7 +638,6 @@ def send_bundled_notification(user_notification_bundle_id: int): False, ) ) - notifications.delete() else: UserNotificationPolicyLogRecord.objects.bulk_create(log_records_to_create, batch_size=5000) @@ -638,11 +646,7 @@ def send_bundled_notification(user_notification_bundle_id: int): f"no alert groups to notify about or notification is not allowed for user " f"{user_notification_bundle.user_id}" ) - notifications.delete() else: - notifications.filter(id__in=skip_notification_ids).delete() - bundle_uuid = uuid4() - notifications.update(bundle_uuid=bundle_uuid) task_logger.info( f"perform bundled notification for alert groups with ids: {active_alert_group_ids}, " f"bundle_uuid: {bundle_uuid}" diff --git a/engine/apps/alerts/tests/test_incident_log_builder.py b/engine/apps/alerts/tests/test_incident_log_builder.py index 594399a7f7..20970a96bd 100644 --- a/engine/apps/alerts/tests/test_incident_log_builder.py +++ b/engine/apps/alerts/tests/test_incident_log_builder.py @@ -1,8 +1,9 @@ import pytest +from django.utils import timezone from apps.alerts.incident_log_builder import IncidentLogBuilder from apps.alerts.models import EscalationPolicy -from apps.base.models import UserNotificationPolicy +from apps.base.models import UserNotificationPolicy, UserNotificationPolicyLogRecord @pytest.mark.django_db @@ -39,6 +40,92 @@ def test_escalation_plan_messaging_backends( assert list(plan.values()) == [["send test only backend message to {}".format(user.username)]] +@pytest.mark.django_db +def test_get_notification_plan_for_user_with_bundled_notification( + make_organization_and_user, + make_user_notification_bundle, + make_user_notification_policy, + make_alert_receive_channel, + make_alert_group, + make_user_notification_policy_log_record, +): + """ + Test building notification plan when one of the notifications was bundled: + - test that scheduled but not triggered bundled notification appears in notification plan + """ + + organization, user = make_organization_and_user() + alert_receive_channel = make_alert_receive_channel(organization) + alert_group = make_alert_group(alert_receive_channel) + + log_builder = IncidentLogBuilder(alert_group) + + notification_bundle = make_user_notification_bundle(user, UserNotificationPolicy.NotificationChannel.SMS) + notification_policy_sms = make_user_notification_policy( + user, UserNotificationPolicy.Step.NOTIFY, notify_by=UserNotificationPolicy.NotificationChannel.SMS + ) + notification_policy_slack = make_user_notification_policy( + user, UserNotificationPolicy.Step.NOTIFY, notify_by=UserNotificationPolicy.NotificationChannel.SLACK + ) + make_user_notification_policy(user, UserNotificationPolicy.Step.WAIT, wait_delay=timezone.timedelta(minutes=5)) + make_user_notification_policy( + user, UserNotificationPolicy.Step.NOTIFY, notify_by=UserNotificationPolicy.NotificationChannel.PHONE_CALL + ) + + # bundled SMS notification has been scheduled, the second notification step "Notify by Slack" has not been passed + # SMS notification should appear in notification plan with timedelta=2min + bundled_sms_notification = notification_bundle.notifications.create( + alert_group=alert_group, + notification_policy=notification_policy_sms, + alert_receive_channel=alert_receive_channel, + ) + notification_plan_dict = log_builder._get_notification_plan_for_user(user) + expected_plan_dict = { + timezone.timedelta(0): [ + { + "user_id": user.id, + "plan_lines": [f"invite {user.username} in Slack"], + "is_the_first_notification_step": False, + } + ], + timezone.timedelta(seconds=120): [{"user_id": user.id, "plan_lines": [f"send sms to {user.username}"]}], + timezone.timedelta(seconds=300): [{"user_id": user.id, "plan_lines": [f"call {user.username} by phone"]}], + } + assert notification_plan_dict == expected_plan_dict + + # the second notification step "Notify by Slack" has been passed + make_user_notification_policy_log_record( + author=user, + alert_group=alert_group, + notification_policy=notification_policy_slack, + type=UserNotificationPolicyLogRecord.TYPE_PERSONAL_NOTIFICATION_TRIGGERED, + ) + notification_plan_dict = log_builder._get_notification_plan_for_user(user) + expected_plan_dict = { + timezone.timedelta(0): [{"user_id": user.id, "plan_lines": [], "is_the_first_notification_step": False}], + timezone.timedelta(seconds=120): [{"user_id": user.id, "plan_lines": [f"send sms to {user.username}"]}], + timezone.timedelta(seconds=300): [{"user_id": user.id, "plan_lines": [f"call {user.username} by phone"]}], + } + assert notification_plan_dict == expected_plan_dict + + # bundled SMS notification has been triggered, it should not appear in notification plan anymore + make_user_notification_policy_log_record( + author=user, + alert_group=alert_group, + notification_policy=notification_policy_sms, + type=UserNotificationPolicyLogRecord.TYPE_PERSONAL_NOTIFICATION_TRIGGERED, + ) + bundled_sms_notification.bundle_uuid = "test_bundle_uuid" + bundled_sms_notification.save() + + notification_plan_dict = log_builder._get_notification_plan_for_user(user) + expected_plan_dict = { + timezone.timedelta(0): [{"user_id": user.id, "plan_lines": [], "is_the_first_notification_step": False}], + timezone.timedelta(seconds=300): [{"user_id": user.id, "plan_lines": [f"call {user.username} by phone"]}], + } + assert notification_plan_dict == expected_plan_dict + + @pytest.mark.django_db def test_escalation_plan_custom_webhooks( make_organization_and_user, diff --git a/engine/apps/alerts/tests/test_notify_user.py b/engine/apps/alerts/tests/test_notify_user.py index e482f8964d..8de1e6792c 100644 --- a/engine/apps/alerts/tests/test_notify_user.py +++ b/engine/apps/alerts/tests/test_notify_user.py @@ -500,8 +500,11 @@ def test_send_bundle_notification( f"there is only one alert group in bundled notification, perform regular notification. " f"alert_group {alert_group_1.id}" ) in caplog.text - # check all notifications were deleted - assert notification_bundle.notifications.all().count() == 0 + # check bundle_uuid was set + assert notification_bundle.notifications.filter(bundle_uuid__isnull=True).count() == 0 + assert notification_bundle.notifications.all().count() == 1 + # cleanup notifications + notification_bundle.notifications.all().delete() # send notification for 0 active alert group notification_bundle.append_notification(alert_group_1, notification_policy) diff --git a/engine/apps/api/views/alert_group.py b/engine/apps/api/views/alert_group.py index 5fd6f511fd..bd0dec3821 100644 --- a/engine/apps/api/views/alert_group.py +++ b/engine/apps/api/views/alert_group.py @@ -335,9 +335,10 @@ def get_queryset(self, ignore_filtering_by_available_teams=False): if settings.ALERT_GROUPS_DISABLE_PREFER_ORDERING_INDEX: # workaround related to MySQL "ORDER BY LIMIT Query Optimizer Bug" # read more: https://hackmysql.com/infamous-order-by-limit-query-optimizer-bug/ - # this achieves the same effect as "FORCE INDEX (alert_group_list_index)" when - # paired with "ORDER BY started_at_optimized DESC" (ordering is performed in AlertGroupCursorPaginator). - queryset = queryset.extra({"started_at_optimized": "alerts_alertgroup.started_at + 0"}) + from django_mysql.models import add_QuerySetMixin + + queryset = add_QuerySetMixin(queryset) + queryset = queryset.force_index("alert_group_list_index") # Filter by labels. Since alert group labels are "static" filter by names, not IDs. label_query = self.request.query_params.getlist("label", []) diff --git a/engine/common/api_helpers/paginators.py b/engine/common/api_helpers/paginators.py index 3692fde7aa..e85ed62a52 100644 --- a/engine/common/api_helpers/paginators.py +++ b/engine/common/api_helpers/paginators.py @@ -1,6 +1,5 @@ import typing -from django.conf import settings from rest_framework.pagination import BasePagination, CursorPagination, PageNumberPagination from rest_framework.response import Response @@ -86,4 +85,4 @@ class FifteenPageSizePaginator(PathPrefixedPagePagination): class AlertGroupCursorPaginator(PathPrefixedCursorPagination): page_size = 25 - ordering = "-started_at_optimized" if settings.ALERT_GROUPS_DISABLE_PREFER_ORDERING_INDEX else "-started_at" + ordering = "-started_at" diff --git a/engine/settings/base.py b/engine/settings/base.py index f5b793dd75..cb903dd196 100644 --- a/engine/settings/base.py +++ b/engine/settings/base.py @@ -189,6 +189,8 @@ class DatabaseTypes: pymysql.install_as_MySQLdb() +DJANGO_MYSQL_REWRITE_QUERIES = True + ALERT_GROUPS_DISABLE_PREFER_ORDERING_INDEX = DATABASE_TYPE == DatabaseTypes.MYSQL and getenv_boolean( "ALERT_GROUPS_DISABLE_PREFER_ORDERING_INDEX", default=False ) @@ -293,6 +295,9 @@ class DatabaseTypes: "apps.chatops_proxy", ] +if DATABASE_TYPE == DatabaseTypes.MYSQL: + INSTALLED_APPS += ["django_mysql"] + REST_FRAMEWORK = { "DEFAULT_PARSER_CLASSES": ( "rest_framework.parsers.JSONParser",