Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(aci milestone 3): ACI alert rule dual update helper #82982

Merged
merged 18 commits into from
Jan 16, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 91 additions & 3 deletions src/sentry/workflow_engine/migration_helpers/alert_rule.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
from typing import Any

from sentry.deletions.models.scheduleddeletion import RegionScheduledDeletion
from sentry.incidents.grouptype import MetricAlertFire
Expand Down Expand Up @@ -32,6 +33,13 @@

logger = logging.getLogger(__name__)

FIELDS_TO_DETECTOR_FIELDS = {
"name": "name",
"description": "description",
"user_id": "owner_user_id",
"team_id": "owner_team_id",
}


def get_action_type(alert_rule_trigger_action: AlertRuleTriggerAction) -> str | None:
if alert_rule_trigger_action.sentry_app_id:
Expand Down Expand Up @@ -375,6 +383,86 @@ def migrate_alert_rule(
)


def update_migrated_alert_rule(alert_rule: AlertRule, updated_fields: dict[str, Any]) -> (
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

will we update the workflow + actions here too?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These are dual updated with the AlertRuleTriggers/AlertRuleActions

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

how will you get these updated fields?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Dual updated with the alert rule

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok because the tests make it seem like you're generating them manually so i wasn't sure

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah yeah, we're just testing the actual dual writing code separately so that the tests in test_logic.py can be cleaner (we also haven't hooked up the dual writing logic yet)

tuple[
DetectorState,
Detector,
]
| None
):
try:
alert_rule_detector = AlertRuleDetector.objects.get(alert_rule=alert_rule)
except AlertRuleDetector.DoesNotExist:
logger.exception(
"AlertRuleDetector does not exist",
extra={"alert_rule_id": alert_rule.id},
)
return None

detector: Detector = alert_rule_detector.detector

try:
detector_state = DetectorState.objects.get(detector=detector)
except DetectorState.DoesNotExist:
logger.exception(
"DetectorState does not exist",
extra={"alert_rule_id": alert_rule.id, "detector_id": detector.id},
)
return None

updated_detector_fields: dict[str, Any] = {}
config = detector.config.copy()

for field, detector_field in FIELDS_TO_DETECTOR_FIELDS.items():
if updated_field := updated_fields.get(field):
updated_detector_fields[detector_field] = updated_field
# update config fields
config_fields = MetricAlertFire.detector_config_schema["properties"].keys()
for field in config_fields:
if field in updated_fields:
config[field] = updated_fields[field]
updated_detector_fields["config"] = config

# if the user updated resolve_threshold or threshold_type, then we also need to update the detector triggers
if "threshold_type" in updated_fields or "resolve_threshold" in updated_fields:
data_condition_group = detector.workflow_condition_group
if data_condition_group is None:
# this shouldn't be possible due to the way we dual write
logger.error(
"AlertRuleDetector has no associated DataConditionGroup",
extra={"alert_rule_id": alert_rule.id},
)
return None
data_conditions = DataCondition.objects.filter(condition_group=data_condition_group)
if "threshold_type" in updated_fields:
threshold_type = (
Condition.GREATER
if updated_fields["threshold_type"] == AlertRuleThresholdType.ABOVE.value
else Condition.LESS
)
resolve_threshold_type = (
Condition.LESS_OR_EQUAL
if updated_fields["threshold_type"] == AlertRuleThresholdType.ABOVE.value
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should this be resolve_threshold_type?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, the resolve threshold type is the opposite as the specified threshold type

Copy link
Member

@ceorourke ceorourke Jan 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is just "less" and "greater" rather than with "or equal" for both - https://github.com/getsentry/sentry/blob/master/src/sentry/workflow_engine/migration_helpers/alert_rule.py#L208

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i think this logic is overall correct - it's inverting the logic type from the alert rule to create the resolution condition; for example: greater than 500 is Priority.HIGH would have a resolution threshold: less than or equal to 500 is Priority.RESOLVE -- and that means we don't need to do the +/- 0.000001 trick from before.

since the logic is always > or < in metric alerts, we can safely assume the equality operator should be applied for the inverse with resolution.

else Condition.GREATER_OR_EQUAL
)
for dc in data_conditions:
if dc.condition_result == DetectorPriorityLevel.OK:
dc.update(type=resolve_threshold_type)
else:
dc.update(type=threshold_type)

if "resolve_threshold" in updated_fields:
resolve_condition = data_conditions.filter(condition_result=DetectorPriorityLevel.OK)
resolve_condition.update(comparison=updated_fields["resolve_threshold"])

detector.update(**updated_detector_fields)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if this fails, should the alert rule be updated? should updating the alert rule + the detector happen in a transaction so either both or neither is updated?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That makes sense

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@cathteng how is that done if you're updating 2 models? cause usually it's like with transaction.atomic(router.db_for_write(MyModel)):


# reset detector status, as the rule was updated
detector_state.update(active=False, state=DetectorPriorityLevel.OK)

return detector_state, detector


def get_data_source(alert_rule: AlertRule) -> DataSource | None:
snuba_query = alert_rule.snuba_query
organization = alert_rule.organization
Expand Down Expand Up @@ -408,13 +496,13 @@ def dual_delete_migrated_alert_rule(
# isn't flagged into dual write
logger.info(
"AlertRuleDetector does not exist",
extra={"alert_rule_id": AlertRule.id},
extra={"alert_rule_id": alert_rule.id},
)
return
except AlertRuleWorkflow.DoesNotExist:
logger.info(
"AlertRuleWorkflow does not exist",
extra={"alert_rule_id": AlertRule.id},
extra={"alert_rule_id": alert_rule.id},
)
return

Expand All @@ -426,7 +514,7 @@ def dual_delete_migrated_alert_rule(
if data_source is None:
logger.info(
"DataSource does not exist",
extra={"alert_rule_id": AlertRule.id},
extra={"alert_rule_id": alert_rule.id},
)
# NOTE: for migrated alert rules, each workflow is associated with a single detector
# make sure there are no other detectors associated with the workflow, then delete it if so
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
migrate_metric_action,
migrate_metric_data_conditions,
migrate_resolve_threshold_data_conditions,
update_migrated_alert_rule,
)
from sentry.workflow_engine.models import (
Action,
Expand Down Expand Up @@ -297,7 +298,7 @@ def test_calculate_resolve_threshold_with_warning(self):
resolve_threshold = get_resolve_threshold(detector_dcg)
assert resolve_threshold == self.alert_rule_trigger_warning.alert_threshold

def create_metric_alert_trigger_auto_resolve(self):
def test_create_metric_alert_trigger_auto_resolve(self):
"""
Test that we create the correct resolution DataConditions when an AlertRule has no explicit resolve threshold
"""
Expand All @@ -306,6 +307,7 @@ def create_metric_alert_trigger_auto_resolve(self):

migrate_alert_rule(metric_alert, self.rpc_user)
migrate_metric_data_conditions(critical_trigger)
migrate_resolve_threshold_data_conditions(metric_alert)

detector = AlertRuleDetector.objects.get(alert_rule=metric_alert).detector

Expand All @@ -327,15 +329,16 @@ def create_metric_alert_trigger_auto_resolve(self):
condition_group=resolve_data_condition.condition_group
).exists()

def create_metric_alert_trigger_auto_resolve_less_than(self):
def test_create_metric_alert_trigger_auto_resolve_less_than(self):
"""
Test that we assign the resolve detector trigger the correct type if the threshold type is ABOVE
Test that we assign the resolve detector trigger the correct type if the threshold type is BELOW
"""
metric_alert = self.create_alert_rule(threshold_type=AlertRuleThresholdType.ABOVE)
metric_alert = self.create_alert_rule(threshold_type=AlertRuleThresholdType.BELOW)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why has this switched?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The default threshold type is ABOVE, so I changed it in this test to BELOW so we're not testing the same case twice. The test wasn't running because I named it incorrectly, so I missed that it was failing the test for the resolve threshold being GTE (happens when the threshold type is BELOW)

critical_trigger = self.create_alert_rule_trigger(alert_rule=metric_alert, label="critical")

migrate_alert_rule(metric_alert, self.rpc_user)
migrate_metric_data_conditions(critical_trigger)
migrate_resolve_threshold_data_conditions(metric_alert)

detector = AlertRuleDetector.objects.get(alert_rule=metric_alert).detector

Expand Down Expand Up @@ -422,3 +425,115 @@ def test_create_metric_alert_trigger_action_no_type(self, mock_logger):
"alert_rule_trigger_action_id": self.alert_rule_trigger_action_integration.id,
},
)

def test_update_metric_alert(self):
migrate_alert_rule(self.metric_alert, self.rpc_user)
mifu67 marked this conversation as resolved.
Show resolved Hide resolved
updated_fields = {
"name": "hojicha",
"description": "a Japanese green tea roasted over charcoal",
}

alert_rule_detector = AlertRuleDetector.objects.get(alert_rule=self.metric_alert)
detector = alert_rule_detector.detector
assert detector.name == self.metric_alert.name
assert detector.description == self.metric_alert.description
detector_state = DetectorState.objects.get(detector=detector)
detector_state.update(
active=True, state=DetectorPriorityLevel.HIGH
) # so we can confirm that our update actually changes things
assert detector_state.active

update_migrated_alert_rule(self.metric_alert, updated_fields)
detector.refresh_from_db()
detector_state.refresh_from_db()

assert detector.name == "hojicha"
assert detector.description == "a Japanese green tea roasted over charcoal"

assert detector_state.state == str(DetectorPriorityLevel.OK.value)
assert detector_state.active is False

def test_update_metric_alert_owner(self):
migrate_alert_rule(self.metric_alert, self.rpc_user)
updated_fields = {
"user_id": self.user.id,
"team_id": None,
}
alert_rule_detector = AlertRuleDetector.objects.get(alert_rule=self.metric_alert)
detector = alert_rule_detector.detector
assert detector.owner_user_id == self.metric_alert.user_id
assert detector.owner_team_id == self.metric_alert.team_id

update_migrated_alert_rule(self.metric_alert, updated_fields)
detector.refresh_from_db()

assert detector.owner_user_id == self.user.id
assert detector.owner_team_id is None

def test_update_metric_alert_config(self):
migrate_alert_rule(self.metric_alert, self.rpc_user)
updated_fields = {
"detection_type": "percent",
"threshold_period": 1,
"sensitivity": None,
"seasonality": None,
"comparison_delta": 3600,
}
alert_rule_detector = AlertRuleDetector.objects.get(alert_rule=self.metric_alert)
detector = alert_rule_detector.detector
config = detector.config
assert config == {
"detection_type": "static",
"threshold_period": 1,
"sensitivity": None,
"seasonality": None,
"comparison_delta": None,
}

update_migrated_alert_rule(self.metric_alert, updated_fields)
detector.refresh_from_db()

assert detector.config == updated_fields

def test_update_metric_alert_threshold_type(self):
metric_alert = self.create_alert_rule()
critical_trigger = self.create_alert_rule_trigger(alert_rule=metric_alert, label="critical")

migrate_alert_rule(metric_alert, self.rpc_user)
migrate_metric_data_conditions(critical_trigger)
migrate_resolve_threshold_data_conditions(metric_alert)
# because there are only two objects in the DB
critical_detector_trigger = DataCondition.objects.get(
condition_result=DetectorPriorityLevel.HIGH
)
resolve_detector_trigger = DataCondition.objects.get(
condition_result=DetectorPriorityLevel.OK
)

assert critical_detector_trigger.type == Condition.GREATER
assert resolve_detector_trigger.type == Condition.LESS_OR_EQUAL

updated_fields = {"threshold_type": AlertRuleThresholdType.BELOW}
update_migrated_alert_rule(metric_alert, updated_fields)

critical_detector_trigger.refresh_from_db()
resolve_detector_trigger.refresh_from_db()

assert critical_detector_trigger.type == Condition.LESS
assert resolve_detector_trigger.type == Condition.GREATER_OR_EQUAL

def test_update_metric_alert_resolve_threshold(self):
migrate_alert_rule(self.metric_alert, self.rpc_user)
migrate_metric_data_conditions(self.alert_rule_trigger_critical)
migrate_resolve_threshold_data_conditions(self.metric_alert)

resolve_detector_trigger = DataCondition.objects.get(
condition_result=DetectorPriorityLevel.OK
)
assert resolve_detector_trigger.comparison == 2

updated_fields = {"resolve_threshold": 10}
update_migrated_alert_rule(self.metric_alert, updated_fields)
resolve_detector_trigger.refresh_from_db()

assert resolve_detector_trigger.comparison == 10
Loading