Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(aci milestone 3): ACI alert rule dual update helper #82982

Merged
merged 18 commits into from
Jan 16, 2025
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 94 additions & 0 deletions src/sentry/workflow_engine/migration_helpers/alert_rule.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
from typing import Any

from sentry.deletions.models.scheduleddeletion import RegionScheduledDeletion
from sentry.incidents.grouptype import MetricAlertFire
Expand Down Expand Up @@ -374,6 +375,99 @@ def migrate_alert_rule(
)


def update_migrated_alert_rule(alert_rule: AlertRule, updated_fields: dict[str, Any]) -> (
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

will we update the workflow + actions here too?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These are dual updated with the AlertRuleTriggers/AlertRuleActions

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

how will you get these updated fields?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Dual updated with the alert rule

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok because the tests make it seem like you're generating them manually so i wasn't sure

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah yeah, we're just testing the actual dual writing code separately so that the tests in test_logic.py can be cleaner (we also haven't hooked up the dual writing logic yet)

tuple[
DetectorState,
Detector,
]
| None
):
# TODO: maybe pull this into a helper method?
try:
alert_rule_detector = AlertRuleDetector.objects.get(alert_rule=alert_rule)
except AlertRuleDetector.DoesNotExist:
logger.exception(
"AlertRuleDetector does not exist",
extra={"alert_rule_id": AlertRule.id},
)
return None

detector: Detector = alert_rule_detector.detector

try:
detector_state = DetectorState.objects.get(detector=detector)
except DetectorState.DoesNotExist:
logger.exception(
"DetectorState does not exist",
extra={"alert_rule_id": AlertRule.id, "detector_id": detector.id},
)
return None

updated_detector_fields: dict[str, Any] = {}
config = detector.config.copy()

if "name" in updated_fields:
updated_detector_fields["name"] = updated_fields["name"]
if "description" in updated_fields:
updated_detector_fields["description"] = updated_fields["description"]
if "user_id" in updated_fields:
updated_detector_fields["owner_user_id"] = updated_fields["user_id"]
if "team_id" in updated_fields:
updated_detector_fields["owner_team_id"] = updated_fields["team_id"]
mifu67 marked this conversation as resolved.
Show resolved Hide resolved
# update config fields
if "threshold_period" in updated_fields:
config["threshold_period"] = updated_fields["threshold_period"]
if "sensitivity" in updated_fields:
config["sensitivity"] = updated_fields["sensitivity"]
if "seasonality" in updated_fields:
config["seasonality"] = updated_fields["seasonality"]
if "comparison_delta" in updated_fields:
config["comparison_delta"] = updated_fields["comparison_delta"]
updated_detector_fields["config"] = config

# if the user updated resolve_threshold or threshold_type, then we also need to update the detector triggers
if "threshold_type" in updated_fields or "resolve_threshold" in updated_fields:
data_condition_group = detector.workflow_condition_group
if data_condition_group is None:
# this shouldn't be possible due to the way we dual write
logger.error(
"AlertRuleDetector has no associated DataConditionGroup",
extra={"alert_rule_id": AlertRule.id},
)
return None
data_conditions = DataCondition.objects.filter(condition_group=data_condition_group)
if "threshold_type" in updated_fields:
threshold_type = (
Condition.GREATER
if updated_fields["threshold_type"] == AlertRuleThresholdType.ABOVE.value
else Condition.LESS
)
resolve_threshold_type = (
Condition.LESS_OR_EQUAL
if updated_fields["threshold_type"] == AlertRuleThresholdType.ABOVE.value
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should this be resolve_threshold_type?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, the resolve threshold type is the opposite as the specified threshold type

Copy link
Member

@ceorourke ceorourke Jan 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is just "less" and "greater" rather than with "or equal" for both - https://github.com/getsentry/sentry/blob/master/src/sentry/workflow_engine/migration_helpers/alert_rule.py#L208

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i think this logic is overall correct - it's inverting the logic type from the alert rule to create the resolution condition; for example: greater than 500 is Priority.HIGH would have a resolution threshold: less than or equal to 500 is Priority.RESOLVE -- and that means we don't need to do the +/- 0.000001 trick from before.

since the logic is always > or < in metric alerts, we can safely assume the equality operator should be applied for the inverse with resolution.

else Condition.GREATER_OR_EQUAL
)
for dc in data_conditions:
if dc.condition_result == DetectorPriorityLevel.OK:
dc.update(type=resolve_threshold_type)
else:
dc.update(type=threshold_type)
resolve_condition = data_conditions.filter(condition_result=DetectorPriorityLevel.OK)
mifu67 marked this conversation as resolved.
Show resolved Hide resolved

if "resolve_threshold" in updated_fields:
resolve_condition = data_conditions.filter(condition_result=DetectorPriorityLevel.OK)
resolve_condition.update(comparison=updated_fields["resolve_threshold"])

detector.update(**updated_detector_fields)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if this fails, should the alert rule be updated? should updating the alert rule + the detector happen in a transaction so either both or neither is updated?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That makes sense

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@cathteng how is that done if you're updating 2 models? cause usually it's like with transaction.atomic(router.db_for_write(MyModel)):


# reset detector status, as the rule was updated
detector_state.update(active=False, state=DetectorPriorityLevel.OK)

# TODO: do we need to create an audit log entry here?
# do we need to return the detector's data conditions/DCG?
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

re: audit log we probably should but I think we can punt that to another task as we'll need to create new audit log types entirely for the whole project

idk that we need to return the dc / dcg here since the user hitting this code path will be unaware of ACI as a whole, so it's not like we need to return this data in the api response. we will need to make a new serializer when we switch over though, so maybe then? I wouldn't worry about it right now though

return detector_state, detector


def get_data_source(alert_rule: AlertRule) -> DataSource | None:
snuba_query = alert_rule.snuba_query
organization = alert_rule.organization
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
migrate_metric_action,
migrate_metric_data_conditions,
migrate_resolve_threshold_data_conditions,
update_migrated_alert_rule,
)
from sentry.workflow_engine.models import (
Action,
Expand Down Expand Up @@ -292,7 +293,7 @@ def test_calculate_resolve_threshold_with_warning(self):
resolve_threshold = get_resolve_threshold(detector_dcg)
assert resolve_threshold == self.alert_rule_trigger_warning.alert_threshold

def create_metric_alert_trigger_auto_resolve(self):
def test_create_metric_alert_trigger_auto_resolve(self):
"""
Test that we create the correct resolution DataConditions when an AlertRule has no explicit resolve threshold
"""
Expand All @@ -301,6 +302,7 @@ def create_metric_alert_trigger_auto_resolve(self):

migrate_alert_rule(metric_alert, self.rpc_user)
migrate_metric_data_conditions(critical_trigger)
migrate_resolve_threshold_data_conditions(metric_alert)

detector = AlertRuleDetector.objects.get(alert_rule=metric_alert).detector

Expand All @@ -322,15 +324,16 @@ def create_metric_alert_trigger_auto_resolve(self):
condition_group=resolve_data_condition.condition_group
).exists()

def create_metric_alert_trigger_auto_resolve_less_than(self):
def test_create_metric_alert_trigger_auto_resolve_less_than(self):
"""
Test that we assign the resolve detector trigger the correct type if the threshold type is ABOVE
Test that we assign the resolve detector trigger the correct type if the threshold type is BELOW
"""
metric_alert = self.create_alert_rule(threshold_type=AlertRuleThresholdType.ABOVE)
metric_alert = self.create_alert_rule(threshold_type=AlertRuleThresholdType.BELOW)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why has this switched?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The default threshold type is ABOVE, so I changed it in this test to BELOW so we're not testing the same case twice. The test wasn't running because I named it incorrectly, so I missed that it was failing the test for the resolve threshold being GTE (happens when the threshold type is BELOW)

critical_trigger = self.create_alert_rule_trigger(alert_rule=metric_alert, label="critical")

migrate_alert_rule(metric_alert, self.rpc_user)
migrate_metric_data_conditions(critical_trigger)
migrate_resolve_threshold_data_conditions(metric_alert)

detector = AlertRuleDetector.objects.get(alert_rule=metric_alert).detector

Expand Down Expand Up @@ -417,3 +420,92 @@ def test_create_metric_alert_trigger_action_no_type(self, mock_logger):
"alert_rule_trigger_action_id": self.alert_rule_trigger_action_integration.id,
},
)

def test_update_metric_alert(self):
migrate_alert_rule(self.metric_alert, self.rpc_user)
mifu67 marked this conversation as resolved.
Show resolved Hide resolved
updated_fields = {
"name": "hojicha",
"description": "a Japanese green tea roasted over charcoal",
}

alert_rule_detector = AlertRuleDetector.objects.get(alert_rule=self.metric_alert)
detector = alert_rule_detector.detector
detector_state = DetectorState.objects.get(detector=detector)
detector_state.update(
active=True, state=DetectorPriorityLevel.HIGH
) # so we can confirm that our update actually changes things
assert detector_state.active

update_migrated_alert_rule(self.metric_alert, updated_fields)
detector.refresh_from_db()
detector_state.refresh_from_db()

assert detector.name == "hojicha"
assert detector.description == "a Japanese green tea roasted over charcoal"

assert detector_state.state == str(DetectorPriorityLevel.OK.value)
assert detector_state.active is False

def test_update_metric_alert_owner(self):
migrate_alert_rule(self.metric_alert, self.rpc_user)
updated_fields = {
"user_id": self.user.id,
"team_id": None,
}
alert_rule_detector = AlertRuleDetector.objects.get(alert_rule=self.metric_alert)
detector = alert_rule_detector.detector

update_migrated_alert_rule(self.metric_alert, updated_fields)
detector.refresh_from_db()

assert detector.owner_user_id == self.user.id
assert detector.owner_team_id is None

def test_update_metric_alert_config(self):
migrate_alert_rule(self.metric_alert, self.rpc_user)
updated_fields = {
"threshold_period": 1,
"sensitivity": None,
"seasonality": None,
"comparison_delta": 3600,
}
alert_rule_detector = AlertRuleDetector.objects.get(alert_rule=self.metric_alert)
detector = alert_rule_detector.detector

update_migrated_alert_rule(self.metric_alert, updated_fields)
detector.refresh_from_db()

assert detector.config == updated_fields

def test_update_metric_alert_threshold_type(self):
metric_alert = self.create_alert_rule()
critical_trigger = self.create_alert_rule_trigger(alert_rule=metric_alert, label="critical")

migrate_alert_rule(metric_alert, self.rpc_user)
migrate_metric_data_conditions(critical_trigger)
migrate_resolve_threshold_data_conditions(metric_alert)

updated_fields = {"threshold_type": AlertRuleThresholdType.BELOW}
update_migrated_alert_rule(metric_alert, updated_fields)
# because there are only two objects in the DB
critical_detector_trigger = DataCondition.objects.get(
condition_result=DetectorPriorityLevel.HIGH
)
resolve_detector_trigger = DataCondition.objects.get(
condition_result=DetectorPriorityLevel.OK
)

assert critical_detector_trigger.type == Condition.LESS
assert resolve_detector_trigger.type == Condition.GREATER_OR_EQUAL

def test_update_metric_alert_resolve_threshold(self):
migrate_alert_rule(self.metric_alert, self.rpc_user)
migrate_metric_data_conditions(self.alert_rule_trigger_critical)
migrate_resolve_threshold_data_conditions(self.metric_alert)

updated_fields = {"resolve_threshold": 10}
update_migrated_alert_rule(self.metric_alert, updated_fields)
resolve_detector_trigger = DataCondition.objects.get(
condition_result=DetectorPriorityLevel.OK
)
assert resolve_detector_trigger.comparison == 10
Loading