-
-
Notifications
You must be signed in to change notification settings - Fork 4.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat(aci milestone 3): ACI alert rule dual update helper #82982
Changes from all commits
4a7fa6e
17794d3
782c669
87b43cf
3194c4c
449e805
e582567
e62708d
791918e
9db54c7
ac4959e
bad3440
a274225
317a505
237300a
5703fff
37a19bc
7a47aca
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,5 @@ | ||
import logging | ||
from typing import Any | ||
|
||
from sentry.deletions.models.scheduleddeletion import RegionScheduledDeletion | ||
from sentry.incidents.grouptype import MetricAlertFire | ||
|
@@ -32,6 +33,13 @@ | |
|
||
logger = logging.getLogger(__name__) | ||
|
||
FIELDS_TO_DETECTOR_FIELDS = { | ||
"name": "name", | ||
"description": "description", | ||
"user_id": "owner_user_id", | ||
"team_id": "owner_team_id", | ||
} | ||
|
||
|
||
def get_action_type(alert_rule_trigger_action: AlertRuleTriggerAction) -> str | None: | ||
if alert_rule_trigger_action.sentry_app_id: | ||
|
@@ -375,6 +383,86 @@ def migrate_alert_rule( | |
) | ||
|
||
|
||
def update_migrated_alert_rule(alert_rule: AlertRule, updated_fields: dict[str, Any]) -> ( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. how will you get these updated fields? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Dual updated with the alert rule There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok because the tests make it seem like you're generating them manually so i wasn't sure There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah yeah, we're just testing the actual dual writing code separately so that the tests in test_logic.py can be cleaner (we also haven't hooked up the dual writing logic yet) |
||
tuple[ | ||
DetectorState, | ||
Detector, | ||
] | ||
| None | ||
): | ||
try: | ||
alert_rule_detector = AlertRuleDetector.objects.get(alert_rule=alert_rule) | ||
except AlertRuleDetector.DoesNotExist: | ||
logger.exception( | ||
"AlertRuleDetector does not exist", | ||
extra={"alert_rule_id": alert_rule.id}, | ||
) | ||
return None | ||
|
||
detector: Detector = alert_rule_detector.detector | ||
|
||
try: | ||
detector_state = DetectorState.objects.get(detector=detector) | ||
except DetectorState.DoesNotExist: | ||
logger.exception( | ||
"DetectorState does not exist", | ||
extra={"alert_rule_id": alert_rule.id, "detector_id": detector.id}, | ||
) | ||
return None | ||
|
||
updated_detector_fields: dict[str, Any] = {} | ||
config = detector.config.copy() | ||
|
||
for field, detector_field in FIELDS_TO_DETECTOR_FIELDS.items(): | ||
if updated_field := updated_fields.get(field): | ||
updated_detector_fields[detector_field] = updated_field | ||
# update config fields | ||
config_fields = MetricAlertFire.detector_config_schema["properties"].keys() | ||
for field in config_fields: | ||
if field in updated_fields: | ||
config[field] = updated_fields[field] | ||
updated_detector_fields["config"] = config | ||
|
||
# if the user updated resolve_threshold or threshold_type, then we also need to update the detector triggers | ||
if "threshold_type" in updated_fields or "resolve_threshold" in updated_fields: | ||
data_condition_group = detector.workflow_condition_group | ||
if data_condition_group is None: | ||
# this shouldn't be possible due to the way we dual write | ||
logger.error( | ||
"AlertRuleDetector has no associated DataConditionGroup", | ||
extra={"alert_rule_id": alert_rule.id}, | ||
) | ||
return None | ||
data_conditions = DataCondition.objects.filter(condition_group=data_condition_group) | ||
if "threshold_type" in updated_fields: | ||
threshold_type = ( | ||
Condition.GREATER | ||
if updated_fields["threshold_type"] == AlertRuleThresholdType.ABOVE.value | ||
else Condition.LESS | ||
) | ||
resolve_threshold_type = ( | ||
Condition.LESS_OR_EQUAL | ||
if updated_fields["threshold_type"] == AlertRuleThresholdType.ABOVE.value | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should this be There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, the resolve threshold type is the opposite as the specified threshold type There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this is just "less" and "greater" rather than with "or equal" for both - https://github.com/getsentry/sentry/blob/master/src/sentry/workflow_engine/migration_helpers/alert_rule.py#L208 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i think this logic is overall correct - it's inverting the logic type from the alert rule to create the resolution condition; for example: since the logic is always |
||
else Condition.GREATER_OR_EQUAL | ||
) | ||
for dc in data_conditions: | ||
if dc.condition_result == DetectorPriorityLevel.OK: | ||
dc.update(type=resolve_threshold_type) | ||
else: | ||
dc.update(type=threshold_type) | ||
|
||
if "resolve_threshold" in updated_fields: | ||
resolve_condition = data_conditions.filter(condition_result=DetectorPriorityLevel.OK) | ||
resolve_condition.update(comparison=updated_fields["resolve_threshold"]) | ||
|
||
detector.update(**updated_detector_fields) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if this fails, should the alert rule be updated? should updating the alert rule + the detector happen in a transaction so either both or neither is updated? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That makes sense There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @cathteng how is that done if you're updating 2 models? cause usually it's like |
||
|
||
# reset detector status, as the rule was updated | ||
detector_state.update(active=False, state=DetectorPriorityLevel.OK) | ||
|
||
return detector_state, detector | ||
|
||
|
||
def get_data_source(alert_rule: AlertRule) -> DataSource | None: | ||
snuba_query = alert_rule.snuba_query | ||
organization = alert_rule.organization | ||
|
@@ -408,13 +496,13 @@ def dual_delete_migrated_alert_rule( | |
# isn't flagged into dual write | ||
logger.info( | ||
"AlertRuleDetector does not exist", | ||
extra={"alert_rule_id": AlertRule.id}, | ||
extra={"alert_rule_id": alert_rule.id}, | ||
) | ||
return | ||
except AlertRuleWorkflow.DoesNotExist: | ||
logger.info( | ||
"AlertRuleWorkflow does not exist", | ||
extra={"alert_rule_id": AlertRule.id}, | ||
extra={"alert_rule_id": alert_rule.id}, | ||
) | ||
return | ||
|
||
|
@@ -426,7 +514,7 @@ def dual_delete_migrated_alert_rule( | |
if data_source is None: | ||
logger.info( | ||
"DataSource does not exist", | ||
extra={"alert_rule_id": AlertRule.id}, | ||
extra={"alert_rule_id": alert_rule.id}, | ||
) | ||
# NOTE: for migrated alert rules, each workflow is associated with a single detector | ||
# make sure there are no other detectors associated with the workflow, then delete it if so | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,6 +20,7 @@ | |
migrate_metric_action, | ||
migrate_metric_data_conditions, | ||
migrate_resolve_threshold_data_conditions, | ||
update_migrated_alert_rule, | ||
) | ||
from sentry.workflow_engine.models import ( | ||
Action, | ||
|
@@ -297,7 +298,7 @@ def test_calculate_resolve_threshold_with_warning(self): | |
resolve_threshold = get_resolve_threshold(detector_dcg) | ||
assert resolve_threshold == self.alert_rule_trigger_warning.alert_threshold | ||
|
||
def create_metric_alert_trigger_auto_resolve(self): | ||
def test_create_metric_alert_trigger_auto_resolve(self): | ||
""" | ||
Test that we create the correct resolution DataConditions when an AlertRule has no explicit resolve threshold | ||
""" | ||
|
@@ -306,6 +307,7 @@ def create_metric_alert_trigger_auto_resolve(self): | |
|
||
migrate_alert_rule(metric_alert, self.rpc_user) | ||
migrate_metric_data_conditions(critical_trigger) | ||
migrate_resolve_threshold_data_conditions(metric_alert) | ||
|
||
detector = AlertRuleDetector.objects.get(alert_rule=metric_alert).detector | ||
|
||
|
@@ -327,15 +329,16 @@ def create_metric_alert_trigger_auto_resolve(self): | |
condition_group=resolve_data_condition.condition_group | ||
).exists() | ||
|
||
def create_metric_alert_trigger_auto_resolve_less_than(self): | ||
def test_create_metric_alert_trigger_auto_resolve_less_than(self): | ||
""" | ||
Test that we assign the resolve detector trigger the correct type if the threshold type is ABOVE | ||
Test that we assign the resolve detector trigger the correct type if the threshold type is BELOW | ||
""" | ||
metric_alert = self.create_alert_rule(threshold_type=AlertRuleThresholdType.ABOVE) | ||
metric_alert = self.create_alert_rule(threshold_type=AlertRuleThresholdType.BELOW) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why has this switched? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The default threshold type is ABOVE, so I changed it in this test to BELOW so we're not testing the same case twice. The test wasn't running because I named it incorrectly, so I missed that it was failing the test for the resolve threshold being GTE (happens when the threshold type is BELOW) |
||
critical_trigger = self.create_alert_rule_trigger(alert_rule=metric_alert, label="critical") | ||
|
||
migrate_alert_rule(metric_alert, self.rpc_user) | ||
migrate_metric_data_conditions(critical_trigger) | ||
migrate_resolve_threshold_data_conditions(metric_alert) | ||
|
||
detector = AlertRuleDetector.objects.get(alert_rule=metric_alert).detector | ||
|
||
|
@@ -422,3 +425,115 @@ def test_create_metric_alert_trigger_action_no_type(self, mock_logger): | |
"alert_rule_trigger_action_id": self.alert_rule_trigger_action_integration.id, | ||
}, | ||
) | ||
|
||
def test_update_metric_alert(self): | ||
migrate_alert_rule(self.metric_alert, self.rpc_user) | ||
mifu67 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
updated_fields = { | ||
"name": "hojicha", | ||
"description": "a Japanese green tea roasted over charcoal", | ||
} | ||
|
||
alert_rule_detector = AlertRuleDetector.objects.get(alert_rule=self.metric_alert) | ||
detector = alert_rule_detector.detector | ||
assert detector.name == self.metric_alert.name | ||
assert detector.description == self.metric_alert.description | ||
detector_state = DetectorState.objects.get(detector=detector) | ||
detector_state.update( | ||
active=True, state=DetectorPriorityLevel.HIGH | ||
) # so we can confirm that our update actually changes things | ||
assert detector_state.active | ||
|
||
update_migrated_alert_rule(self.metric_alert, updated_fields) | ||
detector.refresh_from_db() | ||
detector_state.refresh_from_db() | ||
|
||
assert detector.name == "hojicha" | ||
assert detector.description == "a Japanese green tea roasted over charcoal" | ||
|
||
assert detector_state.state == str(DetectorPriorityLevel.OK.value) | ||
assert detector_state.active is False | ||
|
||
def test_update_metric_alert_owner(self): | ||
migrate_alert_rule(self.metric_alert, self.rpc_user) | ||
updated_fields = { | ||
"user_id": self.user.id, | ||
"team_id": None, | ||
} | ||
alert_rule_detector = AlertRuleDetector.objects.get(alert_rule=self.metric_alert) | ||
detector = alert_rule_detector.detector | ||
assert detector.owner_user_id == self.metric_alert.user_id | ||
assert detector.owner_team_id == self.metric_alert.team_id | ||
|
||
update_migrated_alert_rule(self.metric_alert, updated_fields) | ||
detector.refresh_from_db() | ||
|
||
assert detector.owner_user_id == self.user.id | ||
assert detector.owner_team_id is None | ||
|
||
def test_update_metric_alert_config(self): | ||
migrate_alert_rule(self.metric_alert, self.rpc_user) | ||
updated_fields = { | ||
"detection_type": "percent", | ||
"threshold_period": 1, | ||
"sensitivity": None, | ||
"seasonality": None, | ||
"comparison_delta": 3600, | ||
} | ||
alert_rule_detector = AlertRuleDetector.objects.get(alert_rule=self.metric_alert) | ||
detector = alert_rule_detector.detector | ||
config = detector.config | ||
assert config == { | ||
"detection_type": "static", | ||
"threshold_period": 1, | ||
"sensitivity": None, | ||
"seasonality": None, | ||
"comparison_delta": None, | ||
} | ||
|
||
update_migrated_alert_rule(self.metric_alert, updated_fields) | ||
detector.refresh_from_db() | ||
|
||
assert detector.config == updated_fields | ||
|
||
def test_update_metric_alert_threshold_type(self): | ||
metric_alert = self.create_alert_rule() | ||
critical_trigger = self.create_alert_rule_trigger(alert_rule=metric_alert, label="critical") | ||
|
||
migrate_alert_rule(metric_alert, self.rpc_user) | ||
migrate_metric_data_conditions(critical_trigger) | ||
migrate_resolve_threshold_data_conditions(metric_alert) | ||
# because there are only two objects in the DB | ||
critical_detector_trigger = DataCondition.objects.get( | ||
condition_result=DetectorPriorityLevel.HIGH | ||
) | ||
resolve_detector_trigger = DataCondition.objects.get( | ||
condition_result=DetectorPriorityLevel.OK | ||
) | ||
|
||
assert critical_detector_trigger.type == Condition.GREATER | ||
assert resolve_detector_trigger.type == Condition.LESS_OR_EQUAL | ||
|
||
updated_fields = {"threshold_type": AlertRuleThresholdType.BELOW} | ||
update_migrated_alert_rule(metric_alert, updated_fields) | ||
|
||
critical_detector_trigger.refresh_from_db() | ||
resolve_detector_trigger.refresh_from_db() | ||
|
||
assert critical_detector_trigger.type == Condition.LESS | ||
assert resolve_detector_trigger.type == Condition.GREATER_OR_EQUAL | ||
|
||
def test_update_metric_alert_resolve_threshold(self): | ||
migrate_alert_rule(self.metric_alert, self.rpc_user) | ||
migrate_metric_data_conditions(self.alert_rule_trigger_critical) | ||
migrate_resolve_threshold_data_conditions(self.metric_alert) | ||
|
||
resolve_detector_trigger = DataCondition.objects.get( | ||
condition_result=DetectorPriorityLevel.OK | ||
) | ||
assert resolve_detector_trigger.comparison == 2 | ||
|
||
updated_fields = {"resolve_threshold": 10} | ||
update_migrated_alert_rule(self.metric_alert, updated_fields) | ||
resolve_detector_trigger.refresh_from_db() | ||
|
||
assert resolve_detector_trigger.comparison == 10 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
will we update the workflow + actions here too?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These are dual updated with the AlertRuleTriggers/AlertRuleActions