-
-
Notifications
You must be signed in to change notification settings - Fork 4.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat(aci milestone 3): ACI alert rule dual update helper #82982
Changes from 14 commits
4a7fa6e
17794d3
782c669
87b43cf
3194c4c
449e805
e582567
e62708d
791918e
9db54c7
ac4959e
bad3440
a274225
317a505
237300a
5703fff
37a19bc
7a47aca
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,5 @@ | ||
import logging | ||
from typing import Any | ||
|
||
from sentry.deletions.models.scheduleddeletion import RegionScheduledDeletion | ||
from sentry.incidents.grouptype import MetricAlertFire | ||
|
@@ -374,6 +375,99 @@ def migrate_alert_rule( | |
) | ||
|
||
|
||
def update_migrated_alert_rule(alert_rule: AlertRule, updated_fields: dict[str, Any]) -> ( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. how will you get these updated fields? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Dual updated with the alert rule There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok because the tests make it seem like you're generating them manually so i wasn't sure There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah yeah, we're just testing the actual dual writing code separately so that the tests in test_logic.py can be cleaner (we also haven't hooked up the dual writing logic yet) |
||
tuple[ | ||
DetectorState, | ||
Detector, | ||
] | ||
| None | ||
): | ||
# TODO: maybe pull this into a helper method? | ||
try: | ||
alert_rule_detector = AlertRuleDetector.objects.get(alert_rule=alert_rule) | ||
except AlertRuleDetector.DoesNotExist: | ||
logger.exception( | ||
"AlertRuleDetector does not exist", | ||
extra={"alert_rule_id": AlertRule.id}, | ||
) | ||
return None | ||
|
||
detector: Detector = alert_rule_detector.detector | ||
|
||
try: | ||
detector_state = DetectorState.objects.get(detector=detector) | ||
except DetectorState.DoesNotExist: | ||
logger.exception( | ||
"DetectorState does not exist", | ||
extra={"alert_rule_id": AlertRule.id, "detector_id": detector.id}, | ||
) | ||
return None | ||
|
||
updated_detector_fields: dict[str, Any] = {} | ||
config = detector.config.copy() | ||
|
||
if "name" in updated_fields: | ||
updated_detector_fields["name"] = updated_fields["name"] | ||
if "description" in updated_fields: | ||
updated_detector_fields["description"] = updated_fields["description"] | ||
if "user_id" in updated_fields: | ||
updated_detector_fields["owner_user_id"] = updated_fields["user_id"] | ||
if "team_id" in updated_fields: | ||
updated_detector_fields["owner_team_id"] = updated_fields["team_id"] | ||
mifu67 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
# update config fields | ||
if "threshold_period" in updated_fields: | ||
config["threshold_period"] = updated_fields["threshold_period"] | ||
if "sensitivity" in updated_fields: | ||
config["sensitivity"] = updated_fields["sensitivity"] | ||
if "seasonality" in updated_fields: | ||
config["seasonality"] = updated_fields["seasonality"] | ||
if "comparison_delta" in updated_fields: | ||
config["comparison_delta"] = updated_fields["comparison_delta"] | ||
updated_detector_fields["config"] = config | ||
|
||
# if the user updated resolve_threshold or threshold_type, then we also need to update the detector triggers | ||
if "threshold_type" in updated_fields or "resolve_threshold" in updated_fields: | ||
data_condition_group = detector.workflow_condition_group | ||
if data_condition_group is None: | ||
# this shouldn't be possible due to the way we dual write | ||
logger.error( | ||
"AlertRuleDetector has no associated DataConditionGroup", | ||
extra={"alert_rule_id": AlertRule.id}, | ||
) | ||
return None | ||
data_conditions = DataCondition.objects.filter(condition_group=data_condition_group) | ||
if "threshold_type" in updated_fields: | ||
threshold_type = ( | ||
Condition.GREATER | ||
if updated_fields["threshold_type"] == AlertRuleThresholdType.ABOVE.value | ||
else Condition.LESS | ||
) | ||
resolve_threshold_type = ( | ||
Condition.LESS_OR_EQUAL | ||
if updated_fields["threshold_type"] == AlertRuleThresholdType.ABOVE.value | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should this be There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, the resolve threshold type is the opposite as the specified threshold type There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this is just "less" and "greater" rather than with "or equal" for both - https://github.com/getsentry/sentry/blob/master/src/sentry/workflow_engine/migration_helpers/alert_rule.py#L208 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i think this logic is overall correct - it's inverting the logic type from the alert rule to create the resolution condition; for example: since the logic is always |
||
else Condition.GREATER_OR_EQUAL | ||
) | ||
for dc in data_conditions: | ||
if dc.condition_result == DetectorPriorityLevel.OK: | ||
dc.update(type=resolve_threshold_type) | ||
else: | ||
dc.update(type=threshold_type) | ||
resolve_condition = data_conditions.filter(condition_result=DetectorPriorityLevel.OK) | ||
mifu67 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
if "resolve_threshold" in updated_fields: | ||
resolve_condition = data_conditions.filter(condition_result=DetectorPriorityLevel.OK) | ||
resolve_condition.update(comparison=updated_fields["resolve_threshold"]) | ||
|
||
detector.update(**updated_detector_fields) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if this fails, should the alert rule be updated? should updating the alert rule + the detector happen in a transaction so either both or neither is updated? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That makes sense There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @cathteng how is that done if you're updating 2 models? cause usually it's like |
||
|
||
# reset detector status, as the rule was updated | ||
detector_state.update(active=False, state=DetectorPriorityLevel.OK) | ||
|
||
# TODO: do we need to create an audit log entry here? | ||
# do we need to return the detector's data conditions/DCG? | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. re: audit log we probably should but I think we can punt that to another task as we'll need to create new audit log types entirely for the whole project idk that we need to return the dc / dcg here since the user hitting this code path will be unaware of ACI as a whole, so it's not like we need to return this data in the api response. we will need to make a new serializer when we switch over though, so maybe then? I wouldn't worry about it right now though |
||
return detector_state, detector | ||
|
||
|
||
def get_data_source(alert_rule: AlertRule) -> DataSource | None: | ||
snuba_query = alert_rule.snuba_query | ||
organization = alert_rule.organization | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,6 +16,7 @@ | |
migrate_metric_action, | ||
migrate_metric_data_conditions, | ||
migrate_resolve_threshold_data_conditions, | ||
update_migrated_alert_rule, | ||
) | ||
from sentry.workflow_engine.models import ( | ||
Action, | ||
|
@@ -292,7 +293,7 @@ def test_calculate_resolve_threshold_with_warning(self): | |
resolve_threshold = get_resolve_threshold(detector_dcg) | ||
assert resolve_threshold == self.alert_rule_trigger_warning.alert_threshold | ||
|
||
def create_metric_alert_trigger_auto_resolve(self): | ||
def test_create_metric_alert_trigger_auto_resolve(self): | ||
""" | ||
Test that we create the correct resolution DataConditions when an AlertRule has no explicit resolve threshold | ||
""" | ||
|
@@ -301,6 +302,7 @@ def create_metric_alert_trigger_auto_resolve(self): | |
|
||
migrate_alert_rule(metric_alert, self.rpc_user) | ||
migrate_metric_data_conditions(critical_trigger) | ||
migrate_resolve_threshold_data_conditions(metric_alert) | ||
|
||
detector = AlertRuleDetector.objects.get(alert_rule=metric_alert).detector | ||
|
||
|
@@ -322,15 +324,16 @@ def create_metric_alert_trigger_auto_resolve(self): | |
condition_group=resolve_data_condition.condition_group | ||
).exists() | ||
|
||
def create_metric_alert_trigger_auto_resolve_less_than(self): | ||
def test_create_metric_alert_trigger_auto_resolve_less_than(self): | ||
""" | ||
Test that we assign the resolve detector trigger the correct type if the threshold type is ABOVE | ||
Test that we assign the resolve detector trigger the correct type if the threshold type is BELOW | ||
""" | ||
metric_alert = self.create_alert_rule(threshold_type=AlertRuleThresholdType.ABOVE) | ||
metric_alert = self.create_alert_rule(threshold_type=AlertRuleThresholdType.BELOW) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why has this switched? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The default threshold type is ABOVE, so I changed it in this test to BELOW so we're not testing the same case twice. The test wasn't running because I named it incorrectly, so I missed that it was failing the test for the resolve threshold being GTE (happens when the threshold type is BELOW) |
||
critical_trigger = self.create_alert_rule_trigger(alert_rule=metric_alert, label="critical") | ||
|
||
migrate_alert_rule(metric_alert, self.rpc_user) | ||
migrate_metric_data_conditions(critical_trigger) | ||
migrate_resolve_threshold_data_conditions(metric_alert) | ||
|
||
detector = AlertRuleDetector.objects.get(alert_rule=metric_alert).detector | ||
|
||
|
@@ -417,3 +420,92 @@ def test_create_metric_alert_trigger_action_no_type(self, mock_logger): | |
"alert_rule_trigger_action_id": self.alert_rule_trigger_action_integration.id, | ||
}, | ||
) | ||
|
||
def test_update_metric_alert(self): | ||
migrate_alert_rule(self.metric_alert, self.rpc_user) | ||
mifu67 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
updated_fields = { | ||
"name": "hojicha", | ||
"description": "a Japanese green tea roasted over charcoal", | ||
} | ||
|
||
alert_rule_detector = AlertRuleDetector.objects.get(alert_rule=self.metric_alert) | ||
detector = alert_rule_detector.detector | ||
detector_state = DetectorState.objects.get(detector=detector) | ||
detector_state.update( | ||
active=True, state=DetectorPriorityLevel.HIGH | ||
) # so we can confirm that our update actually changes things | ||
assert detector_state.active | ||
|
||
update_migrated_alert_rule(self.metric_alert, updated_fields) | ||
detector.refresh_from_db() | ||
detector_state.refresh_from_db() | ||
|
||
assert detector.name == "hojicha" | ||
assert detector.description == "a Japanese green tea roasted over charcoal" | ||
|
||
assert detector_state.state == str(DetectorPriorityLevel.OK.value) | ||
assert detector_state.active is False | ||
|
||
def test_update_metric_alert_owner(self): | ||
migrate_alert_rule(self.metric_alert, self.rpc_user) | ||
updated_fields = { | ||
"user_id": self.user.id, | ||
"team_id": None, | ||
} | ||
alert_rule_detector = AlertRuleDetector.objects.get(alert_rule=self.metric_alert) | ||
detector = alert_rule_detector.detector | ||
|
||
update_migrated_alert_rule(self.metric_alert, updated_fields) | ||
detector.refresh_from_db() | ||
|
||
assert detector.owner_user_id == self.user.id | ||
assert detector.owner_team_id is None | ||
|
||
def test_update_metric_alert_config(self): | ||
migrate_alert_rule(self.metric_alert, self.rpc_user) | ||
updated_fields = { | ||
"threshold_period": 1, | ||
"sensitivity": None, | ||
"seasonality": None, | ||
"comparison_delta": 3600, | ||
} | ||
alert_rule_detector = AlertRuleDetector.objects.get(alert_rule=self.metric_alert) | ||
detector = alert_rule_detector.detector | ||
|
||
update_migrated_alert_rule(self.metric_alert, updated_fields) | ||
detector.refresh_from_db() | ||
|
||
assert detector.config == updated_fields | ||
|
||
def test_update_metric_alert_threshold_type(self): | ||
metric_alert = self.create_alert_rule() | ||
critical_trigger = self.create_alert_rule_trigger(alert_rule=metric_alert, label="critical") | ||
|
||
migrate_alert_rule(metric_alert, self.rpc_user) | ||
migrate_metric_data_conditions(critical_trigger) | ||
migrate_resolve_threshold_data_conditions(metric_alert) | ||
|
||
updated_fields = {"threshold_type": AlertRuleThresholdType.BELOW} | ||
update_migrated_alert_rule(metric_alert, updated_fields) | ||
# because there are only two objects in the DB | ||
critical_detector_trigger = DataCondition.objects.get( | ||
condition_result=DetectorPriorityLevel.HIGH | ||
) | ||
resolve_detector_trigger = DataCondition.objects.get( | ||
condition_result=DetectorPriorityLevel.OK | ||
) | ||
|
||
assert critical_detector_trigger.type == Condition.LESS | ||
assert resolve_detector_trigger.type == Condition.GREATER_OR_EQUAL | ||
|
||
def test_update_metric_alert_resolve_threshold(self): | ||
migrate_alert_rule(self.metric_alert, self.rpc_user) | ||
migrate_metric_data_conditions(self.alert_rule_trigger_critical) | ||
migrate_resolve_threshold_data_conditions(self.metric_alert) | ||
|
||
updated_fields = {"resolve_threshold": 10} | ||
update_migrated_alert_rule(self.metric_alert, updated_fields) | ||
resolve_detector_trigger = DataCondition.objects.get( | ||
condition_result=DetectorPriorityLevel.OK | ||
) | ||
assert resolve_detector_trigger.comparison == 10 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
will we update the workflow + actions here too?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These are dual updated with the AlertRuleTriggers/AlertRuleActions