From 20bf25ca9bd2a5446af0dcedb924ec36fdee88e2 Mon Sep 17 00:00:00 2001 From: zdevito Date: Tue, 10 Jun 2025 15:48:08 -0700 Subject: [PATCH] Avoid repeat error reports If an op depends on two nodes that are failing at the time it is issued, then history will generate two failure messages for the sequence number. If there was a future attached to the sequence number, then the second failre will cause a KeyError in the invocation dicationary since the first instance already removed it. This depends on a race condition to hit because the messages for the failure must already be present when the node failing is being added. Differential Revision: [D76372510](https://our.internmc.facebook.com/intern/diff/D76372510/) [ghstack-poisoned] --- controller/src/history.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/controller/src/history.rs b/controller/src/history.rs index 89f96ea1..4405869a 100644 --- a/controller/src/history.rs +++ b/controller/src/history.rs @@ -274,9 +274,11 @@ impl History { Some(RefStatus::Errored(exception)) => { // We know that this invocation hasn't been completed yet, so we can // directly call set_exception on it. - invocation.set_exception(exception.clone()); - results.push((seq, Some(Err(exception.clone())))); - invocation.reported = true; + if !invocation.reported { + invocation.set_exception(exception.clone()); + results.push((seq, Some(Err(exception.clone())))); + invocation.reported = true; + } } Some(RefStatus::Invoked(invoked_seq)) => { if let Some(invocation) = self.invocations.get_mut(invoked_seq) {