Skip to content

Commit

Permalink
test: fix flaky fsm retry test (#1767)
Browse files Browse the repository at this point in the history
fsm retry test was sometimes failing because the async task was not
picked up within the 1s interval that the test expected.
Now the interval is 1.5s. Retries in the test have also been increased
to 2s with less retries so that the interval is still less than the
retry frequency, and so we aren't approaching the 30s timeout.
Example of flakiness:
https://github.com/TBD54566975/ftl/actions/runs/9494930873/job/26166196434?pr=1765#logs
  • Loading branch information
matt2e authored Jun 13, 2024
1 parent d6ad5ae commit 77a9acc
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 8 deletions.
13 changes: 7 additions & 6 deletions backend/controller/dal/fsm_integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,8 @@ func TestFSMRetry(t *testing.T) {
if i > 0 {
delay := times[i].Sub(times[i-1])
targetDelay := delays[i-1]
assert.True(t, delay >= targetDelay && delay < time.Second+targetDelay, "unexpected time diff for %s retry %d: %v (expected %v - %v)", origin, i, delay, targetDelay, time.Second+targetDelay)
acceptableWindow := 1500 * time.Millisecond
assert.True(t, delay >= targetDelay && delay < acceptableWindow+targetDelay, "unexpected time diff for %s retry %d: %v (expected %v - %v)", origin, i, delay, targetDelay, acceptableWindow+targetDelay)
}
}
}
Expand All @@ -94,15 +95,15 @@ func TestFSMRetry(t *testing.T) {
in.Call("fsmretry", "startTransitionToTwo", in.Obj{"id": "1"}, func(t testing.TB, response in.Obj) {}),
in.Call("fsmretry", "startTransitionToThree", in.Obj{"id": "2"}, func(t testing.TB, response in.Obj) {}),

in.Sleep(8*time.Second), //6s is longest run of retries
in.Sleep(8*time.Second), //5s is longest run of retries

// both FSMs instances should have failed
in.QueryRow("ftl", "SELECT COUNT(*) FROM fsm_instances WHERE status = 'failed'", int64(2)),

in.QueryRow("ftl", fmt.Sprintf("SELECT COUNT(*) FROM async_calls WHERE origin = '%s' AND verb = '%s'", "fsm:fsmretry.fsm:1", "fsmretry.state2"), int64(4)),
checkRetries("fsm:fsmretry.fsm:1", "fsmretry.state2", []time.Duration{time.Second, time.Second, time.Second}),
in.QueryRow("ftl", fmt.Sprintf("SELECT COUNT(*) FROM async_calls WHERE origin = '%s' AND verb = '%s'", "fsm:fsmretry.fsm:2", "fsmretry.state3"), int64(4)),
checkRetries("fsm:fsmretry.fsm:2", "fsmretry.state3", []time.Duration{time.Second, 2 * time.Second, 3 * time.Second}),
in.QueryRow("ftl", fmt.Sprintf("SELECT COUNT(*) FROM async_calls WHERE origin = '%s' AND verb = '%s'", "fsm:fsmretry.fsm:1", "fsmretry.state2"), int64(3)),
checkRetries("fsm:fsmretry.fsm:1", "fsmretry.state2", []time.Duration{2 * time.Second, 2 * time.Second}),
in.QueryRow("ftl", fmt.Sprintf("SELECT COUNT(*) FROM async_calls WHERE origin = '%s' AND verb = '%s'", "fsm:fsmretry.fsm:2", "fsmretry.state3"), int64(3)),
checkRetries("fsm:fsmretry.fsm:2", "fsmretry.state3", []time.Duration{2 * time.Second, 3 * time.Second}),
)
}

Expand Down
4 changes: 2 additions & 2 deletions backend/controller/dal/testdata/go/fsmretry/fsmretry.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import (
"github.com/TBD54566975/ftl/go-runtime/ftl"
)

//ftl:retry 3 1s 3s
//ftl:retry 2 2s 3s
var fsm = ftl.FSM("fsm",
ftl.Start(State1),
ftl.Transition(State1, State2),
Expand All @@ -32,7 +32,7 @@ func State1(ctx context.Context, in StartEvent) error {
}

//ftl:verb
//ftl:retry 3 1s 1s
//ftl:retry 2 2s 2s
func State2(ctx context.Context, in TransitionToTwoEvent) error {
return fmt.Errorf("transition will never succeed")
}
Expand Down

0 comments on commit 77a9acc

Please sign in to comment.