From 77a9acc96412015c6d8c3d774d8bdf2381e0bf1f Mon Sep 17 00:00:00 2001 From: Matt Toohey Date: Thu, 13 Jun 2024 20:11:14 +1000 Subject: [PATCH] test: fix flaky fsm retry test (#1767) fsm retry test was sometimes failing because the async task was not picked up within the 1s interval that the test expected. Now the interval is 1.5s. Retries in the test have also been increased to 2s with less retries so that the interval is still less than the retry frequency, and so we aren't approaching the 30s timeout. Example of flakiness: https://github.com/TBD54566975/ftl/actions/runs/9494930873/job/26166196434?pr=1765#logs --- backend/controller/dal/fsm_integration_test.go | 13 +++++++------ .../controller/dal/testdata/go/fsmretry/fsmretry.go | 4 ++-- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/backend/controller/dal/fsm_integration_test.go b/backend/controller/dal/fsm_integration_test.go index 5874fdc999..d78e210615 100644 --- a/backend/controller/dal/fsm_integration_test.go +++ b/backend/controller/dal/fsm_integration_test.go @@ -74,7 +74,8 @@ func TestFSMRetry(t *testing.T) { if i > 0 { delay := times[i].Sub(times[i-1]) targetDelay := delays[i-1] - assert.True(t, delay >= targetDelay && delay < time.Second+targetDelay, "unexpected time diff for %s retry %d: %v (expected %v - %v)", origin, i, delay, targetDelay, time.Second+targetDelay) + acceptableWindow := 1500 * time.Millisecond + assert.True(t, delay >= targetDelay && delay < acceptableWindow+targetDelay, "unexpected time diff for %s retry %d: %v (expected %v - %v)", origin, i, delay, targetDelay, acceptableWindow+targetDelay) } } } @@ -94,15 +95,15 @@ func TestFSMRetry(t *testing.T) { in.Call("fsmretry", "startTransitionToTwo", in.Obj{"id": "1"}, func(t testing.TB, response in.Obj) {}), in.Call("fsmretry", "startTransitionToThree", in.Obj{"id": "2"}, func(t testing.TB, response in.Obj) {}), - in.Sleep(8*time.Second), //6s is longest run of retries + in.Sleep(8*time.Second), //5s is longest run of retries // both FSMs instances should have failed in.QueryRow("ftl", "SELECT COUNT(*) FROM fsm_instances WHERE status = 'failed'", int64(2)), - in.QueryRow("ftl", fmt.Sprintf("SELECT COUNT(*) FROM async_calls WHERE origin = '%s' AND verb = '%s'", "fsm:fsmretry.fsm:1", "fsmretry.state2"), int64(4)), - checkRetries("fsm:fsmretry.fsm:1", "fsmretry.state2", []time.Duration{time.Second, time.Second, time.Second}), - in.QueryRow("ftl", fmt.Sprintf("SELECT COUNT(*) FROM async_calls WHERE origin = '%s' AND verb = '%s'", "fsm:fsmretry.fsm:2", "fsmretry.state3"), int64(4)), - checkRetries("fsm:fsmretry.fsm:2", "fsmretry.state3", []time.Duration{time.Second, 2 * time.Second, 3 * time.Second}), + in.QueryRow("ftl", fmt.Sprintf("SELECT COUNT(*) FROM async_calls WHERE origin = '%s' AND verb = '%s'", "fsm:fsmretry.fsm:1", "fsmretry.state2"), int64(3)), + checkRetries("fsm:fsmretry.fsm:1", "fsmretry.state2", []time.Duration{2 * time.Second, 2 * time.Second}), + in.QueryRow("ftl", fmt.Sprintf("SELECT COUNT(*) FROM async_calls WHERE origin = '%s' AND verb = '%s'", "fsm:fsmretry.fsm:2", "fsmretry.state3"), int64(3)), + checkRetries("fsm:fsmretry.fsm:2", "fsmretry.state3", []time.Duration{2 * time.Second, 3 * time.Second}), ) } diff --git a/backend/controller/dal/testdata/go/fsmretry/fsmretry.go b/backend/controller/dal/testdata/go/fsmretry/fsmretry.go index 00ef94a5d6..f6faa704f4 100644 --- a/backend/controller/dal/testdata/go/fsmretry/fsmretry.go +++ b/backend/controller/dal/testdata/go/fsmretry/fsmretry.go @@ -7,7 +7,7 @@ import ( "github.com/TBD54566975/ftl/go-runtime/ftl" ) -//ftl:retry 3 1s 3s +//ftl:retry 2 2s 3s var fsm = ftl.FSM("fsm", ftl.Start(State1), ftl.Transition(State1, State2), @@ -32,7 +32,7 @@ func State1(ctx context.Context, in StartEvent) error { } //ftl:verb -//ftl:retry 3 1s 1s +//ftl:retry 2 2s 2s func State2(ctx context.Context, in TransitionToTwoEvent) error { return fmt.Errorf("transition will never succeed") }