From 43d9a4720639d87618c605f2eeff091bb0e2a154 Mon Sep 17 00:00:00 2001 From: Daniel Rammer Date: Tue, 26 Dec 2023 09:58:12 -0600 Subject: [PATCH 1/2] handle WaitingForResources phase from backoff controller Signed-off-by: Daniel Rammer --- flytepropeller/pkg/controller/nodes/array/handler.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flytepropeller/pkg/controller/nodes/array/handler.go b/flytepropeller/pkg/controller/nodes/array/handler.go index 7dcdef4749..06a693334e 100644 --- a/flytepropeller/pkg/controller/nodes/array/handler.go +++ b/flytepropeller/pkg/controller/nodes/array/handler.go @@ -633,7 +633,7 @@ func (a *arrayNodeHandler) buildArrayNodeContext(ctx context.Context, nCtx inter // currently just mocking based on node phase -> which works for all k8s plugins // we can not pre-allocated a bit array because max size is 256B and with 5k fanout node state = 1.28MB pluginStateBytes := a.pluginStateBytesStarted - if taskPhase == int(core.PhaseUndefined) || taskPhase == int(core.PhaseRetryableFailure) { + if taskPhase == int(core.PhaseUndefined) || taskPhase == int(core.PhaseRetryableFailure) || taskPhase == int(core.PhaseWaitingForResources) { pluginStateBytes = a.pluginStateBytesNotStarted } From ebd2c3b210c38333ec9b679493e7b5d0b483b29e Mon Sep 17 00:00:00 2001 From: Daniel Rammer Date: Tue, 26 Dec 2023 10:18:09 -0600 Subject: [PATCH 2/2] added unit test Signed-off-by: Daniel Rammer --- .../pkg/controller/nodes/array/handler_test.go | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/flytepropeller/pkg/controller/nodes/array/handler_test.go b/flytepropeller/pkg/controller/nodes/array/handler_test.go index fbb5ae875c..b0328250ab 100644 --- a/flytepropeller/pkg/controller/nodes/array/handler_test.go +++ b/flytepropeller/pkg/controller/nodes/array/handler_test.go @@ -507,6 +507,24 @@ func TestHandleArrayNodePhaseExecuting(t *testing.T) { expectedTransitionPhase: handler.EPhaseRunning, expectedExternalResourcePhases: []idlcore.TaskExecution_Phase{idlcore.TaskExecution_RUNNING}, }, + { + name: "StartSubNodesNewAttempts", + subNodePhases: []v1alpha1.NodePhase{ + v1alpha1.NodePhaseQueued, + v1alpha1.NodePhaseQueued, + }, + subNodeTaskPhases: []core.Phase{ + core.PhaseRetryableFailure, + core.PhaseWaitingForResources, + }, + subNodeTransitions: []handler.Transition{ + handler.DoTransition(handler.TransitionTypeEphemeral, handler.PhaseInfoRunning(&handler.ExecutionInfo{})), + handler.DoTransition(handler.TransitionTypeEphemeral, handler.PhaseInfoRunning(&handler.ExecutionInfo{})), + }, + expectedArrayNodePhase: v1alpha1.ArrayNodePhaseExecuting, + expectedTransitionPhase: handler.EPhaseRunning, + expectedExternalResourcePhases: []idlcore.TaskExecution_Phase{idlcore.TaskExecution_RUNNING, idlcore.TaskExecution_RUNNING}, + }, { name: "AllSubNodesSuccedeed", subNodePhases: []v1alpha1.NodePhase{