diff --git a/levant/job_status_checker.go b/levant/job_status_checker.go index f54d84b00..acbc63418 100644 --- a/levant/job_status_checker.go +++ b/levant/job_status_checker.go @@ -84,7 +84,7 @@ func (l *levantDeployment) jobAllocationChecker(evalID *string) bool { q := nomadHelper.GenerateBlockingQueryOptions(l.config.Template.Job.Namespace) // Build our small internal checking struct. - levantTasks := make(map[TaskCoordinate]string) + levantTasks := make(map[TaskCoordinate]*nomad.TaskState) for { @@ -103,15 +103,15 @@ func (l *levantDeployment) jobAllocationChecker(evalID *string) bool { // If we get here, set the wi to the latest Index. q.WaitIndex = meta.LastIndex - complete, deadTasks := allocationStatusChecker(levantTasks, allocs) + complete, failedTasks := allocationStatusChecker(levantTasks, allocs) // depending on how we finished up we report our status // If we have no allocations left to track then we can exit and log // information depending on the success. - if complete && deadTasks == 0 { - log.Info().Msg("levant/job_status_checker: all allocations in deployment of job are running") + if complete && failedTasks == 0 { + log.Info().Msg("levant/job_status_checker: all allocations in deployment of job are running or finished successfully") return true - } else if complete && deadTasks > 0 { + } else if complete && failedTasks > 0 { return false } } @@ -121,31 +121,31 @@ func (l *levantDeployment) jobAllocationChecker(evalID *string) bool { // job deployment, an update Levants internal tracking on task status based on // this. This functionality exists as Nomad does not currently support // deployments across all job types. -func allocationStatusChecker(levantTasks map[TaskCoordinate]string, allocs []*nomad.AllocationListStub) (bool, int) { +func allocationStatusChecker(levantTasks map[TaskCoordinate]*nomad.TaskState, allocs []*nomad.AllocationListStub) (bool, int) { complete := true - deadTasks := 0 + failedTasks := 0 for _, alloc := range allocs { - for taskName, task := range alloc.TaskStates { + for taskName, taskState := range alloc.TaskStates { // if the state is one we haven't seen yet then we print a message - if levantTasks[TaskCoordinate{alloc.ID, taskName}] != task.State { + if levantTasks[TaskCoordinate{alloc.ID, taskName}] != taskState { log.Info().Msgf("levant/job_status_checker: task %s in allocation %s now in %s state", - taskName, alloc.ID, task.State) + taskName, alloc.ID, taskState.State) // then we record the new state - levantTasks[TaskCoordinate{alloc.ID, taskName}] = task.State + levantTasks[TaskCoordinate{alloc.ID, taskName}] = taskState } - // then we have some case specific actions - switch levantTasks[TaskCoordinate{alloc.ID, taskName}] { // if a task is still pendign we are not yet done - case "pending": + if levantTasks[TaskCoordinate{alloc.ID, taskName}].State == "pending" { complete = false - // if the task is dead we record that - case "dead": - deadTasks++ + } + + // if a task failed we record that + if levantTasks[TaskCoordinate{alloc.ID, taskName}].Failed { + failedTasks++ } } } - return complete, deadTasks + return complete, failedTasks } diff --git a/levant/job_status_checker_test.go b/levant/job_status_checker_test.go index 316744289..a28b2df69 100644 --- a/levant/job_status_checker_test.go +++ b/levant/job_status_checker_test.go @@ -12,14 +12,15 @@ import ( func TestJobStatusChecker_allocationStatusChecker(t *testing.T) { // Build our task status maps - levantTasks1 := make(map[TaskCoordinate]string) - levantTasks2 := make(map[TaskCoordinate]string) - levantTasks3 := make(map[TaskCoordinate]string) + levantTasks1 := make(map[TaskCoordinate]*nomad.TaskState) + levantTasks2 := make(map[TaskCoordinate]*nomad.TaskState) + levantTasks3 := make(map[TaskCoordinate]*nomad.TaskState) + levantTasks4 := make(map[TaskCoordinate]*nomad.TaskState) // Build a small AllocationListStubs with required information. var allocs1 []*nomad.AllocationListStub taskStates1 := make(map[string]*nomad.TaskState) - taskStates1["task1"] = &nomad.TaskState{State: "running"} + taskStates1["task1"] = &nomad.TaskState{State: "running", Failed: false} allocs1 = append(allocs1, &nomad.AllocationListStub{ ID: "10246d87-ecd7-21ad-13b2-f0c564647d64", TaskStates: taskStates1, @@ -27,8 +28,9 @@ func TestJobStatusChecker_allocationStatusChecker(t *testing.T) { var allocs2 []*nomad.AllocationListStub taskStates2 := make(map[string]*nomad.TaskState) - taskStates2["task1"] = &nomad.TaskState{State: "running"} - taskStates2["task2"] = &nomad.TaskState{State: "pending"} + taskStates2["task1"] = &nomad.TaskState{State: "running", Failed: false} + taskStates2["task2"] = &nomad.TaskState{State: "pending", Failed: false} + taskStates2["task3"] = &nomad.TaskState{State: "dead", Failed: false} allocs2 = append(allocs2, &nomad.AllocationListStub{ ID: "20246d87-ecd7-21ad-13b2-f0c564647d64", TaskStates: taskStates2, @@ -36,50 +38,61 @@ func TestJobStatusChecker_allocationStatusChecker(t *testing.T) { var allocs3 []*nomad.AllocationListStub taskStates3 := make(map[string]*nomad.TaskState) - taskStates3["task1"] = &nomad.TaskState{State: "dead"} + taskStates3["task1"] = &nomad.TaskState{State: "dead", Failed: false} allocs3 = append(allocs3, &nomad.AllocationListStub{ ID: "30246d87-ecd7-21ad-13b2-f0c564647d64", TaskStates: taskStates3, }) + var allocs4 []*nomad.AllocationListStub + taskStates4 := make(map[string]*nomad.TaskState) + taskStates4["task1"] = &nomad.TaskState{State: "dead", Failed: false} + taskStates4["task2"] = &nomad.TaskState{State: "dead", Failed: true} + allocs4 = append(allocs4, &nomad.AllocationListStub{ + ID: "40246d87-ecd7-21ad-13b2-f0c564647d64", + TaskStates: taskStates4, + }) + cases := []struct { - levantTasks map[TaskCoordinate]string + levantTasks map[TaskCoordinate]*nomad.TaskState allocs []*nomad.AllocationListStub - dead int - expectedDead int + expectedFailed int expectedComplete bool }{ { levantTasks1, allocs1, 0, - 0, true, }, { levantTasks2, allocs2, 0, - 0, false, }, { levantTasks3, allocs3, 0, + true, + }, + { + levantTasks4, + allocs4, 1, true, }, } for _, tc := range cases { - complete, dead := allocationStatusChecker(tc.levantTasks, tc.allocs) + complete, failed := allocationStatusChecker(tc.levantTasks, tc.allocs) if complete != tc.expectedComplete { t.Fatalf("expected complete to be %v but got %v", tc.expectedComplete, complete) } - if dead != tc.expectedDead { - t.Fatalf("expected %v dead task(s) but got %v", tc.expectedDead, dead) + if failed != tc.expectedFailed { + t.Fatalf("expected %v failed task(s) but got %v", tc.expectedFailed, failed) } } }