From 7c6c2baef7e50c092d31b005d76c6b2edae88ca2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Erik=20M=C3=BCller?= <e.mueller@powercloud.de>
Date: Thu, 9 Sep 2021 20:38:20 +0200
Subject: [PATCH] Check for TaskState.Failed if deployment has failed

---
 GNUmakefile                       |  2 +-
 levant/job_status_checker.go      | 36 +++++++++++++-------------
 levant/job_status_checker_test.go | 43 ++++++++++++++++++++-----------
 3 files changed, 47 insertions(+), 34 deletions(-)

diff --git a/GNUmakefile b/GNUmakefile
index 8aeb76e31..7bb88270e 100644
--- a/GNUmakefile
+++ b/GNUmakefile
@@ -9,7 +9,7 @@ GO_LDFLAGS := "-X github.com/hashicorp/levant/version.GitCommit=$(GIT_COMMIT)$(G
 .PHONY: tools
 tools: ## Install the tools used to test and build
 	@echo "==> Installing tools..."
-	GO111MODULE=off go get -u github.com/golangci/golangci-lint/cmd/golangci-lint
+	curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $$(go env GOPATH)/bin
 	@echo "==> Done"
 
 .PHONY: build
diff --git a/levant/job_status_checker.go b/levant/job_status_checker.go
index 69d643e60..06995b4bb 100644
--- a/levant/job_status_checker.go
+++ b/levant/job_status_checker.go
@@ -81,7 +81,7 @@ func (l *levantDeployment) jobAllocationChecker(evalID *string) bool {
 	q := nomadHelper.GenerateBlockingQueryOptions(l.config.Template.Job.Namespace)
 
 	// Build our small internal checking struct.
-	levantTasks := make(map[TaskCoordinate]string)
+	levantTasks := make(map[TaskCoordinate]*nomad.TaskState)
 
 	for {
 
@@ -100,15 +100,15 @@ func (l *levantDeployment) jobAllocationChecker(evalID *string) bool {
 		// If we get here, set the wi to the latest Index.
 		q.WaitIndex = meta.LastIndex
 
-		complete, deadTasks := allocationStatusChecker(levantTasks, allocs)
+		complete, failedTasks := allocationStatusChecker(levantTasks, allocs)
 
 		// depending on how we finished up we report our status
 		// If we have no allocations left to track then we can exit and log
 		// information depending on the success.
-		if complete && deadTasks == 0 {
-			log.Info().Msg("levant/job_status_checker: all allocations in deployment of job are running")
+		if complete && failedTasks == 0 {
+			log.Info().Msg("levant/job_status_checker: all allocations in deployment of job are running or finished successfully")
 			return true
-		} else if complete && deadTasks > 0 {
+		} else if complete && failedTasks > 0 {
 			return false
 		}
 	}
@@ -118,31 +118,31 @@ func (l *levantDeployment) jobAllocationChecker(evalID *string) bool {
 // job deployment, an update Levants internal tracking on task status based on
 // this. This functionality exists as Nomad does not currently support
 // deployments across all job types.
-func allocationStatusChecker(levantTasks map[TaskCoordinate]string, allocs []*nomad.AllocationListStub) (bool, int) {
+func allocationStatusChecker(levantTasks map[TaskCoordinate]*nomad.TaskState, allocs []*nomad.AllocationListStub) (bool, int) {
 
 	complete := true
-	deadTasks := 0
+	failedTasks := 0
 
 	for _, alloc := range allocs {
-		for taskName, task := range alloc.TaskStates {
+		for taskName, taskState := range alloc.TaskStates {
 			// if the state is one we haven't seen yet then we print a message
-			if levantTasks[TaskCoordinate{alloc.ID, taskName}] != task.State {
+			if levantTasks[TaskCoordinate{alloc.ID, taskName}] != taskState {
 				log.Info().Msgf("levant/job_status_checker: task %s in allocation %s now in %s state",
-					taskName, alloc.ID, task.State)
+					taskName, alloc.ID, taskState.State)
 				// then we record the new state
-				levantTasks[TaskCoordinate{alloc.ID, taskName}] = task.State
+				levantTasks[TaskCoordinate{alloc.ID, taskName}] = taskState
 			}
 
-			// then we have some case specific actions
-			switch levantTasks[TaskCoordinate{alloc.ID, taskName}] {
 			// if a task is still pendign we are not yet done
-			case "pending":
+			if levantTasks[TaskCoordinate{alloc.ID, taskName}].State == "pending" {
 				complete = false
-				// if the task is dead we record that
-			case "dead":
-				deadTasks++
+			}
+
+			// if a task failed we record that
+			if levantTasks[TaskCoordinate{alloc.ID, taskName}].Failed {
+				failedTasks++
 			}
 		}
 	}
-	return complete, deadTasks
+	return complete, failedTasks
 }
diff --git a/levant/job_status_checker_test.go b/levant/job_status_checker_test.go
index 65a86c1ea..c388c15f1 100644
--- a/levant/job_status_checker_test.go
+++ b/levant/job_status_checker_test.go
@@ -9,14 +9,15 @@ import (
 func TestJobStatusChecker_allocationStatusChecker(t *testing.T) {
 
 	// Build our task status maps
-	levantTasks1 := make(map[TaskCoordinate]string)
-	levantTasks2 := make(map[TaskCoordinate]string)
-	levantTasks3 := make(map[TaskCoordinate]string)
+	levantTasks1 := make(map[TaskCoordinate]*nomad.TaskState)
+	levantTasks2 := make(map[TaskCoordinate]*nomad.TaskState)
+	levantTasks3 := make(map[TaskCoordinate]*nomad.TaskState)
+	levantTasks4 := make(map[TaskCoordinate]*nomad.TaskState)
 
 	// Build a small AllocationListStubs with required information.
 	var allocs1 []*nomad.AllocationListStub
 	taskStates1 := make(map[string]*nomad.TaskState)
-	taskStates1["task1"] = &nomad.TaskState{State: "running"}
+	taskStates1["task1"] = &nomad.TaskState{State: "running", Failed: false}
 	allocs1 = append(allocs1, &nomad.AllocationListStub{
 		ID:         "10246d87-ecd7-21ad-13b2-f0c564647d64",
 		TaskStates: taskStates1,
@@ -24,8 +25,9 @@ func TestJobStatusChecker_allocationStatusChecker(t *testing.T) {
 
 	var allocs2 []*nomad.AllocationListStub
 	taskStates2 := make(map[string]*nomad.TaskState)
-	taskStates2["task1"] = &nomad.TaskState{State: "running"}
-	taskStates2["task2"] = &nomad.TaskState{State: "pending"}
+	taskStates2["task1"] = &nomad.TaskState{State: "running", Failed: false}
+	taskStates2["task2"] = &nomad.TaskState{State: "pending", Failed: false}
+	taskStates2["task3"] = &nomad.TaskState{State: "dead", Failed: false}
 	allocs2 = append(allocs2, &nomad.AllocationListStub{
 		ID:         "20246d87-ecd7-21ad-13b2-f0c564647d64",
 		TaskStates: taskStates2,
@@ -33,50 +35,61 @@ func TestJobStatusChecker_allocationStatusChecker(t *testing.T) {
 
 	var allocs3 []*nomad.AllocationListStub
 	taskStates3 := make(map[string]*nomad.TaskState)
-	taskStates3["task1"] = &nomad.TaskState{State: "dead"}
+	taskStates3["task1"] = &nomad.TaskState{State: "dead", Failed: false}
 	allocs3 = append(allocs3, &nomad.AllocationListStub{
 		ID:         "30246d87-ecd7-21ad-13b2-f0c564647d64",
 		TaskStates: taskStates3,
 	})
 
+	var allocs4 []*nomad.AllocationListStub
+	taskStates4 := make(map[string]*nomad.TaskState)
+	taskStates4["task1"] = &nomad.TaskState{State: "dead", Failed: false}
+	taskStates4["task2"] = &nomad.TaskState{State: "dead", Failed: true}
+	allocs4 = append(allocs4, &nomad.AllocationListStub{
+		ID:         "40246d87-ecd7-21ad-13b2-f0c564647d64",
+		TaskStates: taskStates4,
+	})
+
 	cases := []struct {
-		levantTasks      map[TaskCoordinate]string
+		levantTasks      map[TaskCoordinate]*nomad.TaskState
 		allocs           []*nomad.AllocationListStub
-		dead             int
-		expectedDead     int
+		expectedFailed   int
 		expectedComplete bool
 	}{
 		{
 			levantTasks1,
 			allocs1,
 			0,
-			0,
 			true,
 		},
 		{
 			levantTasks2,
 			allocs2,
 			0,
-			0,
 			false,
 		},
 		{
 			levantTasks3,
 			allocs3,
 			0,
+			true,
+		},
+		{
+			levantTasks4,
+			allocs4,
 			1,
 			true,
 		},
 	}
 
 	for _, tc := range cases {
-		complete, dead := allocationStatusChecker(tc.levantTasks, tc.allocs)
+		complete, failed := allocationStatusChecker(tc.levantTasks, tc.allocs)
 
 		if complete != tc.expectedComplete {
 			t.Fatalf("expected complete to be %v but got %v", tc.expectedComplete, complete)
 		}
-		if dead != tc.expectedDead {
-			t.Fatalf("expected %v dead task(s) but got %v", tc.expectedDead, dead)
+		if failed != tc.expectedFailed {
+			t.Fatalf("expected %v failed task(s) but got %v", tc.expectedFailed, failed)
 		}
 	}
 }