diff --git a/gradle.properties b/gradle.properties index 9b1ae50..fbf2632 100644 --- a/gradle.properties +++ b/gradle.properties @@ -1,2 +1,2 @@ -version=0.3.1 +version=0.3.2 github_organization=nextflow-io \ No newline at end of file diff --git a/plugins/nf-nomad/src/main/nextflow/nomad/builders/JobBuilder.groovy b/plugins/nf-nomad/src/main/nextflow/nomad/builders/JobBuilder.groovy index e23a7a8..00939aa 100644 --- a/plugins/nf-nomad/src/main/nextflow/nomad/builders/JobBuilder.groovy +++ b/plugins/nf-nomad/src/main/nextflow/nomad/builders/JobBuilder.groovy @@ -126,7 +126,7 @@ class JobBuilder { def task = createTask(taskRun, args, env, jobOpts) def taskGroup = new TaskGroup( - name: "group", + name: "nf-taskgroup", tasks: [ task ], reschedulePolicy: taskReschedulePolicy, restartPolicy: taskRestartPolicy diff --git a/plugins/nf-nomad/src/main/nextflow/nomad/executor/NomadService.groovy b/plugins/nf-nomad/src/main/nextflow/nomad/executor/NomadService.groovy index a04ad2e..f1fa735 100644 --- a/plugins/nf-nomad/src/main/nextflow/nomad/executor/NomadService.groovy +++ b/plugins/nf-nomad/src/main/nextflow/nomad/executor/NomadService.groovy @@ -126,10 +126,10 @@ class NomadService implements Closeable{ it.modifyIndex }?.last() : null TaskState currentState = last?.taskStates?.values()?.last() - log.debug "Task $jobId , state=${currentState?.state}" + log.debug "[NOMAD] getTaskStatus $jobId , state=${currentState?.state}" currentState ?: new TaskState(state: "unknown", failed: true, finishedAt: OffsetDateTime.now()) }catch(Exception e){ - log.debug("[NOMAD] Failed to get jobState ${jobId} -- Cause: ${e.message ?: e}", e) + log.debug("[NOMAD] getTaskStatus Failed to get jobState ${jobId} -- Cause: ${e.message ?: e}", e) new TaskState(state: "unknown", failed: true, finishedAt: OffsetDateTime.now()) } } @@ -142,10 +142,10 @@ class NomadService implements Closeable{ jobsApi.getJob(jobId, config.jobOpts().region, config.jobOpts().namespace, null, null, null, null, null, null, null) } - log.debug "[NOMAD] checkIfRunning jobID=$job.ID; status=$job.status" + log.debug "[NOMAD] getJobStatus jobID=$job.ID; status=$job.status" job.status }catch (Exception e){ - log.debug("[NOMAD] Failed to get jobState ${jobId} -- Cause: ${e.message ?: e}", e) + log.debug("[NOMAD] getJobStatus Failed to get jobState ${jobId} -- Cause: ${e.message ?: e}", e) "unknown" } } diff --git a/plugins/nf-nomad/src/main/nextflow/nomad/executor/NomadTaskHandler.groovy b/plugins/nf-nomad/src/main/nextflow/nomad/executor/NomadTaskHandler.groovy index 87a52b5..0393910 100644 --- a/plugins/nf-nomad/src/main/nextflow/nomad/executor/NomadTaskHandler.groovy +++ b/plugins/nf-nomad/src/main/nextflow/nomad/executor/NomadTaskHandler.groovy @@ -72,14 +72,41 @@ class NomadTaskHandler extends TaskHandler implements FusionAwareTask { this.exitFile = task.workDir.resolve(TaskRun.CMD_EXIT) } + +//------------------------------------------------- +// +// NOTE: From https://github.com/hashicorp/nomad/blob/6a41dc7b2f1fdbbc5a20ed267b4ad25fc2a14489/api/jobs.go#L1263-L1287 +// +//------------------------------------------------- +// type JobChildrenSummary struct { +// Pending int64 +// Running int64 +// Dead int64 +// } +//------------------------------------------------- +// type TaskGroupSummary struct { +// Queued int +// Complete int +// Failed int +// Running int +// Starting int +// Lost int +// Unknown int +// } +//------------------------------------------------- + + @Override boolean checkIfRunning() { - if( !jobName ) throw new IllegalStateException("Missing Nomad Job name -- cannot check if running") + if( !jobName ) throw new IllegalStateException("[NOMAD] Missing Nomad Job name -- cannot check if running") if(isSubmitted()) { def state = taskState0() + + log.debug "[NOMAD] checkIfRunning task=$task.name; state=${state?.state}" + // include `terminated` state to allow the handler status to progress - if( state && ( ["running","terminated"].contains(state.state))){ - status = TaskStatus.RUNNING + if( state && ( ["running","pending","unknown"].contains(state.state))){ + this.status = TaskStatus.RUNNING determineClientNode() return true } @@ -89,15 +116,14 @@ class NomadTaskHandler extends TaskHandler implements FusionAwareTask { @Override boolean checkIfCompleted() { - if( !jobName ) throw new IllegalStateException("Missing Nomad Job name -- cannot check if running") + if( !jobName ) throw new IllegalStateException("[NOMAD] Missing Nomad Job name -- cannot check if running") + def isFinished = false def state = taskState0() - final isFinished = state && (state.finishedAt != null || state.state == "unknow") - - log.debug "[NOMAD] checkIfCompleted task.name=$task.name; state=${state?.state} completed=$isFinished" + log.debug "[NOMAD] checkIfCompleted task=$task.name; state=${state?.state}" - if (isFinished) { + if( state && ( ["dead","complete"].contains(state.state))){ // finalize the task task.exitStatus = readExitFile() task.stdout = outputFile @@ -193,7 +219,7 @@ class NomadTaskHandler extends TaskHandler implements FusionAwareTask { if (!status || delta >= 1_000) { def newState = nomadService.getTaskState(jobName) - log.debug "[NOMAD] Check jobState: jobName=$jobName currentState=${state?.state} newState=${newState?.state}" + log.debug "[NOMAD] taskState0 jobName=$jobName currentState=${state?.state} newState=${newState?.state}" if (newState) { state = newState diff --git a/plugins/nf-nomad/src/resources/META-INF/MANIFEST.MF b/plugins/nf-nomad/src/resources/META-INF/MANIFEST.MF deleted file mode 100644 index da26157..0000000 --- a/plugins/nf-nomad/src/resources/META-INF/MANIFEST.MF +++ /dev/null @@ -1,6 +0,0 @@ -Manifest-Version: 1.0 -Plugin-Id: nf-nomad -Plugin-Version: 0.1.2 -Plugin-Class: nextflow.nomad.NomadPlugin -Plugin-Provider: nextflow -Plugin-Requires: >=24.01.0-edge diff --git a/plugins/nf-nomad/src/test/nextflow/nomad/NomadDSLSpec.groovy b/plugins/nf-nomad/src/test/nextflow/nomad/NomadDSLSpec.groovy index 193f3a5..e0d752d 100644 --- a/plugins/nf-nomad/src/test/nextflow/nomad/NomadDSLSpec.groovy +++ b/plugins/nf-nomad/src/test/nextflow/nomad/NomadDSLSpec.groovy @@ -102,6 +102,7 @@ class NomadDSLSpec extends Dsl2Spec{ given: boolean submitted = false boolean summary = false + int requestCounter = 0 mockWebServer.dispatcher = new Dispatcher() { @Override MockResponse dispatch(@NotNull RecordedRequest recordedRequest) throws InterruptedException { @@ -109,8 +110,9 @@ class NomadDSLSpec extends Dsl2Spec{ case "get": if( recordedRequest.path.endsWith("/allocations")) { summary = true + def resource = !requestCounter++ ? "/allocations.json" : "/completed.json" return new MockResponse().setResponseCode(200) - .setBody(this.getClass().getResourceAsStream("/allocations.json").text).addHeader("Content-Type", "application/json") + .setBody(this.getClass().getResourceAsStream(resource).text).addHeader("Content-Type", "application/json") }else { return new MockResponse().setResponseCode(200) .setBody('{"Status": "dead"}').addHeader("Content-Type", "application/json") @@ -125,7 +127,7 @@ class NomadDSLSpec extends Dsl2Spec{ } when: - def SCRIPT = ''' + def SCRIPT = ''' process sayHello{ container 'ubuntu:22.0.4' input: @@ -136,10 +138,10 @@ class NomadDSLSpec extends Dsl2Spec{ """ echo '$x world!\' """ - } + } workflow { channel.of('hi!') | sayHello | view - } + } ''' and: def result = new MockScriptRunner([ diff --git a/plugins/nf-nomad/src/test/nextflow/nomad/builders/JobBuilderSpec.groovy b/plugins/nf-nomad/src/test/nextflow/nomad/builders/JobBuilderSpec.groovy index c515368..3b85196 100644 --- a/plugins/nf-nomad/src/test/nextflow/nomad/builders/JobBuilderSpec.groovy +++ b/plugins/nf-nomad/src/test/nextflow/nomad/builders/JobBuilderSpec.groovy @@ -89,7 +89,7 @@ class JobBuilderSpec extends Specification { def taskGroup = JobBuilder.createTaskGroup(taskRun, args, env, jobOpts) then: - taskGroup.name == "group" + taskGroup.name == "nf-taskgroup" taskGroup.tasks.size() == 1 taskGroup.tasks[0].name == "nf-task" taskGroup.tasks[0].config.image == "test-container" diff --git a/plugins/nf-nomad/src/test/nextflow/nomad/executor/NomadServiceSpec.groovy b/plugins/nf-nomad/src/test/nextflow/nomad/executor/NomadServiceSpec.groovy index 7fa442e..69bef13 100644 --- a/plugins/nf-nomad/src/test/nextflow/nomad/executor/NomadServiceSpec.groovy +++ b/plugins/nf-nomad/src/test/nextflow/nomad/executor/NomadServiceSpec.groovy @@ -149,7 +149,7 @@ class NomadServiceSpec extends Specification{ body.Job.Name == name body.Job.Type == "batch" body.Job.TaskGroups.size() == 1 - body.Job.TaskGroups[0].Name == "group" + body.Job.TaskGroups[0].Name == "nf-taskgroup" body.Job.TaskGroups[0].Tasks.size() == 1 body.Job.TaskGroups[0].Tasks[0].Name == "nf-task" body.Job.TaskGroups[0].Tasks[0].Resources.Cores == 1 @@ -224,7 +224,7 @@ class NomadServiceSpec extends Specification{ body.Job.Name == name body.Job.Type == "batch" body.Job.TaskGroups.size() == 1 - body.Job.TaskGroups[0].Name == "group" + body.Job.TaskGroups[0].Name == "nf-taskgroup" body.Job.TaskGroups[0].Tasks.size() == 1 body.Job.TaskGroups[0].Tasks[0].Name == "nf-task" body.Job.TaskGroups[0].Tasks[0].Resources.Cores == 1 diff --git a/plugins/nf-nomad/src/testResources/completed.json b/plugins/nf-nomad/src/testResources/completed.json new file mode 100644 index 0000000..b8195e6 --- /dev/null +++ b/plugins/nf-nomad/src/testResources/completed.json @@ -0,0 +1,132 @@ +[ + { + "ID": "ed344e0a-7290-d117-41d3-a64f853ca3c2", + "EvalID": "a9c5effc-2242-51b2-f1fe-054ee11ab189", + "Name": "example.cache[0]", + "NodeID": "cb1f6030-a220-4f92-57dc-7baaabdc3823", + "PreviousAllocation": "516d2753-0513-cfc7-57ac-2d6fac18b9dc", + "NextAllocation": "cd13d9b9-4f97-7184-c88b-7b451981616b", + "RescheduleTracker": { + "Events": [ + { + "PrevAllocID": "516d2753-0513-cfc7-57ac-2d6fac18b9dc", + "PrevNodeID": "9230cd3b-3bda-9a3f-82f9-b2ea8dedb20e", + "RescheduleTime": 1517434161192946200, + "Delay": 5000000000 + } + ] + }, + "JobID": "example", + "TaskGroup": "cache", + "DesiredStatus": "run", + "DesiredDescription": "", + "ClientStatus": "complete", + "ClientDescription": "", + "TaskStates": { + "redis": { + "State": "complete", + "Failed": false, + "StartedAt": "2017-05-25T23:41:23.240184101Z", + "FinishedAt": "0001-01-01T00:00:00Z", + "Events": [ + { + "Type": "Received", + "Time": 1495755675956923000, + "FailsTask": false, + "RestartReason": "", + "SetupError": "", + "DriverError": "", + "ExitCode": 0, + "Signal": 0, + "Message": "", + "KillTimeout": 0, + "KillError": "", + "KillReason": "", + "StartDelay": 0, + "DownloadError": "", + "ValidationError": "", + "DiskLimit": 0, + "FailedSibling": "", + "VaultError": "", + "TaskSignalReason": "", + "TaskSignal": "", + "DriverMessage": "" + }, + { + "Type": "Task Setup", + "Time": 1495755675957466400, + "FailsTask": false, + "RestartReason": "", + "SetupError": "", + "DriverError": "", + "ExitCode": 0, + "Signal": 0, + "Message": "Building Task Directory", + "KillTimeout": 0, + "KillError": "", + "KillReason": "", + "StartDelay": 0, + "DownloadError": "", + "ValidationError": "", + "DiskLimit": 0, + "FailedSibling": "", + "VaultError": "", + "TaskSignalReason": "", + "TaskSignal": "", + "DriverMessage": "" + }, + { + "Type": "Driver", + "Time": 1495755675970286800, + "FailsTask": false, + "RestartReason": "", + "SetupError": "", + "DriverError": "", + "ExitCode": 0, + "Signal": 0, + "Message": "", + "KillTimeout": 0, + "KillError": "", + "KillReason": "", + "StartDelay": 0, + "DownloadError": "", + "ValidationError": "", + "DiskLimit": 0, + "FailedSibling": "", + "VaultError": "", + "TaskSignalReason": "", + "TaskSignal": "", + "DriverMessage": "Downloading image redis:7" + }, + { + "Type": "Started", + "Time": 1495755683227522000, + "FailsTask": false, + "RestartReason": "", + "SetupError": "", + "DriverError": "", + "ExitCode": 0, + "Signal": 0, + "Message": "", + "KillTimeout": 0, + "KillError": "", + "KillReason": "", + "StartDelay": 0, + "DownloadError": "", + "ValidationError": "", + "DiskLimit": 0, + "FailedSibling": "", + "VaultError": "", + "TaskSignalReason": "", + "TaskSignal": "", + "DriverMessage": "" + } + ] + } + }, + "CreateIndex": 9, + "ModifyIndex": 13, + "CreateTime": 1495755675944527600, + "ModifyTime": 1495755675944527600 + } +] \ No newline at end of file diff --git a/validation/sun-nomadlab/nextflow.config b/validation/sun-nomadlab/nextflow.config index 5e5de70..464ada2 100644 --- a/validation/sun-nomadlab/nextflow.config +++ b/validation/sun-nomadlab/nextflow.config @@ -27,6 +27,7 @@ wave { fusion { enabled = true exportStorageCredentials = true + logLevel = 'verbose' // 'info' | 'debug' } nomad { @@ -45,15 +46,15 @@ nomad { volume = { type "host" name "scratch" } - constraints = { -// attr { -// unique = [hostname:'nomad03'] -// //raw 'platform.aws.instance-type', '=', 'm4.xlarge' +// constraints = { +// // attr { +// // unique = [hostname:'nomad03'] +// // //raw 'platform.aws.instance-type', '=', 'm4.xlarge' - node { - unique = [name: "nomad03"] - } +// node { +// unique = [name: "nomad03"] +// } - } +// } } }