diff --git a/plugins/nf-nomad/src/main/nextflow/nomad/executor/NomadService.groovy b/plugins/nf-nomad/src/main/nextflow/nomad/executor/NomadService.groovy index b47b19e..85983e8 100644 --- a/plugins/nf-nomad/src/main/nextflow/nomad/executor/NomadService.groovy +++ b/plugins/nf-nomad/src/main/nextflow/nomad/executor/NomadService.groovy @@ -109,6 +109,7 @@ class NomadService implements Closeable{ JobRegisterResponse jobRegisterResponse = jobsApi.registerJob(jobRegisterRequest, config.jobOpts().region, config.jobOpts().namespace, null, null) jobRegisterResponse.evalID } catch (Throwable e) { + log.error("[NOMAD] Failed to submit ${job.name} -- Cause: ${e.message ?: e}", e) throw new ProcessSubmitException("[NOMAD] Failed to submit ${job.name} -- Cause: ${e.message ?: e}", e) } @@ -267,27 +268,42 @@ class NomadService implements Closeable{ } String getJobState(String jobId){ - List allocations = jobsApi.getJobAllocations(jobId, config.jobOpts().region, config.jobOpts().namespace, null, null, null, null, null, null, null, null) - AllocationListStub last = allocations?.sort{ - it.modifyIndex - }?.last() - String currentState = last?.taskStates?.values()?.last()?.state - log.debug "Task $jobId , state=$currentState" - currentState ?: "Unknown" + try { + List allocations = jobsApi.getJobAllocations(jobId, config.jobOpts().region, config.jobOpts().namespace, null, null, null, null, null, null, null, null) + AllocationListStub last = allocations?.sort { + it.modifyIndex + }?.last() + String currentState = last?.taskStates?.values()?.last()?.state + log.debug "Task $jobId , state=$currentState" + currentState ?: "Unknown" + }catch(Exception e){ + log.error("[NOMAD] Failed to get jobState ${jobId} -- Cause: ${e.message ?: e}", e) + "dead" + } } boolean checkIfRunning(String jobId){ - Job job = jobsApi.getJob(jobId, config.jobOpts().region, config.jobOpts().namespace, null, null, null, null, null, null, null) - log.debug "[NOMAD] checkIfRunning jobID=$job.ID; status=$job.status" - job.status == "running" + try { + Job job = jobsApi.getJob(jobId, config.jobOpts().region, config.jobOpts().namespace, null, null, null, null, null, null, null) + log.debug "[NOMAD] checkIfRunning jobID=$job.ID; status=$job.status" + job.status == "running" + }catch (Exception e){ + log.error("[NOMAD] Failed to get jobState ${jobId} -- Cause: ${e.message ?: e}", e) + false + } } boolean checkIfDead(String jobId){ - Job job = jobsApi.getJob(jobId, config.jobOpts().region, config.jobOpts().namespace, null, null, null, null, null, null, null) - log.debug "[NOMAD] checkIfDead jobID=$job.ID; status=$job.status" - job.status == "dead" + try{ + Job job = jobsApi.getJob(jobId, config.jobOpts().region, config.jobOpts().namespace, null, null, null, null, null, null, null) + log.debug "[NOMAD] checkIfDead jobID=$job.ID; status=$job.status" + job.status == "dead" + }catch (Exception e){ + log.error("[NOMAD] Failed to get job ${jobId} -- Cause: ${e.message ?: e}", e) + true + } } void kill(String jobId) { @@ -300,12 +316,21 @@ class NomadService implements Closeable{ protected void purgeJob(String jobId, boolean purge){ log.debug "[NOMAD] purgeJob with jobId=${jobId}" - jobsApi.deleteJob(jobId,config.jobOpts().region, config.jobOpts().namespace,null,null,purge, true) + try { + jobsApi.deleteJob(jobId, config.jobOpts().region, config.jobOpts().namespace, null, null, purge, true) + }catch(Exception e){ + log.error("[NOMAD] Failed to delete job ${jobId} -- Cause: ${e.message ?: e}", e) + } } String getClientOfJob(String jobId) { - List allocations = jobsApi.getJobAllocations(jobId, config.jobOpts().region, config.jobOpts().namespace, null, null, null, null, null, null, null, null) - AllocationListStub jobAllocation = allocations.first() - return jobAllocation.nodeName + try{ + List allocations = jobsApi.getJobAllocations(jobId, config.jobOpts().region, config.jobOpts().namespace, null, null, null, null, null, null, null, null) + AllocationListStub jobAllocation = allocations.first() + return jobAllocation.nodeName + }catch (Exception e){ + log.error("[NOMAD] Failed to get job allocations ${jobId} -- Cause: ${e.message ?: e}", e) + throw new ProcessSubmitException("[NOMAD] Failed to get alloactions ${jobId} -- Cause: ${e.message ?: e}", e) + } } } diff --git a/plugins/nf-nomad/src/test/nextflow/nomad/MockScriptRunner.groovy b/plugins/nf-nomad/src/test/nextflow/nomad/MockScriptRunner.groovy index 20cc6a8..98f99bc 100644 --- a/plugins/nf-nomad/src/test/nextflow/nomad/MockScriptRunner.groovy +++ b/plugins/nf-nomad/src/test/nextflow/nomad/MockScriptRunner.groovy @@ -124,7 +124,7 @@ class MockExecutor extends NomadExecutor { @Override void signal() { } - protected TaskMonitor createTaskMonitor2() { + protected TaskMonitor createTaskMonitor() { new MockMonitor() } diff --git a/plugins/nf-nomad/src/test/nextflow/nomad/NomadDSLSpec.groovy b/plugins/nf-nomad/src/test/nextflow/nomad/NomadDSLSpec.groovy index 641b601..7d9781c 100644 --- a/plugins/nf-nomad/src/test/nextflow/nomad/NomadDSLSpec.groovy +++ b/plugins/nf-nomad/src/test/nextflow/nomad/NomadDSLSpec.groovy @@ -31,6 +31,7 @@ import org.pf4j.PluginDescriptorFinder import spock.lang.Shared import spock.lang.Timeout import test.Dsl2Spec +import test.OutputCapture import java.nio.file.Files import java.nio.file.Path @@ -157,4 +158,50 @@ class NomadDSLSpec extends Dsl2Spec{ // submitted // summary } + + @org.junit.Rule + OutputCapture capture = new OutputCapture() + + def 'should catch a remote exception' () { + given: + mockWebServer.dispatcher = new Dispatcher() { + @Override + MockResponse dispatch(@NotNull RecordedRequest recordedRequest) throws InterruptedException { + new MockResponse().setResponseCode(500).setBody("Dummy exception") + } + } + + when: + def SCRIPT = ''' + process sayHello{ + container 'ubuntu:22.0.4' + input: + val x + output: + stdout + script: + """ + echo '$x world!\' + """ + } + workflow { + channel.of('hi!') | sayHello | view + } + ''' + and: + def result = new MockScriptRunner([ + process:[executor:'nomad'], + nomad: + [ + client: + [ + address : "http://${mockWebServer.hostName}:${mockWebServer.port}" + ] + ] + ]).setScript(SCRIPT).execute() + + then: + thrown(AbortRunException) //it fails because no real task is executed + capture.toString().indexOf("io.nomadproject.client.ApiException: Server Error") != -1 + } }