Skip to content

Commit

Permalink
log detail errors from remote
Browse files Browse the repository at this point in the history
Signed-off-by: Jorge Aguilera <[email protected]>
  • Loading branch information
jagedn committed Jul 3, 2024
1 parent 09a9e68 commit 53e2878
Show file tree
Hide file tree
Showing 3 changed files with 90 additions and 18 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ class NomadService implements Closeable{
JobRegisterResponse jobRegisterResponse = jobsApi.registerJob(jobRegisterRequest, config.jobOpts().region, config.jobOpts().namespace, null, null)
jobRegisterResponse.evalID
} catch (Throwable e) {
log.error("[NOMAD] Failed to submit ${job.name} -- Cause: ${e.message ?: e}", e)
throw new ProcessSubmitException("[NOMAD] Failed to submit ${job.name} -- Cause: ${e.message ?: e}", e)
}

Expand Down Expand Up @@ -267,27 +268,42 @@ class NomadService implements Closeable{
}

String getJobState(String jobId){
List<AllocationListStub> allocations = jobsApi.getJobAllocations(jobId, config.jobOpts().region, config.jobOpts().namespace, null, null, null, null, null, null, null, null)
AllocationListStub last = allocations?.sort{
it.modifyIndex
}?.last()
String currentState = last?.taskStates?.values()?.last()?.state
log.debug "Task $jobId , state=$currentState"
currentState ?: "Unknown"
try {
List<AllocationListStub> allocations = jobsApi.getJobAllocations(jobId, config.jobOpts().region, config.jobOpts().namespace, null, null, null, null, null, null, null, null)
AllocationListStub last = allocations?.sort {
it.modifyIndex
}?.last()
String currentState = last?.taskStates?.values()?.last()?.state
log.debug "Task $jobId , state=$currentState"
currentState ?: "Unknown"
}catch(Exception e){
log.error("[NOMAD] Failed to get jobState ${jobId} -- Cause: ${e.message ?: e}", e)
"dead"
}
}



boolean checkIfRunning(String jobId){
Job job = jobsApi.getJob(jobId, config.jobOpts().region, config.jobOpts().namespace, null, null, null, null, null, null, null)
log.debug "[NOMAD] checkIfRunning jobID=$job.ID; status=$job.status"
job.status == "running"
try {
Job job = jobsApi.getJob(jobId, config.jobOpts().region, config.jobOpts().namespace, null, null, null, null, null, null, null)
log.debug "[NOMAD] checkIfRunning jobID=$job.ID; status=$job.status"
job.status == "running"
}catch (Exception e){
log.error("[NOMAD] Failed to get jobState ${jobId} -- Cause: ${e.message ?: e}", e)
false
}
}

boolean checkIfDead(String jobId){
Job job = jobsApi.getJob(jobId, config.jobOpts().region, config.jobOpts().namespace, null, null, null, null, null, null, null)
log.debug "[NOMAD] checkIfDead jobID=$job.ID; status=$job.status"
job.status == "dead"
try{
Job job = jobsApi.getJob(jobId, config.jobOpts().region, config.jobOpts().namespace, null, null, null, null, null, null, null)
log.debug "[NOMAD] checkIfDead jobID=$job.ID; status=$job.status"
job.status == "dead"
}catch (Exception e){
log.error("[NOMAD] Failed to get job ${jobId} -- Cause: ${e.message ?: e}", e)
true
}
}

void kill(String jobId) {
Expand All @@ -300,12 +316,21 @@ class NomadService implements Closeable{

protected void purgeJob(String jobId, boolean purge){
log.debug "[NOMAD] purgeJob with jobId=${jobId}"
jobsApi.deleteJob(jobId,config.jobOpts().region, config.jobOpts().namespace,null,null,purge, true)
try {
jobsApi.deleteJob(jobId, config.jobOpts().region, config.jobOpts().namespace, null, null, purge, true)
}catch(Exception e){
log.error("[NOMAD] Failed to delete job ${jobId} -- Cause: ${e.message ?: e}", e)
}
}

String getClientOfJob(String jobId) {
List<AllocationListStub> allocations = jobsApi.getJobAllocations(jobId, config.jobOpts().region, config.jobOpts().namespace, null, null, null, null, null, null, null, null)
AllocationListStub jobAllocation = allocations.first()
return jobAllocation.nodeName
try{
List<AllocationListStub> allocations = jobsApi.getJobAllocations(jobId, config.jobOpts().region, config.jobOpts().namespace, null, null, null, null, null, null, null, null)
AllocationListStub jobAllocation = allocations.first()
return jobAllocation.nodeName
}catch (Exception e){
log.error("[NOMAD] Failed to get job allocations ${jobId} -- Cause: ${e.message ?: e}", e)
throw new ProcessSubmitException("[NOMAD] Failed to get alloactions ${jobId} -- Cause: ${e.message ?: e}", e)
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ class MockExecutor extends NomadExecutor {
@Override
void signal() { }

protected TaskMonitor createTaskMonitor2() {
protected TaskMonitor createTaskMonitor() {
new MockMonitor()
}

Expand Down
47 changes: 47 additions & 0 deletions plugins/nf-nomad/src/test/nextflow/nomad/NomadDSLSpec.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import org.pf4j.PluginDescriptorFinder
import spock.lang.Shared
import spock.lang.Timeout
import test.Dsl2Spec
import test.OutputCapture

import java.nio.file.Files
import java.nio.file.Path
Expand Down Expand Up @@ -157,4 +158,50 @@ class NomadDSLSpec extends Dsl2Spec{
// submitted
// summary
}

@org.junit.Rule
OutputCapture capture = new OutputCapture()

def 'should catch a remote exception' () {
given:
mockWebServer.dispatcher = new Dispatcher() {
@Override
MockResponse dispatch(@NotNull RecordedRequest recordedRequest) throws InterruptedException {
new MockResponse().setResponseCode(500).setBody("Dummy exception")
}
}

when:
def SCRIPT = '''
process sayHello{
container 'ubuntu:22.0.4'
input:
val x
output:
stdout
script:
"""
echo '$x world!\'
"""
}
workflow {
channel.of('hi!') | sayHello | view
}
'''
and:
def result = new MockScriptRunner([
process:[executor:'nomad'],
nomad:
[
client:
[
address : "http://${mockWebServer.hostName}:${mockWebServer.port}"
]
]
]).setScript(SCRIPT).execute()

then:
thrown(AbortRunException) //it fails because no real task is executed
capture.toString().indexOf("io.nomadproject.client.ApiException: Server Error") != -1
}
}

0 comments on commit 53e2878

Please sign in to comment.