-
Notifications
You must be signed in to change notification settings - Fork 13
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix worker death #668
Fix worker death #668
Changes from 4 commits
31af818
b213809
0528519
4ad992b
2c6a59e
781a75d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,5 @@ | ||
// utilities to run inside the worker | ||
// This is designed to minimize the amount of code we have to mock | ||
|
||
import process from 'node:process'; | ||
import stringify from 'fast-safe-stringify'; | ||
import createLogger, { SanitizePolicies } from '@openfn/logger'; | ||
|
@@ -66,17 +65,23 @@ export const createLoggers = ( | |
// Execute wrapper function | ||
export const execute = async ( | ||
workflowId: string, | ||
executeFn: () => Promise<any> | undefined | ||
executeFn: () => Promise<any> | undefined, | ||
publishFn = publish | ||
) => { | ||
const handleError = (err: any) => { | ||
publish(workerEvents.ERROR, { | ||
publishFn(workerEvents.ERROR, { | ||
// @ts-ignore | ||
workflowId, | ||
// Map the error out of the thread in a serializable format | ||
error: serializeError(err), | ||
stack: err?.stack | ||
stack: err?.stack, | ||
// TODO job id maybe | ||
}); | ||
|
||
// Explicitly send a reject task error, to ensure the worker closes down | ||
publish(workerEvents.ENGINE_REJECT_TASK, { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is the actual fix and this is what WASN'T happening before. It's not enough to tell the worker we've errored. We need to send an event to the parent process to tell it that we're dead. I'm a little concerned about duplicating error messages here and so there's a little bit of gnarly code on the reporting side to handle that. |
||
error: serializeError(err), | ||
}); | ||
}; | ||
|
||
process.on('exit', (code: number) => { | ||
|
@@ -91,39 +96,35 @@ export const execute = async ( | |
// it'll be ignored (ie the workerEmit call will fail) | ||
process.on('uncaughtException', async (err: any) => { | ||
// Log this error to local stdout. This won't be sent out of the worker thread. | ||
console.debug(`Uncaught exception in worker thread (workflow ${workflowId} )`) | ||
console.debug(err) | ||
|
||
console.debug( | ||
`Uncaught exception in worker thread (workflow ${workflowId} )` | ||
); | ||
console.debug(err); | ||
|
||
// Also try and log to the workflow's logger | ||
try { | ||
console.error(`Uncaught exception in worker thread (workflow ${workflowId} )`) | ||
console.error(err) | ||
} catch(e) { | ||
console.error(`Uncaught exception in worker thread`) | ||
console.error( | ||
`Uncaught exception in worker thread (workflow ${workflowId} )` | ||
); | ||
console.error(err); | ||
} catch (e) { | ||
console.error(`Uncaught exception in worker thread`); | ||
} | ||
|
||
// For now, we'll write this off as a crash-level generic execution error | ||
// TODO did this come from job or adaptor code? | ||
const e = new ExecutionError(err); | ||
e.severity = 'crash'; // Downgrade this to a crash because it's likely not our fault | ||
handleError(e); | ||
|
||
// Close down the process just to be 100% sure that all async code stops | ||
// This is in a timeout to give the emitted message time to escape | ||
// There is a TINY WINDOW in which async code can still run and affect the next run | ||
// This should all go away when we replace workerpool | ||
setTimeout(() => { | ||
process.exit(HANDLED_EXIT_CODE); | ||
}, 2); | ||
}); | ||
|
||
publish(workerEvents.WORKFLOW_START, { | ||
publishFn(workerEvents.WORKFLOW_START, { | ||
workflowId, | ||
}); | ||
|
||
try { | ||
const result = await executeFn(); | ||
publish(workerEvents.WORKFLOW_COMPLETE, { workflowId, state: result }); | ||
publishFn(workerEvents.WORKFLOW_COMPLETE, { workflowId, state: result }); | ||
|
||
// For tests | ||
return result; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is incidental debugging code. I do want to leave it here but commented out for now.