From 69a83fe88780019b41c7b3508ac71ff97e8821a2 Mon Sep 17 00:00:00 2001 From: ddl-ebrown Date: Wed, 29 May 2024 10:40:31 -0700 Subject: [PATCH] Return non-0 exit code from copilot upload failure - The current copilot behavior is to: * try to upload the specified file * if an error occurs, try to write an error file instead * if the error file write succeeds, return success -- otherwise error This behavior is not ideal, because it suppresses real issues like the inability to find the expected output file. While a case can be made that the sidecar succeeded in handling its responsibility, important information is lost in the process that is useful to other containers in the job. Generallyk, it's better to consider the inability to find the original file a failure of the job to produce the expected file in the expected location (useful especially for container tasks). That means that a failure to upload the file should also return a non-0 exit code. - The new behavior is therefore: * try to upload the specified file * if an error occurs, try to write an error file instead * if the error file write succeeds, return the original error * if the error file write fails, return a joined version of the errors Signed-off-by: ddl-ebrown --- flytecopilot/cmd/sidecar.go | 10 +++++++--- flytecopilot/cmd/sidecar_test.go | 2 +- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/flytecopilot/cmd/sidecar.go b/flytecopilot/cmd/sidecar.go index 09abdb31e5..4d29d2f4de 100644 --- a/flytecopilot/cmd/sidecar.go +++ b/flytecopilot/cmd/sidecar.go @@ -2,6 +2,7 @@ package cmd import ( "context" + stderrors "errors" "fmt" "time" @@ -144,10 +145,13 @@ func (u *UploadOptions) Sidecar(ctx context.Context) error { if err := u.uploader(ctx); err != nil { logger.Errorf(ctx, "Uploading failed, err %s", err) - if err := u.UploadError(ctx, "OutputUploadFailed", err, storage.DataReference(u.remoteOutputsPrefix)); err != nil { - logger.Errorf(ctx, "Failed to write error document, err :%s", err) - return err + if uerr := u.UploadError(ctx, "OutputUploadFailed", err, storage.DataReference(u.remoteOutputsPrefix)); uerr != nil { + logger.Errorf(ctx, "Failed to write error document, err :%s", uerr) + return stderrors.Join(uerr, err) } + + // failure to upload should still return err exit code from process + return err } return nil } diff --git a/flytecopilot/cmd/sidecar_test.go b/flytecopilot/cmd/sidecar_test.go index 6d261e2c48..31d6f8b3df 100644 --- a/flytecopilot/cmd/sidecar_test.go +++ b/flytecopilot/cmd/sidecar_test.go @@ -91,7 +91,7 @@ func TestUploadOptions_Upload(t *testing.T) { ok, err := containerwatcher.FileExists(success) assert.NoError(t, err) assert.True(t, ok, "sucessfile not created") - assert.NoError(t, uopts.Sidecar(ctx)) + assert.Error(t, uopts.Sidecar(ctx)) v, err := store.Head(ctx, "/output/errors.pb") assert.NoError(t, err) assert.True(t, v.Exists())