From bb671e8e5450b202f05b379403064012cb59a739 Mon Sep 17 00:00:00 2001 From: Izaak Lauer <8404559+izaaklauer@users.noreply.github.com> Date: Fri, 21 Jul 2023 17:21:05 -0400 Subject: [PATCH] Not interpreting server NotFound error as server down Co-authored-by: Martina Santangelo --- internal/runner/accept.go | 19 +++++++------------ internal/runner/operation_project_destroy.go | 7 ++++--- 2 files changed, 11 insertions(+), 15 deletions(-) diff --git a/internal/runner/accept.go b/internal/runner/accept.go index d94bb2fcf72..340d066d3c0 100644 --- a/internal/runner/accept.go +++ b/internal/runner/accept.go @@ -72,22 +72,17 @@ func (r *Runner) AcceptMany(ctx context.Context) { r.logger.Error("runner unexpectedly deregistered, exiting") time.Sleep(5 * time.Second) return - - case codes.NotFound: - // This means the runner was deregistered and we must exit. - // This won't be fixed unless the runner is closed and restarted. - r.logger.Error("runner unexpectedly deregistered, exiting") - return case codes.Unavailable, codes.Unimplemented: // Server became unavailable. Unimplemented likely means that the server // is running behind a proxy and is failing health checks. // Let's just sleep to give the server time to come back. - r.logger.Warn("server unavailable, sleeping before retry", "error", err) - time.Sleep(time.Duration(2+rand.Intn(3)) * time.Second) + r.logger.Warn("server unavailable", "error", err) default: r.logger.Error("error running job", "error", err) } + r.logger.Warn("sleeping before retry", "error", err) + time.Sleep(time.Duration(2+rand.Intn(3)) * time.Second) } } } @@ -261,8 +256,8 @@ RESTART_JOB_STREAM: }, }); err != nil { if atomic.LoadInt32(&canceled) > 0 || - status.Code(err) == codes.Unavailable || - status.Code(err) == codes.NotFound { + status.Code(err) == codes.Unavailable { + log.Trace("Restarting the accept loop due to a cancellation and we got an error sending on the job stream. I don't think we'll see this.", "err", err) goto RESTART_JOB_STREAM } @@ -278,8 +273,8 @@ RESTART_JOB_STREAM: resp, err := client.Recv() if err != nil { if atomic.LoadInt32(&canceled) > 0 || - status.Code(err) == codes.Unavailable || - status.Code(err) == codes.NotFound { + status.Code(err) == codes.Unavailable { + log.Trace("Restarting the accept loop due to a cancellation and we got an error receiving the runner job stream. I don't think we'll see this.", "err", err) goto RESTART_JOB_STREAM } diff --git a/internal/runner/operation_project_destroy.go b/internal/runner/operation_project_destroy.go index 856b892fceb..d67bd098871 100644 --- a/internal/runner/operation_project_destroy.go +++ b/internal/runner/operation_project_destroy.go @@ -6,12 +6,13 @@ package runner import ( "context" "github.com/hashicorp/go-hclog" - projConfig "github.com/hashicorp/waypoint/internal/config" - "github.com/hashicorp/waypoint/internal/core" - pb "github.com/hashicorp/waypoint/pkg/server/gen" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" empty "google.golang.org/protobuf/types/known/emptypb" + + projConfig "github.com/hashicorp/waypoint/internal/config" + "github.com/hashicorp/waypoint/internal/core" + pb "github.com/hashicorp/waypoint/pkg/server/gen" ) func (r *Runner) executeDestroyProjectOp(