Skip to content
This repository has been archived by the owner on Jan 8, 2024. It is now read-only.

Commit

Permalink
Not interpreting server NotFound error as server down
Browse files Browse the repository at this point in the history
  • Loading branch information
izaaklauer committed Jul 21, 2023
1 parent 918df22 commit 0a9baf8
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 15 deletions.
20 changes: 8 additions & 12 deletions internal/runner/accept.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,22 +72,17 @@ func (r *Runner) AcceptMany(ctx context.Context) {
r.logger.Error("runner unexpectedly deregistered, exiting")
time.Sleep(5 * time.Second)
return

case codes.NotFound:
// This means the runner was deregistered and we must exit.
// This won't be fixed unless the runner is closed and restarted.
r.logger.Error("runner unexpectedly deregistered, exiting")
return
case codes.Unavailable, codes.Unimplemented:
// Server became unavailable. Unimplemented likely means that the server
// is running behind a proxy and is failing health checks.

// Let's just sleep to give the server time to come back.
r.logger.Warn("server unavailable, sleeping before retry", "error", err)
time.Sleep(time.Duration(2+rand.Intn(3)) * time.Second)
r.logger.Warn("server unavailable", "error", err)
default:
r.logger.Error("error running job", "error", err)
}
r.logger.Warn("sleeping before retry", "error", err)
time.Sleep(time.Duration(2+rand.Intn(3)) * time.Second)
}
}
}
Expand Down Expand Up @@ -261,8 +256,8 @@ RESTART_JOB_STREAM:
},
}); err != nil {
if atomic.LoadInt32(&canceled) > 0 ||
status.Code(err) == codes.Unavailable ||
status.Code(err) == codes.NotFound {
status.Code(err) == codes.Unavailable {
log.Trace("Restarting the accept loop due to a cancellation and we got an error sending on the job stream. I don't think we'll see this.", "err", err)
goto RESTART_JOB_STREAM
}

Expand All @@ -278,8 +273,9 @@ RESTART_JOB_STREAM:
resp, err := client.Recv()
if err != nil {
if atomic.LoadInt32(&canceled) > 0 ||
status.Code(err) == codes.Unavailable ||
status.Code(err) == codes.NotFound {
status.Code(err) == codes.Unavailable {
// TODO(izaak): delete this: This is what we're actually seeing
log.Trace("Restarting the accept loop due to a cancellation and we got an error receiving the runner job stream. I don't think we'll see this.", "err", err)
goto RESTART_JOB_STREAM
}

Expand Down
7 changes: 4 additions & 3 deletions internal/runner/operation_project_destroy.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,13 @@ package runner
import (
"context"
"github.com/hashicorp/go-hclog"
projConfig "github.com/hashicorp/waypoint/internal/config"
"github.com/hashicorp/waypoint/internal/core"
pb "github.com/hashicorp/waypoint/pkg/server/gen"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
empty "google.golang.org/protobuf/types/known/emptypb"

projConfig "github.com/hashicorp/waypoint/internal/config"
"github.com/hashicorp/waypoint/internal/core"
pb "github.com/hashicorp/waypoint/pkg/server/gen"
)

func (r *Runner) executeDestroyProjectOp(
Expand Down

0 comments on commit 0a9baf8

Please sign in to comment.