Skip to content

Commit

Permalink
reintroduce poison() so that saxadmin will work when the the user rec…
Browse files Browse the repository at this point in the history
…reates a cluster.

PiperOrigin-RevId: 673614166
Change-Id: I39c0418445e5b6e7cf26d71c989ac5ec97d6e321
  • Loading branch information
Sax Authors authored and copybara-github committed Sep 12, 2024
1 parent baa2ccf commit 9603088
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 0 deletions.
2 changes: 2 additions & 0 deletions saxml/client/go/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,8 @@ go_library(
# unused internal admin gRPC dependency,
"@com_github_golang_glog//:go_default_library",
"@org_golang_google_grpc//:go_default_library",
"@org_golang_google_grpc//codes:go_default_library",
"@org_golang_google_grpc//status:go_default_library",
],
)

Expand Down
25 changes: 25 additions & 0 deletions saxml/client/go/admin.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,14 @@ import (
"encoding/binary"
"fmt"
"hash/maphash"
"strings"
"sync"
"time"

log "github.com/golang/glog"
"google.golang.org/grpc/codes"
"google.golang.org/grpc"
"google.golang.org/grpc/status"
"saxml/common/addr"
"saxml/common/errors"
"saxml/common/platform/env"
Expand Down Expand Up @@ -120,12 +123,34 @@ func (a *Admin) getAdminClient(ctx context.Context) (pbgrpc.AdminClient, error)
return a.client, nil
}

func (a *Admin) poison() {
a.mu.Lock()
conn := a.conn
a.conn = nil
a.client = nil
a.mu.Unlock()

if conn != nil {
conn.Close()
}
}

func (a *Admin) retry(ctx context.Context, callback func(client pbgrpc.AdminClient) error) error {
action := func() error {
client, err := a.getAdminClient(ctx)
if err == nil {
err = callback(client)
}
if errors.AdminShouldPoison(err) {
a.poison()
}
// After poison(), RPCs active on the old connection will fail with
// "Canceled desc = grpc: the client connection is closing". Translate it to
// Unavailable so that the RPC will retry.
if status.Code(err) == codes.Canceled &&
strings.Contains(err.Error(), "client connection is closing") {
err = fmt.Errorf("Admin connection is poisoned: %w", errors.ErrUnavailable)
}
return err
}
return retrier.Do(ctx, action, errors.AdminShouldRetry)
Expand Down

0 comments on commit 9603088

Please sign in to comment.