Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: close the apid connection to other machines gracefully #8560

Merged
merged 1 commit into from
Apr 8, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 36 additions & 1 deletion internal/app/apid/pkg/backend/apid.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
"github.com/siderolabs/net"
"google.golang.org/grpc"
"google.golang.org/grpc/backoff"
"google.golang.org/grpc/connectivity"
"google.golang.org/grpc/credentials"
"google.golang.org/grpc/metadata"
"google.golang.org/grpc/status"
Expand All @@ -26,6 +27,11 @@ import (
"github.com/siderolabs/talos/pkg/machinery/proto"
)

// GracefulShutdownTimeout is the timeout for graceful shutdown of the backend connection.
//
// Talos has a few long-running API calls, so we need to give the backend some time to finish them.
const GracefulShutdownTimeout = 30 * time.Minute

var _ proxy.Backend = (*APID)(nil)

// APID backend performs proxying to another apid instance.
Expand Down Expand Up @@ -253,7 +259,36 @@ func (a *APID) Close() {
defer a.mu.Unlock()

if a.conn != nil {
a.conn.Close() //nolint:errcheck
gracefulGRPCClose(a.conn, GracefulShutdownTimeout)
a.conn = nil
}
}

func gracefulGRPCClose(conn *grpc.ClientConn, timeout time.Duration) {
// close the client connection in the background, tries to avoid closing the connection
// if the connection is in the middle of a call (e.g. streaming API)
//
// see https://github.com/grpc/grpc/blob/master/doc/connectivity-semantics-and-api.md for details on connection states
go func() {
ctx, cancel := context.WithTimeout(context.Background(), timeout)
defer cancel()

for ctx.Err() != nil {
switch state := conn.GetState(); state { //nolint:exhaustive
case connectivity.Idle,
connectivity.Shutdown,
connectivity.TransientFailure:
// close immediately, connection is not used
conn.Close() //nolint:errcheck

return
default:
// wait for state change of the connection
conn.WaitForStateChange(ctx, state)
}
}

// close anyways on timeout
conn.Close() //nolint:errcheck
}()
}