-
Notifications
You must be signed in to change notification settings - Fork 110
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
RSDK-7843: Do not remove resources when a remote has a transient error. #4268
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,6 +3,7 @@ package client | |
|
||
import ( | ||
"context" | ||
"errors" | ||
"flag" | ||
"fmt" | ||
"io" | ||
|
@@ -15,7 +16,6 @@ import ( | |
grpc_retry "github.com/grpc-ecosystem/go-grpc-middleware/retry" | ||
"github.com/jhump/protoreflect/desc" | ||
"github.com/jhump/protoreflect/grpcreflect" | ||
"github.com/pkg/errors" | ||
"go.uber.org/multierr" | ||
"go.uber.org/zap" | ||
"go.uber.org/zap/zapcore" | ||
|
@@ -131,7 +131,7 @@ func isClosedPipeError(err error) bool { | |
} | ||
|
||
func (rc *RobotClient) notConnectedToRemoteError() error { | ||
return errors.Errorf("not connected to remote robot at %s", rc.address) | ||
return fmt.Errorf("not connected to remote robot at %s", rc.address) | ||
} | ||
|
||
func (rc *RobotClient) handleUnaryDisconnect( | ||
|
@@ -346,6 +346,7 @@ func (rc *RobotClient) connectWithLock(ctx context.Context) error { | |
if err := rc.conn.Close(); err != nil { | ||
return err | ||
} | ||
|
||
conn, err := grpc.Dial(ctx, rc.address, rc.logger, rc.dialOptions...) | ||
if err != nil { | ||
return err | ||
|
@@ -616,7 +617,7 @@ func (rc *RobotClient) resources(ctx context.Context) ([]resource.Name, []resour | |
} | ||
svcDesc, ok := symDesc.(*desc.ServiceDescriptor) | ||
if !ok { | ||
return nil, nil, errors.Errorf("expected descriptor to be service descriptor but got %T", symDesc) | ||
return nil, nil, fmt.Errorf("expected descriptor to be service descriptor but got %T", symDesc) | ||
} | ||
resTypes = append(resTypes, resource.RPCAPI{ | ||
API: rprotoutils.ResourceNameFromProto(resAPI.Subtype).API, | ||
|
@@ -709,9 +710,8 @@ func (rc *RobotClient) PackageManager() packages.Manager { | |
return nil | ||
} | ||
|
||
// ResourceNames returns a list of all known resource names connected to this machine. | ||
// | ||
// resource_names := machine.ResourceNames() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Didn't know if this served a purpose -- happy to reintroduce if there's some magic documentation thing going on here |
||
// ResourceNames returns a list of all known resource names connected to this machine. If we do not | ||
// have a connection to this robot, return nil. | ||
func (rc *RobotClient) ResourceNames() []resource.Name { | ||
if err := rc.checkConnected(); err != nil { | ||
rc.Logger().Errorw("failed to get remote resource names", "error", err.Error()) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -185,29 +185,22 @@ func (manager *resourceManager) updateRemoteResourceNames( | |
manager.logger.CDebugw(ctx, "updating remote resource names", "remote", remoteName) | ||
activeResourceNames := map[resource.Name]bool{} | ||
newResources := rr.ResourceNames() | ||
oldResources := manager.remoteResourceNames(remoteName) | ||
for _, res := range oldResources { | ||
if newResources == nil { | ||
// nil implies our connection to the remote is currently broken. Return without changing any | ||
// state for this remote. | ||
return false | ||
} | ||
|
||
// Initialize a map with all existing resources as a map item. We will iterate through all of | ||
// the `newResources` and set the map value for that item to true. Anything left with `false` at | ||
// the end has been removed and will be marked for removal. | ||
for _, res := range manager.remoteResourceNames(remoteName) { | ||
activeResourceNames[res] = false | ||
} | ||
|
||
anythingChanged := false | ||
|
||
for _, resName := range newResources { | ||
remoteResName := resName | ||
res, err := rr.ResourceByName(remoteResName) // this returns a remote known OR foreign resource client | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nothing changed here. Just code movement. What's important is this is now below oldline 215/newline 208. I hope the comment I added to the new code outlines the significance. There is no test coverage of the error case here. |
||
if err != nil { | ||
if errors.Is(err, client.ErrMissingClientRegistration) { | ||
manager.logger.CDebugw(ctx, "couldn't obtain remote resource interface", | ||
"name", remoteResName, | ||
"reason", err) | ||
} else { | ||
manager.logger.CErrorw(ctx, "couldn't obtain remote resource interface", | ||
"name", remoteResName, | ||
"reason", err) | ||
} | ||
continue | ||
} | ||
|
||
resName = resName.PrependRemote(remoteName.Name) | ||
gNode, ok := manager.resources.Node(resName) | ||
|
||
|
@@ -239,6 +232,25 @@ func (manager *resourceManager) updateRemoteResourceNames( | |
} | ||
} | ||
|
||
// ResourceByName on the robot client will construct and return the client resource that | ||
// should be added into the resource graph. This method can return an error if the | ||
// connection to the remote has been lost. In this case, the resource has already been | ||
// deemed "active" and will not be removed in the "mark for update" stage of updating remote | ||
// resources. | ||
res, err := rr.ResourceByName(remoteResName) // this returns a remote known OR foreign resource client | ||
if err != nil { | ||
if errors.Is(err, client.ErrMissingClientRegistration) { | ||
manager.logger.CDebugw(ctx, "couldn't obtain remote resource interface", | ||
"name", remoteResName, | ||
"reason", err) | ||
} else { | ||
manager.logger.CErrorw(ctx, "couldn't obtain remote resource interface", | ||
"name", remoteResName, | ||
"reason", err) | ||
} | ||
continue | ||
} | ||
|
||
if ok { | ||
gNode.SwapResource(res, unknownModel) | ||
} else { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This usage adds a stack trace to the error. I found them to be an eyesore and not at all helpful.