diff --git a/pkg/cloud_provider/clientset/fake.go b/pkg/cloud_provider/clientset/fake.go index fcacce3a..c1e9b6c4 100644 --- a/pkg/cloud_provider/clientset/fake.go +++ b/pkg/cloud_provider/clientset/fake.go @@ -43,6 +43,16 @@ func (c *FakeClientset) GetPod(namespace, name string) (*corev1.Pod, error) { }, Volumes: webhook.GetSidecarContainerVolumeSpec(), }, + Status: corev1.PodStatus{ + ContainerStatuses: []corev1.ContainerStatus{ + { + Name: webhook.SidecarContainerName, + State: corev1.ContainerState{ + Running: &corev1.ContainerStateRunning{}, + }, + }, + }, + }, } return pod, nil diff --git a/pkg/csi_driver/node.go b/pkg/csi_driver/node.go index c513c14e..fb80ab9c 100644 --- a/pkg/csi_driver/node.go +++ b/pkg/csi_driver/node.go @@ -32,7 +32,6 @@ import ( "golang.org/x/time/rate" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" - corev1 "k8s.io/api/core/v1" "k8s.io/klog/v2" mount "k8s.io/mount-utils" ) @@ -134,88 +133,31 @@ func (s *nodeServer) NodePublishVolume(ctx context.Context, req *csi.NodePublish return nil, status.Error(codes.FailedPrecondition, "failed to find the sidecar container in Pod spec") } - // Prepare the emptyDir path for the mounter to pass the file descriptor - emptyDirBasePath, err := util.PrepareEmptyDir(targetPath, true) - if err != nil { - return nil, status.Errorf(codes.Internal, "failed to prepare emptyDir path: %v", err) - } - // Check if the sidecar container is still required, // if not, put an exit file to the emptyDir path to // notify the sidecar container to exit. if !isInitContainer { - if err := putExitFile(pod, emptyDirBasePath); err != nil { + if err := putExitFile(pod, targetPath); err != nil { return nil, status.Errorf(codes.Internal, err.Error()) } } - // Check if there is any error from the sidecar container - errMsg, err := os.ReadFile(emptyDirBasePath + "/error") - if err != nil && !os.IsNotExist(err) { - return nil, status.Errorf(codes.Internal, "failed to open error file %q: %v", emptyDirBasePath+"/error", err) - } - if err == nil && len(errMsg) > 0 { - errMsgStr := string(errMsg) - code := codes.Internal - if strings.Contains(errMsgStr, "Incorrect Usage") { - code = codes.InvalidArgument - } - - if strings.Contains(errMsgStr, "signal: killed") { - code = codes.ResourceExhausted - } - - if strings.Contains(errMsgStr, "signal: terminated") { - klog.V(4).Infof("NodePublishVolume on volume %q to target path %q is not needed because the sidecar container has terminated.", bucketName, targetPath) + // Check if there is any error from the gcsfuse + code, err := checkGcsFuseErr(isInitContainer, pod, targetPath) + if code != codes.OK { + if code == codes.Canceled { + klog.V(4).Infof("NodePublishVolume on volume %q to target path %q is not needed because the gcsfuse has terminated.", bucketName, targetPath) return &csi.NodePublishVolumeResponse{}, nil } - if strings.Contains(errMsgStr, "googleapi: Error 403") || - strings.Contains(errMsgStr, "IAM returned 403 Forbidden: Permission") || - strings.Contains(errMsgStr, "google: could not find default credentials") { - code = codes.PermissionDenied - } - - if strings.Contains(errMsgStr, "bucket doesn't exist") { - code = codes.NotFound - } - - return nil, status.Errorf(code, "the sidecar container failed with error: %v", errMsgStr) - } - - var containerStatusList []corev1.ContainerStatus - // Use ContainerStatuses or InitContainerStatuses - if isInitContainer { - containerStatusList = pod.Status.InitContainerStatuses - } else { - containerStatusList = pod.Status.ContainerStatuses + return nil, status.Errorf(code, err.Error()) } - // Check if the sidecar container terminated - for _, cs := range containerStatusList { - if cs.Name != webhook.SidecarContainerName { - continue - } - - var reason string - var exitCode int32 - if cs.RestartCount > 0 && cs.LastTerminationState.Terminated != nil { - reason = cs.LastTerminationState.Terminated.Reason - exitCode = cs.LastTerminationState.Terminated.ExitCode - } else if cs.State.Terminated != nil { - reason = cs.State.Terminated.Reason - exitCode = cs.State.Terminated.ExitCode - } - - if exitCode != 0 { - code := codes.Internal - if reason == "OOMKilled" || exitCode == 137 { - code = codes.ResourceExhausted - } - - return nil, status.Errorf(code, "the sidecar container terminated due to %v, exit code: %v", reason, exitCode) - } + // Check if there is any error from the sidecar container + code, err = checkSidecarContainerErr(isInitContainer, pod) + if code != codes.OK { + return nil, status.Errorf(code, err.Error()) } // TODO: Check if the socket listener timed out @@ -227,11 +169,11 @@ func (s *nodeServer) NodePublishVolume(ctx context.Context, req *csi.NodePublish } if mounted { - // Already mounted klog.V(4).Infof("NodePublishVolume succeeded on volume %q to target path %q, mount already exists.", bucketName, targetPath) return &csi.NodePublishVolumeResponse{}, nil } + klog.V(4).Infof("NodePublishVolume attempting mkdir for path %q", targetPath) if err := os.MkdirAll(targetPath, 0o750); err != nil { return nil, status.Errorf(codes.Internal, "mkdir failed for path %q: %v", targetPath, err) diff --git a/pkg/csi_driver/utils.go b/pkg/csi_driver/utils.go index 86b11ca2..a9866f93 100644 --- a/pkg/csi_driver/utils.go +++ b/pkg/csi_driver/utils.go @@ -18,6 +18,7 @@ limitations under the License. package driver import ( + "errors" "fmt" "os" "path/filepath" @@ -25,10 +26,12 @@ import ( "strings" csi "github.com/container-storage-interface/spec/lib/go/csi" + "github.com/googlecloudplatform/gcs-fuse-csi-driver/pkg/util" "github.com/googlecloudplatform/gcs-fuse-csi-driver/pkg/webhook" pbSanitizer "github.com/kubernetes-csi/csi-lib-utils/protosanitizer" "golang.org/x/net/context" "google.golang.org/grpc" + "google.golang.org/grpc/codes" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/util/sets" @@ -291,7 +294,7 @@ func parseRequestArguments(req *csi.NodePublishVolumeRequest) (string, string, [ return targetPath, bucketName, fuseMountOptions, skipCSIBucketAccessCheck, nil } -func putExitFile(pod *corev1.Pod, emptyDirBasePath string) error { +func putExitFile(pod *corev1.Pod, targetPath string) error { podIsTerminating := pod.DeletionTimestamp != nil podRestartPolicyIsNever := pod.Spec.RestartPolicy == corev1.RestartPolicyNever podRestartPolicyIsOnFailure := pod.Spec.RestartPolicy == corev1.RestartPolicyOnFailure @@ -333,6 +336,11 @@ func putExitFile(pod *corev1.Pod, emptyDirBasePath string) error { } klog.V(4).Infof("[Pod %v/%v, UID %v] all the other containers terminated in the Pod, put the exit file.", pod.Namespace, pod.Name, pod.UID) + emptyDirBasePath, err := util.PrepareEmptyDir(targetPath, false) + if err != nil { + return fmt.Errorf("failed to get emptyDir path: %w", err) + } + exitFilePath := filepath.Dir(emptyDirBasePath) + "/exit" f, err := os.Create(exitFilePath) if err != nil { @@ -348,3 +356,101 @@ func putExitFile(pod *corev1.Pod, emptyDirBasePath string) error { return nil } + +func checkGcsFuseErr(isInitContainer bool, pod *corev1.Pod, targetPath string) (codes.Code, error) { + code := codes.Internal + cs, err := getSidecarContainerStatus(isInitContainer, pod) + if err != nil { + return code, err + } + + // the sidecar container has not started, skip the check + if cs.State.Waiting != nil { + return codes.OK, nil + } + + emptyDirBasePath, err := util.PrepareEmptyDir(targetPath, false) + if err != nil { + return code, fmt.Errorf("failed to get emptyDir path: %w", err) + } + + errMsg, err := os.ReadFile(emptyDirBasePath + "/error") + if err != nil && !os.IsNotExist(err) { + return code, fmt.Errorf("failed to open error file %q: %w", emptyDirBasePath+"/error", err) + } + if err == nil && len(errMsg) > 0 { + errMsgStr := string(errMsg) + code := codes.Internal + if strings.Contains(errMsgStr, "Incorrect Usage") { + code = codes.InvalidArgument + } + + if strings.Contains(errMsgStr, "signal: killed") { + code = codes.ResourceExhausted + } + + if strings.Contains(errMsgStr, "signal: terminated") { + code = codes.Canceled + } + + if strings.Contains(errMsgStr, "googleapi: Error 403") || + strings.Contains(errMsgStr, "IAM returned 403 Forbidden: Permission") || + strings.Contains(errMsgStr, "google: could not find default credentials") { + code = codes.PermissionDenied + } + + if strings.Contains(errMsgStr, "bucket doesn't exist") { + code = codes.NotFound + } + + return code, fmt.Errorf("gcsfuse failed with error: %v", errMsgStr) + } + + return codes.OK, nil +} + +func checkSidecarContainerErr(isInitContainer bool, pod *corev1.Pod) (codes.Code, error) { + code := codes.Internal + cs, err := getSidecarContainerStatus(isInitContainer, pod) + if err != nil { + return code, err + } + + var reason string + var exitCode int32 + if cs.RestartCount > 0 && cs.LastTerminationState.Terminated != nil { + reason = cs.LastTerminationState.Terminated.Reason + exitCode = cs.LastTerminationState.Terminated.ExitCode + } else if cs.State.Terminated != nil { + reason = cs.State.Terminated.Reason + exitCode = cs.State.Terminated.ExitCode + } + + if exitCode != 0 { + if reason == "OOMKilled" || exitCode == 137 { + code = codes.ResourceExhausted + } + + return code, fmt.Errorf("the sidecar container terminated due to %v, exit code: %v", reason, exitCode) + } + + return codes.OK, nil +} + +func getSidecarContainerStatus(isInitContainer bool, pod *corev1.Pod) (*corev1.ContainerStatus, error) { + var containerStatusList []corev1.ContainerStatus + // Use ContainerStatuses or InitContainerStatuses + if isInitContainer { + containerStatusList = pod.Status.InitContainerStatuses + } else { + containerStatusList = pod.Status.ContainerStatuses + } + + for _, cs := range containerStatusList { + if cs.Name == webhook.SidecarContainerName { + return &cs, nil + } + } + + return nil, errors.New("the sidecar container was not found") +} diff --git a/pkg/csi_mounter/csi_mounter.go b/pkg/csi_mounter/csi_mounter.go index ba1e25c6..635edcc0 100644 --- a/pkg/csi_mounter/csi_mounter.go +++ b/pkg/csi_mounter/csi_mounter.go @@ -79,7 +79,7 @@ func (m *Mounter) Mount(source string, target string, fstype string, options []s } // Prepare the temp emptyDir path - emptyDirBasePath, err := util.PrepareEmptyDir(target, false) + emptyDirBasePath, err := util.PrepareEmptyDir(target, true) if err != nil { return fmt.Errorf("failed to prepare emptyDir path: %w", err) }