Skip to content

Commit

Permalink
refactor sidecar container error parsing logic
Browse files Browse the repository at this point in the history
  • Loading branch information
songjiaxun committed Jul 5, 2024
1 parent 0ca5eb9 commit be8a611
Show file tree
Hide file tree
Showing 4 changed files with 130 additions and 72 deletions.
10 changes: 10 additions & 0 deletions pkg/cloud_provider/clientset/fake.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,16 @@ func (c *FakeClientset) GetPod(namespace, name string) (*corev1.Pod, error) {
},
Volumes: webhook.GetSidecarContainerVolumeSpec(),
},
Status: corev1.PodStatus{
ContainerStatuses: []corev1.ContainerStatus{
{
Name: webhook.SidecarContainerName,
State: corev1.ContainerState{
Running: &corev1.ContainerStateRunning{},
},
},
},
},
}

return pod, nil
Expand Down
82 changes: 12 additions & 70 deletions pkg/csi_driver/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ import (
"golang.org/x/time/rate"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
corev1 "k8s.io/api/core/v1"
"k8s.io/klog/v2"
mount "k8s.io/mount-utils"
)
Expand Down Expand Up @@ -134,88 +133,31 @@ func (s *nodeServer) NodePublishVolume(ctx context.Context, req *csi.NodePublish
return nil, status.Error(codes.FailedPrecondition, "failed to find the sidecar container in Pod spec")
}

// Prepare the emptyDir path for the mounter to pass the file descriptor
emptyDirBasePath, err := util.PrepareEmptyDir(targetPath, true)
if err != nil {
return nil, status.Errorf(codes.Internal, "failed to prepare emptyDir path: %v", err)
}

// Check if the sidecar container is still required,
// if not, put an exit file to the emptyDir path to
// notify the sidecar container to exit.
if !isInitContainer {
if err := putExitFile(pod, emptyDirBasePath); err != nil {
if err := putExitFile(pod, targetPath); err != nil {
return nil, status.Errorf(codes.Internal, err.Error())
}
}

// Check if there is any error from the sidecar container
errMsg, err := os.ReadFile(emptyDirBasePath + "/error")
if err != nil && !os.IsNotExist(err) {
return nil, status.Errorf(codes.Internal, "failed to open error file %q: %v", emptyDirBasePath+"/error", err)
}
if err == nil && len(errMsg) > 0 {
errMsgStr := string(errMsg)
code := codes.Internal
if strings.Contains(errMsgStr, "Incorrect Usage") {
code = codes.InvalidArgument
}

if strings.Contains(errMsgStr, "signal: killed") {
code = codes.ResourceExhausted
}

if strings.Contains(errMsgStr, "signal: terminated") {
klog.V(4).Infof("NodePublishVolume on volume %q to target path %q is not needed because the sidecar container has terminated.", bucketName, targetPath)
// Check if there is any error from the gcsfuse
code, err := checkGcsFuseErr(isInitContainer, pod, targetPath)
if code != codes.OK {
if code == codes.Canceled {
klog.V(4).Infof("NodePublishVolume on volume %q to target path %q is not needed because the gcsfuse has terminated.", bucketName, targetPath)

return &csi.NodePublishVolumeResponse{}, nil
}

if strings.Contains(errMsgStr, "googleapi: Error 403") ||
strings.Contains(errMsgStr, "IAM returned 403 Forbidden: Permission") ||
strings.Contains(errMsgStr, "google: could not find default credentials") {
code = codes.PermissionDenied
}

if strings.Contains(errMsgStr, "bucket doesn't exist") {
code = codes.NotFound
}

return nil, status.Errorf(code, "the sidecar container failed with error: %v", errMsgStr)
}

var containerStatusList []corev1.ContainerStatus
// Use ContainerStatuses or InitContainerStatuses
if isInitContainer {
containerStatusList = pod.Status.InitContainerStatuses
} else {
containerStatusList = pod.Status.ContainerStatuses
return nil, status.Errorf(code, err.Error())
}

// Check if the sidecar container terminated
for _, cs := range containerStatusList {
if cs.Name != webhook.SidecarContainerName {
continue
}

var reason string
var exitCode int32
if cs.RestartCount > 0 && cs.LastTerminationState.Terminated != nil {
reason = cs.LastTerminationState.Terminated.Reason
exitCode = cs.LastTerminationState.Terminated.ExitCode
} else if cs.State.Terminated != nil {
reason = cs.State.Terminated.Reason
exitCode = cs.State.Terminated.ExitCode
}

if exitCode != 0 {
code := codes.Internal
if reason == "OOMKilled" || exitCode == 137 {
code = codes.ResourceExhausted
}

return nil, status.Errorf(code, "the sidecar container terminated due to %v, exit code: %v", reason, exitCode)
}
// Check if there is any error from the sidecar container
code, err = checkSidecarContainerErr(isInitContainer, pod)
if code != codes.OK {
return nil, status.Errorf(code, err.Error())
}

// TODO: Check if the socket listener timed out
Expand All @@ -227,11 +169,11 @@ func (s *nodeServer) NodePublishVolume(ctx context.Context, req *csi.NodePublish
}

if mounted {
// Already mounted
klog.V(4).Infof("NodePublishVolume succeeded on volume %q to target path %q, mount already exists.", bucketName, targetPath)

return &csi.NodePublishVolumeResponse{}, nil
}

klog.V(4).Infof("NodePublishVolume attempting mkdir for path %q", targetPath)
if err := os.MkdirAll(targetPath, 0o750); err != nil {
return nil, status.Errorf(codes.Internal, "mkdir failed for path %q: %v", targetPath, err)
Expand Down
108 changes: 107 additions & 1 deletion pkg/csi_driver/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,20 @@ limitations under the License.
package driver

import (
"errors"
"fmt"
"os"
"path/filepath"
"strconv"
"strings"

csi "github.com/container-storage-interface/spec/lib/go/csi"
"github.com/googlecloudplatform/gcs-fuse-csi-driver/pkg/util"
"github.com/googlecloudplatform/gcs-fuse-csi-driver/pkg/webhook"
pbSanitizer "github.com/kubernetes-csi/csi-lib-utils/protosanitizer"
"golang.org/x/net/context"
"google.golang.org/grpc"
"google.golang.org/grpc/codes"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/util/sets"
Expand Down Expand Up @@ -291,7 +294,7 @@ func parseRequestArguments(req *csi.NodePublishVolumeRequest) (string, string, [
return targetPath, bucketName, fuseMountOptions, skipCSIBucketAccessCheck, nil
}

func putExitFile(pod *corev1.Pod, emptyDirBasePath string) error {
func putExitFile(pod *corev1.Pod, targetPath string) error {
podIsTerminating := pod.DeletionTimestamp != nil
podRestartPolicyIsNever := pod.Spec.RestartPolicy == corev1.RestartPolicyNever
podRestartPolicyIsOnFailure := pod.Spec.RestartPolicy == corev1.RestartPolicyOnFailure
Expand Down Expand Up @@ -333,6 +336,11 @@ func putExitFile(pod *corev1.Pod, emptyDirBasePath string) error {
}

klog.V(4).Infof("[Pod %v/%v, UID %v] all the other containers terminated in the Pod, put the exit file.", pod.Namespace, pod.Name, pod.UID)
emptyDirBasePath, err := util.PrepareEmptyDir(targetPath, false)
if err != nil {
return fmt.Errorf("failed to get emptyDir path: %w", err)
}

exitFilePath := filepath.Dir(emptyDirBasePath) + "/exit"
f, err := os.Create(exitFilePath)
if err != nil {
Expand All @@ -348,3 +356,101 @@ func putExitFile(pod *corev1.Pod, emptyDirBasePath string) error {

return nil
}

func checkGcsFuseErr(isInitContainer bool, pod *corev1.Pod, targetPath string) (codes.Code, error) {
code := codes.Internal
cs, err := getSidecarContainerStatus(isInitContainer, pod)
if err != nil {
return code, err
}

// the sidecar container has not started, skip the check
if cs.State.Waiting != nil {
return codes.OK, nil
}

emptyDirBasePath, err := util.PrepareEmptyDir(targetPath, false)
if err != nil {
return code, fmt.Errorf("failed to get emptyDir path: %w", err)
}

errMsg, err := os.ReadFile(emptyDirBasePath + "/error")
if err != nil && !os.IsNotExist(err) {
return code, fmt.Errorf("failed to open error file %q: %w", emptyDirBasePath+"/error", err)
}
if err == nil && len(errMsg) > 0 {
errMsgStr := string(errMsg)
code := codes.Internal
if strings.Contains(errMsgStr, "Incorrect Usage") {
code = codes.InvalidArgument
}

if strings.Contains(errMsgStr, "signal: killed") {
code = codes.ResourceExhausted
}

if strings.Contains(errMsgStr, "signal: terminated") {
code = codes.Canceled
}

if strings.Contains(errMsgStr, "googleapi: Error 403") ||
strings.Contains(errMsgStr, "IAM returned 403 Forbidden: Permission") ||
strings.Contains(errMsgStr, "google: could not find default credentials") {
code = codes.PermissionDenied
}

if strings.Contains(errMsgStr, "bucket doesn't exist") {
code = codes.NotFound
}

return code, fmt.Errorf("gcsfuse failed with error: %v", errMsgStr)
}

return codes.OK, nil
}

func checkSidecarContainerErr(isInitContainer bool, pod *corev1.Pod) (codes.Code, error) {
code := codes.Internal
cs, err := getSidecarContainerStatus(isInitContainer, pod)
if err != nil {
return code, err
}

var reason string
var exitCode int32
if cs.RestartCount > 0 && cs.LastTerminationState.Terminated != nil {
reason = cs.LastTerminationState.Terminated.Reason
exitCode = cs.LastTerminationState.Terminated.ExitCode
} else if cs.State.Terminated != nil {
reason = cs.State.Terminated.Reason
exitCode = cs.State.Terminated.ExitCode
}

if exitCode != 0 {
if reason == "OOMKilled" || exitCode == 137 {
code = codes.ResourceExhausted
}

return code, fmt.Errorf("the sidecar container terminated due to %v, exit code: %v", reason, exitCode)
}

return codes.OK, nil
}

func getSidecarContainerStatus(isInitContainer bool, pod *corev1.Pod) (*corev1.ContainerStatus, error) {
var containerStatusList []corev1.ContainerStatus
// Use ContainerStatuses or InitContainerStatuses
if isInitContainer {
containerStatusList = pod.Status.InitContainerStatuses
} else {
containerStatusList = pod.Status.ContainerStatuses
}

for _, cs := range containerStatusList {
if cs.Name == webhook.SidecarContainerName {
return &cs, nil
}
}

return nil, errors.New("the sidecar container was not found")
}
2 changes: 1 addition & 1 deletion pkg/csi_mounter/csi_mounter.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ func (m *Mounter) Mount(source string, target string, fstype string, options []s
}

// Prepare the temp emptyDir path
emptyDirBasePath, err := util.PrepareEmptyDir(target, false)
emptyDirBasePath, err := util.PrepareEmptyDir(target, true)
if err != nil {
return fmt.Errorf("failed to prepare emptyDir path: %w", err)
}
Expand Down

0 comments on commit be8a611

Please sign in to comment.