Skip to content

Commit

Permalink
pb-7504: make NFS job pod to use root for resource backup
Browse files Browse the repository at this point in the history
- When we use GCP based file store as NFS backup location, the job pod
  using that doesn't have write permission for group user, this causes
  the non-root user permission denied error during backup and restore.

- This is GKE specific behaviour hence a check added to force all job pod
  to run as a root user eradicating the permission denied error.

Signed-off-by: Lalatendu Das <[email protected]>
  • Loading branch information
lalat-das committed Jul 30, 2024
1 parent 1f487d1 commit 6ef74cc
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 6 deletions.
25 changes: 22 additions & 3 deletions pkg/drivers/nfsbackup/nfsbackup.go
Original file line number Diff line number Diff line change
Expand Up @@ -278,14 +278,33 @@ func jobForBackupResource(
},
}

uid := utils.KdmpJobUid
// For GCP based clusters the NFS PVC mounted with a anomalous GID permissions( i.e. sans GID write permission)
// hence avoiding passing any specific UID or GID so that Job pod will always run as ROOT user.
// This makes the job pod to fail in GCP based cluster with PSA enabled environment.

// check the cluster is GCP based or not
isGcpBasedCluster, err := utils.IsGcpHostedCluster()
if err != nil {
logrus.Errorf("failed to check the cluster is GCP based or not: %v", err)
return nil, fmt.Errorf("failed to check the cluster is GCP based or not for job [%s/%s]", jobOption.Namespace, jobOption.RestoreExportName)
}
if isGcpBasedCluster {
logrus.Debugf("Found a GCP based cluster hence not adding any specific UID/GID to the job, it will run with root user")
uid = ""
}

// The Job is intended to backup resources to NFS backuplocation
// and it doesn't need a specific JOB uid/gid since it will be sqaushed at NFS server
// hence used a global hardcoded UID/GID.
// Not passing the groupId as we do not want to set the RunAsGroup field in the securityContext
// This helps us in setting the primaryGroup ID to root for the user ID.
job, err = utils.AddSecurityContextToJob(job, utils.KdmpJobUid, "")
if err != nil {
return nil, err
logrus.Infof("DAS ............. Adding security context to the job")
if uid != "" {
job, err = utils.AddSecurityContextToJob(job, uid, "")
if err != nil {
return nil, err
}
}

// Add the image secret in job spec only if it is present in the stork deployment.
Expand Down
25 changes: 22 additions & 3 deletions pkg/drivers/nfsrestore/nfsrestore.go
Original file line number Diff line number Diff line change
Expand Up @@ -321,11 +321,30 @@ func jobForRestoreResource(
},
},
}

uid := utils.KdmpJobUid
// For GCP based clusters the NFS PVC mounted with a anomalous GID permissions( i.e. sans GID write permission)
// hence avoiding passing any specific UID or GID so that Job pod will always run as ROOT user.
// This makes the job pod to fail in GCP based cluster with PSA enabled environment.

// check the cluster is GCP based or not
isGcpBasedCluster, err := utils.IsGcpHostedCluster()
if err != nil {
logrus.Errorf("failed to check the cluster is GCP based or not: %v", err)
return nil, fmt.Errorf("failed to check the cluster is GCP based or not for job [%s/%s]", jobOption.Namespace, jobOption.RestoreExportName)
}
if isGcpBasedCluster {
logrus.Debugf("Found a GCP based cluster hence not adding any specific UID/GID to the job, it will run with root user")
uid = ""
}
// Not passing the groupId as we do not want to set the RunAsGroup field in the securityContext
// This helps us in setting the primaryGroup ID to root for the user ID.
job, err = utils.AddSecurityContextToJob(job, utils.KdmpJobUid, "")
if err != nil {
return nil, err
logrus.Infof("DAS ............. Adding security context to the job restore -path")
if uid != "" {
job, err = utils.AddSecurityContextToJob(job, uid, "")
if err != nil {
return nil, err
}
}
// Add the image secret in job spec only if it is present in the stork deployment.
if len(imageRegistrySecret) != 0 {
Expand Down
18 changes: 18 additions & 0 deletions pkg/drivers/utils/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -1088,3 +1088,21 @@ func GetOcpNsUidGid(nsName string, psaJobUid string, psaJobGid string) (string,
}
return psaJobUid, psaJobGid, isOcp, nil
}

// Checks if the cluster is GCP hosted cluster.
func IsGcpHostedCluster() (bool, error) {
// Any GCP hosted cluster be it vanilla , OCP or GKE
// it is expected to have a ProviderId in its spec witha prefix of "gce"
nodes, err := core.Instance().GetNodes()
if err != nil {
return false, fmt.Errorf("failed to get nodes: %v", err)
}

for _, node := range nodes.Items {
providerID := node.Spec.ProviderID
if strings.HasPrefix(providerID, "gce://") {
return true, nil
}
}
return false, nil
}

0 comments on commit 6ef74cc

Please sign in to comment.