Skip to content

Commit

Permalink
Review remarks
Browse files Browse the repository at this point in the history
  • Loading branch information
mimowo committed Jul 30, 2024
1 parent b8d1c9b commit eb3f230
Showing 1 changed file with 20 additions and 14 deletions.
34 changes: 20 additions & 14 deletions pkg/webhooks/jobset_webhook.go
Original file line number Diff line number Diff line change
Expand Up @@ -318,20 +318,7 @@ func (j *jobSetWebhook) ValidateUpdate(ctx context.Context, old, newObj runtime.
// This is needed for integration with Kueue/DWS.
if ptr.Deref(oldJS.Spec.Suspend, false) || ptr.Deref(js.Spec.Suspend, false) {
for index := range js.Spec.ReplicatedJobs {
munge := true
// Don't allow to unsuspend if there are still active Jobs with
// different PodTemplate. We do this to avoid a race condition when
// Jobs with an old PodTemplate (before suspension) are still
// runnable and would not be deleted if we unsuspend the JobSet.
if !ptr.Deref(js.Spec.Suspend, false) {
rStatus := js.Status.ReplicatedJobsStatus
// Don't allow to mutate PodTemplate on unsuspending if there
// are still active or suspended jobs from the previous run
if len(rStatus) > index && (rStatus[index].Active > 0 || rStatus[index].Suspended > 0) {
munge = false
}
}
if munge {
if allowToMutatePodTemplate(js, index) {
// Pod values which must be mutable for Kueue are defined here: https://github.com/kubernetes-sigs/kueue/blob/a50d395c36a2cb3965be5232162cf1fded1bdb08/apis/kueue/v1beta1/workload_types.go#L256-L260
mungedSpec.ReplicatedJobs[index].Template.Spec.Template.Annotations = oldJS.Spec.ReplicatedJobs[index].Template.Spec.Template.Annotations
mungedSpec.ReplicatedJobs[index].Template.Spec.Template.Labels = oldJS.Spec.ReplicatedJobs[index].Template.Spec.Template.Labels
Expand All @@ -350,6 +337,25 @@ func (j *jobSetWebhook) ValidateUpdate(ctx context.Context, old, newObj runtime.
return nil, errs.ToAggregate()
}

// allowToMutatePodTemplate checks if a PodTemplate in a specifc ReplicatedJob
// can be mutated. PodTemplate can be mutated when the JobSet is either
// suspended or getting suspended. However, we don't allow to mutate the
// PodTemplate when the JobSet is resuming, and the Jobs from the previous run
// still exist. This gives time the JobSet controller to delete the Jobs from
// the previous run if they could conflict with the Job creation on resume.
func allowToMutatePodTemplate(js *jobset.JobSet, index int) bool {
if !ptr.Deref(js.Spec.Suspend, false) {
rStatus := js.Status.ReplicatedJobsStatus

// Don't allow to mutate PodTemplate on unsuspending if there are still
// active or suspended jobs from the previous run.
if len(rStatus) > index && (rStatus[index].Active > 0 || rStatus[index].Suspended > 0) {
return false
}
}
return true
}

// ValidateDelete implements webhook.Validator so a webhook will be registered for the type
func (j *jobSetWebhook) ValidateDelete(ctx context.Context, obj runtime.Object) (admission.Warnings, error) {
return nil, nil
Expand Down

0 comments on commit eb3f230

Please sign in to comment.