diff --git a/pkg/cache/task.go b/pkg/cache/task.go index eccab00a7..e2d4c4e01 100644 --- a/pkg/cache/task.go +++ b/pkg/cache/task.go @@ -533,6 +533,31 @@ func (task *Task) releaseAllocation() { // this reduces the scheduling overhead by blocking such // request away from the core scheduler. func (task *Task) sanityCheckBeforeScheduling() error { + // After version 1.7.0, we should reject the task whose pod is unbound and has conflicting metadata. + if !utils.PodAlreadyBound(task.pod) { + if err := utils.CheckAppIdInPod(task.pod); err != nil { + log.Log(log.ShimCacheTask).Warn("Pod has inconsistent application metadata and may be rejected in a future YuniKorn release.", + zap.String("appID", task.applicationID), + zap.String("podName", task.pod.Name), + zap.String("error", err.Error())) + + events.GetRecorder().Eventf(task.pod.DeepCopy(), + nil, v1.EventTypeWarning, "Scheduling", "Scheduling", fmt.Sprintf("Pod has inconsistent application metadata and may be rejected in a future YuniKorn release: %s", err.Error())) + } + if err := utils.CheckQueueNameInPod(task.pod); err != nil { + log.Log(log.ShimCacheTask).Warn("Pod has inconsistent queue metadata and may be rejected in a future YuniKorn release.", + zap.String("appID", task.applicationID), + zap.String("podName", task.pod.Name), + zap.String("error", err.Error())) + + events.GetRecorder().Eventf(task.pod.DeepCopy(), + nil, v1.EventTypeWarning, "Scheduling", "Scheduling", fmt.Sprintf("Pod has inconsistent queue metadata and may be rejected in a future YuniKorn release: %s", err.Error())) + } + } + return task.checkPodPVCs() +} + +func (task *Task) checkPodPVCs() error { task.lock.RLock() // Check PVCs used by the pod namespace := task.pod.Namespace diff --git a/pkg/common/constants/constants.go b/pkg/common/constants/constants.go index 0e6f00bab..5a848b4af 100644 --- a/pkg/common/constants/constants.go +++ b/pkg/common/constants/constants.go @@ -26,6 +26,10 @@ import ( const True = "true" const False = "false" +// Kubernetes +const Label = "label" +const Annotation = "annotation" + // Cluster const DefaultNodeAttributeHostNameKey = "si.io/hostname" const DefaultNodeAttributeRackNameKey = "si.io/rackname" @@ -124,3 +128,9 @@ const AutoGenAppSuffix = "autogen" // Compression Algorithms for schedulerConfig const GzipSuffix = "gz" + +// The key list which are used to identify the application ID or queue name in pod. +var AppIdLabelKeys = []string{CanonicalLabelApplicationID, SparkLabelAppID, LabelApplicationID} +var AppIdAnnotationKeys = []string{AnnotationApplicationID} +var QueueLabelKeys = []string{CanonicalLabelQueueName, LabelQueueName} +var QueueAnnotationKeys = []string{AnnotationQueueName} diff --git a/pkg/common/utils/utils.go b/pkg/common/utils/utils.go index 515d4109c..f58c96e53 100644 --- a/pkg/common/utils/utils.go +++ b/pkg/common/utils/utils.go @@ -213,6 +213,53 @@ func GetApplicationIDFromPod(pod *v1.Pod) string { return GenerateApplicationID(pod.Namespace, conf.GetSchedulerConf().GenerateUniqueAppIds, string(pod.UID)) } +func CheckAppIdInPod(pod *v1.Pod) error { + return ValidatePodLabelAnnotation(pod, constants.AppIdLabelKeys, constants.AppIdAnnotationKeys) +} + +func CheckQueueNameInPod(pod *v1.Pod) error { + return ValidatePodLabelAnnotation(pod, constants.QueueLabelKeys, constants.QueueAnnotationKeys) +} + +// return true if all non-empty values are same across all provided label/annotation +func ValidatePodLabelAnnotation(pod *v1.Pod, labelKeys []string, annotationKeys []string) error { + var referenceKey string + var referenceValue string + var referenceType string + + checkingType := constants.Label + for _, key := range labelKeys { + value := GetPodLabelValue(pod, key) + if value == "" { + continue + } + if referenceValue == "" { + referenceKey = key + referenceValue = value + referenceType = checkingType + } else if referenceValue != value { + return fmt.Errorf("%s %s: \"%s\" doesn't match %s %s: \"%s\"", checkingType, key, value, referenceType, referenceKey, referenceValue) + } + } + + checkingType = constants.Annotation + for _, key := range annotationKeys { + value := GetPodAnnotationValue(pod, key) + if value == "" { + continue + } + if referenceValue == "" { + referenceKey = key + referenceValue = value + referenceType = checkingType + } else if referenceValue != value { + return fmt.Errorf("%s %s: \"%s\" doesn't match %s %s: \"%s\"", checkingType, key, value, referenceType, referenceKey, referenceValue) + } + } + + return nil +} + // compare the existing pod condition with the given one, return true if the pod condition remains not changed. // return false if pod has no condition set yet, or condition has changed. func PodUnderCondition(pod *v1.Pod, condition *v1.PodCondition) bool { diff --git a/pkg/common/utils/utils_test.go b/pkg/common/utils/utils_test.go index 4c2b64cda..60dd1d482 100644 --- a/pkg/common/utils/utils_test.go +++ b/pkg/common/utils/utils_test.go @@ -21,6 +21,7 @@ package utils import ( "bytes" "compress/gzip" + "errors" "fmt" "reflect" "strings" @@ -723,6 +724,209 @@ func TestGetApplicationIDFromPod(t *testing.T) { } } +func TestCheckAppIdInPod(t *testing.T) { + testCases := []struct { + name string + pod *v1.Pod + expected error + }{ + { + name: "consistent app ID", + pod: &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + constants.CanonicalLabelApplicationID: "app-123", + constants.SparkLabelAppID: "app-123", + constants.LabelApplicationID: "app-123", + }, + Annotations: map[string]string{ + constants.AnnotationApplicationID: "app-123", + }, + }, + }, + expected: nil, + }, + { + name: "inconsistent app ID in labels", + pod: &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + constants.CanonicalLabelApplicationID: "app-123", + constants.SparkLabelAppID: "app-456", + }, + }, + }, + expected: errors.New("label spark-app-selector: \"app-456\" doesn't match label yunikorn.apache.org/app-id: \"app-123\""), + }, + { + name: "inconsistent app ID between label and annotation", + pod: &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + constants.CanonicalLabelApplicationID: "app-123", + }, + Annotations: map[string]string{ + constants.AnnotationApplicationID: "app-456", + }, + }, + }, + expected: errors.New("annotation yunikorn.apache.org/app-id: \"app-456\" doesn't match label yunikorn.apache.org/app-id: \"app-123\""), + }, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + err := CheckAppIdInPod(tc.pod) + if tc.expected != nil { + assert.ErrorContains(t, err, tc.expected.Error()) + } else { + assert.NilError(t, err) + } + }) + } +} + +func TestCheckQueueNameInPod(t *testing.T) { + testCases := []struct { + name string + pod *v1.Pod + expected error + }{ + { + name: "consistent queue name", + pod: &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + constants.CanonicalLabelQueueName: "root.a", + constants.LabelQueueName: "root.a", + }, + Annotations: map[string]string{ + constants.AnnotationQueueName: "root.a", + }, + }, + }, + expected: nil, + }, + { + name: "inconsistent app ID in labels", + pod: &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + constants.CanonicalLabelQueueName: "root.a", + constants.LabelQueueName: "root.b", + }, + }, + }, + expected: errors.New("label queue: \"root.b\" doesn't match label yunikorn.apache.org/queue: \"root.a\""), + }, + { + name: "inconsistent app ID between label and annotation", + pod: &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + constants.CanonicalLabelQueueName: "root.a", + }, + Annotations: map[string]string{ + constants.AnnotationQueueName: "root.b", + }, + }, + }, + expected: errors.New("annotation yunikorn.apache.org/queue: \"root.b\" doesn't match label yunikorn.apache.org/queue: \"root.a\""), + }, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + err := CheckQueueNameInPod(tc.pod) + if tc.expected != nil { + assert.ErrorContains(t, err, tc.expected.Error()) + } else { + assert.NilError(t, err) + } + }) + } +} + +func TestValidatePodLabelAnnotation(t *testing.T) { + labelKeys := []string{"labelKey1", "labelKey2"} + annotationKeys := []string{"annotationKey1", "annotationKey2"} + + testCases := []struct { + name string + pod *v1.Pod + expected error + }{ + { + name: "empty pod", + pod: &v1.Pod{}, + expected: nil, + }, + { + name: "pod with all values are consistent", + pod: &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + "labelKey1": "value1", + "labelKey2": "value1", + }, + Annotations: map[string]string{ + "annotationKey1": "value1", + "annotationKey2": "value1", + }, + }, + }, + expected: nil, + }, + { + name: "pod with inconsistent value in labels", + pod: &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + "labelKey1": "value1", + "labelKey2": "value2", + }, + }, + }, + expected: errors.New("label labelKey2: \"value2\" doesn't match label labelKey1: \"value1\""), + }, + { + name: "pod with inconsistent value between label and annotation", + pod: &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + "labelKey1": "value1", + }, + Annotations: map[string]string{ + "annotationKey1": "value2", + }, + }, + }, + expected: errors.New("annotation annotationKey1: \"value2\" doesn't match label labelKey1: \"value1\""), + }, + { + name: "pod with inconsistent value in annotations", + pod: &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + "annotationKey1": "value1", + "annotationKey2": "value2", + }, + }, + }, + expected: errors.New("annotation annotationKey2: \"value2\" doesn't match annotation annotationKey1: \"value1\""), + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + err := ValidatePodLabelAnnotation(tc.pod, labelKeys, annotationKeys) + if tc.expected != nil { + assert.ErrorContains(t, err, tc.expected.Error()) + } else { + assert.NilError(t, err) + } + }) + } +} + func TestGenerateApplicationID(t *testing.T) { assert.Equal(t, "yunikorn-this-is-a-namespace-autogen", GenerateApplicationID("this-is-a-namespace", false, "pod-uid"))