Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[YUNIKORN-2810] Throw a warning if a pod has inconsistent metadata in the shim #900

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions pkg/cache/task.go
Original file line number Diff line number Diff line change
Expand Up @@ -533,6 +533,31 @@
// this reduces the scheduling overhead by blocking such
// request away from the core scheduler.
func (task *Task) sanityCheckBeforeScheduling() error {
// After version 1.7.0, we should reject the task whose pod is unbound and has conflicting metadata.
if !utils.PodAlreadyBound(task.pod) {
if err := utils.CheckAppIdInPod(task.pod); err != nil {
log.Log(log.ShimCacheTask).Warn("Pod has inconsistent application metadata and may be rejected in a future YuniKorn release.",
zap.String("appID", task.applicationID),
zap.String("podName", task.pod.Name),
zap.String("error", err.Error()))

Check warning on line 542 in pkg/cache/task.go

View check run for this annotation

Codecov / codecov/patch

pkg/cache/task.go#L538-L542

Added lines #L538 - L542 were not covered by tests

events.GetRecorder().Eventf(task.pod.DeepCopy(),
nil, v1.EventTypeWarning, "Scheduling", "Scheduling", fmt.Sprintf("Pod has inconsistent application metadata and may be rejected in a future YuniKorn release: %s", err.Error()))

Check warning on line 545 in pkg/cache/task.go

View check run for this annotation

Codecov / codecov/patch

pkg/cache/task.go#L544-L545

Added lines #L544 - L545 were not covered by tests
}
if err := utils.CheckQueueNameInPod(task.pod); err != nil {
log.Log(log.ShimCacheTask).Warn("Pod has inconsistent queue metadata and may be rejected in a future YuniKorn release.",
zap.String("appID", task.applicationID),
zap.String("podName", task.pod.Name),
zap.String("error", err.Error()))

Check warning on line 551 in pkg/cache/task.go

View check run for this annotation

Codecov / codecov/patch

pkg/cache/task.go#L547-L551

Added lines #L547 - L551 were not covered by tests

events.GetRecorder().Eventf(task.pod.DeepCopy(),
nil, v1.EventTypeWarning, "Scheduling", "Scheduling", fmt.Sprintf("Pod has inconsistent queue metadata and may be rejected in a future YuniKorn release: %s", err.Error()))

Check warning on line 554 in pkg/cache/task.go

View check run for this annotation

Codecov / codecov/patch

pkg/cache/task.go#L553-L554

Added lines #L553 - L554 were not covered by tests
}
}
return task.checkPodPVCs()
craigcondit marked this conversation as resolved.
Show resolved Hide resolved
}

func (task *Task) checkPodPVCs() error {
task.lock.RLock()
// Check PVCs used by the pod
namespace := task.pod.Namespace
Expand Down
10 changes: 10 additions & 0 deletions pkg/common/constants/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ import (
const True = "true"
const False = "false"

// Kubernetes
const Label = "label"
const Annotation = "annotation"

// Cluster
const DefaultNodeAttributeHostNameKey = "si.io/hostname"
const DefaultNodeAttributeRackNameKey = "si.io/rackname"
Expand Down Expand Up @@ -124,3 +128,9 @@ const AutoGenAppSuffix = "autogen"

// Compression Algorithms for schedulerConfig
const GzipSuffix = "gz"

// The key list which are used to identify the application ID or queue name in pod.
var AppIdLabelKeys = []string{CanonicalLabelApplicationID, SparkLabelAppID, LabelApplicationID}
var AppIdAnnotationKeys = []string{AnnotationApplicationID}
var QueueLabelKeys = []string{CanonicalLabelQueueName, LabelQueueName}
var QueueAnnotationKeys = []string{AnnotationQueueName}
47 changes: 47 additions & 0 deletions pkg/common/utils/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,53 @@ func GetApplicationIDFromPod(pod *v1.Pod) string {
return GenerateApplicationID(pod.Namespace, conf.GetSchedulerConf().GenerateUniqueAppIds, string(pod.UID))
}

func CheckAppIdInPod(pod *v1.Pod) error {
return ValidatePodLabelAnnotation(pod, constants.AppIdLabelKeys, constants.AppIdAnnotationKeys)
}

func CheckQueueNameInPod(pod *v1.Pod) error {
return ValidatePodLabelAnnotation(pod, constants.QueueLabelKeys, constants.QueueAnnotationKeys)
}

// return true if all non-empty values are same across all provided label/annotation
func ValidatePodLabelAnnotation(pod *v1.Pod, labelKeys []string, annotationKeys []string) error {
var referenceKey string
var referenceValue string
var referenceType string

checkingType := constants.Label
for _, key := range labelKeys {
value := GetPodLabelValue(pod, key)
if value == "" {
continue
}
if referenceValue == "" {
referenceKey = key
referenceValue = value
referenceType = checkingType
} else if referenceValue != value {
return fmt.Errorf("%s %s: \"%s\" doesn't match %s %s: \"%s\"", checkingType, key, value, referenceType, referenceKey, referenceValue)
}
}

checkingType = constants.Annotation
for _, key := range annotationKeys {
value := GetPodAnnotationValue(pod, key)
if value == "" {
continue
}
if referenceValue == "" {
referenceKey = key
referenceValue = value
referenceType = checkingType
} else if referenceValue != value {
return fmt.Errorf("%s %s: \"%s\" doesn't match %s %s: \"%s\"", checkingType, key, value, referenceType, referenceKey, referenceValue)
}
}

return nil
}

// compare the existing pod condition with the given one, return true if the pod condition remains not changed.
// return false if pod has no condition set yet, or condition has changed.
func PodUnderCondition(pod *v1.Pod, condition *v1.PodCondition) bool {
Expand Down
204 changes: 204 additions & 0 deletions pkg/common/utils/utils_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ package utils
import (
"bytes"
"compress/gzip"
"errors"
"fmt"
"reflect"
"strings"
Expand Down Expand Up @@ -723,6 +724,209 @@ func TestGetApplicationIDFromPod(t *testing.T) {
}
}

func TestCheckAppIdInPod(t *testing.T) {
testCases := []struct {
name string
pod *v1.Pod
expected error
}{
{
name: "consistent app ID",
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
constants.CanonicalLabelApplicationID: "app-123",
constants.SparkLabelAppID: "app-123",
constants.LabelApplicationID: "app-123",
},
Annotations: map[string]string{
constants.AnnotationApplicationID: "app-123",
},
},
},
expected: nil,
},
{
name: "inconsistent app ID in labels",
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
constants.CanonicalLabelApplicationID: "app-123",
constants.SparkLabelAppID: "app-456",
},
},
},
expected: errors.New("label spark-app-selector: \"app-456\" doesn't match label yunikorn.apache.org/app-id: \"app-123\""),
},
{
name: "inconsistent app ID between label and annotation",
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
constants.CanonicalLabelApplicationID: "app-123",
},
Annotations: map[string]string{
constants.AnnotationApplicationID: "app-456",
},
},
},
expected: errors.New("annotation yunikorn.apache.org/app-id: \"app-456\" doesn't match label yunikorn.apache.org/app-id: \"app-123\""),
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
err := CheckAppIdInPod(tc.pod)
if tc.expected != nil {
assert.ErrorContains(t, err, tc.expected.Error())
} else {
assert.NilError(t, err)
}
})
}
}

func TestCheckQueueNameInPod(t *testing.T) {
testCases := []struct {
name string
pod *v1.Pod
expected error
}{
{
name: "consistent queue name",
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
constants.CanonicalLabelQueueName: "root.a",
constants.LabelQueueName: "root.a",
},
Annotations: map[string]string{
constants.AnnotationQueueName: "root.a",
},
},
},
expected: nil,
},
{
name: "inconsistent app ID in labels",
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
constants.CanonicalLabelQueueName: "root.a",
constants.LabelQueueName: "root.b",
},
},
},
expected: errors.New("label queue: \"root.b\" doesn't match label yunikorn.apache.org/queue: \"root.a\""),
},
{
name: "inconsistent app ID between label and annotation",
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
constants.CanonicalLabelQueueName: "root.a",
},
Annotations: map[string]string{
constants.AnnotationQueueName: "root.b",
},
},
},
expected: errors.New("annotation yunikorn.apache.org/queue: \"root.b\" doesn't match label yunikorn.apache.org/queue: \"root.a\""),
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
err := CheckQueueNameInPod(tc.pod)
if tc.expected != nil {
assert.ErrorContains(t, err, tc.expected.Error())
} else {
assert.NilError(t, err)
}
})
}
}

func TestValidatePodLabelAnnotation(t *testing.T) {
labelKeys := []string{"labelKey1", "labelKey2"}
annotationKeys := []string{"annotationKey1", "annotationKey2"}

testCases := []struct {
name string
pod *v1.Pod
expected error
}{
{
name: "empty pod",
pod: &v1.Pod{},
expected: nil,
},
{
name: "pod with all values are consistent",
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
"labelKey1": "value1",
"labelKey2": "value1",
},
Annotations: map[string]string{
"annotationKey1": "value1",
"annotationKey2": "value1",
},
},
},
expected: nil,
},
{
name: "pod with inconsistent value in labels",
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
"labelKey1": "value1",
"labelKey2": "value2",
},
},
},
expected: errors.New("label labelKey2: \"value2\" doesn't match label labelKey1: \"value1\""),
},
{
name: "pod with inconsistent value between label and annotation",
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
"labelKey1": "value1",
},
Annotations: map[string]string{
"annotationKey1": "value2",
},
},
},
expected: errors.New("annotation annotationKey1: \"value2\" doesn't match label labelKey1: \"value1\""),
},
{
name: "pod with inconsistent value in annotations",
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Annotations: map[string]string{
"annotationKey1": "value1",
"annotationKey2": "value2",
},
},
},
expected: errors.New("annotation annotationKey2: \"value2\" doesn't match annotation annotationKey1: \"value1\""),
},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
err := ValidatePodLabelAnnotation(tc.pod, labelKeys, annotationKeys)
if tc.expected != nil {
assert.ErrorContains(t, err, tc.expected.Error())
} else {
assert.NilError(t, err)
}
})
}
}

func TestGenerateApplicationID(t *testing.T) {
assert.Equal(t, "yunikorn-this-is-a-namespace-autogen",
GenerateApplicationID("this-is-a-namespace", false, "pod-uid"))
Expand Down