Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[YUNIKORN-2068] E2E Test for Preemption #705

Closed
wants to merge 27 commits into from
Closed
Changes from 16 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
b83bdd5
[YUNIKORN-2068] E2E Test for Preemption
rrajesh-cloudera Oct 26, 2023
3830426
[YUNIKORN-2068] E2E Test for Preemption
rrajesh-cloudera Oct 26, 2023
72ef0d8
[YUNIKORN-2068] E2E Test for Preemption
rrajesh-cloudera Oct 30, 2023
f3e8b57
[YUNIKORN-2068] E2E Test for Preemption
rrajesh-cloudera Oct 30, 2023
46eb327
[YUNIKORN-2068] E2E Test for Preemption
rrajesh-cloudera Oct 30, 2023
856b629
[YUNIKORN-2068] E2E Test for Preemption
rrajesh-cloudera Oct 31, 2023
7e22832
[YUNIKORN-2068] E2E Test for Preemption
rrajesh-cloudera Nov 2, 2023
353bf51
[YUNIKORN-2068] E2E Test for Preemption
rrajesh-cloudera Nov 2, 2023
2206722
[YUNIKORN-2068] E2E Test for Preemption
rrajesh-cloudera Nov 2, 2023
c972734
[YUNIKORN-2068] E2E Test for Preemption
rrajesh-cloudera Nov 2, 2023
daac46d
[YUNIKORN-2068] E2E Test for Preemption
rrajesh-cloudera Nov 2, 2023
93ab76e
[YUNIKORN-2068] E2E Test for Preemption
rrajesh-cloudera Nov 3, 2023
d916cd9
[YUNIKORN-2068] E2E Test for Preemption
rrajesh-cloudera Nov 3, 2023
bca7152
[YUNIKORN-2068] E2E Test for Preemption
rrajesh-cloudera Nov 3, 2023
b30f5ec
[YUNIKORN-2068] E2E Test for Preemption
rrajesh-cloudera Nov 3, 2023
726c154
[YUNIKORN-2068] E2E Test for Preemption
rrajesh-cloudera Nov 6, 2023
ad8e7eb
[YUNIKORN-2068] E2E Test for Preemption
rrajesh-cloudera Nov 7, 2023
c4f17b0
[YUNIKORN-2068] E2E Test for Preemption
rrajesh-cloudera Nov 8, 2023
ce0351b
[YUNIKORN-2068] E2E Test for Preemption
rrajesh-cloudera Nov 8, 2023
fae4a0f
[YUNIKORN-2068] E2E Test for Preemption
rrajesh-cloudera Nov 8, 2023
d2d1130
[YUNIKORN-2068] E2E Test for Preemption
rrajesh-cloudera Nov 8, 2023
5fbc97f
[YUNIKORN-2068] E2E Test for Preemption
rrajesh-cloudera Nov 8, 2023
c86acb1
[YUNIKORN-2068] E2E Test for Preemption
rrajesh-cloudera Nov 9, 2023
d85fa9a
cdh kli
rrajesh-cloudera Nov 9, 2023
87494ce
[YUNIKORN-2068] E2E Preemption tests
rrajesh-cloudera Nov 21, 2023
7a30f8e
[YUNIKORN-2068] E2E Preemption tests
rrajesh-cloudera Nov 21, 2023
461cff1
[YUNIKORN-2068] E2E Preemption tests
rrajesh-cloudera Nov 27, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 81 additions & 1 deletion test/e2e/preemption/preemption_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ var Worker = ""
var WorkerMemRes int64
var sleepPodMemLimit int64
var sleepPodMemLimit2 int64
var sleepPodMemOverLimit int64
var nodeName string
var taintKey = "e2e_test_preemption"
var nodesToTaint []string

Expand Down Expand Up @@ -102,12 +104,14 @@ var _ = ginkgo.BeforeSuite(func() {
for _, node := range *nodesDAOInfo {
if node.NodeID == Worker {
WorkerMemRes = node.Available["memory"]
nodeName = node.HostName
}
}
WorkerMemRes /= (1000 * 1000) // change to M
fmt.Fprintf(ginkgo.GinkgoWriter, "Worker node %s available memory %dM\n", Worker, WorkerMemRes)

sleepPodMemLimit = int64(float64(WorkerMemRes) / 3)
sleepPodMemOverLimit = int64(float64(WorkerMemRes) * 1.5)
Ω(sleepPodMemLimit).NotTo(gomega.BeZero(), "Sleep pod memory limit cannot be zero")
fmt.Fprintf(ginkgo.GinkgoWriter, "Sleep pod limit memory %dM\n", sleepPodMemLimit)

Expand Down Expand Up @@ -175,7 +179,7 @@ var _ = ginkgo.Describe("Preemption", func() {
// Wait for pod to move to running state
podErr = kClient.WaitForPodBySelectorRunning(dev,
fmt.Sprintf("app=%s", sleepRespPod.ObjectMeta.Labels["app"]),
60)
120)
gomega.Ω(podErr).NotTo(gomega.HaveOccurred())
}

Expand Down Expand Up @@ -546,6 +550,74 @@ var _ = ginkgo.Describe("Preemption", func() {
gomega.Ω(err).ShouldNot(HaveOccurred())
})

ginkgo.It("Verify_preemption_on_specific_node", func() {
/*
1. Create Two Queue (High and Low Guaranteed Limit)
2. Select a schedulable node from the cluster
3. Schedule a number of small, Low priority pause tasks on Low Guaranteed queue (Enough to fill the node)
4. Schedule a large task in High Priority queue with same node
5. Wait for few seconds to schedule the task
6. This should trigger preemption on low-priority queue and remove or preempt task from low priority queue
7. Do cleanup once test is done either passed or failed
*/

ginkgo.By("Create Two Queue High and Low Guaranteed Limit")
annotation = "ann-" + common.RandSeq(10)
yunikorn.UpdateCustomConfigMapWrapper(oldConfigMap, "", annotation, func(sc *configs.SchedulerConfig) error {
// remove placement rules so we can control queue
sc.Partitions[0].PlacementRules = nil
var err error
if err = common.AddQueue(sc, "default", "root", configs.QueueConfig{
Name: "sandbox2",
Resources: configs.Resources{Guaranteed: map[string]string{"memory": fmt.Sprintf("%dM", sleepPodMemOverLimit)}},
Properties: map[string]string{"preemption.delay": "1s", "priority.offset": "100"},
}); err != nil {
return err
}
if err = common.AddQueue(sc, "default", "root", configs.QueueConfig{
Name: "sandbox1",
Resources: configs.Resources{Guaranteed: map[string]string{"memory": fmt.Sprintf("%dM", sleepPodMemLimit)}},
Properties: map[string]string{"preemption.delay": "1s", "priority.offset": "-100"},
}); err != nil {
return err
}
return nil
})

ginkgo.By("Schedule a number of small, Low priority pause tasks on Low Guaranteed queue (Enough to fill the node)")

sandbox1SleepPodConfigs := createSandbox1SleepPodCofigsWithStaticNode(3, 600)
sleepPod4Config := k8s.SleepPodConfig{Name: "sleepjob4", NS: dev, Mem: sleepPodMemLimit, Time: 600, Optedout: k8s.Allow, Labels: map[string]string{"queue": "root.sandbox2"}, RequiredNode: nodeName}
sandbox1SleepPodConfigs = append(sandbox1SleepPodConfigs, sleepPod4Config)
for _, config := range sandbox1SleepPodConfigs {
ginkgo.By("Deploy the sleep pod " + config.Name + " to the development namespace")
sleepObj, podErr := k8s.InitSleepPod(config)
Ω(podErr).NotTo(gomega.HaveOccurred())
sleepRespPod, podErr := kClient.CreatePod(sleepObj, dev)
gomega.Ω(podErr).NotTo(gomega.HaveOccurred())

// Wait for pod to move to running state
podErr = kClient.WaitForPodBySelectorRunning(dev,
fmt.Sprintf("app=%s", sleepRespPod.ObjectMeta.Labels["app"]),
60)
gomega.Ω(podErr).NotTo(gomega.HaveOccurred())
}

ginkgo.By("Two pods in root.sandbox1 queue are preempted")
sandbox1RunningPodsCnt := 0
pods, err := kClient.ListPodsByLabelSelector(dev, "queue=root.sandbox1")
gomega.Ω(err).NotTo(gomega.HaveOccurred())
for _, pod := range pods.Items {
if pod.DeletionTimestamp != nil {
continue
}
if pod.Status.Phase == v1.PodRunning {
sandbox1RunningPodsCnt++
}
}
Ω(sandbox1RunningPodsCnt).To(gomega.Equal(2), "two pods in root.sandbox1 queue should be preempted")
})

ginkgo.AfterEach(func() {
testDescription := ginkgo.CurrentSpecReport()
if testDescription.Failed() {
Expand All @@ -572,3 +644,11 @@ func createSandbox1SleepPodCofigs(cnt, time int) []k8s.SleepPodConfig {
}
return sandbox1Configs
}

func createSandbox1SleepPodCofigsWithStaticNode(cnt, time int) []k8s.SleepPodConfig {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"WithRequiredNode" or "WithNodeSelector" sounds better

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Acknowledged.

sandbox1Configs := make([]k8s.SleepPodConfig, 0, cnt)
for i := 0; i < cnt; i++ {
sandbox1Configs = append(sandbox1Configs, k8s.SleepPodConfig{Name: fmt.Sprintf("sleepjob%d", i+1), NS: dev, Mem: sleepPodMemLimit2, Time: time, Optedout: k8s.Allow, Labels: map[string]string{"queue": "root.sandbox1"}, RequiredNode: nodeName})
}
return sandbox1Configs
}
Loading