From 9d9919dc917b982ce50e476bdecdcea1a804bf8c Mon Sep 17 00:00:00 2001 From: rrajesh Date: Fri, 6 Sep 2024 16:08:54 +0530 Subject: [PATCH 1/2] [YUNIKORN-1126] Added E2E Tests for User and Group Limits with wildcard --- test/e2e/framework/configmanager/constants.go | 1 + .../helpers/yunikorn/rest_api_utils.go | 10 + .../user_group_limit/user_group_limit_test.go | 227 +++++++++++++++++- 3 files changed, 237 insertions(+), 1 deletion(-) diff --git a/test/e2e/framework/configmanager/constants.go b/test/e2e/framework/configmanager/constants.go index aa3285ea1..478baea0f 100644 --- a/test/e2e/framework/configmanager/constants.go +++ b/test/e2e/framework/configmanager/constants.go @@ -46,6 +46,7 @@ const ( NodesPath = "ws/v1/partition/%s/nodes" UserUsagePath = "ws/v1/partition/%s/usage/user/%s" GroupUsagePath = "ws/v1/partition/%s/usage/group/%s" + GroupsUsagePath = "ws/v1/partition/%s/usage/groups" HealthCheckPath = "ws/v1/scheduler/healthcheck" ValidateConfPath = "ws/v1/validate-conf" FullStateDumpPath = "ws/v1/fullstatedump" diff --git a/test/e2e/framework/helpers/yunikorn/rest_api_utils.go b/test/e2e/framework/helpers/yunikorn/rest_api_utils.go index 409a3f332..82afbc2b4 100644 --- a/test/e2e/framework/helpers/yunikorn/rest_api_utils.go +++ b/test/e2e/framework/helpers/yunikorn/rest_api_utils.go @@ -536,3 +536,13 @@ func (c *RClient) GetGroupUsage(partition string, groupName string) (*dao.GroupR _, err = c.do(req, &groupUsage) return groupUsage, err } + +func (c *RClient) GetGroupsUsage(partition string) ([]*dao.GroupResourceUsageDAOInfo, error) { + req, err := c.newRequest("GET", fmt.Sprintf(configmanager.GroupsUsagePath, partition), nil) + if err != nil { + return nil, err + } + var groupsUsage []*dao.GroupResourceUsageDAOInfo + _, err = c.do(req, &groupsUsage) + return groupsUsage, err +} diff --git a/test/e2e/user_group_limit/user_group_limit_test.go b/test/e2e/user_group_limit/user_group_limit_test.go index eac84c2ba..25d9260f4 100644 --- a/test/e2e/user_group_limit/user_group_limit_test.go +++ b/test/e2e/user_group_limit/user_group_limit_test.go @@ -708,8 +708,200 @@ var _ = ginkgo.Describe("UserGroupLimit", func() { return nil }) }) + + ginkgo.It("Verify user limit and wildcard user limit", func() { + ginkgo.By("Update config") + // The wait wrapper still can't fully guarantee that the config in AdmissionController has been updated. + yunikorn.WaitForAdmissionControllerRefreshConfAfterAction(func() { + yunikorn.UpdateCustomConfigMapWrapperWithMap(oldConfigMap, "", admissionCustomConfig, func(sc *configs.SchedulerConfig) error { + // remove placement rules so we can control queue + sc.Partitions[0].PlacementRules = nil + + common.AddQueue(sc, constants.DefaultPartition, constants.RootQueue, configs.QueueConfig{ + Name: "sandbox1", + Limits: []configs.Limit{ + { + Limit: "user entry", + Users: []string{user1}, + MaxApplications: 1, + MaxResources: map[string]string{ + siCommon.Memory: fmt.Sprintf("%dM", mediumMem), + }, + }, + { + Limit: "wildcard user entry", + Users: []string{"*"}, + MaxApplications: 2, + MaxResources: map[string]string{ + siCommon.Memory: fmt.Sprintf("%dM", largeMem), + }, + }, + }, + }) + return common.AddQueue(sc, constants.DefaultPartition, constants.RootQueue, configs.QueueConfig{Name: "sandbox2"}) + }) + }) + + // usergroup1 can deploy the first sleep pod to root.sandbox1 + usergroup1 := &si.UserGroupInformation{User: user1, Groups: []string{group1}} + + // usergroup1 can't deploy the second sleep pod to root.sandbox1 + usergroup1Sandbox1Pod1 := deploySleepPod(usergroup1, sandboxQueue1, true, "because memory usage is less than user entry limit") + _ = deploySleepPod(usergroup1, sandboxQueue1, false, "because final memory usage is more than user entry limit") + checkUsage(userTestType, user1, sandboxQueue1, []*v1.Pod{usergroup1Sandbox1Pod1}) + + // usergroup2 can deploy 2 sleep pods to root.sandbox1 + usergroup2 := &si.UserGroupInformation{User: user2, Groups: []string{group2}} + usergroup2Sandbox1Pod1 := deploySleepPod(usergroup2, sandboxQueue1, true, "because there is no limit for usergroup2") + checkUsage(userTestType, user2, sandboxQueue1, []*v1.Pod{usergroup2Sandbox1Pod1}) + + // usergroup2 can deploy the second sleep pod to root.sandbox1 + usergroup2 = &si.UserGroupInformation{User: user2, Groups: []string{group2}} + usergroup2Sandbox1Pod2 := deploySleepPod(usergroup2, sandboxQueue1, true, "because there is no limit for usergroup2") + checkUsage(userTestType, user2, sandboxQueue1, []*v1.Pod{usergroup2Sandbox1Pod1, usergroup2Sandbox1Pod2}) + + // usergroup2 can't deploy the third sleep pod to root.sandbox1 because of max-application limit + usergroup2 = &si.UserGroupInformation{User: user2, Groups: []string{group2}} + usergroup2Sandbox1Pod3 := deploySleepPod(usergroup2, sandboxQueue1, false, "because final memory usage is more than wildcard maxapplications") + checkUsage(userTestType, user2, sandboxQueue1, []*v1.Pod{usergroup2Sandbox1Pod1, usergroup2Sandbox1Pod2}) + + //Update Wildcard user entry limit to 3 + ginkgo.By("Update config") + // The wait wrapper still can't fully guarantee that the config in AdmissionController has been updated. + yunikorn.WaitForAdmissionControllerRefreshConfAfterAction(func() { + yunikorn.UpdateCustomConfigMapWrapperWithMap(oldConfigMap, "", admissionCustomConfig, func(sc *configs.SchedulerConfig) error { + // remove placement rules so we can control queue + sc.Partitions[0].PlacementRules = nil + + common.AddQueue(sc, constants.DefaultPartition, constants.RootQueue, configs.QueueConfig{ + Name: "sandbox1", + Limits: []configs.Limit{ + { + Limit: "user entry", + Users: []string{user1}, + MaxApplications: 1, + MaxResources: map[string]string{ + siCommon.Memory: fmt.Sprintf("%dM", mediumMem), + }, + }, + { + Limit: "wildcard user entry", + Users: []string{"*"}, + MaxApplications: 3, + MaxResources: map[string]string{ + siCommon.Memory: fmt.Sprintf("%dM", largeMem), + }, + }, + }, + }) + return common.AddQueue(sc, constants.DefaultPartition, constants.RootQueue, configs.QueueConfig{Name: "sandbox2"}) + }) + }) + // usergroup2 can deploy the third sleep pod to root.sandbox1 becuase of max-application limit updated to 3 + checkUsage(userTestType, user2, sandboxQueue1, []*v1.Pod{usergroup2Sandbox1Pod1, usergroup2Sandbox1Pod2, usergroup2Sandbox1Pod3}) + // usergroup2 can't deploy the fourth sleep pod to root.sandbox1 because of max-application limit + usergroup2 = &si.UserGroupInformation{User: user2, Groups: []string{group2}} + _ = deploySleepPod(usergroup2, sandboxQueue1, false, "because final memory usage is more than wildcard maxapplications") + checkUsage(userTestType, user2, sandboxQueue1, []*v1.Pod{usergroup2Sandbox1Pod1, usergroup2Sandbox1Pod2, usergroup2Sandbox1Pod3}) + + }) + + ginkgo.It("Verify group limit and wildcard group limit", func() { + ginkgo.By("Update config") + // The wait wrapper still can't fully guarantee that the config in AdmissionController has been updated. + yunikorn.WaitForAdmissionControllerRefreshConfAfterAction(func() { + yunikorn.UpdateCustomConfigMapWrapperWithMap(oldConfigMap, "", admissionCustomConfig, func(sc *configs.SchedulerConfig) error { + // remove placement rules so we can control queue + sc.Partitions[0].PlacementRules = nil + + common.AddQueue(sc, constants.DefaultPartition, constants.RootQueue, configs.QueueConfig{ + Name: "sandbox1", + Limits: []configs.Limit{ + { + Limit: "group entry", + Groups: []string{group1}, + MaxApplications: 1, + MaxResources: map[string]string{ + siCommon.Memory: fmt.Sprintf("%dM", mediumMem), + }, + }, + { + Limit: "wildcard group entry", + Groups: []string{"*"}, + MaxApplications: 2, + MaxResources: map[string]string{ + siCommon.Memory: fmt.Sprintf("%dM", largeMem), + }, + }, + }}) + return common.AddQueue(sc, constants.DefaultPartition, constants.RootQueue, configs.QueueConfig{Name: "sandbox2"}) + }) + }) + // group1 can deploy the first sleep pod to root.sandbox1 + usergroup1 := &si.UserGroupInformation{User: user1, Groups: []string{group1}} + group1Sandvox1Pod1 := deploySleepPod(usergroup1, sandboxQueue1, true, "because there is no limit for group1") + checkUsage(groupTestType, group1, sandboxQueue1, []*v1.Pod{group1Sandvox1Pod1}) + + // group1 can't deploy the second sleep pod to root.sandbox1 + usergroup1 = &si.UserGroupInformation{User: user1, Groups: []string{group1}} + _ = deploySleepPod(usergroup1, sandboxQueue1, false, "because final memory usage is more than group entry limit") + checkUsage(groupTestType, group1, sandboxQueue1, []*v1.Pod{group1Sandvox1Pod1}) + + // group2 can deploy 2 sleep pods to root.sandbox1 + usergroup2 := &si.UserGroupInformation{User: user2, Groups: []string{group2}} + group2Sandbox1Pod1 := deploySleepPod(usergroup2, sandboxQueue1, true, "because there is no limit for group2") + checkUsageWildcardGroups(groupTestType, group2, sandboxQueue1, []*v1.Pod{group2Sandbox1Pod1}) + + // group2 can deploy the second sleep pod to root.sandbox1 + usergroup2 = &si.UserGroupInformation{User: user2, Groups: []string{group2}} + group2Sandbox1Pod2 := deploySleepPod(usergroup2, sandboxQueue1, true, "because there is no limit for group2") + checkUsageWildcardGroups(groupTestType, group2, sandboxQueue1, []*v1.Pod{group2Sandbox1Pod1, group2Sandbox1Pod2}) + + // group2 can't deploy the third sleep pod to root.sandbox1 because of max-application limit + usergroup2 = &si.UserGroupInformation{User: user2, Groups: []string{group2}} + group2Sandbox1Pod3 := deploySleepPod(usergroup2, sandboxQueue1, false, "because final memory usage is more than wildcard maxapplications") + checkUsageWildcardGroups(groupTestType, group2, sandboxQueue1, []*v1.Pod{group2Sandbox1Pod1, group2Sandbox1Pod2}) + //Update Wildcard group entry limit to 3 + ginkgo.By("Update config") + // The wait wrapper still can't fully guarantee that the config in AdmissionController has been updated. + yunikorn.WaitForAdmissionControllerRefreshConfAfterAction(func() { + yunikorn.UpdateCustomConfigMapWrapperWithMap(oldConfigMap, "", admissionCustomConfig, func(sc *configs.SchedulerConfig) error { + // remove placement rules so we can control queue + sc.Partitions[0].PlacementRules = nil + + common.AddQueue(sc, constants.DefaultPartition, constants.RootQueue, configs.QueueConfig{ + Name: "sandbox1", + Limits: []configs.Limit{ + { + Limit: "group entry", + Groups: []string{group1}, + MaxApplications: 1, + MaxResources: map[string]string{ + siCommon.Memory: fmt.Sprintf("%dM", mediumMem), + }, + }, + { + Limit: "wildcard group entry", + Groups: []string{"*"}, + MaxApplications: 3, + MaxResources: map[string]string{ + siCommon.Memory: fmt.Sprintf("%dM", largeMem), + }, + }, + }}) + return common.AddQueue(sc, constants.DefaultPartition, constants.RootQueue, configs.QueueConfig{Name: "sandbox2"}) + }) + }) + // group2 can deploy the third sleep pod to root.sandbox1 becuase of max-application limit updated to 3 + checkUsageWildcardGroups(groupTestType, group2, sandboxQueue1, []*v1.Pod{group2Sandbox1Pod1, group2Sandbox1Pod2, group2Sandbox1Pod3}) + // group2 can't deploy the fourth sleep pod to root.sandbox1 because of max-application limit + usergroup2 = &si.UserGroupInformation{User: user2, Groups: []string{group2}} + _ = deploySleepPod(usergroup2, sandboxQueue1, false, "because final memory usage is more than wildcard maxapplications") + checkUsageWildcardGroups(groupTestType, group2, sandboxQueue1, []*v1.Pod{group2Sandbox1Pod1, group2Sandbox1Pod2, group2Sandbox1Pod3}) + }) + ginkgo.AfterEach(func() { - tests.DumpClusterInfoIfSpecFailed(suiteName, []string{ns.Name}) + //tests.DumpClusterInfoIfSpecFailed(suiteName, []string{ns.Name}) // Delete all sleep pods ginkgo.By("Delete all sleep pods") @@ -787,3 +979,36 @@ func checkUsage(testType TestType, name string, queuePath string, expectedRunnin Ω(resourceUsageDAO.ResourceUsage.Resources["pods"]).To(gomega.Equal(resources.Quantity(len(expectedRunningPods)))) Ω(resourceUsageDAO.RunningApplications).To(gomega.ConsistOf(appIDs...)) } + +func checkUsageWildcardGroups(testType TestType, name string, queuePath string, expectedRunningPods []*v1.Pod) { + var rootQueueResourceUsageDAO *dao.ResourceUsageDAOInfo + if testType == groupTestType { + ginkgo.By(fmt.Sprintf("Check group resource usage for %s in queue %s", name, queuePath)) + groupUsageDAOInfo, err := restClient.GetGroupsUsage(constants.DefaultPartition) + Ω(err).NotTo(gomega.HaveOccurred()) + Ω(groupUsageDAOInfo).NotTo(gomega.BeNil()) + for _, groupUsageDAOInfog := range groupUsageDAOInfo { + if groupUsageDAOInfog.GroupName == "*" { + rootQueueResourceUsageDAO = groupUsageDAOInfog.Queues + } + } + } + Ω(rootQueueResourceUsageDAO).NotTo(gomega.BeNil()) + var resourceUsageDAO *dao.ResourceUsageDAOInfo + for _, queue := range rootQueueResourceUsageDAO.Children { + if queue.QueuePath == queuePath { + resourceUsageDAO = queue + break + } + } + Ω(resourceUsageDAO).NotTo(gomega.BeNil()) + + appIDs := make([]interface{}, 0, len(expectedRunningPods)) + for _, pod := range expectedRunningPods { + appIDs = append(appIDs, pod.Labels[constants.LabelApplicationID]) + } + Ω(resourceUsageDAO.ResourceUsage).NotTo(gomega.BeNil()) + Ω(resourceUsageDAO.ResourceUsage.Resources["pods"]).To(gomega.Equal(resources.Quantity(len(expectedRunningPods)))) + Ω(resourceUsageDAO.RunningApplications).To(gomega.ConsistOf(appIDs...)) + +} From 7f70e88791c8b9e7950a922f5026ffd64289e34b Mon Sep 17 00:00:00 2001 From: rrajesh Date: Fri, 6 Sep 2024 18:10:45 +0530 Subject: [PATCH 2/2] [YUNIKORN-1957] Fixing golint issue --- .../user_group_limit/user_group_limit_test.go | 28 +++++++++++++------ 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/test/e2e/user_group_limit/user_group_limit_test.go b/test/e2e/user_group_limit/user_group_limit_test.go index 25d9260f4..b92a341bd 100644 --- a/test/e2e/user_group_limit/user_group_limit_test.go +++ b/test/e2e/user_group_limit/user_group_limit_test.go @@ -716,8 +716,7 @@ var _ = ginkgo.Describe("UserGroupLimit", func() { yunikorn.UpdateCustomConfigMapWrapperWithMap(oldConfigMap, "", admissionCustomConfig, func(sc *configs.SchedulerConfig) error { // remove placement rules so we can control queue sc.Partitions[0].PlacementRules = nil - - common.AddQueue(sc, constants.DefaultPartition, constants.RootQueue, configs.QueueConfig{ + err := common.AddQueue(sc, constants.DefaultPartition, constants.RootQueue, configs.QueueConfig{ Name: "sandbox1", Limits: []configs.Limit{ { @@ -738,6 +737,9 @@ var _ = ginkgo.Describe("UserGroupLimit", func() { }, }, }) + if err != nil { + return err + } return common.AddQueue(sc, constants.DefaultPartition, constants.RootQueue, configs.QueueConfig{Name: "sandbox2"}) }) }) @@ -765,7 +767,7 @@ var _ = ginkgo.Describe("UserGroupLimit", func() { usergroup2Sandbox1Pod3 := deploySleepPod(usergroup2, sandboxQueue1, false, "because final memory usage is more than wildcard maxapplications") checkUsage(userTestType, user2, sandboxQueue1, []*v1.Pod{usergroup2Sandbox1Pod1, usergroup2Sandbox1Pod2}) - //Update Wildcard user entry limit to 3 + // Update Wildcard user entry limit to 3 ginkgo.By("Update config") // The wait wrapper still can't fully guarantee that the config in AdmissionController has been updated. yunikorn.WaitForAdmissionControllerRefreshConfAfterAction(func() { @@ -773,7 +775,7 @@ var _ = ginkgo.Describe("UserGroupLimit", func() { // remove placement rules so we can control queue sc.Partitions[0].PlacementRules = nil - common.AddQueue(sc, constants.DefaultPartition, constants.RootQueue, configs.QueueConfig{ + err := common.AddQueue(sc, constants.DefaultPartition, constants.RootQueue, configs.QueueConfig{ Name: "sandbox1", Limits: []configs.Limit{ { @@ -794,6 +796,9 @@ var _ = ginkgo.Describe("UserGroupLimit", func() { }, }, }) + if err != nil { + return err + } return common.AddQueue(sc, constants.DefaultPartition, constants.RootQueue, configs.QueueConfig{Name: "sandbox2"}) }) }) @@ -814,7 +819,7 @@ var _ = ginkgo.Describe("UserGroupLimit", func() { // remove placement rules so we can control queue sc.Partitions[0].PlacementRules = nil - common.AddQueue(sc, constants.DefaultPartition, constants.RootQueue, configs.QueueConfig{ + err := common.AddQueue(sc, constants.DefaultPartition, constants.RootQueue, configs.QueueConfig{ Name: "sandbox1", Limits: []configs.Limit{ { @@ -834,6 +839,9 @@ var _ = ginkgo.Describe("UserGroupLimit", func() { }, }, }}) + if err != nil { + return err + } return common.AddQueue(sc, constants.DefaultPartition, constants.RootQueue, configs.QueueConfig{Name: "sandbox2"}) }) }) @@ -861,7 +869,7 @@ var _ = ginkgo.Describe("UserGroupLimit", func() { usergroup2 = &si.UserGroupInformation{User: user2, Groups: []string{group2}} group2Sandbox1Pod3 := deploySleepPod(usergroup2, sandboxQueue1, false, "because final memory usage is more than wildcard maxapplications") checkUsageWildcardGroups(groupTestType, group2, sandboxQueue1, []*v1.Pod{group2Sandbox1Pod1, group2Sandbox1Pod2}) - //Update Wildcard group entry limit to 3 + // Update Wildcard group entry limit to 3 ginkgo.By("Update config") // The wait wrapper still can't fully guarantee that the config in AdmissionController has been updated. yunikorn.WaitForAdmissionControllerRefreshConfAfterAction(func() { @@ -869,7 +877,7 @@ var _ = ginkgo.Describe("UserGroupLimit", func() { // remove placement rules so we can control queue sc.Partitions[0].PlacementRules = nil - common.AddQueue(sc, constants.DefaultPartition, constants.RootQueue, configs.QueueConfig{ + err := common.AddQueue(sc, constants.DefaultPartition, constants.RootQueue, configs.QueueConfig{ Name: "sandbox1", Limits: []configs.Limit{ { @@ -889,6 +897,9 @@ var _ = ginkgo.Describe("UserGroupLimit", func() { }, }, }}) + if err != nil { + return err + } return common.AddQueue(sc, constants.DefaultPartition, constants.RootQueue, configs.QueueConfig{Name: "sandbox2"}) }) }) @@ -901,7 +912,7 @@ var _ = ginkgo.Describe("UserGroupLimit", func() { }) ginkgo.AfterEach(func() { - //tests.DumpClusterInfoIfSpecFailed(suiteName, []string{ns.Name}) + tests.DumpClusterInfoIfSpecFailed(suiteName, []string{ns.Name}) // Delete all sleep pods ginkgo.By("Delete all sleep pods") @@ -1010,5 +1021,4 @@ func checkUsageWildcardGroups(testType TestType, name string, queuePath string, Ω(resourceUsageDAO.ResourceUsage).NotTo(gomega.BeNil()) Ω(resourceUsageDAO.ResourceUsage.Resources["pods"]).To(gomega.Equal(resources.Quantity(len(expectedRunningPods)))) Ω(resourceUsageDAO.RunningApplications).To(gomega.ConsistOf(appIDs...)) - }