diff --git a/test/e2e/framework/configmanager/constants.go b/test/e2e/framework/configmanager/constants.go index aa3285ea1..478baea0f 100644 --- a/test/e2e/framework/configmanager/constants.go +++ b/test/e2e/framework/configmanager/constants.go @@ -46,6 +46,7 @@ const ( NodesPath = "ws/v1/partition/%s/nodes" UserUsagePath = "ws/v1/partition/%s/usage/user/%s" GroupUsagePath = "ws/v1/partition/%s/usage/group/%s" + GroupsUsagePath = "ws/v1/partition/%s/usage/groups" HealthCheckPath = "ws/v1/scheduler/healthcheck" ValidateConfPath = "ws/v1/validate-conf" FullStateDumpPath = "ws/v1/fullstatedump" diff --git a/test/e2e/framework/helpers/yunikorn/rest_api_utils.go b/test/e2e/framework/helpers/yunikorn/rest_api_utils.go index 409a3f332..82afbc2b4 100644 --- a/test/e2e/framework/helpers/yunikorn/rest_api_utils.go +++ b/test/e2e/framework/helpers/yunikorn/rest_api_utils.go @@ -536,3 +536,13 @@ func (c *RClient) GetGroupUsage(partition string, groupName string) (*dao.GroupR _, err = c.do(req, &groupUsage) return groupUsage, err } + +func (c *RClient) GetGroupsUsage(partition string) ([]*dao.GroupResourceUsageDAOInfo, error) { + req, err := c.newRequest("GET", fmt.Sprintf(configmanager.GroupsUsagePath, partition), nil) + if err != nil { + return nil, err + } + var groupsUsage []*dao.GroupResourceUsageDAOInfo + _, err = c.do(req, &groupsUsage) + return groupsUsage, err +} diff --git a/test/e2e/user_group_limit/user_group_limit_test.go b/test/e2e/user_group_limit/user_group_limit_test.go index eac84c2ba..b92a341bd 100644 --- a/test/e2e/user_group_limit/user_group_limit_test.go +++ b/test/e2e/user_group_limit/user_group_limit_test.go @@ -708,6 +708,209 @@ var _ = ginkgo.Describe("UserGroupLimit", func() { return nil }) }) + + ginkgo.It("Verify user limit and wildcard user limit", func() { + ginkgo.By("Update config") + // The wait wrapper still can't fully guarantee that the config in AdmissionController has been updated. + yunikorn.WaitForAdmissionControllerRefreshConfAfterAction(func() { + yunikorn.UpdateCustomConfigMapWrapperWithMap(oldConfigMap, "", admissionCustomConfig, func(sc *configs.SchedulerConfig) error { + // remove placement rules so we can control queue + sc.Partitions[0].PlacementRules = nil + err := common.AddQueue(sc, constants.DefaultPartition, constants.RootQueue, configs.QueueConfig{ + Name: "sandbox1", + Limits: []configs.Limit{ + { + Limit: "user entry", + Users: []string{user1}, + MaxApplications: 1, + MaxResources: map[string]string{ + siCommon.Memory: fmt.Sprintf("%dM", mediumMem), + }, + }, + { + Limit: "wildcard user entry", + Users: []string{"*"}, + MaxApplications: 2, + MaxResources: map[string]string{ + siCommon.Memory: fmt.Sprintf("%dM", largeMem), + }, + }, + }, + }) + if err != nil { + return err + } + return common.AddQueue(sc, constants.DefaultPartition, constants.RootQueue, configs.QueueConfig{Name: "sandbox2"}) + }) + }) + + // usergroup1 can deploy the first sleep pod to root.sandbox1 + usergroup1 := &si.UserGroupInformation{User: user1, Groups: []string{group1}} + + // usergroup1 can't deploy the second sleep pod to root.sandbox1 + usergroup1Sandbox1Pod1 := deploySleepPod(usergroup1, sandboxQueue1, true, "because memory usage is less than user entry limit") + _ = deploySleepPod(usergroup1, sandboxQueue1, false, "because final memory usage is more than user entry limit") + checkUsage(userTestType, user1, sandboxQueue1, []*v1.Pod{usergroup1Sandbox1Pod1}) + + // usergroup2 can deploy 2 sleep pods to root.sandbox1 + usergroup2 := &si.UserGroupInformation{User: user2, Groups: []string{group2}} + usergroup2Sandbox1Pod1 := deploySleepPod(usergroup2, sandboxQueue1, true, "because there is no limit for usergroup2") + checkUsage(userTestType, user2, sandboxQueue1, []*v1.Pod{usergroup2Sandbox1Pod1}) + + // usergroup2 can deploy the second sleep pod to root.sandbox1 + usergroup2 = &si.UserGroupInformation{User: user2, Groups: []string{group2}} + usergroup2Sandbox1Pod2 := deploySleepPod(usergroup2, sandboxQueue1, true, "because there is no limit for usergroup2") + checkUsage(userTestType, user2, sandboxQueue1, []*v1.Pod{usergroup2Sandbox1Pod1, usergroup2Sandbox1Pod2}) + + // usergroup2 can't deploy the third sleep pod to root.sandbox1 because of max-application limit + usergroup2 = &si.UserGroupInformation{User: user2, Groups: []string{group2}} + usergroup2Sandbox1Pod3 := deploySleepPod(usergroup2, sandboxQueue1, false, "because final memory usage is more than wildcard maxapplications") + checkUsage(userTestType, user2, sandboxQueue1, []*v1.Pod{usergroup2Sandbox1Pod1, usergroup2Sandbox1Pod2}) + + // Update Wildcard user entry limit to 3 + ginkgo.By("Update config") + // The wait wrapper still can't fully guarantee that the config in AdmissionController has been updated. + yunikorn.WaitForAdmissionControllerRefreshConfAfterAction(func() { + yunikorn.UpdateCustomConfigMapWrapperWithMap(oldConfigMap, "", admissionCustomConfig, func(sc *configs.SchedulerConfig) error { + // remove placement rules so we can control queue + sc.Partitions[0].PlacementRules = nil + + err := common.AddQueue(sc, constants.DefaultPartition, constants.RootQueue, configs.QueueConfig{ + Name: "sandbox1", + Limits: []configs.Limit{ + { + Limit: "user entry", + Users: []string{user1}, + MaxApplications: 1, + MaxResources: map[string]string{ + siCommon.Memory: fmt.Sprintf("%dM", mediumMem), + }, + }, + { + Limit: "wildcard user entry", + Users: []string{"*"}, + MaxApplications: 3, + MaxResources: map[string]string{ + siCommon.Memory: fmt.Sprintf("%dM", largeMem), + }, + }, + }, + }) + if err != nil { + return err + } + return common.AddQueue(sc, constants.DefaultPartition, constants.RootQueue, configs.QueueConfig{Name: "sandbox2"}) + }) + }) + // usergroup2 can deploy the third sleep pod to root.sandbox1 becuase of max-application limit updated to 3 + checkUsage(userTestType, user2, sandboxQueue1, []*v1.Pod{usergroup2Sandbox1Pod1, usergroup2Sandbox1Pod2, usergroup2Sandbox1Pod3}) + // usergroup2 can't deploy the fourth sleep pod to root.sandbox1 because of max-application limit + usergroup2 = &si.UserGroupInformation{User: user2, Groups: []string{group2}} + _ = deploySleepPod(usergroup2, sandboxQueue1, false, "because final memory usage is more than wildcard maxapplications") + checkUsage(userTestType, user2, sandboxQueue1, []*v1.Pod{usergroup2Sandbox1Pod1, usergroup2Sandbox1Pod2, usergroup2Sandbox1Pod3}) + + }) + + ginkgo.It("Verify group limit and wildcard group limit", func() { + ginkgo.By("Update config") + // The wait wrapper still can't fully guarantee that the config in AdmissionController has been updated. + yunikorn.WaitForAdmissionControllerRefreshConfAfterAction(func() { + yunikorn.UpdateCustomConfigMapWrapperWithMap(oldConfigMap, "", admissionCustomConfig, func(sc *configs.SchedulerConfig) error { + // remove placement rules so we can control queue + sc.Partitions[0].PlacementRules = nil + + err := common.AddQueue(sc, constants.DefaultPartition, constants.RootQueue, configs.QueueConfig{ + Name: "sandbox1", + Limits: []configs.Limit{ + { + Limit: "group entry", + Groups: []string{group1}, + MaxApplications: 1, + MaxResources: map[string]string{ + siCommon.Memory: fmt.Sprintf("%dM", mediumMem), + }, + }, + { + Limit: "wildcard group entry", + Groups: []string{"*"}, + MaxApplications: 2, + MaxResources: map[string]string{ + siCommon.Memory: fmt.Sprintf("%dM", largeMem), + }, + }, + }}) + if err != nil { + return err + } + return common.AddQueue(sc, constants.DefaultPartition, constants.RootQueue, configs.QueueConfig{Name: "sandbox2"}) + }) + }) + // group1 can deploy the first sleep pod to root.sandbox1 + usergroup1 := &si.UserGroupInformation{User: user1, Groups: []string{group1}} + group1Sandvox1Pod1 := deploySleepPod(usergroup1, sandboxQueue1, true, "because there is no limit for group1") + checkUsage(groupTestType, group1, sandboxQueue1, []*v1.Pod{group1Sandvox1Pod1}) + + // group1 can't deploy the second sleep pod to root.sandbox1 + usergroup1 = &si.UserGroupInformation{User: user1, Groups: []string{group1}} + _ = deploySleepPod(usergroup1, sandboxQueue1, false, "because final memory usage is more than group entry limit") + checkUsage(groupTestType, group1, sandboxQueue1, []*v1.Pod{group1Sandvox1Pod1}) + + // group2 can deploy 2 sleep pods to root.sandbox1 + usergroup2 := &si.UserGroupInformation{User: user2, Groups: []string{group2}} + group2Sandbox1Pod1 := deploySleepPod(usergroup2, sandboxQueue1, true, "because there is no limit for group2") + checkUsageWildcardGroups(groupTestType, group2, sandboxQueue1, []*v1.Pod{group2Sandbox1Pod1}) + + // group2 can deploy the second sleep pod to root.sandbox1 + usergroup2 = &si.UserGroupInformation{User: user2, Groups: []string{group2}} + group2Sandbox1Pod2 := deploySleepPod(usergroup2, sandboxQueue1, true, "because there is no limit for group2") + checkUsageWildcardGroups(groupTestType, group2, sandboxQueue1, []*v1.Pod{group2Sandbox1Pod1, group2Sandbox1Pod2}) + + // group2 can't deploy the third sleep pod to root.sandbox1 because of max-application limit + usergroup2 = &si.UserGroupInformation{User: user2, Groups: []string{group2}} + group2Sandbox1Pod3 := deploySleepPod(usergroup2, sandboxQueue1, false, "because final memory usage is more than wildcard maxapplications") + checkUsageWildcardGroups(groupTestType, group2, sandboxQueue1, []*v1.Pod{group2Sandbox1Pod1, group2Sandbox1Pod2}) + // Update Wildcard group entry limit to 3 + ginkgo.By("Update config") + // The wait wrapper still can't fully guarantee that the config in AdmissionController has been updated. + yunikorn.WaitForAdmissionControllerRefreshConfAfterAction(func() { + yunikorn.UpdateCustomConfigMapWrapperWithMap(oldConfigMap, "", admissionCustomConfig, func(sc *configs.SchedulerConfig) error { + // remove placement rules so we can control queue + sc.Partitions[0].PlacementRules = nil + + err := common.AddQueue(sc, constants.DefaultPartition, constants.RootQueue, configs.QueueConfig{ + Name: "sandbox1", + Limits: []configs.Limit{ + { + Limit: "group entry", + Groups: []string{group1}, + MaxApplications: 1, + MaxResources: map[string]string{ + siCommon.Memory: fmt.Sprintf("%dM", mediumMem), + }, + }, + { + Limit: "wildcard group entry", + Groups: []string{"*"}, + MaxApplications: 3, + MaxResources: map[string]string{ + siCommon.Memory: fmt.Sprintf("%dM", largeMem), + }, + }, + }}) + if err != nil { + return err + } + return common.AddQueue(sc, constants.DefaultPartition, constants.RootQueue, configs.QueueConfig{Name: "sandbox2"}) + }) + }) + // group2 can deploy the third sleep pod to root.sandbox1 becuase of max-application limit updated to 3 + checkUsageWildcardGroups(groupTestType, group2, sandboxQueue1, []*v1.Pod{group2Sandbox1Pod1, group2Sandbox1Pod2, group2Sandbox1Pod3}) + // group2 can't deploy the fourth sleep pod to root.sandbox1 because of max-application limit + usergroup2 = &si.UserGroupInformation{User: user2, Groups: []string{group2}} + _ = deploySleepPod(usergroup2, sandboxQueue1, false, "because final memory usage is more than wildcard maxapplications") + checkUsageWildcardGroups(groupTestType, group2, sandboxQueue1, []*v1.Pod{group2Sandbox1Pod1, group2Sandbox1Pod2, group2Sandbox1Pod3}) + }) + ginkgo.AfterEach(func() { tests.DumpClusterInfoIfSpecFailed(suiteName, []string{ns.Name}) @@ -787,3 +990,35 @@ func checkUsage(testType TestType, name string, queuePath string, expectedRunnin Ω(resourceUsageDAO.ResourceUsage.Resources["pods"]).To(gomega.Equal(resources.Quantity(len(expectedRunningPods)))) Ω(resourceUsageDAO.RunningApplications).To(gomega.ConsistOf(appIDs...)) } + +func checkUsageWildcardGroups(testType TestType, name string, queuePath string, expectedRunningPods []*v1.Pod) { + var rootQueueResourceUsageDAO *dao.ResourceUsageDAOInfo + if testType == groupTestType { + ginkgo.By(fmt.Sprintf("Check group resource usage for %s in queue %s", name, queuePath)) + groupUsageDAOInfo, err := restClient.GetGroupsUsage(constants.DefaultPartition) + Ω(err).NotTo(gomega.HaveOccurred()) + Ω(groupUsageDAOInfo).NotTo(gomega.BeNil()) + for _, groupUsageDAOInfog := range groupUsageDAOInfo { + if groupUsageDAOInfog.GroupName == "*" { + rootQueueResourceUsageDAO = groupUsageDAOInfog.Queues + } + } + } + Ω(rootQueueResourceUsageDAO).NotTo(gomega.BeNil()) + var resourceUsageDAO *dao.ResourceUsageDAOInfo + for _, queue := range rootQueueResourceUsageDAO.Children { + if queue.QueuePath == queuePath { + resourceUsageDAO = queue + break + } + } + Ω(resourceUsageDAO).NotTo(gomega.BeNil()) + + appIDs := make([]interface{}, 0, len(expectedRunningPods)) + for _, pod := range expectedRunningPods { + appIDs = append(appIDs, pod.Labels[constants.LabelApplicationID]) + } + Ω(resourceUsageDAO.ResourceUsage).NotTo(gomega.BeNil()) + Ω(resourceUsageDAO.ResourceUsage.Resources["pods"]).To(gomega.Equal(resources.Quantity(len(expectedRunningPods)))) + Ω(resourceUsageDAO.RunningApplications).To(gomega.ConsistOf(appIDs...)) +}