Skip to content

Commit

Permalink
[YUNIKORN-1126] Add e2e tests for user and group limits with wildcard (
Browse files Browse the repository at this point in the history
…#909)

Closes: #909

Signed-off-by: Craig Condit <[email protected]>
  • Loading branch information
rrajesh-cloudera authored and craigcondit committed Sep 6, 2024
1 parent 58c0c36 commit b335500
Show file tree
Hide file tree
Showing 3 changed files with 246 additions and 0 deletions.
1 change: 1 addition & 0 deletions test/e2e/framework/configmanager/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ const (
NodesPath = "ws/v1/partition/%s/nodes"
UserUsagePath = "ws/v1/partition/%s/usage/user/%s"
GroupUsagePath = "ws/v1/partition/%s/usage/group/%s"
GroupsUsagePath = "ws/v1/partition/%s/usage/groups"
HealthCheckPath = "ws/v1/scheduler/healthcheck"
ValidateConfPath = "ws/v1/validate-conf"
FullStateDumpPath = "ws/v1/fullstatedump"
Expand Down
10 changes: 10 additions & 0 deletions test/e2e/framework/helpers/yunikorn/rest_api_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -536,3 +536,13 @@ func (c *RClient) GetGroupUsage(partition string, groupName string) (*dao.GroupR
_, err = c.do(req, &groupUsage)
return groupUsage, err
}

func (c *RClient) GetGroupsUsage(partition string) ([]*dao.GroupResourceUsageDAOInfo, error) {
req, err := c.newRequest("GET", fmt.Sprintf(configmanager.GroupsUsagePath, partition), nil)
if err != nil {
return nil, err
}
var groupsUsage []*dao.GroupResourceUsageDAOInfo
_, err = c.do(req, &groupsUsage)
return groupsUsage, err
}
235 changes: 235 additions & 0 deletions test/e2e/user_group_limit/user_group_limit_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -708,6 +708,209 @@ var _ = ginkgo.Describe("UserGroupLimit", func() {
return nil
})
})

ginkgo.It("Verify user limit and wildcard user limit", func() {
ginkgo.By("Update config")
// The wait wrapper still can't fully guarantee that the config in AdmissionController has been updated.
yunikorn.WaitForAdmissionControllerRefreshConfAfterAction(func() {
yunikorn.UpdateCustomConfigMapWrapperWithMap(oldConfigMap, "", admissionCustomConfig, func(sc *configs.SchedulerConfig) error {
// remove placement rules so we can control queue
sc.Partitions[0].PlacementRules = nil
err := common.AddQueue(sc, constants.DefaultPartition, constants.RootQueue, configs.QueueConfig{
Name: "sandbox1",
Limits: []configs.Limit{
{
Limit: "user entry",
Users: []string{user1},
MaxApplications: 1,
MaxResources: map[string]string{
siCommon.Memory: fmt.Sprintf("%dM", mediumMem),
},
},
{
Limit: "wildcard user entry",
Users: []string{"*"},
MaxApplications: 2,
MaxResources: map[string]string{
siCommon.Memory: fmt.Sprintf("%dM", largeMem),
},
},
},
})
if err != nil {
return err
}
return common.AddQueue(sc, constants.DefaultPartition, constants.RootQueue, configs.QueueConfig{Name: "sandbox2"})
})
})

// usergroup1 can deploy the first sleep pod to root.sandbox1
usergroup1 := &si.UserGroupInformation{User: user1, Groups: []string{group1}}

// usergroup1 can't deploy the second sleep pod to root.sandbox1
usergroup1Sandbox1Pod1 := deploySleepPod(usergroup1, sandboxQueue1, true, "because memory usage is less than user entry limit")
_ = deploySleepPod(usergroup1, sandboxQueue1, false, "because final memory usage is more than user entry limit")
checkUsage(userTestType, user1, sandboxQueue1, []*v1.Pod{usergroup1Sandbox1Pod1})

// usergroup2 can deploy 2 sleep pods to root.sandbox1
usergroup2 := &si.UserGroupInformation{User: user2, Groups: []string{group2}}
usergroup2Sandbox1Pod1 := deploySleepPod(usergroup2, sandboxQueue1, true, "because there is no limit for usergroup2")
checkUsage(userTestType, user2, sandboxQueue1, []*v1.Pod{usergroup2Sandbox1Pod1})

// usergroup2 can deploy the second sleep pod to root.sandbox1
usergroup2 = &si.UserGroupInformation{User: user2, Groups: []string{group2}}
usergroup2Sandbox1Pod2 := deploySleepPod(usergroup2, sandboxQueue1, true, "because there is no limit for usergroup2")
checkUsage(userTestType, user2, sandboxQueue1, []*v1.Pod{usergroup2Sandbox1Pod1, usergroup2Sandbox1Pod2})

// usergroup2 can't deploy the third sleep pod to root.sandbox1 because of max-application limit
usergroup2 = &si.UserGroupInformation{User: user2, Groups: []string{group2}}
usergroup2Sandbox1Pod3 := deploySleepPod(usergroup2, sandboxQueue1, false, "because final memory usage is more than wildcard maxapplications")
checkUsage(userTestType, user2, sandboxQueue1, []*v1.Pod{usergroup2Sandbox1Pod1, usergroup2Sandbox1Pod2})

// Update Wildcard user entry limit to 3
ginkgo.By("Update config")
// The wait wrapper still can't fully guarantee that the config in AdmissionController has been updated.
yunikorn.WaitForAdmissionControllerRefreshConfAfterAction(func() {
yunikorn.UpdateCustomConfigMapWrapperWithMap(oldConfigMap, "", admissionCustomConfig, func(sc *configs.SchedulerConfig) error {
// remove placement rules so we can control queue
sc.Partitions[0].PlacementRules = nil

err := common.AddQueue(sc, constants.DefaultPartition, constants.RootQueue, configs.QueueConfig{
Name: "sandbox1",
Limits: []configs.Limit{
{
Limit: "user entry",
Users: []string{user1},
MaxApplications: 1,
MaxResources: map[string]string{
siCommon.Memory: fmt.Sprintf("%dM", mediumMem),
},
},
{
Limit: "wildcard user entry",
Users: []string{"*"},
MaxApplications: 3,
MaxResources: map[string]string{
siCommon.Memory: fmt.Sprintf("%dM", largeMem),
},
},
},
})
if err != nil {
return err
}
return common.AddQueue(sc, constants.DefaultPartition, constants.RootQueue, configs.QueueConfig{Name: "sandbox2"})
})
})
// usergroup2 can deploy the third sleep pod to root.sandbox1 becuase of max-application limit updated to 3
checkUsage(userTestType, user2, sandboxQueue1, []*v1.Pod{usergroup2Sandbox1Pod1, usergroup2Sandbox1Pod2, usergroup2Sandbox1Pod3})
// usergroup2 can't deploy the fourth sleep pod to root.sandbox1 because of max-application limit
usergroup2 = &si.UserGroupInformation{User: user2, Groups: []string{group2}}
_ = deploySleepPod(usergroup2, sandboxQueue1, false, "because final memory usage is more than wildcard maxapplications")
checkUsage(userTestType, user2, sandboxQueue1, []*v1.Pod{usergroup2Sandbox1Pod1, usergroup2Sandbox1Pod2, usergroup2Sandbox1Pod3})

})

ginkgo.It("Verify group limit and wildcard group limit", func() {
ginkgo.By("Update config")
// The wait wrapper still can't fully guarantee that the config in AdmissionController has been updated.
yunikorn.WaitForAdmissionControllerRefreshConfAfterAction(func() {
yunikorn.UpdateCustomConfigMapWrapperWithMap(oldConfigMap, "", admissionCustomConfig, func(sc *configs.SchedulerConfig) error {
// remove placement rules so we can control queue
sc.Partitions[0].PlacementRules = nil

err := common.AddQueue(sc, constants.DefaultPartition, constants.RootQueue, configs.QueueConfig{
Name: "sandbox1",
Limits: []configs.Limit{
{
Limit: "group entry",
Groups: []string{group1},
MaxApplications: 1,
MaxResources: map[string]string{
siCommon.Memory: fmt.Sprintf("%dM", mediumMem),
},
},
{
Limit: "wildcard group entry",
Groups: []string{"*"},
MaxApplications: 2,
MaxResources: map[string]string{
siCommon.Memory: fmt.Sprintf("%dM", largeMem),
},
},
}})
if err != nil {
return err
}
return common.AddQueue(sc, constants.DefaultPartition, constants.RootQueue, configs.QueueConfig{Name: "sandbox2"})
})
})
// group1 can deploy the first sleep pod to root.sandbox1
usergroup1 := &si.UserGroupInformation{User: user1, Groups: []string{group1}}
group1Sandvox1Pod1 := deploySleepPod(usergroup1, sandboxQueue1, true, "because there is no limit for group1")
checkUsage(groupTestType, group1, sandboxQueue1, []*v1.Pod{group1Sandvox1Pod1})

// group1 can't deploy the second sleep pod to root.sandbox1
usergroup1 = &si.UserGroupInformation{User: user1, Groups: []string{group1}}
_ = deploySleepPod(usergroup1, sandboxQueue1, false, "because final memory usage is more than group entry limit")
checkUsage(groupTestType, group1, sandboxQueue1, []*v1.Pod{group1Sandvox1Pod1})

// group2 can deploy 2 sleep pods to root.sandbox1
usergroup2 := &si.UserGroupInformation{User: user2, Groups: []string{group2}}
group2Sandbox1Pod1 := deploySleepPod(usergroup2, sandboxQueue1, true, "because there is no limit for group2")
checkUsageWildcardGroups(groupTestType, group2, sandboxQueue1, []*v1.Pod{group2Sandbox1Pod1})

// group2 can deploy the second sleep pod to root.sandbox1
usergroup2 = &si.UserGroupInformation{User: user2, Groups: []string{group2}}
group2Sandbox1Pod2 := deploySleepPod(usergroup2, sandboxQueue1, true, "because there is no limit for group2")
checkUsageWildcardGroups(groupTestType, group2, sandboxQueue1, []*v1.Pod{group2Sandbox1Pod1, group2Sandbox1Pod2})

// group2 can't deploy the third sleep pod to root.sandbox1 because of max-application limit
usergroup2 = &si.UserGroupInformation{User: user2, Groups: []string{group2}}
group2Sandbox1Pod3 := deploySleepPod(usergroup2, sandboxQueue1, false, "because final memory usage is more than wildcard maxapplications")
checkUsageWildcardGroups(groupTestType, group2, sandboxQueue1, []*v1.Pod{group2Sandbox1Pod1, group2Sandbox1Pod2})
// Update Wildcard group entry limit to 3
ginkgo.By("Update config")
// The wait wrapper still can't fully guarantee that the config in AdmissionController has been updated.
yunikorn.WaitForAdmissionControllerRefreshConfAfterAction(func() {
yunikorn.UpdateCustomConfigMapWrapperWithMap(oldConfigMap, "", admissionCustomConfig, func(sc *configs.SchedulerConfig) error {
// remove placement rules so we can control queue
sc.Partitions[0].PlacementRules = nil

err := common.AddQueue(sc, constants.DefaultPartition, constants.RootQueue, configs.QueueConfig{
Name: "sandbox1",
Limits: []configs.Limit{
{
Limit: "group entry",
Groups: []string{group1},
MaxApplications: 1,
MaxResources: map[string]string{
siCommon.Memory: fmt.Sprintf("%dM", mediumMem),
},
},
{
Limit: "wildcard group entry",
Groups: []string{"*"},
MaxApplications: 3,
MaxResources: map[string]string{
siCommon.Memory: fmt.Sprintf("%dM", largeMem),
},
},
}})
if err != nil {
return err
}
return common.AddQueue(sc, constants.DefaultPartition, constants.RootQueue, configs.QueueConfig{Name: "sandbox2"})
})
})
// group2 can deploy the third sleep pod to root.sandbox1 becuase of max-application limit updated to 3
checkUsageWildcardGroups(groupTestType, group2, sandboxQueue1, []*v1.Pod{group2Sandbox1Pod1, group2Sandbox1Pod2, group2Sandbox1Pod3})
// group2 can't deploy the fourth sleep pod to root.sandbox1 because of max-application limit
usergroup2 = &si.UserGroupInformation{User: user2, Groups: []string{group2}}
_ = deploySleepPod(usergroup2, sandboxQueue1, false, "because final memory usage is more than wildcard maxapplications")
checkUsageWildcardGroups(groupTestType, group2, sandboxQueue1, []*v1.Pod{group2Sandbox1Pod1, group2Sandbox1Pod2, group2Sandbox1Pod3})
})

ginkgo.AfterEach(func() {
tests.DumpClusterInfoIfSpecFailed(suiteName, []string{ns.Name})

Expand Down Expand Up @@ -787,3 +990,35 @@ func checkUsage(testType TestType, name string, queuePath string, expectedRunnin
Ω(resourceUsageDAO.ResourceUsage.Resources["pods"]).To(gomega.Equal(resources.Quantity(len(expectedRunningPods))))
Ω(resourceUsageDAO.RunningApplications).To(gomega.ConsistOf(appIDs...))
}

func checkUsageWildcardGroups(testType TestType, name string, queuePath string, expectedRunningPods []*v1.Pod) {
var rootQueueResourceUsageDAO *dao.ResourceUsageDAOInfo
if testType == groupTestType {
ginkgo.By(fmt.Sprintf("Check group resource usage for %s in queue %s", name, queuePath))
groupUsageDAOInfo, err := restClient.GetGroupsUsage(constants.DefaultPartition)
Ω(err).NotTo(gomega.HaveOccurred())
Ω(groupUsageDAOInfo).NotTo(gomega.BeNil())
for _, groupUsageDAOInfog := range groupUsageDAOInfo {
if groupUsageDAOInfog.GroupName == "*" {
rootQueueResourceUsageDAO = groupUsageDAOInfog.Queues
}
}
}
Ω(rootQueueResourceUsageDAO).NotTo(gomega.BeNil())
var resourceUsageDAO *dao.ResourceUsageDAOInfo
for _, queue := range rootQueueResourceUsageDAO.Children {
if queue.QueuePath == queuePath {
resourceUsageDAO = queue
break
}
}
Ω(resourceUsageDAO).NotTo(gomega.BeNil())

appIDs := make([]interface{}, 0, len(expectedRunningPods))
for _, pod := range expectedRunningPods {
appIDs = append(appIDs, pod.Labels[constants.LabelApplicationID])
}
Ω(resourceUsageDAO.ResourceUsage).NotTo(gomega.BeNil())
Ω(resourceUsageDAO.ResourceUsage.Resources["pods"]).To(gomega.Equal(resources.Quantity(len(expectedRunningPods))))
Ω(resourceUsageDAO.RunningApplications).To(gomega.ConsistOf(appIDs...))
}

0 comments on commit b335500

Please sign in to comment.