Skip to content

Commit

Permalink
add test for scaledown
Browse files Browse the repository at this point in the history
  • Loading branch information
sakoush committed Feb 10, 2025
1 parent c9d4dbd commit 3ebc37f
Show file tree
Hide file tree
Showing 3 changed files with 69 additions and 5 deletions.
2 changes: 1 addition & 1 deletion scheduler/pkg/server/server_status.go
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ func (s *SchedulerServer) handleServerEvents(event coordinator.ServerEventMsg) {

server, _ := s.modelStore.GetServer(event.ServerName, true, true)

if shouldScaleDown(server) {
if shouldScaleDown(server, AllowPackingPercentage) {
logger.Infof("Server %s is scaling down", event.ServerName)
// TODO send control message to scale down
}
Expand Down
8 changes: 4 additions & 4 deletions scheduler/pkg/server/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ const (
// increased latency in the case of MMS
// in the future we should have more metrics to decide whether packing can lead
// to better performance
ALLOW_PACKING_PERCENTAGE = 0.25
AllowPackingPercentage = 0.25
)

func sendWithTimeout(f func() error, d time.Duration) (bool, error) {
Expand All @@ -45,22 +45,22 @@ func sendWithTimeout(f func() error, d time.Duration) (bool, error) {
}
}

func shouldScaleDown(server *store.ServerSnapshot) bool {
func shouldScaleDown(server *store.ServerSnapshot, perc float32) bool {

if server.Stats != nil {
stats := server.Stats
// 25% chance of trying to pack replicas if models are not fully packed
tryPack := false
rand := rand.Float32()
if rand > (1 - ALLOW_PACKING_PERCENTAGE) {
if rand > (1 - perc) {
if stats.MaxNumReplicaHostedModels < uint32(server.ExpectedReplicas) {
tryPack = true
}
}
// we do scaling down if:
// 1. we are trying to pack replicas: max number of replicas for any hosted model is less than the number of expected replicas (only 25% of the time)
// 2. we have empty replicas and the server has more than one expected replicas
return tryPack || (stats.NumEmptyReplicas > 0 && server.ExpectedReplicas > 1)
return (tryPack || stats.NumEmptyReplicas > 0) && server.ExpectedReplicas > 1
}
return false

Expand Down
64 changes: 64 additions & 0 deletions scheduler/pkg/server/utils_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
"time"

. "github.com/onsi/gomega"

Check failure on line 17 in scheduler/pkg/server/utils_test.go

View workflow job for this annotation

GitHub Actions / lint

File is not properly formatted (gci)
"github.com/seldonio/seldon-core/scheduler/v2/pkg/store"
)

func TestSendWithTimeout(t *testing.T) {
Expand Down Expand Up @@ -71,3 +72,66 @@ func TestSendWithTimeout(t *testing.T) {
})
}
}

func TestShouldScaleDown(t *testing.T) {
g := NewGomegaWithT(t)

type test struct {
name string
server *store.ServerSnapshot
shouldScaleDown bool
}

tests := []test{
{
name: "should scale down - empty replicas",
server: &store.ServerSnapshot{
Stats: &store.ServerStats{
NumEmptyReplicas: 1,
MaxNumReplicaHostedModels: 0,
},
ExpectedReplicas: 2,
},
shouldScaleDown: true,
},
{
name: "should scale down - pack",
server: &store.ServerSnapshot{
Stats: &store.ServerStats{
NumEmptyReplicas: 0,
MaxNumReplicaHostedModels: 1,
},
ExpectedReplicas: 2,
},
shouldScaleDown: true,
},
{
name: "should not scale down - empty replicas - last replica",
server: &store.ServerSnapshot{
Stats: &store.ServerStats{
NumEmptyReplicas: 1,
MaxNumReplicaHostedModels: 0,
},
ExpectedReplicas: 1,
},
shouldScaleDown: false,
},
{
name: "should not scale down - pack - last replica",
server: &store.ServerSnapshot{
Stats: &store.ServerStats{
NumEmptyReplicas: 1,
MaxNumReplicaHostedModels: 0,
},
ExpectedReplicas: 1,
},
shouldScaleDown: false,
},
}

for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
g.Expect(shouldScaleDown(test.server, 1.0)).To(Equal(test.shouldScaleDown))
})
}
}

0 comments on commit 3ebc37f

Please sign in to comment.