Skip to content

Commit

Permalink
fix(#15): use /slots endpoint instead of /health to fetch slots s…
Browse files Browse the repository at this point in the history
…tatuses
  • Loading branch information
mcharytoniuk committed Sep 4, 2024
1 parent 627b83b commit eb2fcda
Show file tree
Hide file tree
Showing 8 changed files with 45 additions and 9 deletions.
19 changes: 19 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,25 @@ If you want to keep the balancer management address predictable, I recommend usi

## Changelog

### v0.7.0

Requires at least [b3606](https://github.com/ggerganov/llama.cpp/releases/tag/b3606) llama.cpp release.

#### Breaking Changes

- Adjusted to handle breaking changes in llama.cpp `/health` endpoint: https://github.com/ggerganov/llama.cpp/pull/9056

Instead of using the `/health` endpoint to monitor slot statuses, starting from this version, Paddler uses the `/slots` endpoint to monitor llama.cpp instances.
Paddler's `/health` endpoint remains unchanged.

### v0.6.0

Latest supported llama.cpp release: [b3604](https://github.com/ggerganov/llama.cpp/releases/tag/b3604)

#### Features

- [Name agents with `--name` flag](https://github.com/distantmagic/paddler/issues/15)

### v0.6.0

#### Features
Expand Down
4 changes: 3 additions & 1 deletion cmd/Balancer.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,9 @@ func (self *Balancer) Action(cliContext *cli.Context) error {
defer close(serverEventsChannel)

llamaCppHealthStatusAggregate := &loadbalancer.LlamaCppHealthStatusAggregate{
AggregatedHealthStatus: &llamacpp.LlamaCppSlotsAggregatedStatus{},
AggregatedHealthStatus: &llamacpp.LlamaCppSlotsAggregatedStatus{
Status: llamacpp.Ok,
},
}

loadBalancerTargetCollection := loadbalancer.NewLoadBalancerTargetCollection(llamaCppHealthStatusAggregate)
Expand Down
1 change: 1 addition & 0 deletions llamacpp/LlamaCppClient.go
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,7 @@ func (self *LlamaCppClient) GetSlotsAggregatedStatus(
responseChannel <- LlamaCppSlotsAggregatedStatus{
Error: slotStatus.Error,
ErrorMessage: slotStatus.ErrorMessage,
Status: Error,
}

return
Expand Down
9 changes: 5 additions & 4 deletions llamacpp/LlamaCppSlotsAggregatedStatus.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
package llamacpp

type LlamaCppSlotsAggregatedStatus struct {
Error error `json:"-"`
ErrorMessage string `json:"error_message,omitempty"`
SlotsIdle int `json:"slots_idle"`
SlotsProcessing int `json:"slots_processing"`
Error error `json:"-"`
ErrorMessage string `json:"error_message,omitempty"`
Status LlamaCppHealthStatusCode `json:"status"`
SlotsIdle int `json:"slots_idle"`
SlotsProcessing int `json:"slots_processing"`
}
4 changes: 3 additions & 1 deletion loadbalancer/LlamaCppHealthStatusAggregate_bench_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@ import (

func BenchmarkLlamaCppHealthStatusAggregate(b *testing.B) {
llamaCppHealthStatusAggregate := &LlamaCppHealthStatusAggregate{
AggregatedHealthStatus: &llamacpp.LlamaCppSlotsAggregatedStatus{},
AggregatedHealthStatus: &llamacpp.LlamaCppSlotsAggregatedStatus{
Status: llamacpp.Ok,
},
}

b.RunParallel(func(pb *testing.PB) {
Expand Down
5 changes: 4 additions & 1 deletion loadbalancer/LoadBalancerTargetCollection_bench_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@ import (

func BenchmarkLoadBalancerTargetCollection(b *testing.B) {
llamaCppHealthStatusAggregate := &LlamaCppHealthStatusAggregate{
AggregatedHealthStatus: &llamacpp.LlamaCppSlotsAggregatedStatus{},
AggregatedHealthStatus: &llamacpp.LlamaCppSlotsAggregatedStatus{
Status: llamacpp.Ok,
},
}

loadBalancerTargetRegistrar := &LoadBalancerTargetRegistrar{
Expand All @@ -38,6 +40,7 @@ func BenchmarkLoadBalancerTargetCollection(b *testing.B) {
&llamacpp.LlamaCppSlotsAggregatedStatus{
SlotsIdle: 8,
SlotsProcessing: 0,
Status: llamacpp.Ok,
Error: nil,
},
)
Expand Down
7 changes: 6 additions & 1 deletion loadbalancer/LoadBalancerTargetRegistrar_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@ import (

func TestTargetOrderIsPreserved(t *testing.T) {
llamaCppHealthStatusAggregate := &LlamaCppHealthStatusAggregate{
AggregatedHealthStatus: &llamacpp.LlamaCppSlotsAggregatedStatus{},
AggregatedHealthStatus: &llamacpp.LlamaCppSlotsAggregatedStatus{
Status: llamacpp.Ok,
},
}

loadBalancerTargetRegistrar := &LoadBalancerTargetRegistrar{
Expand All @@ -39,6 +41,7 @@ func TestTargetOrderIsPreserved(t *testing.T) {
&llamacpp.LlamaCppSlotsAggregatedStatus{
SlotsIdle: 10,
SlotsProcessing: 0,
Status: llamacpp.Ok,
Error: nil,
},
)
Expand Down Expand Up @@ -67,6 +70,7 @@ func TestTargetOrderIsPreserved(t *testing.T) {
&llamacpp.LlamaCppSlotsAggregatedStatus{
SlotsIdle: 8,
SlotsProcessing: 0,
Status: llamacpp.Ok,
Error: nil,
},
)
Expand All @@ -83,6 +87,7 @@ func TestTargetOrderIsPreserved(t *testing.T) {
&llamacpp.LlamaCppSlotsAggregatedStatus{
SlotsIdle: 11,
SlotsProcessing: 0,
Status: llamacpp.Ok,
Error: nil,
},
)
Expand Down
5 changes: 4 additions & 1 deletion loadbalancer/LoadBalancerTemporalManager_bench_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@ func BenchmarkLoadBalancerTemporalManager(b *testing.B) {
logger := hclog.NewNullLogger()

llamaCppHealthStatusAggregate := &LlamaCppHealthStatusAggregate{
AggregatedHealthStatus: &llamacpp.LlamaCppSlotsAggregatedStatus{},
AggregatedHealthStatus: &llamacpp.LlamaCppSlotsAggregatedStatus{
Status: llamacpp.Ok,
},
}

loadBalancerTargetCollection := NewLoadBalancerTargetCollection(llamaCppHealthStatusAggregate)
Expand Down Expand Up @@ -50,6 +52,7 @@ func BenchmarkLoadBalancerTemporalManager(b *testing.B) {
&llamacpp.LlamaCppSlotsAggregatedStatus{
SlotsIdle: 8,
SlotsProcessing: 0,
Status: llamacpp.Ok,
Error: nil,
},
)
Expand Down

0 comments on commit eb2fcda

Please sign in to comment.