Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: implement Top-K sampling for improved user control #1110

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions cmd/auth/add.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,10 @@ var addCmd = &cobra.Command{
color.Red("Error: topP ranges from 0 to 1.")
os.Exit(1)
}
if topK < 1 || topK > 100 {
color.Red("Error: topK ranges from 1 to 100.")
os.Exit(1)
}

if ai.NeedPassword(backend) && password == "" {
fmt.Printf("Enter %s Key: ", backend)
Expand All @@ -124,6 +128,7 @@ var addCmd = &cobra.Command{
ProviderRegion: providerRegion,
ProviderId: providerId,
TopP: topP,
TopK: topK,
MaxTokens: maxTokens,
}

Expand Down Expand Up @@ -156,6 +161,8 @@ func init() {
addCmd.Flags().StringVarP(&endpointName, "endpointname", "n", "", "Endpoint Name, e.g. `endpoint-xxxxxxxxxxxx` (only for amazonbedrock, amazonsagemaker backends)")
// add flag for topP
addCmd.Flags().Float32VarP(&topP, "topp", "c", 0.5, "Probability Cutoff: Set a threshold (0.0-1.0) to limit word choices. Higher values add randomness, lower values increase predictability.")
// add flag for topK
addCmd.Flags().Int32VarP(&topK, "topk", "c", 50, "Sampling Cutoff: Set a threshold (1-100) to restrict the sampling process to the top K most probable words at each step. Higher values lead to greater variability, lower values increases predictability.")
// max tokens
addCmd.Flags().IntVarP(&maxTokens, "maxtokens", "l", 2048, "Specify a maximum output length. Adjust (1-...) to control text length. Higher values produce longer output, lower values limit length")
// add flag for temperature
Expand Down
1 change: 1 addition & 0 deletions cmd/auth/auth.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ var (
providerRegion string
providerId string
topP float32
topK int32
maxTokens int
)

Expand Down
18 changes: 18 additions & 0 deletions cmd/serve/serve.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import (
const (
defaultTemperature float32 = 0.7
defaultTopP float32 = 1.0
defaultTopK int32 = 50
)

var (
Expand Down Expand Up @@ -84,6 +85,22 @@ var ServeCmd = &cobra.Command{
}
return float32(topP)
}
topK := func() int32 {
env := os.Getenv("K8SGPT_TOP_K")
if env == "" {
return defaultTopK
}
topK, err := strconv.ParseFloat(env, 32)
if err != nil {
color.Red("Unable to convert topK value: %v", err)
os.Exit(1)
}
if topK < 10 || topK > 100 {
color.Red("Error: topK ranges from 1 to 100.")
os.Exit(1)
}
return int32(topK)
}
// Check for env injection
backend = os.Getenv("K8SGPT_BACKEND")
password := os.Getenv("K8SGPT_PASSWORD")
Expand All @@ -104,6 +121,7 @@ var ServeCmd = &cobra.Command{
ProxyEndpoint: proxyEndpoint,
Temperature: temperature(),
TopP: topP(),
TopK: topK(),
}

configAI.Providers = append(configAI.Providers, *aiProvider)
Expand Down
4 changes: 4 additions & 0 deletions pkg/ai/amazonsagemaker.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ type SageMakerAIClient struct {
temperature float32
endpoint string
topP float32
topK int32
maxTokens int
}

Expand All @@ -56,6 +57,7 @@ type Message struct {
type Parameters struct {
MaxNewTokens int `json:"max_new_tokens"`
TopP float64 `json:"top_p"`
TopK float64 `json:"top_k"`
Temperature float64 `json:"temperature"`
}

Expand All @@ -74,6 +76,7 @@ func (c *SageMakerAIClient) Configure(config IAIConfig) error {
c.temperature = config.GetTemperature()
c.maxTokens = config.GetMaxTokens()
c.topP = config.GetTopP()
c.topK = config.GetTopK()
return nil
}

Expand All @@ -90,6 +93,7 @@ func (c *SageMakerAIClient) GetCompletion(_ context.Context, prompt string) (str
Parameters: Parameters{
MaxNewTokens: int(c.maxTokens),
TopP: float64(c.topP),
TopK: float64(c.topK),
Temperature: float64(c.temperature),
},
}
Expand Down
3 changes: 3 additions & 0 deletions pkg/ai/googlegenai.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ type GoogleGenAIClient struct {
model string
temperature float32
topP float32
topK int32
maxTokens int
}

Expand All @@ -53,6 +54,7 @@ func (c *GoogleGenAIClient) Configure(config IAIConfig) error {
c.model = config.GetModel()
c.temperature = config.GetTemperature()
c.topP = config.GetTopP()
c.topK = config.GetTopK()
c.maxTokens = config.GetMaxTokens()
return nil
}
Expand All @@ -62,6 +64,7 @@ func (c *GoogleGenAIClient) GetCompletion(ctx context.Context, prompt string) (s
model := c.client.GenerativeModel(c.model)
model.SetTemperature(c.temperature)
model.SetTopP(c.topP)
model.SetTopK(c.topK)
model.SetMaxOutputTokens(int32(c.maxTokens))

// Google AI SDK is capable of different inputs than just text, for now set explicit text prompt type.
Expand Down
3 changes: 3 additions & 0 deletions pkg/ai/googlevertexai.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ type GoogleVertexAIClient struct {
model string
temperature float32
topP float32
topK int32
maxTokens int
}

Expand Down Expand Up @@ -111,6 +112,7 @@ func (g *GoogleVertexAIClient) Configure(config IAIConfig) error {
g.model = GetVertexAIModelOrDefault(config.GetModel())
g.temperature = config.GetTemperature()
g.topP = config.GetTopP()
g.topK = config.GetTopK()
g.maxTokens = config.GetMaxTokens()

return nil
Expand All @@ -121,6 +123,7 @@ func (g *GoogleVertexAIClient) GetCompletion(ctx context.Context, prompt string)
model := g.client.GenerativeModel(g.model)
model.SetTemperature(g.temperature)
model.SetTopP(g.topP)
model.SetTopK(float32(g.topK))
model.SetMaxOutputTokens(int32(g.maxTokens))

// Google AI SDK is capable of different inputs than just text, for now set explicit text prompt type.
Expand Down
4 changes: 4 additions & 0 deletions pkg/ai/huggingface.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package ai

import (
"context"

"github.com/hupe1980/go-huggingface"
"k8s.io/utils/ptr"
)
Expand All @@ -14,6 +15,7 @@ type HuggingfaceClient struct {
client *huggingface.InferenceClient
model string
topP float32
topK int32
temperature float32
maxTokens int
}
Expand All @@ -26,6 +28,7 @@ func (c *HuggingfaceClient) Configure(config IAIConfig) error {
c.client = client
c.model = config.GetModel()
c.topP = config.GetTopP()
c.topK = config.GetTopK()
c.temperature = config.GetTemperature()
if config.GetMaxTokens() > 500 {
c.maxTokens = 500
Expand All @@ -43,6 +46,7 @@ func (c *HuggingfaceClient) GetCompletion(ctx context.Context, prompt string) (s
Model: c.model,
Parameters: huggingface.ConversationalParameters{
TopP: ptr.To[float64](float64(c.topP)),
TopK: ptr.To[int](int(c.topK)),
Temperature: ptr.To[float64](float64(c.temperature)),
MaxLength: &c.maxTokens,
},
Expand Down
6 changes: 6 additions & 0 deletions pkg/ai/iai.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ type IAIConfig interface {
GetTemperature() float32
GetProviderRegion() string
GetTopP() float32
GetTopK() int32
GetMaxTokens() int
GetProviderId() string
}
Expand Down Expand Up @@ -104,6 +105,7 @@ type AIProvider struct {
ProviderRegion string `mapstructure:"providerregion" yaml:"providerregion,omitempty"`
ProviderId string `mapstructure:"providerid" yaml:"providerid,omitempty"`
TopP float32 `mapstructure:"topp" yaml:"topp,omitempty"`
TopK int32 `mapstructure:"topk" yaml:"topk,omitempty"`
MaxTokens int `mapstructure:"maxtokens" yaml:"maxtokens,omitempty"`
}

Expand All @@ -123,6 +125,10 @@ func (p *AIProvider) GetTopP() float32 {
return p.TopP
}

func (p *AIProvider) GetTopK() int32 {
return p.TopK
}

func (p *AIProvider) GetMaxTokens() int {
return p.MaxTokens
}
Expand Down
Loading