Skip to content

Commit

Permalink
update default
Browse files Browse the repository at this point in the history
  • Loading branch information
OlivierDehaene committed Dec 5, 2024
1 parent 124eea2 commit 7f1c22a
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 6 deletions.
6 changes: 4 additions & 2 deletions docs/openapi.json
Original file line number Diff line number Diff line change
Expand Up @@ -1013,6 +1013,7 @@
"type": "integer",
"format": "int32",
"description": "The maximum number of tokens that can be generated in the chat completion.",
"default": "1024",
"example": "32",
"nullable": true,
"minimum": 0
Expand Down Expand Up @@ -1329,7 +1330,8 @@
"type": "integer",
"format": "int32",
"description": "The maximum number of tokens that can be generated in the chat completion.",
"default": "32",
"default": "1024",
"example": "32",
"nullable": true,
"minimum": 0
},
Expand Down Expand Up @@ -1591,7 +1593,7 @@
"type": "integer",
"format": "int32",
"description": "Maximum number of tokens to generate.",
"default": "100",
"default": "1024",
"example": "20",
"nullable": true,
"minimum": 0
Expand Down
6 changes: 3 additions & 3 deletions router/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -333,7 +333,7 @@ pub(crate) struct GenerateParameters {

/// Maximum number of tokens to generate.
#[serde(default)]
#[schema(nullable = true, default = "256", example = "20")]
#[schema(nullable = true, default = "1024", example = "20")]
pub max_new_tokens: Option<u32>,

/// Whether to prepend the prompt to the generated text
Expand Down Expand Up @@ -460,7 +460,7 @@ pub struct CompletionRequest {

/// The maximum number of tokens that can be generated in the chat completion.
#[serde(default)]
#[schema(default = "32")]
#[schema(default = "1024", example = "32")]
pub max_tokens: Option<u32>,

/// What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while
Expand Down Expand Up @@ -838,7 +838,7 @@ pub(crate) struct ChatRequest {

/// The maximum number of tokens that can be generated in the chat completion.
#[serde(default)]
#[schema(example = "32")]
#[schema(default = "1024", example = "32")]
pub max_tokens: Option<u32>,

/// UNUSED
Expand Down
2 changes: 1 addition & 1 deletion router/src/validation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ use tokio::sync::oneshot;
use tracing::{instrument, Span};
use {once_cell::sync::Lazy, regex::Regex};

static DEFAULT_GENERATION_LENGTH: u32 = 10;
static DEFAULT_GENERATION_LENGTH: u32 = 1024;

/// Validation
#[derive(Debug, Clone)]
Expand Down

0 comments on commit 7f1c22a

Please sign in to comment.