diff --git a/router/src/lib.rs b/router/src/lib.rs index 3f7ba1650..b218b3df9 100644 --- a/router/src/lib.rs +++ b/router/src/lib.rs @@ -480,6 +480,7 @@ struct ChatCompletionRequest { response_format: Option, repetition_penalty: Option, top_k: Option, + ignore_eos_token: Option, } #[derive(Clone, Debug, Deserialize, ToSchema)] @@ -505,6 +506,7 @@ struct CompletionRequest { // TODO(travis): add other LoRAX params here repetition_penalty: Option, top_k: Option, + ignore_eos_token: Option, } #[derive(Serialize, ToSchema)] @@ -623,7 +625,7 @@ impl From for CompatGenerateRequest { .max_tokens .map(|x| x as u32) .unwrap_or(default_max_new_tokens()), - ignore_eos_token: false, + ignore_eos_token: req.ignore_eos_token.unwrap_or(false), return_full_text: req.echo, stop: req.stop, truncate: None, @@ -660,7 +662,7 @@ impl From for CompatGenerateRequest { .max_tokens .map(|x| x as u32) .unwrap_or(default_max_new_tokens()), - ignore_eos_token: false, + ignore_eos_token: req.ignore_eos_token.unwrap_or(false), return_full_text: None, stop: req.stop, truncate: None,