diff --git a/clients/python/lorax/client.py b/clients/python/lorax/client.py index ec9d32e02..18bbf8085 100644 --- a/clients/python/lorax/client.py +++ b/clients/python/lorax/client.py @@ -389,6 +389,7 @@ async def generate( api_token: Optional[str] = None, do_sample: bool = False, max_new_tokens: int = 20, + ignore_eos_token: bool = False, best_of: Optional[int] = None, repetition_penalty: Optional[float] = None, return_full_text: bool = False, @@ -423,6 +424,8 @@ async def generate( Activate logits sampling max_new_tokens (`int`): Maximum number of generated tokens + ignore_eos_token (`bool`): + Whether to ignore EOS tokens during generation best_of (`int`): Generate best_of sequences and return the one if the highest token logprobs repetition_penalty (`float`): @@ -481,6 +484,7 @@ async def generate( return_k_alternatives=return_k_alternatives, do_sample=do_sample, max_new_tokens=max_new_tokens, + ignore_eos_token=ignore_eos_token, repetition_penalty=repetition_penalty, return_full_text=return_full_text, seed=seed, @@ -514,6 +518,7 @@ async def generate_stream( api_token: Optional[str] = None, do_sample: bool = False, max_new_tokens: int = 20, + ignore_eos_token: bool = False, repetition_penalty: Optional[float] = None, return_full_text: bool = False, seed: Optional[int] = None, @@ -547,6 +552,8 @@ async def generate_stream( Activate logits sampling max_new_tokens (`int`): Maximum number of generated tokens + ignore_eos_token (`bool`): + Whether to ignore EOS tokens during generation repetition_penalty (`float`): The parameter for repetition penalty. 1.0 means no penalty. See [this paper](https://arxiv.org/pdf/1909.05858.pdf) for more details. @@ -601,6 +608,7 @@ async def generate_stream( return_k_alternatives=return_k_alternatives, do_sample=do_sample, max_new_tokens=max_new_tokens, + ignore_eos_token=ignore_eos_token, repetition_penalty=repetition_penalty, return_full_text=return_full_text, seed=seed,