Adding the option to avoid displaying tqdm bars at inference with `vl…

…lm` (#1004) Very minor change that adds the possibility to avoid displaying progress bars within `outlines`. It is not changing the default behaviour and is just about adding an argument (`use_tqdm`) to the `LLM.generate` call. It was useful for a personal project and I did not find another way to do so, so I decided to PR this.
dottxt-ai · Jun 24, 2024 · f7d525b · f7d525b
1 parent 1bc2a30
commit f7d525b
Showing 1 changed file with 8 additions and 2 deletions.
diff --git a/outlines/models/vllm.py b/outlines/models/vllm.py
@@ -30,6 +30,7 @@ def generate(
         sampling_parameters: SamplingParameters,
         *,
         sampling_params: Optional["SamplingParams"] = None,
+        use_tqdm: bool = True,
     ):
         """Generate text using vLLM.
 
@@ -47,11 +48,13 @@ def generate(
             An instance of `SamplingParameters`, a dataclass that contains
             the name of the sampler to use and related parameters as available
             in Outlines.
-        samplng_params
+        sampling_params
             An instance of `vllm.sampling_params.SamplingParams`. The values
             passed via this dataclass supersede the values of the parameters
             in `generation_parameters` and `sampling_parameters`. See the
             vLLM documentation for more details: https://docs.vllm.ai/en/latest/dev/sampling_params.html.
+        use_tqdm
+            A boolean in order to display progress bar while inferencing
 
         Returns
         -------
@@ -103,7 +106,10 @@ def generate(
             sampling_params.use_beam_search = True
 
         results = self.model.generate(
-            prompts, sampling_params=sampling_params, lora_request=self.lora_request
+            prompts,
+            sampling_params=sampling_params,
+            lora_request=self.lora_request,
+            use_tqdm=use_tqdm,
         )
         results = [[sample.text for sample in batch.outputs] for batch in results]