diff --git a/src/lighteval/models/endpoints/endpoint_model.py b/src/lighteval/models/endpoints/endpoint_model.py index 45cea7f8..37bb9754 100644 --- a/src/lighteval/models/endpoints/endpoint_model.py +++ b/src/lighteval/models/endpoints/endpoint_model.py @@ -510,7 +510,7 @@ def greedy_until( for _, _ in tqdm( dataset.splits_start_end_iterator(), - total=self.DATASET_SPLITS, + total=dataset.num_dataset_splits, desc="Splits", position=0, disable=self.disable_tqdm, @@ -532,12 +532,15 @@ def greedy_until( responses = asyncio.run(self._async_process_batch_generate(batch)) else: responses = self._process_batch_generate(batch) - for response in responses: + for i, response in enumerate(responses): results.append( GenerativeResponse( result=response.generated_text, logits=[item.logprob for item in response.details.prefill] if returns_logits else None, - truncated_tokens_count=-1, + generated_tokens=[token.id for token in response.details.tokens], + truncated_tokens_count=max( + len(self.tokenizer.encode(batch[i].context)) - self.max_length, 0 + ), padded_tokens_count=-1, ) ) @@ -556,7 +559,7 @@ def loglikelihood( for _, _ in tqdm( dataset.splits_start_end_iterator(), - total=self.DATASET_SPLITS, + total=dataset.num_dataset_splits, desc="Splits", position=0, disable=self.disable_tqdm, @@ -607,7 +610,7 @@ def loglikelihood_rolling( for _, _ in tqdm( dataset.splits_start_end_iterator(), - total=self.DATASET_SPLITS, + total=dataset.num_dataset_splits, desc="Splits", position=0, disable=self.disable_tqdm, diff --git a/src/lighteval/models/model_loader.py b/src/lighteval/models/model_loader.py index 7219b9b9..68835fda 100644 --- a/src/lighteval/models/model_loader.py +++ b/src/lighteval/models/model_loader.py @@ -131,7 +131,9 @@ def load_openai_model(config: OpenAIModelConfig, env_config: EnvConfig): return model -def load_model_with_inference_endpoints(config: InferenceEndpointModelConfig, env_config: EnvConfig): +def load_model_with_inference_endpoints( + config: Union[InferenceEndpointModelConfig, ServerlessEndpointModelConfig], env_config: EnvConfig +): logger.info("Spin up model using inference endpoint.") model = InferenceEndpointModel(config=config, env_config=env_config) return model diff --git a/src/lighteval/models/transformers/transformers_model.py b/src/lighteval/models/transformers/transformers_model.py index bed174b4..48edb431 100644 --- a/src/lighteval/models/transformers/transformers_model.py +++ b/src/lighteval/models/transformers/transformers_model.py @@ -880,10 +880,7 @@ def greedy_until( input_ids=tokenized["input_ids"], input_lengths=[len(item == 1) for item in tokenized["attention_mask"]], input_mask=tokenized["attention_mask"], - truncated=[ - len(c) - tokenized["input_ids"].shape[1] if len(c) > tokenized["input_ids"].shape[1] else 0 - for c in context - ], + truncated=[max(len(c) - tokenized["input_ids"].shape[1], 0) for c in context], padded=[sum(mask == 0) for mask in tokenized["attention_mask"]], )