format

huggingface · NathanHB · Mar 29, 2024 · Feb 20, 2024 · Feb 23, 2024 · Feb 23, 2024
commit 588fb2f9bb2640fa775786109e3984a64d20721f
diff --git a/extended_tasks/mt_bench/judges.py b/extended_tasks/mt_bench/judges.py
@@ -1,3 +1,26 @@
+# MIT License
+
+# Copyright (c) 2024 The HuggingFace Team
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+
 import ast
 import json
 import re

diff --git a/extended_tasks/mt_bench/main.py b/extended_tasks/mt_bench/main.py
@@ -1,3 +1,26 @@
+# MIT License
+
+# Copyright (c) 2024 The HuggingFace Team
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+
 # ruff: noqa: F405, F403, F401
 """
 Custom evaluation tasks for lighteval. Copy this file and complete it with the info for your task.

diff --git a/src/lighteval/evaluator.py b/src/lighteval/evaluator.py
@@ -130,7 +130,9 @@ def evaluate(  # noqa: C901
             judgement = None
 
         evaluation_tracker.metrics_logger.log(task_example_id.task_name, metrics)
-        evaluation_tracker.details_logger.log(task_example_id.task_name, task, doc, model_responses, metrics, (user_prompt, judgement))
+        evaluation_tracker.details_logger.log(
+            task_example_id.task_name, task, doc, model_responses, metrics, (user_prompt, judgement)
+        )
 
     return evaluation_tracker
 

diff --git a/src/lighteval/logging/info_loggers.py b/src/lighteval/logging/info_loggers.py
@@ -305,7 +305,15 @@ class CompiledHash:
     compiled_details: dict[str, CompiledDetail] = collections.defaultdict(CompiledDetail)
     compiled_details_over_all_tasks: CompiledDetailOverAllTasks = CompiledDetailOverAllTasks()
 
-    def log(self, task_name: str, task: LightevalTask, doc: Doc, outputs: list[ModelReturn], metrics: dict, llm_as_prompt_judgement: tuple[str, str]) -> None:
+    def log(
+        self,
+        task_name: str,
+        task: LightevalTask,
+        doc: Doc,
+        outputs: list[ModelReturn],
+        metrics: dict,
+        llm_as_prompt_judgement: tuple[str, str],
+    ) -> None:
         """Stores the relevant information for one sample of one task to the total list of samples stored in the DetailsLogger.
 
         Args:

diff --git a/src/lighteval/metrics/__init__.py b/src/lighteval/metrics/__init__.py
@@ -147,6 +147,7 @@ def apply_multichoice_metric_one_token(results: list[ModelReturn], formatted_doc
 
     return results, outputs
 
+
 def apply_generative_multi_turn_metric(results: list[ModelReturn], formatted_doc: Doc, metrics: list[str]):
     outputs = {}
     predictions = results.pop(0).result

diff --git a/src/lighteval/models/base_model.py b/src/lighteval/models/base_model.py
@@ -352,7 +352,9 @@ def greedy_until_with_logits(
             override_bs=override_bs,
         )
 
-    def greedy_until_multi_turn(self, requests: list[GreedyUntilMultiTurnRequest], override_bs: Optional[int] = None) -> GenerateMultiTurnReturn:
+    def greedy_until_multi_turn(
+        self, requests: list[GreedyUntilMultiTurnRequest], override_bs: Optional[int] = None
+    ) -> GenerateMultiTurnReturn:
         for request in requests:
             request.stop_sequence = as_list(request.stop_sequence) + [self.tokenizer.eos_token]
             request.tokenized_context = self.tok_encode(request.context)
@@ -429,7 +431,15 @@ def greedy_until_multi_turn(self, requests: list[GreedyUntilMultiTurnRequest], o
 
                 model_answers.append(cur_reponses[0].result)
 
-            results.append(GenerateMultiTurnReturn(result=model_answers, input_tokens=[], generated_tokens=[], truncated_tokens_count=0, padded_tokens_count=0))
+            results.append(
+                GenerateMultiTurnReturn(
+                    result=model_answers,
+                    input_tokens=[],
+                    generated_tokens=[],
+                    truncated_tokens_count=0,
+                    padded_tokens_count=0,
+                )
+            )
 
         return results
 

diff --git a/src/lighteval/models/model_output.py b/src/lighteval/models/model_output.py
@@ -65,6 +65,7 @@ class GenerateReturn(ModelReturn):
     def get_result_for_eval(self):
         return self.result if self.logits is None else (self.result, self.logits)
 
+
 @dataclass
 class GenerateMultiTurnReturn(ModelReturn):
     result: list[str] = field(default_factory=list)

diff --git a/src/lighteval/tasks/requests.py b/src/lighteval/tasks/requests.py
@@ -120,6 +120,7 @@ class GreedyUntilRequest(Request):
     request_type = RequestType.GREEDY_UNTIL
     tokenized_context: list[int] = None
 
+
 @dataclass
 class GreedyUntilMultiTurnRequest(Request):
     """
@@ -130,6 +131,7 @@ class GreedyUntilMultiTurnRequest(Request):
         generation_size (int): The maximum number of tokens to generate.
         request_type (RequestType): The type of the request, set to RequestType.GREEDY_UNTIL.
     """
+
     stop_sequence: str
     generation_size: int
     request_type = RequestType.GREEDY_UNTIL_MULTI_TURN