Skip to content

Commit

Permalink
Merge branch 'main' into add_litellm_inference
Browse files Browse the repository at this point in the history
  • Loading branch information
JoelNiklaus authored Dec 22, 2024
2 parents 86dd849 + 8568e72 commit db983e3
Show file tree
Hide file tree
Showing 8 changed files with 12 additions and 20 deletions.
1 change: 0 additions & 1 deletion .github/workflows/tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ jobs:
uses: actions/checkout@v3
with:
lfs: 'true'
ref: ${{ github.event.pull_request.head.sha }} # we want to test against our branch not against a merge commit
- name: Setup Python environment
uses: actions/setup-python@v4
with:
Expand Down
1 change: 0 additions & 1 deletion .github/workflows/trufflehog.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,3 @@ jobs:
fetch-depth: 0
- name: Secret Scanning
uses: trufflesecurity/trufflehog@main

7 changes: 2 additions & 5 deletions community_tasks/arabic_evals.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,6 @@ def arabic_mmlu_pfn(line, task_name: str = None):
choices=valid_keys_arabic, # Return only valid choices (Arabic keys)
gold_index=answer_index, # Correct index in the valid Arabic keys
instruction=instruction,
target_for_fewshot_sorting=valid_keys_arabic[answer_index], # Correct answer in Arabic form
)


Expand Down Expand Up @@ -149,7 +148,6 @@ def arabic_mmlu_ht_pfn(line, task_name: str = None):
choices=[str(i) for i in range(1, len(choices) + 1)], # List of strings instead of ints
gold_index=answer_index,
instruction=instruction,
target_for_fewshot_sorting=str(answer_index), # Assuming it's sorted based on the number
)


Expand Down Expand Up @@ -328,7 +326,6 @@ def aratrust_pfn(line, task_name: str = None):
choices=LETTER_INDICES_AR[:3],
gold_index=answer_index,
instruction=instruction,
target_for_fewshot_sorting=LETTER_INDICES_AR[answer_index],
)


Expand Down Expand Up @@ -413,7 +410,8 @@ def arabic_exams_pfn(line, task_name: str = None):
def alghafa_pfn(line, task_name: str = None):
question = line["query"]
answer_index = int(line["label"])
choices = [line[key] for key in ["sol1", "sol2", "sol3", "sol4"]]
allowed_keys = [f"sol{i}" for i in range(1, 6)]
choices = [line[key] for key in allowed_keys if key in line]

instruction = "الأسئلة التالية هي أسئلة متعددة الإختيارات مع الجواب الصحيح\n\n"
query = f"{instruction}السؤال: {question}\n"
Expand Down Expand Up @@ -802,7 +800,6 @@ def madinah_qa_pfn(line, task_name: str = None):
choices=choices,
gold_index=answer_index, # Correct index in the valid keys
instruction=instruction,
target_for_fewshot_sorting=valid_keys_latin[answer_index], # Correct answer in Latin form
)


Expand Down
1 change: 0 additions & 1 deletion docs/source/adding-a-new-metric.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -92,4 +92,3 @@ if __name__ == "__main__":

You can then give your custom metric to lighteval by using `--custom-tasks
path_to_your_file` when launching it.

12 changes: 6 additions & 6 deletions docs/source/contributing-to-multilingual-evaluations.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ We welcome translations in your language!

To contribute, you'll need to
1. Open the [translation_literals](https://github.com/huggingface/lighteval/blob/main/src/lighteval/tasks/templates/utils/translation_literals.py) file
2. Edit the file to add or expand the literal for your language of interest.
2. Edit the file to add or expand the literal for your language of interest.

```python
Language.ENGLISH: TranslationLiterals(
Expand Down Expand Up @@ -42,7 +42,7 @@ To contribute, you'll need to

## Contributing a new multilingual task

You should first read our guide on [adding a custom task](adding-a-custom-task), to better understand the different parameters we use.
You should first read our guide on [adding a custom task](adding-a-custom-task), to better understand the different parameters we use.

Then, you should take a look at the current [multilingual tasks](https://github.com/huggingface/lighteval/blob/main/src/lighteval/tasks/multilingual/tasks.py) file, to understand how they are defined. For multilingual evaluations the `prompt_function` should be implemented by language-adapted template. The template will take care of correct formatting, correct and consistent usage of language adjusted prompt anchors (e.g Question/Answer) and punctuation.

Expand All @@ -58,7 +58,7 @@ your_tasks = [
LightevalTaskConfig(
# Name of your evaluation
name=f"evalname_{language.value}_{formulation.name.lower()}",
# The evaluation is community contributed
# The evaluation is community contributed
suite=["community"],
# This will automatically get the correct metrics for your chosen formulation
metric=get_metrics_for_formulation(
Expand All @@ -72,7 +72,7 @@ your_tasks = [
# In this function, you choose which template to follow and for which language and formulation
prompt_function=get_template_prompt_function(
language=language,
# then use the adapter to define the mapping between the
# then use the adapter to define the mapping between the
# keys of the template (left), and the keys of your dataset
# (right)
# To know which template keys are required and available,
Expand All @@ -83,9 +83,9 @@ your_tasks = [
},
formulation=formulation,
),
# You can also add specific filters to remove irrelevant samples
# You can also add specific filters to remove irrelevant samples
hf_filter=lambda line: line["label"] in <condition>,
# You then select your huggingface dataset as well as
# You then select your huggingface dataset as well as
# the splits available for evaluation
hf_repo=<dataset>,
hf_subset=<subset>,
Expand Down
2 changes: 1 addition & 1 deletion docs/source/using-the-python-api.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def main():
env_config=EnvConfig(cache_dir="tmp/"),
# Remove the 2 parameters below once your configuration is tested
override_batch_size=1,
max_samples=10
max_samples=10
)

model_config = VLLMModelConfig(
Expand Down
4 changes: 1 addition & 3 deletions src/lighteval/logging/evaluation_tracker.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

import copy
import json
import logging
import os
Expand Down Expand Up @@ -156,8 +155,7 @@ def save(self) -> None:
date_id = datetime.now().isoformat().replace(":", "-")

# We first prepare data to save
config_general = copy.deepcopy(self.general_config_logger)
config_general = asdict(config_general)
config_general = asdict(self.general_config_logger)
# We remove the config from logging, which contains context/accelerator objects
config_general.pop("config")

Expand Down
4 changes: 2 additions & 2 deletions src/lighteval/tasks/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,10 +148,10 @@ def task_registry(self):
intersection = set(default_tasks_registry.keys()).intersection(set(custom_tasks_registry.keys()))
if len(intersection) > 0:
logger.warning(
f"Following tasks ({intersection}) exists both in the default and custom tasks. Will use the default ones on conflict."
f"Following tasks ({intersection}) exists both in the default and custom tasks. Will use the custom ones on conflict."
)

# Defaults tasks should overwrite custom tasks
# Custom tasks overwrite defaults tasks
return {**default_tasks_registry, **custom_tasks_registry}

@property
Expand Down

0 comments on commit db983e3

Please sign in to comment.