Skip to content

Commit

Permalink
Adds Global MLMU (#426)
Browse files Browse the repository at this point in the history
* add global mmlu + zulu

* add global mmlu + zulu

* fix translatin literals

* add unk for global mmlu

* Update src/lighteval/tasks/multilingual/tasks.py

Co-authored-by: Clémentine Fourrier <[email protected]>

---------

Co-authored-by: Clémentine Fourrier <[email protected]>
  • Loading branch information
hynky1999 and clefourrier authored Dec 9, 2024
1 parent f2d0a65 commit 412ccfc
Show file tree
Hide file tree
Showing 3 changed files with 89 additions and 0 deletions.
87 changes: 87 additions & 0 deletions src/lighteval/tasks/multilingual/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1707,6 +1707,92 @@
]
]

# Translated MMLU using both professional and non-professional translators. Contains tags for cultural sensitivity.
# CA: Cultural Agnostic
# CS: Cultural Specific
# UNK: Not annotated
# ALL: All of the above
# https://huggingface.co/papers/2412.03304
global_mmlu_tasks = [
LightevalTaskConfig(
name=f"global_mmlu_{sensitivity_label.lower()}_{language.value}_{formulation.name.lower()}:{subset}",
prompt_function=get_mcq_prompt_function(
language,
lambda line: {
"question": line["question"],
"choices": [line["option_a"], line["option_b"], line["option_c"], line["option_d"]],
"gold_idx": LETTER_INDICES.index(line["answer"]),
},
formulation=formulation,
),
suite=("lighteval",),
hf_repo="CohereForAI/Global-MMLU",
hf_subset=standardize_tag(language.value),
evaluation_splits=("test",),
few_shots_split="dev",
hf_filter=partial(
lambda subset, sensitivity_label, x: x["subject"].lower() == subset
and (
sensitivity_label == "ALL" or sensitivity_label in x["cultural_sensitivity_label"].replace("-", "UNK")
),
subset,
sensitivity_label,
),
metric=get_metrics_for_formulation(
formulation,
[
loglikelihood_acc_metric(normalization=LogProbTokenNorm()),
loglikelihood_acc_metric(normalization=LogProbCharNorm()),
loglikelihood_acc_metric(normalization=LogProbPMINorm()),
],
),
)
for subset in MMLU_SUBSETS
for language in [
Language.AMHARIC,
Language.ARABIC,
Language.BENGALI,
Language.CHINESE,
Language.CZECH,
Language.GERMAN,
Language.ENGLISH,
Language.SPANISH,
Language.FRENCH,
Language.HEBREW,
Language.HINDI,
Language.INDONESIAN,
Language.ITALIAN,
Language.JAPANESE,
Language.KOREAN,
Language.MALAY,
Language.DUTCH,
Language.NORWEGIAN,
Language.POLISH,
Language.PORTUGUESE,
Language.ROMANIAN,
Language.RUSSIAN,
Language.SERBIAN,
Language.SWEDISH,
Language.SWAHILI,
Language.TAMIL,
Language.TELUGU,
Language.THAI,
Language.TURKISH,
Language.UKRAINIAN,
Language.URDU,
Language.VIETNAMESE,
Language.YORUBA,
Language.ZULU,
]
for formulation in [
MCFFormulation(),
CFFormulation(),
HybridFormulation(),
]
for sensitivity_label in ["ALL", "CA", "CS", "UNK"]
]


# There are only these subsets in the African MMLU
AFRI_MMLU_SUBSETS = [
"elementary_mathematics",
Expand Down Expand Up @@ -2088,6 +2174,7 @@
*arabic_mmlu_tasks,
*turkish_mmlu_tasks,
*afri_mmlu_tasks,
*global_mmlu_tasks,
]
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1007,4 +1007,5 @@ def __getattribute__(self, name: str) -> str:
Language.WESTERN_FRISIAN: TranslationLiterals(language=Language.WESTERN_FRISIAN),
Language.YIDDISH: TranslationLiterals(language=Language.YIDDISH),
Language.YORUBA: TranslationLiterals(language=Language.YORUBA),
Language.ZULU: TranslationLiterals(language=Language.ZULU),
}
1 change: 1 addition & 0 deletions src/lighteval/utils/language.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ class Language(Enum):
WAR = "war"
SHAN = "shn"
UDMURT = "udm"
ZULU = "zul"


# This mapping was created for beleble, it converts iso_639_3 individual codes to iso_639_3 macro codes
Expand Down

0 comments on commit 412ccfc

Please sign in to comment.