diff --git a/src/lighteval/tasks/multilingual/tasks.py b/src/lighteval/tasks/multilingual/tasks.py index c788871ba..93d8cea40 100644 --- a/src/lighteval/tasks/multilingual/tasks.py +++ b/src/lighteval/tasks/multilingual/tasks.py @@ -1707,6 +1707,92 @@ ] ] +# Translated MMLU using both professional and non-professional translators. Contains tags for cultural sensitivity. +# CA: Cultural Agnostic +# CS: Cultural Specific +# UNK: Not annotated +# ALL: All of the above +# https://huggingface.co/papers/2412.03304 +global_mmlu_tasks = [ + LightevalTaskConfig( + name=f"global_mmlu_{sensitivity_label.lower()}_{language.value}_{formulation.name.lower()}:{subset}", + prompt_function=get_mcq_prompt_function( + language, + lambda line: { + "question": line["question"], + "choices": [line["option_a"], line["option_b"], line["option_c"], line["option_d"]], + "gold_idx": LETTER_INDICES.index(line["answer"]), + }, + formulation=formulation, + ), + suite=("lighteval",), + hf_repo="CohereForAI/Global-MMLU", + hf_subset=standardize_tag(language.value), + evaluation_splits=("test",), + few_shots_split="dev", + hf_filter=partial( + lambda subset, sensitivity_label, x: x["subject"].lower() == subset + and ( + sensitivity_label == "ALL" or sensitivity_label in x["cultural_sensitivity_label"].replace("-", "UNK") + ), + subset, + sensitivity_label, + ), + metric=get_metrics_for_formulation( + formulation, + [ + loglikelihood_acc_metric(normalization=LogProbTokenNorm()), + loglikelihood_acc_metric(normalization=LogProbCharNorm()), + loglikelihood_acc_metric(normalization=LogProbPMINorm()), + ], + ), + ) + for subset in MMLU_SUBSETS + for language in [ + Language.AMHARIC, + Language.ARABIC, + Language.BENGALI, + Language.CHINESE, + Language.CZECH, + Language.GERMAN, + Language.ENGLISH, + Language.SPANISH, + Language.FRENCH, + Language.HEBREW, + Language.HINDI, + Language.INDONESIAN, + Language.ITALIAN, + Language.JAPANESE, + Language.KOREAN, + Language.MALAY, + Language.DUTCH, + Language.NORWEGIAN, + Language.POLISH, + Language.PORTUGUESE, + Language.ROMANIAN, + Language.RUSSIAN, + Language.SERBIAN, + Language.SWEDISH, + Language.SWAHILI, + Language.TAMIL, + Language.TELUGU, + Language.THAI, + Language.TURKISH, + Language.UKRAINIAN, + Language.URDU, + Language.VIETNAMESE, + Language.YORUBA, + Language.ZULU, + ] + for formulation in [ + MCFFormulation(), + CFFormulation(), + HybridFormulation(), + ] + for sensitivity_label in ["ALL", "CA", "CS", "UNK"] +] + + # There are only these subsets in the African MMLU AFRI_MMLU_SUBSETS = [ "elementary_mathematics", @@ -2088,6 +2174,7 @@ *arabic_mmlu_tasks, *turkish_mmlu_tasks, *afri_mmlu_tasks, + *global_mmlu_tasks, ] ) diff --git a/src/lighteval/tasks/templates/utils/translation_literals.py b/src/lighteval/tasks/templates/utils/translation_literals.py index 186d64485..441b5a7b6 100644 --- a/src/lighteval/tasks/templates/utils/translation_literals.py +++ b/src/lighteval/tasks/templates/utils/translation_literals.py @@ -1007,4 +1007,5 @@ def __getattribute__(self, name: str) -> str: Language.WESTERN_FRISIAN: TranslationLiterals(language=Language.WESTERN_FRISIAN), Language.YIDDISH: TranslationLiterals(language=Language.YIDDISH), Language.YORUBA: TranslationLiterals(language=Language.YORUBA), + Language.ZULU: TranslationLiterals(language=Language.ZULU), } diff --git a/src/lighteval/utils/language.py b/src/lighteval/utils/language.py index e6e53984e..d59908b01 100644 --- a/src/lighteval/utils/language.py +++ b/src/lighteval/utils/language.py @@ -122,6 +122,7 @@ class Language(Enum): WAR = "war" SHAN = "shn" UDMURT = "udm" + ZULU = "zul" # This mapping was created for beleble, it converts iso_639_3 individual codes to iso_639_3 macro codes