From 1d2ed54796aa26e7c3d2535208b6f0e52312ce2f Mon Sep 17 00:00:00 2001 From: shivi Date: Thu, 12 Dec 2024 12:46:25 +0000 Subject: [PATCH 1/8] add global mmlu lite --- lm_eval/tasks/global_mmlu/_default_yaml | 17 ++++++++++++ .../tasks/global_mmlu/_generate_configs.py | 27 +++++++++++++++++++ lm_eval/tasks/global_mmlu/global_mmlu_am.yaml | 4 +++ lm_eval/tasks/global_mmlu/global_mmlu_ar.yaml | 4 +++ lm_eval/tasks/global_mmlu/global_mmlu_cs.yaml | 4 +++ lm_eval/tasks/global_mmlu/global_mmlu_de.yaml | 4 +++ lm_eval/tasks/global_mmlu/global_mmlu_el.yaml | 4 +++ lm_eval/tasks/global_mmlu/global_mmlu_en.yaml | 4 +++ lm_eval/tasks/global_mmlu/global_mmlu_es.yaml | 4 +++ lm_eval/tasks/global_mmlu/global_mmlu_fa.yaml | 4 +++ .../tasks/global_mmlu/global_mmlu_fil.yaml | 4 +++ lm_eval/tasks/global_mmlu/global_mmlu_fr.yaml | 4 +++ lm_eval/tasks/global_mmlu/global_mmlu_ha.yaml | 4 +++ lm_eval/tasks/global_mmlu/global_mmlu_he.yaml | 4 +++ lm_eval/tasks/global_mmlu/global_mmlu_hi.yaml | 4 +++ lm_eval/tasks/global_mmlu/global_mmlu_id.yaml | 4 +++ lm_eval/tasks/global_mmlu/global_mmlu_ig.yaml | 4 +++ lm_eval/tasks/global_mmlu/global_mmlu_it.yaml | 4 +++ lm_eval/tasks/global_mmlu/global_mmlu_ja.yaml | 4 +++ lm_eval/tasks/global_mmlu/global_mmlu_ko.yaml | 4 +++ lm_eval/tasks/global_mmlu/global_mmlu_ky.yaml | 4 +++ lm_eval/tasks/global_mmlu/global_mmlu_lt.yaml | 4 +++ lm_eval/tasks/global_mmlu/global_mmlu_mg.yaml | 4 +++ lm_eval/tasks/global_mmlu/global_mmlu_ms.yaml | 4 +++ lm_eval/tasks/global_mmlu/global_mmlu_ne.yaml | 4 +++ lm_eval/tasks/global_mmlu/global_mmlu_nl.yaml | 4 +++ lm_eval/tasks/global_mmlu/global_mmlu_ny.yaml | 4 +++ lm_eval/tasks/global_mmlu/global_mmlu_pl.yaml | 4 +++ lm_eval/tasks/global_mmlu/global_mmlu_pt.yaml | 4 +++ lm_eval/tasks/global_mmlu/global_mmlu_ro.yaml | 4 +++ lm_eval/tasks/global_mmlu/global_mmlu_ru.yaml | 4 +++ lm_eval/tasks/global_mmlu/global_mmlu_si.yaml | 4 +++ lm_eval/tasks/global_mmlu/global_mmlu_sn.yaml | 4 +++ lm_eval/tasks/global_mmlu/global_mmlu_so.yaml | 4 +++ lm_eval/tasks/global_mmlu/global_mmlu_sr.yaml | 4 +++ lm_eval/tasks/global_mmlu/global_mmlu_sv.yaml | 4 +++ lm_eval/tasks/global_mmlu/global_mmlu_te.yaml | 4 +++ lm_eval/tasks/global_mmlu/global_mmlu_tr.yaml | 4 +++ lm_eval/tasks/global_mmlu/global_mmlu_uk.yaml | 4 +++ lm_eval/tasks/global_mmlu/global_mmlu_vi.yaml | 4 +++ lm_eval/tasks/global_mmlu/global_mmlu_yo.yaml | 4 +++ lm_eval/tasks/global_mmlu/global_mmlu_zh.yaml | 4 +++ 42 files changed, 204 insertions(+) create mode 100644 lm_eval/tasks/global_mmlu/_default_yaml create mode 100644 lm_eval/tasks/global_mmlu/_generate_configs.py create mode 100644 lm_eval/tasks/global_mmlu/global_mmlu_am.yaml create mode 100644 lm_eval/tasks/global_mmlu/global_mmlu_ar.yaml create mode 100644 lm_eval/tasks/global_mmlu/global_mmlu_cs.yaml create mode 100644 lm_eval/tasks/global_mmlu/global_mmlu_de.yaml create mode 100644 lm_eval/tasks/global_mmlu/global_mmlu_el.yaml create mode 100644 lm_eval/tasks/global_mmlu/global_mmlu_en.yaml create mode 100644 lm_eval/tasks/global_mmlu/global_mmlu_es.yaml create mode 100644 lm_eval/tasks/global_mmlu/global_mmlu_fa.yaml create mode 100644 lm_eval/tasks/global_mmlu/global_mmlu_fil.yaml create mode 100644 lm_eval/tasks/global_mmlu/global_mmlu_fr.yaml create mode 100644 lm_eval/tasks/global_mmlu/global_mmlu_ha.yaml create mode 100644 lm_eval/tasks/global_mmlu/global_mmlu_he.yaml create mode 100644 lm_eval/tasks/global_mmlu/global_mmlu_hi.yaml create mode 100644 lm_eval/tasks/global_mmlu/global_mmlu_id.yaml create mode 100644 lm_eval/tasks/global_mmlu/global_mmlu_ig.yaml create mode 100644 lm_eval/tasks/global_mmlu/global_mmlu_it.yaml create mode 100644 lm_eval/tasks/global_mmlu/global_mmlu_ja.yaml create mode 100644 lm_eval/tasks/global_mmlu/global_mmlu_ko.yaml create mode 100644 lm_eval/tasks/global_mmlu/global_mmlu_ky.yaml create mode 100644 lm_eval/tasks/global_mmlu/global_mmlu_lt.yaml create mode 100644 lm_eval/tasks/global_mmlu/global_mmlu_mg.yaml create mode 100644 lm_eval/tasks/global_mmlu/global_mmlu_ms.yaml create mode 100644 lm_eval/tasks/global_mmlu/global_mmlu_ne.yaml create mode 100644 lm_eval/tasks/global_mmlu/global_mmlu_nl.yaml create mode 100644 lm_eval/tasks/global_mmlu/global_mmlu_ny.yaml create mode 100644 lm_eval/tasks/global_mmlu/global_mmlu_pl.yaml create mode 100644 lm_eval/tasks/global_mmlu/global_mmlu_pt.yaml create mode 100644 lm_eval/tasks/global_mmlu/global_mmlu_ro.yaml create mode 100644 lm_eval/tasks/global_mmlu/global_mmlu_ru.yaml create mode 100644 lm_eval/tasks/global_mmlu/global_mmlu_si.yaml create mode 100644 lm_eval/tasks/global_mmlu/global_mmlu_sn.yaml create mode 100644 lm_eval/tasks/global_mmlu/global_mmlu_so.yaml create mode 100644 lm_eval/tasks/global_mmlu/global_mmlu_sr.yaml create mode 100644 lm_eval/tasks/global_mmlu/global_mmlu_sv.yaml create mode 100644 lm_eval/tasks/global_mmlu/global_mmlu_te.yaml create mode 100644 lm_eval/tasks/global_mmlu/global_mmlu_tr.yaml create mode 100644 lm_eval/tasks/global_mmlu/global_mmlu_uk.yaml create mode 100644 lm_eval/tasks/global_mmlu/global_mmlu_vi.yaml create mode 100644 lm_eval/tasks/global_mmlu/global_mmlu_yo.yaml create mode 100644 lm_eval/tasks/global_mmlu/global_mmlu_zh.yaml diff --git a/lm_eval/tasks/global_mmlu/_default_yaml b/lm_eval/tasks/global_mmlu/_default_yaml new file mode 100644 index 0000000000..9cf22e330c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/_default_yaml @@ -0,0 +1,17 @@ +tag: + - global_mmlu +dataset_path: CohereForAI/Global-MMLU-Lite +test_split: test +fewshot_split: dev +fewshot_config: + sampler: default +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_0}}\nB. {{option_1}}\nC. {{option_2}}\nD. {{option_3}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 \ No newline at end of file diff --git a/lm_eval/tasks/global_mmlu/_generate_configs.py b/lm_eval/tasks/global_mmlu/_generate_configs.py new file mode 100644 index 0000000000..583f196af6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/_generate_configs.py @@ -0,0 +1,27 @@ +import datasets +import yaml +from tqdm import tqdm + +languages = ['am', 'ar', 'cs', 'de', 'el', 'en', 'es', 'fa', 'fil', 'fr', 'ha', 'he', 'hi', 'id', 'ig', 'it', 'ja', 'ko', 'ky', 'lt', 'mg', 'ms', 'ne', 'nl', 'ny', 'pl', 'pt', 'ro', 'ru', 'si', 'sn', 'so', 'sr', 'sv', 'te', 'tr', 'uk', 'vi', 'yo', 'zh'] + +def main() -> None: + + for language in languages: + file_name = f"global_mmlu_{language}.yaml" + try: + with open(f"{file_name}", "w") as f: + f.write("# Generated by _generate_configs.py\n") + yaml.dump( + { + "include": "_default_yaml", + "task": f"global_mmlu_{language}", + "dataset_name": language, + }, + f, + ) + except FileExistsError: + pass + + +if __name__ == "__main__": + main() diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_am.yaml b/lm_eval/tasks/global_mmlu/global_mmlu_am.yaml new file mode 100644 index 0000000000..ab800bcb8b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/global_mmlu_am.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: am +include: _default_yaml +task: global_mmlu_am diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_ar.yaml b/lm_eval/tasks/global_mmlu/global_mmlu_ar.yaml new file mode 100644 index 0000000000..703f420a52 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/global_mmlu_ar.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: ar +include: _default_yaml +task: global_mmlu_ar diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_cs.yaml b/lm_eval/tasks/global_mmlu/global_mmlu_cs.yaml new file mode 100644 index 0000000000..fee2e7f305 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/global_mmlu_cs.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: cs +include: _default_yaml +task: global_mmlu_cs diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_de.yaml b/lm_eval/tasks/global_mmlu/global_mmlu_de.yaml new file mode 100644 index 0000000000..a874c64fd5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/global_mmlu_de.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: de +include: _default_yaml +task: global_mmlu_de diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_el.yaml b/lm_eval/tasks/global_mmlu/global_mmlu_el.yaml new file mode 100644 index 0000000000..ca52ed2447 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/global_mmlu_el.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: el +include: _default_yaml +task: global_mmlu_el diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_en.yaml b/lm_eval/tasks/global_mmlu/global_mmlu_en.yaml new file mode 100644 index 0000000000..34a6d7120a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/global_mmlu_en.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: en +include: _default_yaml +task: global_mmlu_en diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_es.yaml b/lm_eval/tasks/global_mmlu/global_mmlu_es.yaml new file mode 100644 index 0000000000..75abc77567 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/global_mmlu_es.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: es +include: _default_yaml +task: global_mmlu_es diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_fa.yaml b/lm_eval/tasks/global_mmlu/global_mmlu_fa.yaml new file mode 100644 index 0000000000..f566b75277 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/global_mmlu_fa.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: fa +include: _default_yaml +task: global_mmlu_fa diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_fil.yaml b/lm_eval/tasks/global_mmlu/global_mmlu_fil.yaml new file mode 100644 index 0000000000..3a8d3b94da --- /dev/null +++ b/lm_eval/tasks/global_mmlu/global_mmlu_fil.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: fil +include: _default_yaml +task: global_mmlu_fil diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_fr.yaml b/lm_eval/tasks/global_mmlu/global_mmlu_fr.yaml new file mode 100644 index 0000000000..1a66f53648 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/global_mmlu_fr.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: fr +include: _default_yaml +task: global_mmlu_fr diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_ha.yaml b/lm_eval/tasks/global_mmlu/global_mmlu_ha.yaml new file mode 100644 index 0000000000..b83574a73d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/global_mmlu_ha.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: ha +include: _default_yaml +task: global_mmlu_ha diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_he.yaml b/lm_eval/tasks/global_mmlu/global_mmlu_he.yaml new file mode 100644 index 0000000000..58abe62c03 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/global_mmlu_he.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: he +include: _default_yaml +task: global_mmlu_he diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_hi.yaml b/lm_eval/tasks/global_mmlu/global_mmlu_hi.yaml new file mode 100644 index 0000000000..788f95f2d2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/global_mmlu_hi.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: hi +include: _default_yaml +task: global_mmlu_hi diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_id.yaml b/lm_eval/tasks/global_mmlu/global_mmlu_id.yaml new file mode 100644 index 0000000000..f4b6d5071d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/global_mmlu_id.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: id +include: _default_yaml +task: global_mmlu_id diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_ig.yaml b/lm_eval/tasks/global_mmlu/global_mmlu_ig.yaml new file mode 100644 index 0000000000..bf91e72429 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/global_mmlu_ig.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: ig +include: _default_yaml +task: global_mmlu_ig diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_it.yaml b/lm_eval/tasks/global_mmlu/global_mmlu_it.yaml new file mode 100644 index 0000000000..5b55df975f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/global_mmlu_it.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: it +include: _default_yaml +task: global_mmlu_it diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_ja.yaml b/lm_eval/tasks/global_mmlu/global_mmlu_ja.yaml new file mode 100644 index 0000000000..97d9c6ca48 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/global_mmlu_ja.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: ja +include: _default_yaml +task: global_mmlu_ja diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_ko.yaml b/lm_eval/tasks/global_mmlu/global_mmlu_ko.yaml new file mode 100644 index 0000000000..02b7fe0388 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/global_mmlu_ko.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: ko +include: _default_yaml +task: global_mmlu_ko diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_ky.yaml b/lm_eval/tasks/global_mmlu/global_mmlu_ky.yaml new file mode 100644 index 0000000000..2f714937f2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/global_mmlu_ky.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: ky +include: _default_yaml +task: global_mmlu_ky diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_lt.yaml b/lm_eval/tasks/global_mmlu/global_mmlu_lt.yaml new file mode 100644 index 0000000000..ba0c543304 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/global_mmlu_lt.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: lt +include: _default_yaml +task: global_mmlu_lt diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_mg.yaml b/lm_eval/tasks/global_mmlu/global_mmlu_mg.yaml new file mode 100644 index 0000000000..6077d0f8d8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/global_mmlu_mg.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: mg +include: _default_yaml +task: global_mmlu_mg diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_ms.yaml b/lm_eval/tasks/global_mmlu/global_mmlu_ms.yaml new file mode 100644 index 0000000000..b7c38759a4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/global_mmlu_ms.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: ms +include: _default_yaml +task: global_mmlu_ms diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_ne.yaml b/lm_eval/tasks/global_mmlu/global_mmlu_ne.yaml new file mode 100644 index 0000000000..4d7b7b6a73 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/global_mmlu_ne.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: ne +include: _default_yaml +task: global_mmlu_ne diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_nl.yaml b/lm_eval/tasks/global_mmlu/global_mmlu_nl.yaml new file mode 100644 index 0000000000..9765f63c69 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/global_mmlu_nl.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: nl +include: _default_yaml +task: global_mmlu_nl diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_ny.yaml b/lm_eval/tasks/global_mmlu/global_mmlu_ny.yaml new file mode 100644 index 0000000000..b125133255 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/global_mmlu_ny.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: ny +include: _default_yaml +task: global_mmlu_ny diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_pl.yaml b/lm_eval/tasks/global_mmlu/global_mmlu_pl.yaml new file mode 100644 index 0000000000..9d930cf650 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/global_mmlu_pl.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: pl +include: _default_yaml +task: global_mmlu_pl diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_pt.yaml b/lm_eval/tasks/global_mmlu/global_mmlu_pt.yaml new file mode 100644 index 0000000000..724bfb4d23 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/global_mmlu_pt.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: pt +include: _default_yaml +task: global_mmlu_pt diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_ro.yaml b/lm_eval/tasks/global_mmlu/global_mmlu_ro.yaml new file mode 100644 index 0000000000..98fd3e724f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/global_mmlu_ro.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: ro +include: _default_yaml +task: global_mmlu_ro diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_ru.yaml b/lm_eval/tasks/global_mmlu/global_mmlu_ru.yaml new file mode 100644 index 0000000000..5de5757e77 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/global_mmlu_ru.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: ru +include: _default_yaml +task: global_mmlu_ru diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_si.yaml b/lm_eval/tasks/global_mmlu/global_mmlu_si.yaml new file mode 100644 index 0000000000..ab232577dd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/global_mmlu_si.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: si +include: _default_yaml +task: global_mmlu_si diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_sn.yaml b/lm_eval/tasks/global_mmlu/global_mmlu_sn.yaml new file mode 100644 index 0000000000..6195b8186a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/global_mmlu_sn.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: sn +include: _default_yaml +task: global_mmlu_sn diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_so.yaml b/lm_eval/tasks/global_mmlu/global_mmlu_so.yaml new file mode 100644 index 0000000000..5b5096afa2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/global_mmlu_so.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: so +include: _default_yaml +task: global_mmlu_so diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_sr.yaml b/lm_eval/tasks/global_mmlu/global_mmlu_sr.yaml new file mode 100644 index 0000000000..a78cb4fc13 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/global_mmlu_sr.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: sr +include: _default_yaml +task: global_mmlu_sr diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_sv.yaml b/lm_eval/tasks/global_mmlu/global_mmlu_sv.yaml new file mode 100644 index 0000000000..30c0825e67 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/global_mmlu_sv.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: sv +include: _default_yaml +task: global_mmlu_sv diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_te.yaml b/lm_eval/tasks/global_mmlu/global_mmlu_te.yaml new file mode 100644 index 0000000000..cba1be87ad --- /dev/null +++ b/lm_eval/tasks/global_mmlu/global_mmlu_te.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: te +include: _default_yaml +task: global_mmlu_te diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_tr.yaml b/lm_eval/tasks/global_mmlu/global_mmlu_tr.yaml new file mode 100644 index 0000000000..4f792508c2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/global_mmlu_tr.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: tr +include: _default_yaml +task: global_mmlu_tr diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_uk.yaml b/lm_eval/tasks/global_mmlu/global_mmlu_uk.yaml new file mode 100644 index 0000000000..bb3b03133e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/global_mmlu_uk.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: uk +include: _default_yaml +task: global_mmlu_uk diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_vi.yaml b/lm_eval/tasks/global_mmlu/global_mmlu_vi.yaml new file mode 100644 index 0000000000..5aaddc4a8b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/global_mmlu_vi.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: vi +include: _default_yaml +task: global_mmlu_vi diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_yo.yaml b/lm_eval/tasks/global_mmlu/global_mmlu_yo.yaml new file mode 100644 index 0000000000..c6ec2f9efc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/global_mmlu_yo.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: yo +include: _default_yaml +task: global_mmlu_yo diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_zh.yaml b/lm_eval/tasks/global_mmlu/global_mmlu_zh.yaml new file mode 100644 index 0000000000..862d46ad9d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/global_mmlu_zh.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: zh +include: _default_yaml +task: global_mmlu_zh From 8322ca689a3cf5869af13942eb9b8254b660d138 Mon Sep 17 00:00:00 2001 From: shivi Date: Thu, 12 Dec 2024 12:56:38 +0000 Subject: [PATCH 2/8] add global mmlu lite --- lm_eval/tasks/global_mmlu/_generate_configs.py | 2 +- .../global_mmlu/{global_mmlu_am.yaml => global_mmlu_bn.yaml} | 4 ++-- lm_eval/tasks/global_mmlu/global_mmlu_el.yaml | 4 ---- lm_eval/tasks/global_mmlu/global_mmlu_fa.yaml | 4 ---- lm_eval/tasks/global_mmlu/global_mmlu_fil.yaml | 4 ---- lm_eval/tasks/global_mmlu/global_mmlu_ha.yaml | 4 ---- lm_eval/tasks/global_mmlu/global_mmlu_he.yaml | 4 ---- lm_eval/tasks/global_mmlu/global_mmlu_ig.yaml | 4 ---- lm_eval/tasks/global_mmlu/global_mmlu_ky.yaml | 4 ---- lm_eval/tasks/global_mmlu/global_mmlu_lt.yaml | 4 ---- lm_eval/tasks/global_mmlu/global_mmlu_mg.yaml | 4 ---- lm_eval/tasks/global_mmlu/global_mmlu_ms.yaml | 4 ---- lm_eval/tasks/global_mmlu/global_mmlu_ne.yaml | 4 ---- lm_eval/tasks/global_mmlu/global_mmlu_nl.yaml | 4 ---- lm_eval/tasks/global_mmlu/global_mmlu_ny.yaml | 4 ---- lm_eval/tasks/global_mmlu/global_mmlu_pl.yaml | 4 ---- lm_eval/tasks/global_mmlu/global_mmlu_ro.yaml | 4 ---- lm_eval/tasks/global_mmlu/global_mmlu_ru.yaml | 4 ---- lm_eval/tasks/global_mmlu/global_mmlu_si.yaml | 4 ---- lm_eval/tasks/global_mmlu/global_mmlu_sn.yaml | 4 ---- lm_eval/tasks/global_mmlu/global_mmlu_so.yaml | 4 ---- lm_eval/tasks/global_mmlu/global_mmlu_sr.yaml | 4 ---- lm_eval/tasks/global_mmlu/global_mmlu_sv.yaml | 4 ---- .../global_mmlu/{global_mmlu_cs.yaml => global_mmlu_sw.yaml} | 4 ++-- lm_eval/tasks/global_mmlu/global_mmlu_te.yaml | 4 ---- lm_eval/tasks/global_mmlu/global_mmlu_tr.yaml | 4 ---- lm_eval/tasks/global_mmlu/global_mmlu_uk.yaml | 4 ---- lm_eval/tasks/global_mmlu/global_mmlu_vi.yaml | 4 ---- 28 files changed, 5 insertions(+), 105 deletions(-) rename lm_eval/tasks/global_mmlu/{global_mmlu_am.yaml => global_mmlu_bn.yaml} (60%) delete mode 100644 lm_eval/tasks/global_mmlu/global_mmlu_el.yaml delete mode 100644 lm_eval/tasks/global_mmlu/global_mmlu_fa.yaml delete mode 100644 lm_eval/tasks/global_mmlu/global_mmlu_fil.yaml delete mode 100644 lm_eval/tasks/global_mmlu/global_mmlu_ha.yaml delete mode 100644 lm_eval/tasks/global_mmlu/global_mmlu_he.yaml delete mode 100644 lm_eval/tasks/global_mmlu/global_mmlu_ig.yaml delete mode 100644 lm_eval/tasks/global_mmlu/global_mmlu_ky.yaml delete mode 100644 lm_eval/tasks/global_mmlu/global_mmlu_lt.yaml delete mode 100644 lm_eval/tasks/global_mmlu/global_mmlu_mg.yaml delete mode 100644 lm_eval/tasks/global_mmlu/global_mmlu_ms.yaml delete mode 100644 lm_eval/tasks/global_mmlu/global_mmlu_ne.yaml delete mode 100644 lm_eval/tasks/global_mmlu/global_mmlu_nl.yaml delete mode 100644 lm_eval/tasks/global_mmlu/global_mmlu_ny.yaml delete mode 100644 lm_eval/tasks/global_mmlu/global_mmlu_pl.yaml delete mode 100644 lm_eval/tasks/global_mmlu/global_mmlu_ro.yaml delete mode 100644 lm_eval/tasks/global_mmlu/global_mmlu_ru.yaml delete mode 100644 lm_eval/tasks/global_mmlu/global_mmlu_si.yaml delete mode 100644 lm_eval/tasks/global_mmlu/global_mmlu_sn.yaml delete mode 100644 lm_eval/tasks/global_mmlu/global_mmlu_so.yaml delete mode 100644 lm_eval/tasks/global_mmlu/global_mmlu_sr.yaml delete mode 100644 lm_eval/tasks/global_mmlu/global_mmlu_sv.yaml rename lm_eval/tasks/global_mmlu/{global_mmlu_cs.yaml => global_mmlu_sw.yaml} (60%) delete mode 100644 lm_eval/tasks/global_mmlu/global_mmlu_te.yaml delete mode 100644 lm_eval/tasks/global_mmlu/global_mmlu_tr.yaml delete mode 100644 lm_eval/tasks/global_mmlu/global_mmlu_uk.yaml delete mode 100644 lm_eval/tasks/global_mmlu/global_mmlu_vi.yaml diff --git a/lm_eval/tasks/global_mmlu/_generate_configs.py b/lm_eval/tasks/global_mmlu/_generate_configs.py index 583f196af6..faec69fdbf 100644 --- a/lm_eval/tasks/global_mmlu/_generate_configs.py +++ b/lm_eval/tasks/global_mmlu/_generate_configs.py @@ -2,7 +2,7 @@ import yaml from tqdm import tqdm -languages = ['am', 'ar', 'cs', 'de', 'el', 'en', 'es', 'fa', 'fil', 'fr', 'ha', 'he', 'hi', 'id', 'ig', 'it', 'ja', 'ko', 'ky', 'lt', 'mg', 'ms', 'ne', 'nl', 'ny', 'pl', 'pt', 'ro', 'ru', 'si', 'sn', 'so', 'sr', 'sv', 'te', 'tr', 'uk', 'vi', 'yo', 'zh'] +languages = ['en', 'ar', 'fr', 'es', 'hi', 'de', 'id', 'it', 'ja', 'ko', 'pt','zh', 'yo', 'bn', 'sw'] def main() -> None: diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_am.yaml b/lm_eval/tasks/global_mmlu/global_mmlu_bn.yaml similarity index 60% rename from lm_eval/tasks/global_mmlu/global_mmlu_am.yaml rename to lm_eval/tasks/global_mmlu/global_mmlu_bn.yaml index ab800bcb8b..f85b67a293 100644 --- a/lm_eval/tasks/global_mmlu/global_mmlu_am.yaml +++ b/lm_eval/tasks/global_mmlu/global_mmlu_bn.yaml @@ -1,4 +1,4 @@ # Generated by _generate_configs.py -dataset_name: am +dataset_name: bn include: _default_yaml -task: global_mmlu_am +task: global_mmlu_bn diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_el.yaml b/lm_eval/tasks/global_mmlu/global_mmlu_el.yaml deleted file mode 100644 index ca52ed2447..0000000000 --- a/lm_eval/tasks/global_mmlu/global_mmlu_el.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by _generate_configs.py -dataset_name: el -include: _default_yaml -task: global_mmlu_el diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_fa.yaml b/lm_eval/tasks/global_mmlu/global_mmlu_fa.yaml deleted file mode 100644 index f566b75277..0000000000 --- a/lm_eval/tasks/global_mmlu/global_mmlu_fa.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by _generate_configs.py -dataset_name: fa -include: _default_yaml -task: global_mmlu_fa diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_fil.yaml b/lm_eval/tasks/global_mmlu/global_mmlu_fil.yaml deleted file mode 100644 index 3a8d3b94da..0000000000 --- a/lm_eval/tasks/global_mmlu/global_mmlu_fil.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by _generate_configs.py -dataset_name: fil -include: _default_yaml -task: global_mmlu_fil diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_ha.yaml b/lm_eval/tasks/global_mmlu/global_mmlu_ha.yaml deleted file mode 100644 index b83574a73d..0000000000 --- a/lm_eval/tasks/global_mmlu/global_mmlu_ha.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by _generate_configs.py -dataset_name: ha -include: _default_yaml -task: global_mmlu_ha diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_he.yaml b/lm_eval/tasks/global_mmlu/global_mmlu_he.yaml deleted file mode 100644 index 58abe62c03..0000000000 --- a/lm_eval/tasks/global_mmlu/global_mmlu_he.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by _generate_configs.py -dataset_name: he -include: _default_yaml -task: global_mmlu_he diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_ig.yaml b/lm_eval/tasks/global_mmlu/global_mmlu_ig.yaml deleted file mode 100644 index bf91e72429..0000000000 --- a/lm_eval/tasks/global_mmlu/global_mmlu_ig.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by _generate_configs.py -dataset_name: ig -include: _default_yaml -task: global_mmlu_ig diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_ky.yaml b/lm_eval/tasks/global_mmlu/global_mmlu_ky.yaml deleted file mode 100644 index 2f714937f2..0000000000 --- a/lm_eval/tasks/global_mmlu/global_mmlu_ky.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by _generate_configs.py -dataset_name: ky -include: _default_yaml -task: global_mmlu_ky diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_lt.yaml b/lm_eval/tasks/global_mmlu/global_mmlu_lt.yaml deleted file mode 100644 index ba0c543304..0000000000 --- a/lm_eval/tasks/global_mmlu/global_mmlu_lt.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by _generate_configs.py -dataset_name: lt -include: _default_yaml -task: global_mmlu_lt diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_mg.yaml b/lm_eval/tasks/global_mmlu/global_mmlu_mg.yaml deleted file mode 100644 index 6077d0f8d8..0000000000 --- a/lm_eval/tasks/global_mmlu/global_mmlu_mg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by _generate_configs.py -dataset_name: mg -include: _default_yaml -task: global_mmlu_mg diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_ms.yaml b/lm_eval/tasks/global_mmlu/global_mmlu_ms.yaml deleted file mode 100644 index b7c38759a4..0000000000 --- a/lm_eval/tasks/global_mmlu/global_mmlu_ms.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by _generate_configs.py -dataset_name: ms -include: _default_yaml -task: global_mmlu_ms diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_ne.yaml b/lm_eval/tasks/global_mmlu/global_mmlu_ne.yaml deleted file mode 100644 index 4d7b7b6a73..0000000000 --- a/lm_eval/tasks/global_mmlu/global_mmlu_ne.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by _generate_configs.py -dataset_name: ne -include: _default_yaml -task: global_mmlu_ne diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_nl.yaml b/lm_eval/tasks/global_mmlu/global_mmlu_nl.yaml deleted file mode 100644 index 9765f63c69..0000000000 --- a/lm_eval/tasks/global_mmlu/global_mmlu_nl.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by _generate_configs.py -dataset_name: nl -include: _default_yaml -task: global_mmlu_nl diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_ny.yaml b/lm_eval/tasks/global_mmlu/global_mmlu_ny.yaml deleted file mode 100644 index b125133255..0000000000 --- a/lm_eval/tasks/global_mmlu/global_mmlu_ny.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by _generate_configs.py -dataset_name: ny -include: _default_yaml -task: global_mmlu_ny diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_pl.yaml b/lm_eval/tasks/global_mmlu/global_mmlu_pl.yaml deleted file mode 100644 index 9d930cf650..0000000000 --- a/lm_eval/tasks/global_mmlu/global_mmlu_pl.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by _generate_configs.py -dataset_name: pl -include: _default_yaml -task: global_mmlu_pl diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_ro.yaml b/lm_eval/tasks/global_mmlu/global_mmlu_ro.yaml deleted file mode 100644 index 98fd3e724f..0000000000 --- a/lm_eval/tasks/global_mmlu/global_mmlu_ro.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by _generate_configs.py -dataset_name: ro -include: _default_yaml -task: global_mmlu_ro diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_ru.yaml b/lm_eval/tasks/global_mmlu/global_mmlu_ru.yaml deleted file mode 100644 index 5de5757e77..0000000000 --- a/lm_eval/tasks/global_mmlu/global_mmlu_ru.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by _generate_configs.py -dataset_name: ru -include: _default_yaml -task: global_mmlu_ru diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_si.yaml b/lm_eval/tasks/global_mmlu/global_mmlu_si.yaml deleted file mode 100644 index ab232577dd..0000000000 --- a/lm_eval/tasks/global_mmlu/global_mmlu_si.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by _generate_configs.py -dataset_name: si -include: _default_yaml -task: global_mmlu_si diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_sn.yaml b/lm_eval/tasks/global_mmlu/global_mmlu_sn.yaml deleted file mode 100644 index 6195b8186a..0000000000 --- a/lm_eval/tasks/global_mmlu/global_mmlu_sn.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by _generate_configs.py -dataset_name: sn -include: _default_yaml -task: global_mmlu_sn diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_so.yaml b/lm_eval/tasks/global_mmlu/global_mmlu_so.yaml deleted file mode 100644 index 5b5096afa2..0000000000 --- a/lm_eval/tasks/global_mmlu/global_mmlu_so.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by _generate_configs.py -dataset_name: so -include: _default_yaml -task: global_mmlu_so diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_sr.yaml b/lm_eval/tasks/global_mmlu/global_mmlu_sr.yaml deleted file mode 100644 index a78cb4fc13..0000000000 --- a/lm_eval/tasks/global_mmlu/global_mmlu_sr.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by _generate_configs.py -dataset_name: sr -include: _default_yaml -task: global_mmlu_sr diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_sv.yaml b/lm_eval/tasks/global_mmlu/global_mmlu_sv.yaml deleted file mode 100644 index 30c0825e67..0000000000 --- a/lm_eval/tasks/global_mmlu/global_mmlu_sv.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by _generate_configs.py -dataset_name: sv -include: _default_yaml -task: global_mmlu_sv diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_cs.yaml b/lm_eval/tasks/global_mmlu/global_mmlu_sw.yaml similarity index 60% rename from lm_eval/tasks/global_mmlu/global_mmlu_cs.yaml rename to lm_eval/tasks/global_mmlu/global_mmlu_sw.yaml index fee2e7f305..481232fa28 100644 --- a/lm_eval/tasks/global_mmlu/global_mmlu_cs.yaml +++ b/lm_eval/tasks/global_mmlu/global_mmlu_sw.yaml @@ -1,4 +1,4 @@ # Generated by _generate_configs.py -dataset_name: cs +dataset_name: sw include: _default_yaml -task: global_mmlu_cs +task: global_mmlu_sw diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_te.yaml b/lm_eval/tasks/global_mmlu/global_mmlu_te.yaml deleted file mode 100644 index cba1be87ad..0000000000 --- a/lm_eval/tasks/global_mmlu/global_mmlu_te.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by _generate_configs.py -dataset_name: te -include: _default_yaml -task: global_mmlu_te diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_tr.yaml b/lm_eval/tasks/global_mmlu/global_mmlu_tr.yaml deleted file mode 100644 index 4f792508c2..0000000000 --- a/lm_eval/tasks/global_mmlu/global_mmlu_tr.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by _generate_configs.py -dataset_name: tr -include: _default_yaml -task: global_mmlu_tr diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_uk.yaml b/lm_eval/tasks/global_mmlu/global_mmlu_uk.yaml deleted file mode 100644 index bb3b03133e..0000000000 --- a/lm_eval/tasks/global_mmlu/global_mmlu_uk.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by _generate_configs.py -dataset_name: uk -include: _default_yaml -task: global_mmlu_uk diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_vi.yaml b/lm_eval/tasks/global_mmlu/global_mmlu_vi.yaml deleted file mode 100644 index 5aaddc4a8b..0000000000 --- a/lm_eval/tasks/global_mmlu/global_mmlu_vi.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by _generate_configs.py -dataset_name: vi -include: _default_yaml -task: global_mmlu_vi From 9ac1dc10c06095c81e80d028186c6559e89fda03 Mon Sep 17 00:00:00 2001 From: shivi Date: Fri, 13 Dec 2024 15:19:32 +0000 Subject: [PATCH 3/8] fix bugs --- lm_eval/tasks/global_mmlu/_default_yaml | 4 ++-- .../tasks/global_mmlu/_generate_configs.py | 23 +++++++++++++++---- 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/lm_eval/tasks/global_mmlu/_default_yaml b/lm_eval/tasks/global_mmlu/_default_yaml index 9cf22e330c..33a1fc356a 100644 --- a/lm_eval/tasks/global_mmlu/_default_yaml +++ b/lm_eval/tasks/global_mmlu/_default_yaml @@ -6,7 +6,7 @@ fewshot_split: dev fewshot_config: sampler: default output_type: multiple_choice -doc_to_text: "{{question.strip()}}\nA. {{option_0}}\nB. {{option_1}}\nC. {{option_2}}\nD. {{option_3}}\nAnswer:" +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" doc_to_choice: ["A", "B", "C", "D"] doc_to_target: answer metric_list: @@ -14,4 +14,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 0.0 \ No newline at end of file + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/_generate_configs.py b/lm_eval/tasks/global_mmlu/_generate_configs.py index faec69fdbf..58e169c6d4 100644 --- a/lm_eval/tasks/global_mmlu/_generate_configs.py +++ b/lm_eval/tasks/global_mmlu/_generate_configs.py @@ -1,11 +1,26 @@ -import datasets import yaml -from tqdm import tqdm -languages = ['en', 'ar', 'fr', 'es', 'hi', 'de', 'id', 'it', 'ja', 'ko', 'pt','zh', 'yo', 'bn', 'sw'] + +languages = [ + "en", + "ar", + "fr", + "es", + "hi", + "de", + "id", + "it", + "ja", + "ko", + "pt", + "zh", + "yo", + "bn", + "sw", +] + def main() -> None: - for language in languages: file_name = f"global_mmlu_{language}.yaml" try: From 7d53191ba1416c2659f9a5ec5ab5517e4cc01cf5 Mon Sep 17 00:00:00 2001 From: shivalika-singh Date: Tue, 17 Dec 2024 17:04:48 +0530 Subject: [PATCH 4/8] add task README.md --- lm_eval/tasks/global_mmlu/README.md | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 lm_eval/tasks/global_mmlu/README.md diff --git a/lm_eval/tasks/global_mmlu/README.md b/lm_eval/tasks/global_mmlu/README.md new file mode 100644 index 0000000000..036226d779 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/README.md @@ -0,0 +1,26 @@ +# Task-name + +### Paper + +Title: `Global MMLU: Understanding and Addressing Cultural and Linguistic Biases in Multilingual Evaluation` + +Abstract: `https://arxiv.org/abs/2412.03304` + +This is a multilingual evaluation set spanning 15 languages, including English. It is "lite" version of the original [Global-MMLU dataset](https://huggingface.co/datasets/CohereForAI/Global-MMLU) 🌍 designed for efficient evaluation. +It includes 200 Culturally Sensitive (CS) and 200 Culturally Agnostic (CA) samples per language. The samples in Global-MMLU-Lite are corresponding to languages which are fully human translated or post-edited in the original Global-MMLU dataset. + +Homepage: `https://huggingface.co/datasets/CohereForAI/Global-MMLU-Lite` + +### Citation + +```bibtex +@misc{singh2024globalmmluunderstandingaddressing, + title={Global MMLU: Understanding and Addressing Cultural and Linguistic Biases in Multilingual Evaluation}, + author={Shivalika Singh and Angelika Romanou and Clémentine Fourrier and David I. Adelani and Jian Gang Ngui and Daniel Vila-Suero and Peerat Limkonchotiwat and Kelly Marchisio and Wei Qi Leong and Yosephine Susanto and Raymond Ng and Shayne Longpre and Wei-Yin Ko and Madeline Smith and Antoine Bosselut and Alice Oh and Andre F. T. Martins and Leshem Choshen and Daphne Ippolito and Enzo Ferrante and Marzieh Fadaee and Beyza Ermis and Sara Hooker}, + year={2024}, + eprint={2412.03304}, + archivePrefix={arXiv}, + primaryClass={cs.CL}, + url={https://arxiv.org/abs/2412.03304}, +} +``` From e21c9571e417663b9ee957d7af0ea8e9a1b95eb9 Mon Sep 17 00:00:00 2001 From: shivalika-singh Date: Tue, 17 Dec 2024 17:09:51 +0530 Subject: [PATCH 5/8] Update README.md --- lm_eval/tasks/global_mmlu/README.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/lm_eval/tasks/global_mmlu/README.md b/lm_eval/tasks/global_mmlu/README.md index 036226d779..17ebb322fb 100644 --- a/lm_eval/tasks/global_mmlu/README.md +++ b/lm_eval/tasks/global_mmlu/README.md @@ -1,4 +1,4 @@ -# Task-name +# Global-MMLU ### Paper @@ -6,8 +6,7 @@ Title: `Global MMLU: Understanding and Addressing Cultural and Linguistic Biases Abstract: `https://arxiv.org/abs/2412.03304` -This is a multilingual evaluation set spanning 15 languages, including English. It is "lite" version of the original [Global-MMLU dataset](https://huggingface.co/datasets/CohereForAI/Global-MMLU) 🌍 designed for efficient evaluation. -It includes 200 Culturally Sensitive (CS) and 200 Culturally Agnostic (CA) samples per language. The samples in Global-MMLU-Lite are corresponding to languages which are fully human translated or post-edited in the original Global-MMLU dataset. +Global-MMLU-Lite is designed for efficient evaluation of multilingual models in 15 languages, including English. It includes 200 Culturally Sensitive (CS) and 200 Culturally Agnostic (CA) samples per language. The samples in Global-MMLU-Lite are corresponding to languages which are fully human translated or post-edited in the original [Global-MMLU dataset](https://huggingface.co/datasets/CohereForAI/Global-MMLU) 🌍. Homepage: `https://huggingface.co/datasets/CohereForAI/Global-MMLU-Lite` From 7990a774c70b313af4af38ecb1a406b458632aa1 Mon Sep 17 00:00:00 2001 From: shivalika-singh Date: Tue, 17 Dec 2024 17:17:36 +0530 Subject: [PATCH 6/8] Update tasks README.md --- lm_eval/tasks/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/lm_eval/tasks/README.md b/lm_eval/tasks/README.md index 62e65a1eb8..8db5ee31bb 100644 --- a/lm_eval/tasks/README.md +++ b/lm_eval/tasks/README.md @@ -45,6 +45,7 @@ | [fld](fld/README.md) | Tasks involving free-form and directed dialogue understanding. | English | | [french_bench](french_bench/README.md) | Set of tasks designed to assess language model performance in French. | French| | [galician_bench](galician_bench/README.md) | Collection of tasks in Galician encompassing various evaluation areas. | Galician | +| [global_mmlu](global_mmlu/README.md) | Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits. | Multiple (15 languages) | | [glue](glue/README.md) | General Language Understanding Evaluation benchmark to test broad language abilities. | English | | [gpqa](gpqa/README.md) | Tasks designed for general public question answering and knowledge verification. | English | | [gsm8k](gsm8k/README.md) | A benchmark of grade school math problems aimed at evaluating reasoning capabilities. | English | From 5919c2f3d52499d748182b1c4642c1646b92b914 Mon Sep 17 00:00:00 2001 From: shivalika-singh Date: Tue, 17 Dec 2024 17:27:25 +0530 Subject: [PATCH 7/8] Update README.md --- lm_eval/tasks/global_mmlu/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lm_eval/tasks/global_mmlu/README.md b/lm_eval/tasks/global_mmlu/README.md index 17ebb322fb..a5e49d266b 100644 --- a/lm_eval/tasks/global_mmlu/README.md +++ b/lm_eval/tasks/global_mmlu/README.md @@ -6,7 +6,7 @@ Title: `Global MMLU: Understanding and Addressing Cultural and Linguistic Biases Abstract: `https://arxiv.org/abs/2412.03304` -Global-MMLU-Lite is designed for efficient evaluation of multilingual models in 15 languages, including English. It includes 200 Culturally Sensitive (CS) and 200 Culturally Agnostic (CA) samples per language. The samples in Global-MMLU-Lite are corresponding to languages which are fully human translated or post-edited in the original [Global-MMLU dataset](https://huggingface.co/datasets/CohereForAI/Global-MMLU) 🌍. +Global-MMLU-Lite is a balanced collection of culturally sensitive and culturally agnostic MMLU tasks. It is designed for efficient evaluation of multilingual models in 15 languages (including English). Only languages with human translations and post-edits in the original [Global-MMLU](https://huggingface.co/datasets/CohereForAI/Global-MMLU) 🌍 dataset have been included in the lite version. Homepage: `https://huggingface.co/datasets/CohereForAI/Global-MMLU-Lite` From 6eb9afd6f44ea9b19b1eaaad4d5e76a7d349c923 Mon Sep 17 00:00:00 2001 From: shivalika-singh Date: Tue, 17 Dec 2024 17:17:53 +0000 Subject: [PATCH 8/8] update readme --- lm_eval/tasks/global_mmlu/README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lm_eval/tasks/global_mmlu/README.md b/lm_eval/tasks/global_mmlu/README.md index a5e49d266b..838a7c9d42 100644 --- a/lm_eval/tasks/global_mmlu/README.md +++ b/lm_eval/tasks/global_mmlu/README.md @@ -4,22 +4,22 @@ Title: `Global MMLU: Understanding and Addressing Cultural and Linguistic Biases in Multilingual Evaluation` -Abstract: `https://arxiv.org/abs/2412.03304` +Abstract: [https://arxiv.org/abs/2412.03304](https://arxiv.org/abs/2412.03304) -Global-MMLU-Lite is a balanced collection of culturally sensitive and culturally agnostic MMLU tasks. It is designed for efficient evaluation of multilingual models in 15 languages (including English). Only languages with human translations and post-edits in the original [Global-MMLU](https://huggingface.co/datasets/CohereForAI/Global-MMLU) 🌍 dataset have been included in the lite version. +Global-MMLU-Lite is a balanced collection of culturally sensitive and culturally agnostic MMLU tasks. It is designed for efficient evaluation of multilingual models in 15 languages (including English). Only languages with human translations and post-edits in the original [Global-MMLU](https://huggingface.co/datasets/CohereForAI/Global-MMLU) 🌍 dataset have been included in the lite version. -Homepage: `https://huggingface.co/datasets/CohereForAI/Global-MMLU-Lite` +Homepage: [https://huggingface.co/datasets/CohereForAI/Global-MMLU-Lite](https://huggingface.co/datasets/CohereForAI/Global-MMLU-Lite) ### Citation ```bibtex @misc{singh2024globalmmluunderstandingaddressing, - title={Global MMLU: Understanding and Addressing Cultural and Linguistic Biases in Multilingual Evaluation}, + title={Global MMLU: Understanding and Addressing Cultural and Linguistic Biases in Multilingual Evaluation}, author={Shivalika Singh and Angelika Romanou and Clémentine Fourrier and David I. Adelani and Jian Gang Ngui and Daniel Vila-Suero and Peerat Limkonchotiwat and Kelly Marchisio and Wei Qi Leong and Yosephine Susanto and Raymond Ng and Shayne Longpre and Wei-Yin Ko and Madeline Smith and Antoine Bosselut and Alice Oh and Andre F. T. Martins and Leshem Choshen and Daphne Ippolito and Enzo Ferrante and Marzieh Fadaee and Beyza Ermis and Sara Hooker}, year={2024}, eprint={2412.03304}, archivePrefix={arXiv}, primaryClass={cs.CL}, - url={https://arxiv.org/abs/2412.03304}, + url={https://arxiv.org/abs/2412.03304}, } ```