From de3eff92149338a281ad13c3610a21a9db2c0bb2 Mon Sep 17 00:00:00 2001 From: jonabur <135807120+jonabur@users.noreply.github.com> Date: Wed, 29 May 2024 10:54:11 +0300 Subject: [PATCH] add arc_challenge_mt --- lm_eval/tasks/arc_mt/arc_challenge_mt_da.yaml | 3 +++ lm_eval/tasks/arc_mt/arc_challenge_mt_de.yaml | 3 +++ lm_eval/tasks/arc_mt/arc_challenge_mt_el.yaml | 3 +++ lm_eval/tasks/arc_mt/arc_challenge_mt_es.yaml | 3 +++ lm_eval/tasks/arc_mt/arc_challenge_mt_fi.yaml | 23 +++++++++++++++++++ lm_eval/tasks/arc_mt/arc_challenge_mt_hu.yaml | 3 +++ lm_eval/tasks/arc_mt/arc_challenge_mt_it.yaml | 3 +++ lm_eval/tasks/arc_mt/arc_challenge_mt_nb.yaml | 3 +++ lm_eval/tasks/arc_mt/arc_challenge_mt_pl.yaml | 3 +++ lm_eval/tasks/arc_mt/arc_challenge_mt_pt.yaml | 3 +++ lm_eval/tasks/arc_mt/arc_challenge_mt_sv.yaml | 3 +++ 11 files changed, 53 insertions(+) create mode 100644 lm_eval/tasks/arc_mt/arc_challenge_mt_da.yaml create mode 100644 lm_eval/tasks/arc_mt/arc_challenge_mt_de.yaml create mode 100644 lm_eval/tasks/arc_mt/arc_challenge_mt_el.yaml create mode 100644 lm_eval/tasks/arc_mt/arc_challenge_mt_es.yaml create mode 100644 lm_eval/tasks/arc_mt/arc_challenge_mt_fi.yaml create mode 100644 lm_eval/tasks/arc_mt/arc_challenge_mt_hu.yaml create mode 100644 lm_eval/tasks/arc_mt/arc_challenge_mt_it.yaml create mode 100644 lm_eval/tasks/arc_mt/arc_challenge_mt_nb.yaml create mode 100644 lm_eval/tasks/arc_mt/arc_challenge_mt_pl.yaml create mode 100644 lm_eval/tasks/arc_mt/arc_challenge_mt_pt.yaml create mode 100644 lm_eval/tasks/arc_mt/arc_challenge_mt_sv.yaml diff --git a/lm_eval/tasks/arc_mt/arc_challenge_mt_da.yaml b/lm_eval/tasks/arc_mt/arc_challenge_mt_da.yaml new file mode 100644 index 0000000000..f3efdc4cca --- /dev/null +++ b/lm_eval/tasks/arc_mt/arc_challenge_mt_da.yaml @@ -0,0 +1,3 @@ +include: arc_challenge_mt_fi.yaml +task: arc_challenge_mt_da +dataset_name: da diff --git a/lm_eval/tasks/arc_mt/arc_challenge_mt_de.yaml b/lm_eval/tasks/arc_mt/arc_challenge_mt_de.yaml new file mode 100644 index 0000000000..36fdf7be96 --- /dev/null +++ b/lm_eval/tasks/arc_mt/arc_challenge_mt_de.yaml @@ -0,0 +1,3 @@ +include: arc_challenge_mt_fi.yaml +task: arc_challenge_mt_de +dataset_name: de diff --git a/lm_eval/tasks/arc_mt/arc_challenge_mt_el.yaml b/lm_eval/tasks/arc_mt/arc_challenge_mt_el.yaml new file mode 100644 index 0000000000..d97580b09e --- /dev/null +++ b/lm_eval/tasks/arc_mt/arc_challenge_mt_el.yaml @@ -0,0 +1,3 @@ +include: arc_challenge_mt_fi.yaml +task: arc_challenge_mt_el +dataset_name: el diff --git a/lm_eval/tasks/arc_mt/arc_challenge_mt_es.yaml b/lm_eval/tasks/arc_mt/arc_challenge_mt_es.yaml new file mode 100644 index 0000000000..7dffc6c7b9 --- /dev/null +++ b/lm_eval/tasks/arc_mt/arc_challenge_mt_es.yaml @@ -0,0 +1,3 @@ +include: arc_challenge_mt_fi.yaml +task: arc_challenge_mt_es +dataset_name: es diff --git a/lm_eval/tasks/arc_mt/arc_challenge_mt_fi.yaml b/lm_eval/tasks/arc_mt/arc_challenge_mt_fi.yaml new file mode 100644 index 0000000000..d2032e34a4 --- /dev/null +++ b/lm_eval/tasks/arc_mt/arc_challenge_mt_fi.yaml @@ -0,0 +1,23 @@ +group: + - arc_challenge_mt +task: arc_challenge_mt_fi +dataset_path: LumiOpen/arc_challenge_mt +dataset_name: fi +output_type: multiple_choice +training_split: train +validation_split: validation +test_split: test +doc_to_text: "Question: {{question}}\nAnswer:" +doc_to_target: "{{choices.label.index(answerKey)}}" +doc_to_choice: "{{choices.text}}" +should_decontaminate: true +doc_to_decontamination_query: "Question: {{question}}\nAnswer:" +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: acc_norm + aggregation: mean + higher_is_better: true +metadata: + version: 1.0 diff --git a/lm_eval/tasks/arc_mt/arc_challenge_mt_hu.yaml b/lm_eval/tasks/arc_mt/arc_challenge_mt_hu.yaml new file mode 100644 index 0000000000..03d5ac1725 --- /dev/null +++ b/lm_eval/tasks/arc_mt/arc_challenge_mt_hu.yaml @@ -0,0 +1,3 @@ +include: arc_challenge_mt_fi.yaml +task: arc_challenge_mt_hu +dataset_name: hu diff --git a/lm_eval/tasks/arc_mt/arc_challenge_mt_it.yaml b/lm_eval/tasks/arc_mt/arc_challenge_mt_it.yaml new file mode 100644 index 0000000000..995f7a3dc9 --- /dev/null +++ b/lm_eval/tasks/arc_mt/arc_challenge_mt_it.yaml @@ -0,0 +1,3 @@ +include: arc_challenge_mt_fi.yaml +task: arc_challenge_mt_it +dataset_name: it diff --git a/lm_eval/tasks/arc_mt/arc_challenge_mt_nb.yaml b/lm_eval/tasks/arc_mt/arc_challenge_mt_nb.yaml new file mode 100644 index 0000000000..aceaa14b5f --- /dev/null +++ b/lm_eval/tasks/arc_mt/arc_challenge_mt_nb.yaml @@ -0,0 +1,3 @@ +include: arc_challenge_mt_fi.yaml +task: arc_challenge_mt_nb +dataset_name: nb diff --git a/lm_eval/tasks/arc_mt/arc_challenge_mt_pl.yaml b/lm_eval/tasks/arc_mt/arc_challenge_mt_pl.yaml new file mode 100644 index 0000000000..3b9a332f68 --- /dev/null +++ b/lm_eval/tasks/arc_mt/arc_challenge_mt_pl.yaml @@ -0,0 +1,3 @@ +include: arc_challenge_mt_fi.yaml +task: arc_challenge_mt_pl +dataset_name: pl diff --git a/lm_eval/tasks/arc_mt/arc_challenge_mt_pt.yaml b/lm_eval/tasks/arc_mt/arc_challenge_mt_pt.yaml new file mode 100644 index 0000000000..748743fc8d --- /dev/null +++ b/lm_eval/tasks/arc_mt/arc_challenge_mt_pt.yaml @@ -0,0 +1,3 @@ +include: arc_challenge_mt_fi.yaml +task: arc_challenge_mt_pt +dataset_name: pt diff --git a/lm_eval/tasks/arc_mt/arc_challenge_mt_sv.yaml b/lm_eval/tasks/arc_mt/arc_challenge_mt_sv.yaml new file mode 100644 index 0000000000..09d97c51eb --- /dev/null +++ b/lm_eval/tasks/arc_mt/arc_challenge_mt_sv.yaml @@ -0,0 +1,3 @@ +include: arc_challenge_mt_fi.yaml +task: arc_challenge_mt_sv +dataset_name: sv