From f04a051df5e3f894225f1f14b53ee1130fca51d2 Mon Sep 17 00:00:00 2001 From: calpt <36051308+calpt@users.noreply.github.com> Date: Thu, 31 Aug 2023 16:14:54 +0200 Subject: [PATCH] Add CC-100 languages --- subtasks/text_lang/af_cc100.yaml | 5 +++++ subtasks/text_lang/am_cc100.yaml | 5 +++++ subtasks/text_lang/ar_cc100.yaml | 5 +++++ subtasks/text_lang/az_cc100.yaml | 5 +++++ subtasks/text_lang/be_cc100.yaml | 5 +++++ subtasks/text_lang/bg_cc100.yaml | 5 +++++ subtasks/text_lang/bn_cc100.yaml | 5 +++++ subtasks/text_lang/ca_cc100.yaml | 5 +++++ subtasks/text_lang/cs_cc100.yaml | 5 +++++ subtasks/text_lang/cy_cc100.yaml | 5 +++++ subtasks/text_lang/da_cc100.yaml | 5 +++++ subtasks/text_lang/de_cc100.yaml | 5 +++++ subtasks/text_lang/el_cc100.yaml | 5 +++++ subtasks/text_lang/en_cc100.yaml | 5 +++++ subtasks/text_lang/eo_cc100.yaml | 5 +++++ subtasks/text_lang/es_cc100.yaml | 5 +++++ subtasks/text_lang/et_cc100.yaml | 5 +++++ subtasks/text_lang/eu_cc100.yaml | 5 +++++ subtasks/text_lang/fa_cc100.yaml | 5 +++++ subtasks/text_lang/fi_cc100.yaml | 5 +++++ subtasks/text_lang/fr_cc100.yaml | 5 +++++ subtasks/text_lang/ga_cc100.yaml | 5 +++++ subtasks/text_lang/gl_cc100.yaml | 5 +++++ subtasks/text_lang/gu_cc100.yaml | 5 +++++ subtasks/text_lang/ha_cc100.yaml | 5 +++++ subtasks/text_lang/he_cc100.yaml | 5 +++++ subtasks/text_lang/hi_cc100.yaml | 5 +++++ subtasks/text_lang/hr_cc100.yaml | 5 +++++ subtasks/text_lang/hu_cc100.yaml | 5 +++++ subtasks/text_lang/hy_cc100.yaml | 5 +++++ subtasks/text_lang/id_cc100.yaml | 5 +++++ subtasks/text_lang/is_cc100.yaml | 5 +++++ subtasks/text_lang/it_cc100.yaml | 5 +++++ subtasks/text_lang/ja_cc100.yaml | 5 +++++ subtasks/text_lang/ka_cc100.yaml | 5 +++++ subtasks/text_lang/kk_cc100.yaml | 5 +++++ subtasks/text_lang/km_cc100.yaml | 5 +++++ subtasks/text_lang/kn_cc100.yaml | 5 +++++ subtasks/text_lang/ko_cc100.yaml | 5 +++++ subtasks/text_lang/ku_cc100.yaml | 5 +++++ subtasks/text_lang/ky_cc100.yaml | 5 +++++ subtasks/text_lang/la_cc100.yaml | 5 +++++ subtasks/text_lang/lo_cc100.yaml | 5 +++++ subtasks/text_lang/lt_cc100.yaml | 5 +++++ subtasks/text_lang/lv_cc100.yaml | 5 +++++ subtasks/text_lang/mk_cc100.yaml | 5 +++++ subtasks/text_lang/ml_cc100.yaml | 5 +++++ subtasks/text_lang/mn_cc100.yaml | 5 +++++ subtasks/text_lang/mr_cc100.yaml | 5 +++++ subtasks/text_lang/ms_cc100.yaml | 5 +++++ subtasks/text_lang/my_cc100.yaml | 5 +++++ subtasks/text_lang/ne_cc100.yaml | 5 +++++ subtasks/text_lang/nl_cc100.yaml | 5 +++++ subtasks/text_lang/no_cc100.yaml | 5 +++++ subtasks/text_lang/or_cc100.yaml | 5 +++++ subtasks/text_lang/pa_cc100.yaml | 5 +++++ subtasks/text_lang/pl_cc100.yaml | 5 +++++ subtasks/text_lang/ps_cc100.yaml | 5 +++++ subtasks/text_lang/pt_cc100.yaml | 5 +++++ subtasks/text_lang/ro_cc100.yaml | 5 +++++ subtasks/text_lang/ru_cc100.yaml | 5 +++++ subtasks/text_lang/sa_cc100.yaml | 5 +++++ subtasks/text_lang/si_cc100.yaml | 5 +++++ subtasks/text_lang/sk_cc100.yaml | 5 +++++ subtasks/text_lang/sl_cc100.yaml | 5 +++++ subtasks/text_lang/so_cc100.yaml | 5 +++++ subtasks/text_lang/sq_cc100.yaml | 5 +++++ subtasks/text_lang/sr_cc100.yaml | 5 +++++ subtasks/text_lang/sv_cc100.yaml | 5 +++++ subtasks/text_lang/sw_cc100.yaml | 5 +++++ subtasks/text_lang/ta_cc100.yaml | 5 +++++ subtasks/text_lang/te_cc100.yaml | 5 +++++ subtasks/text_lang/th_cc100.yaml | 5 +++++ subtasks/text_lang/tl_cc100.yaml | 5 +++++ subtasks/text_lang/tr_cc100.yaml | 5 +++++ subtasks/text_lang/uk_cc100.yaml | 5 +++++ subtasks/text_lang/ur_cc100.yaml | 5 +++++ subtasks/text_lang/uz_cc100.yaml | 5 +++++ subtasks/text_lang/vi_cc100.yaml | 5 +++++ subtasks/text_lang/zh_cc100.yaml | 6 ++++++ tasks/text_lang/af.yaml | 3 +++ tasks/text_lang/az.yaml | 3 +++ tasks/text_lang/be.yaml | 3 +++ tasks/text_lang/bg.yaml | 3 +++ tasks/text_lang/ca.yaml | 3 +++ tasks/text_lang/cy.yaml | 3 +++ tasks/text_lang/da.yaml | 3 +++ tasks/text_lang/eo.yaml | 3 +++ tasks/text_lang/ga.yaml | 3 +++ tasks/text_lang/gl.yaml | 3 +++ tasks/text_lang/gu.yaml | 3 +++ tasks/text_lang/ha.yaml | 3 +++ tasks/text_lang/he.yaml | 3 +++ tasks/text_lang/hr.yaml | 3 +++ tasks/text_lang/kk.yaml | 3 +++ tasks/text_lang/km.yaml | 3 +++ tasks/text_lang/kn.yaml | 3 +++ tasks/text_lang/ku.yaml | 3 +++ tasks/text_lang/ky.yaml | 3 +++ tasks/text_lang/lo.yaml | 3 +++ tasks/text_lang/lt.yaml | 3 +++ tasks/text_lang/mk.yaml | 3 +++ tasks/text_lang/ml.yaml | 3 +++ tasks/text_lang/mn.yaml | 3 +++ tasks/text_lang/mr.yaml | 3 +++ tasks/text_lang/ms.yaml | 3 +++ tasks/text_lang/ne.yaml | 3 +++ tasks/text_lang/nl.yaml | 3 +++ tasks/text_lang/no.yaml | 3 +++ tasks/text_lang/or.yaml | 3 +++ tasks/text_lang/pa.yaml | 3 +++ tasks/text_lang/pl.yaml | 3 +++ tasks/text_lang/ps.yaml | 3 +++ tasks/text_lang/ro.yaml | 3 +++ tasks/text_lang/sa.yaml | 3 +++ tasks/text_lang/si.yaml | 3 +++ tasks/text_lang/sk.yaml | 3 +++ tasks/text_lang/sl.yaml | 3 +++ tasks/text_lang/so.yaml | 3 +++ tasks/text_lang/sq.yaml | 3 +++ tasks/text_lang/sr.yaml | 3 +++ tasks/text_lang/sv.yaml | 3 +++ tasks/text_lang/te.yaml | 3 +++ tasks/text_lang/tl.yaml | 3 +++ tasks/text_lang/uk.yaml | 3 +++ tasks/text_lang/ur.yaml | 3 +++ tasks/text_lang/uz.yaml | 3 +++ 127 files changed, 542 insertions(+) create mode 100644 subtasks/text_lang/af_cc100.yaml create mode 100644 subtasks/text_lang/am_cc100.yaml create mode 100644 subtasks/text_lang/ar_cc100.yaml create mode 100644 subtasks/text_lang/az_cc100.yaml create mode 100644 subtasks/text_lang/be_cc100.yaml create mode 100644 subtasks/text_lang/bg_cc100.yaml create mode 100644 subtasks/text_lang/bn_cc100.yaml create mode 100644 subtasks/text_lang/ca_cc100.yaml create mode 100644 subtasks/text_lang/cs_cc100.yaml create mode 100644 subtasks/text_lang/cy_cc100.yaml create mode 100644 subtasks/text_lang/da_cc100.yaml create mode 100644 subtasks/text_lang/de_cc100.yaml create mode 100644 subtasks/text_lang/el_cc100.yaml create mode 100644 subtasks/text_lang/en_cc100.yaml create mode 100644 subtasks/text_lang/eo_cc100.yaml create mode 100644 subtasks/text_lang/es_cc100.yaml create mode 100644 subtasks/text_lang/et_cc100.yaml create mode 100644 subtasks/text_lang/eu_cc100.yaml create mode 100644 subtasks/text_lang/fa_cc100.yaml create mode 100644 subtasks/text_lang/fi_cc100.yaml create mode 100644 subtasks/text_lang/fr_cc100.yaml create mode 100644 subtasks/text_lang/ga_cc100.yaml create mode 100644 subtasks/text_lang/gl_cc100.yaml create mode 100644 subtasks/text_lang/gu_cc100.yaml create mode 100644 subtasks/text_lang/ha_cc100.yaml create mode 100644 subtasks/text_lang/he_cc100.yaml create mode 100644 subtasks/text_lang/hi_cc100.yaml create mode 100644 subtasks/text_lang/hr_cc100.yaml create mode 100644 subtasks/text_lang/hu_cc100.yaml create mode 100644 subtasks/text_lang/hy_cc100.yaml create mode 100644 subtasks/text_lang/id_cc100.yaml create mode 100644 subtasks/text_lang/is_cc100.yaml create mode 100644 subtasks/text_lang/it_cc100.yaml create mode 100644 subtasks/text_lang/ja_cc100.yaml create mode 100644 subtasks/text_lang/ka_cc100.yaml create mode 100644 subtasks/text_lang/kk_cc100.yaml create mode 100644 subtasks/text_lang/km_cc100.yaml create mode 100644 subtasks/text_lang/kn_cc100.yaml create mode 100644 subtasks/text_lang/ko_cc100.yaml create mode 100644 subtasks/text_lang/ku_cc100.yaml create mode 100644 subtasks/text_lang/ky_cc100.yaml create mode 100644 subtasks/text_lang/la_cc100.yaml create mode 100644 subtasks/text_lang/lo_cc100.yaml create mode 100644 subtasks/text_lang/lt_cc100.yaml create mode 100644 subtasks/text_lang/lv_cc100.yaml create mode 100644 subtasks/text_lang/mk_cc100.yaml create mode 100644 subtasks/text_lang/ml_cc100.yaml create mode 100644 subtasks/text_lang/mn_cc100.yaml create mode 100644 subtasks/text_lang/mr_cc100.yaml create mode 100644 subtasks/text_lang/ms_cc100.yaml create mode 100644 subtasks/text_lang/my_cc100.yaml create mode 100644 subtasks/text_lang/ne_cc100.yaml create mode 100644 subtasks/text_lang/nl_cc100.yaml create mode 100644 subtasks/text_lang/no_cc100.yaml create mode 100644 subtasks/text_lang/or_cc100.yaml create mode 100644 subtasks/text_lang/pa_cc100.yaml create mode 100644 subtasks/text_lang/pl_cc100.yaml create mode 100644 subtasks/text_lang/ps_cc100.yaml create mode 100644 subtasks/text_lang/pt_cc100.yaml create mode 100644 subtasks/text_lang/ro_cc100.yaml create mode 100644 subtasks/text_lang/ru_cc100.yaml create mode 100644 subtasks/text_lang/sa_cc100.yaml create mode 100644 subtasks/text_lang/si_cc100.yaml create mode 100644 subtasks/text_lang/sk_cc100.yaml create mode 100644 subtasks/text_lang/sl_cc100.yaml create mode 100644 subtasks/text_lang/so_cc100.yaml create mode 100644 subtasks/text_lang/sq_cc100.yaml create mode 100644 subtasks/text_lang/sr_cc100.yaml create mode 100644 subtasks/text_lang/sv_cc100.yaml create mode 100644 subtasks/text_lang/sw_cc100.yaml create mode 100644 subtasks/text_lang/ta_cc100.yaml create mode 100644 subtasks/text_lang/te_cc100.yaml create mode 100644 subtasks/text_lang/th_cc100.yaml create mode 100644 subtasks/text_lang/tl_cc100.yaml create mode 100644 subtasks/text_lang/tr_cc100.yaml create mode 100644 subtasks/text_lang/uk_cc100.yaml create mode 100644 subtasks/text_lang/ur_cc100.yaml create mode 100644 subtasks/text_lang/uz_cc100.yaml create mode 100644 subtasks/text_lang/vi_cc100.yaml create mode 100644 subtasks/text_lang/zh_cc100.yaml create mode 100644 tasks/text_lang/af.yaml create mode 100644 tasks/text_lang/az.yaml create mode 100644 tasks/text_lang/be.yaml create mode 100644 tasks/text_lang/bg.yaml create mode 100644 tasks/text_lang/ca.yaml create mode 100644 tasks/text_lang/cy.yaml create mode 100644 tasks/text_lang/da.yaml create mode 100644 tasks/text_lang/eo.yaml create mode 100644 tasks/text_lang/ga.yaml create mode 100644 tasks/text_lang/gl.yaml create mode 100644 tasks/text_lang/gu.yaml create mode 100644 tasks/text_lang/ha.yaml create mode 100644 tasks/text_lang/he.yaml create mode 100644 tasks/text_lang/hr.yaml create mode 100644 tasks/text_lang/kk.yaml create mode 100644 tasks/text_lang/km.yaml create mode 100644 tasks/text_lang/kn.yaml create mode 100644 tasks/text_lang/ku.yaml create mode 100644 tasks/text_lang/ky.yaml create mode 100644 tasks/text_lang/lo.yaml create mode 100644 tasks/text_lang/lt.yaml create mode 100644 tasks/text_lang/mk.yaml create mode 100644 tasks/text_lang/ml.yaml create mode 100644 tasks/text_lang/mn.yaml create mode 100644 tasks/text_lang/mr.yaml create mode 100644 tasks/text_lang/ms.yaml create mode 100644 tasks/text_lang/ne.yaml create mode 100644 tasks/text_lang/nl.yaml create mode 100644 tasks/text_lang/no.yaml create mode 100644 tasks/text_lang/or.yaml create mode 100644 tasks/text_lang/pa.yaml create mode 100644 tasks/text_lang/pl.yaml create mode 100644 tasks/text_lang/ps.yaml create mode 100644 tasks/text_lang/ro.yaml create mode 100644 tasks/text_lang/sa.yaml create mode 100644 tasks/text_lang/si.yaml create mode 100644 tasks/text_lang/sk.yaml create mode 100644 tasks/text_lang/sl.yaml create mode 100644 tasks/text_lang/so.yaml create mode 100644 tasks/text_lang/sq.yaml create mode 100644 tasks/text_lang/sr.yaml create mode 100644 tasks/text_lang/sv.yaml create mode 100644 tasks/text_lang/te.yaml create mode 100644 tasks/text_lang/tl.yaml create mode 100644 tasks/text_lang/uk.yaml create mode 100644 tasks/text_lang/ur.yaml create mode 100644 tasks/text_lang/uz.yaml diff --git a/subtasks/text_lang/af_cc100.yaml b/subtasks/text_lang/af_cc100.yaml new file mode 100644 index 0000000..3ef8dfc --- /dev/null +++ b/subtasks/text_lang/af_cc100.yaml @@ -0,0 +1,5 @@ +task: af +subtask: cc100 +description: Language modeling for the Afrikaans language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/am_cc100.yaml b/subtasks/text_lang/am_cc100.yaml new file mode 100644 index 0000000..ec27ab1 --- /dev/null +++ b/subtasks/text_lang/am_cc100.yaml @@ -0,0 +1,5 @@ +task: am +subtask: cc100 +description: Language modeling for the Amharic language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/ar_cc100.yaml b/subtasks/text_lang/ar_cc100.yaml new file mode 100644 index 0000000..87b6946 --- /dev/null +++ b/subtasks/text_lang/ar_cc100.yaml @@ -0,0 +1,5 @@ +task: ar +subtask: cc100 +description: Language modeling for the Arabic language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/az_cc100.yaml b/subtasks/text_lang/az_cc100.yaml new file mode 100644 index 0000000..708126f --- /dev/null +++ b/subtasks/text_lang/az_cc100.yaml @@ -0,0 +1,5 @@ +task: az +subtask: cc100 +description: Language modeling for the Azerbaijani language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/be_cc100.yaml b/subtasks/text_lang/be_cc100.yaml new file mode 100644 index 0000000..02f5b69 --- /dev/null +++ b/subtasks/text_lang/be_cc100.yaml @@ -0,0 +1,5 @@ +task: be +subtask: cc100 +description: Language modeling for the Belarusian language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/bg_cc100.yaml b/subtasks/text_lang/bg_cc100.yaml new file mode 100644 index 0000000..f725730 --- /dev/null +++ b/subtasks/text_lang/bg_cc100.yaml @@ -0,0 +1,5 @@ +task: bg +subtask: cc100 +description: Language modeling for the Bulgarian language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/bn_cc100.yaml b/subtasks/text_lang/bn_cc100.yaml new file mode 100644 index 0000000..d0d40be --- /dev/null +++ b/subtasks/text_lang/bn_cc100.yaml @@ -0,0 +1,5 @@ +task: bn +subtask: cc100 +description: Language modeling for the Bengali language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/ca_cc100.yaml b/subtasks/text_lang/ca_cc100.yaml new file mode 100644 index 0000000..1b32fb0 --- /dev/null +++ b/subtasks/text_lang/ca_cc100.yaml @@ -0,0 +1,5 @@ +task: ca +subtask: cc100 +description: Language modeling for the Catalan language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/cs_cc100.yaml b/subtasks/text_lang/cs_cc100.yaml new file mode 100644 index 0000000..0305cc0 --- /dev/null +++ b/subtasks/text_lang/cs_cc100.yaml @@ -0,0 +1,5 @@ +task: cs +subtask: cc100 +description: Language modeling for the Czech language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/cy_cc100.yaml b/subtasks/text_lang/cy_cc100.yaml new file mode 100644 index 0000000..8d6b189 --- /dev/null +++ b/subtasks/text_lang/cy_cc100.yaml @@ -0,0 +1,5 @@ +task: cy +subtask: cc100 +description: Language modeling for the Welsh language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/da_cc100.yaml b/subtasks/text_lang/da_cc100.yaml new file mode 100644 index 0000000..a7bfa9d --- /dev/null +++ b/subtasks/text_lang/da_cc100.yaml @@ -0,0 +1,5 @@ +task: da +subtask: cc100 +description: Language modeling for the Danish language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/de_cc100.yaml b/subtasks/text_lang/de_cc100.yaml new file mode 100644 index 0000000..5fb098d --- /dev/null +++ b/subtasks/text_lang/de_cc100.yaml @@ -0,0 +1,5 @@ +task: de +subtask: cc100 +description: Language modeling for the German language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/el_cc100.yaml b/subtasks/text_lang/el_cc100.yaml new file mode 100644 index 0000000..aa4d32f --- /dev/null +++ b/subtasks/text_lang/el_cc100.yaml @@ -0,0 +1,5 @@ +task: el +subtask: cc100 +description: Language modeling for the Greek language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/en_cc100.yaml b/subtasks/text_lang/en_cc100.yaml new file mode 100644 index 0000000..625fe98 --- /dev/null +++ b/subtasks/text_lang/en_cc100.yaml @@ -0,0 +1,5 @@ +task: en +subtask: cc100 +description: Language modeling for the English language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/eo_cc100.yaml b/subtasks/text_lang/eo_cc100.yaml new file mode 100644 index 0000000..c70c9a6 --- /dev/null +++ b/subtasks/text_lang/eo_cc100.yaml @@ -0,0 +1,5 @@ +task: eo +subtask: cc100 +description: Language modeling for the Esperanto language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/es_cc100.yaml b/subtasks/text_lang/es_cc100.yaml new file mode 100644 index 0000000..3151845 --- /dev/null +++ b/subtasks/text_lang/es_cc100.yaml @@ -0,0 +1,5 @@ +task: es +subtask: cc100 +description: Language modeling for the Spanish language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/et_cc100.yaml b/subtasks/text_lang/et_cc100.yaml new file mode 100644 index 0000000..f9bcc90 --- /dev/null +++ b/subtasks/text_lang/et_cc100.yaml @@ -0,0 +1,5 @@ +task: et +subtask: cc100 +description: Language modeling for the Estonian language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/eu_cc100.yaml b/subtasks/text_lang/eu_cc100.yaml new file mode 100644 index 0000000..f5ca096 --- /dev/null +++ b/subtasks/text_lang/eu_cc100.yaml @@ -0,0 +1,5 @@ +task: eu +subtask: cc100 +description: Language modeling for the Basque language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/fa_cc100.yaml b/subtasks/text_lang/fa_cc100.yaml new file mode 100644 index 0000000..8b9e9b4 --- /dev/null +++ b/subtasks/text_lang/fa_cc100.yaml @@ -0,0 +1,5 @@ +task: fa +subtask: cc100 +description: Language modeling for the Persian language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/fi_cc100.yaml b/subtasks/text_lang/fi_cc100.yaml new file mode 100644 index 0000000..a9ecd6b --- /dev/null +++ b/subtasks/text_lang/fi_cc100.yaml @@ -0,0 +1,5 @@ +task: fi +subtask: cc100 +description: Language modeling for the Finnish language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/fr_cc100.yaml b/subtasks/text_lang/fr_cc100.yaml new file mode 100644 index 0000000..0f526db --- /dev/null +++ b/subtasks/text_lang/fr_cc100.yaml @@ -0,0 +1,5 @@ +task: fr +subtask: cc100 +description: Language modeling for the French language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/ga_cc100.yaml b/subtasks/text_lang/ga_cc100.yaml new file mode 100644 index 0000000..5f3504e --- /dev/null +++ b/subtasks/text_lang/ga_cc100.yaml @@ -0,0 +1,5 @@ +task: ga +subtask: cc100 +description: Language modeling for the Irish language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/gl_cc100.yaml b/subtasks/text_lang/gl_cc100.yaml new file mode 100644 index 0000000..db4ef84 --- /dev/null +++ b/subtasks/text_lang/gl_cc100.yaml @@ -0,0 +1,5 @@ +task: gl +subtask: cc100 +description: Language modeling for the Galician language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/gu_cc100.yaml b/subtasks/text_lang/gu_cc100.yaml new file mode 100644 index 0000000..8778dce --- /dev/null +++ b/subtasks/text_lang/gu_cc100.yaml @@ -0,0 +1,5 @@ +task: gu +subtask: cc100 +description: Language modeling for the Gujarati language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/ha_cc100.yaml b/subtasks/text_lang/ha_cc100.yaml new file mode 100644 index 0000000..3844c04 --- /dev/null +++ b/subtasks/text_lang/ha_cc100.yaml @@ -0,0 +1,5 @@ +task: ha +subtask: cc100 +description: Language modeling for the Hausa language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/he_cc100.yaml b/subtasks/text_lang/he_cc100.yaml new file mode 100644 index 0000000..f63a9c7 --- /dev/null +++ b/subtasks/text_lang/he_cc100.yaml @@ -0,0 +1,5 @@ +task: he +subtask: cc100 +description: Language modeling for the Hebrew language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/hi_cc100.yaml b/subtasks/text_lang/hi_cc100.yaml new file mode 100644 index 0000000..592330d --- /dev/null +++ b/subtasks/text_lang/hi_cc100.yaml @@ -0,0 +1,5 @@ +task: hi +subtask: cc100 +description: Language modeling for the Hindi language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/hr_cc100.yaml b/subtasks/text_lang/hr_cc100.yaml new file mode 100644 index 0000000..cb3ebb6 --- /dev/null +++ b/subtasks/text_lang/hr_cc100.yaml @@ -0,0 +1,5 @@ +task: hr +subtask: cc100 +description: Language modeling for the Croatian language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/hu_cc100.yaml b/subtasks/text_lang/hu_cc100.yaml new file mode 100644 index 0000000..4fcbba6 --- /dev/null +++ b/subtasks/text_lang/hu_cc100.yaml @@ -0,0 +1,5 @@ +task: hu +subtask: cc100 +description: Language modeling for the Hungarian language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/hy_cc100.yaml b/subtasks/text_lang/hy_cc100.yaml new file mode 100644 index 0000000..2c16368 --- /dev/null +++ b/subtasks/text_lang/hy_cc100.yaml @@ -0,0 +1,5 @@ +task: hy +subtask: cc100 +description: Language modeling for the Armenian language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/id_cc100.yaml b/subtasks/text_lang/id_cc100.yaml new file mode 100644 index 0000000..7b96b18 --- /dev/null +++ b/subtasks/text_lang/id_cc100.yaml @@ -0,0 +1,5 @@ +task: id +subtask: cc100 +description: Language modeling for the Indonesian language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/is_cc100.yaml b/subtasks/text_lang/is_cc100.yaml new file mode 100644 index 0000000..d72d450 --- /dev/null +++ b/subtasks/text_lang/is_cc100.yaml @@ -0,0 +1,5 @@ +task: is +subtask: cc100 +description: Language modeling for the Icelandic language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/it_cc100.yaml b/subtasks/text_lang/it_cc100.yaml new file mode 100644 index 0000000..dc9c14b --- /dev/null +++ b/subtasks/text_lang/it_cc100.yaml @@ -0,0 +1,5 @@ +task: it +subtask: cc100 +description: Language modeling for the Italian language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/ja_cc100.yaml b/subtasks/text_lang/ja_cc100.yaml new file mode 100644 index 0000000..d98dd0a --- /dev/null +++ b/subtasks/text_lang/ja_cc100.yaml @@ -0,0 +1,5 @@ +task: ja +subtask: cc100 +description: Language modeling for the Japanese language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/ka_cc100.yaml b/subtasks/text_lang/ka_cc100.yaml new file mode 100644 index 0000000..1f597df --- /dev/null +++ b/subtasks/text_lang/ka_cc100.yaml @@ -0,0 +1,5 @@ +task: ka +subtask: cc100 +description: Language modeling for the Georgian language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/kk_cc100.yaml b/subtasks/text_lang/kk_cc100.yaml new file mode 100644 index 0000000..b53facc --- /dev/null +++ b/subtasks/text_lang/kk_cc100.yaml @@ -0,0 +1,5 @@ +task: kk +subtask: cc100 +description: Language modeling for the Kazakh language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/km_cc100.yaml b/subtasks/text_lang/km_cc100.yaml new file mode 100644 index 0000000..eeb201a --- /dev/null +++ b/subtasks/text_lang/km_cc100.yaml @@ -0,0 +1,5 @@ +task: km +subtask: cc100 +description: Language modeling for the Central Khmer language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/kn_cc100.yaml b/subtasks/text_lang/kn_cc100.yaml new file mode 100644 index 0000000..4475507 --- /dev/null +++ b/subtasks/text_lang/kn_cc100.yaml @@ -0,0 +1,5 @@ +task: kn +subtask: cc100 +description: Language modeling for the Kannada language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/ko_cc100.yaml b/subtasks/text_lang/ko_cc100.yaml new file mode 100644 index 0000000..b65e94b --- /dev/null +++ b/subtasks/text_lang/ko_cc100.yaml @@ -0,0 +1,5 @@ +task: ko +subtask: cc100 +description: Language modeling for the Korean language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/ku_cc100.yaml b/subtasks/text_lang/ku_cc100.yaml new file mode 100644 index 0000000..6223907 --- /dev/null +++ b/subtasks/text_lang/ku_cc100.yaml @@ -0,0 +1,5 @@ +task: ku +subtask: cc100 +description: Language modeling for the Kurdish language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/ky_cc100.yaml b/subtasks/text_lang/ky_cc100.yaml new file mode 100644 index 0000000..e1b979d --- /dev/null +++ b/subtasks/text_lang/ky_cc100.yaml @@ -0,0 +1,5 @@ +task: ky +subtask: cc100 +description: Language modeling for the Kirghiz language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/la_cc100.yaml b/subtasks/text_lang/la_cc100.yaml new file mode 100644 index 0000000..e8d46f7 --- /dev/null +++ b/subtasks/text_lang/la_cc100.yaml @@ -0,0 +1,5 @@ +task: la +subtask: cc100 +description: Language modeling for the Latin language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/lo_cc100.yaml b/subtasks/text_lang/lo_cc100.yaml new file mode 100644 index 0000000..11340a8 --- /dev/null +++ b/subtasks/text_lang/lo_cc100.yaml @@ -0,0 +1,5 @@ +task: lo +subtask: cc100 +description: Language modeling for the Lao language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/lt_cc100.yaml b/subtasks/text_lang/lt_cc100.yaml new file mode 100644 index 0000000..68e4cb3 --- /dev/null +++ b/subtasks/text_lang/lt_cc100.yaml @@ -0,0 +1,5 @@ +task: lt +subtask: cc100 +description: Language modeling for the Lithuanian language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/lv_cc100.yaml b/subtasks/text_lang/lv_cc100.yaml new file mode 100644 index 0000000..7ed26f6 --- /dev/null +++ b/subtasks/text_lang/lv_cc100.yaml @@ -0,0 +1,5 @@ +task: lv +subtask: cc100 +description: Language modeling for the Latvian language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/mk_cc100.yaml b/subtasks/text_lang/mk_cc100.yaml new file mode 100644 index 0000000..35bab47 --- /dev/null +++ b/subtasks/text_lang/mk_cc100.yaml @@ -0,0 +1,5 @@ +task: mk +subtask: cc100 +description: Language modeling for the Macedonian language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/ml_cc100.yaml b/subtasks/text_lang/ml_cc100.yaml new file mode 100644 index 0000000..668d4b7 --- /dev/null +++ b/subtasks/text_lang/ml_cc100.yaml @@ -0,0 +1,5 @@ +task: ml +subtask: cc100 +description: Language modeling for the Malayalam language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/mn_cc100.yaml b/subtasks/text_lang/mn_cc100.yaml new file mode 100644 index 0000000..45a2254 --- /dev/null +++ b/subtasks/text_lang/mn_cc100.yaml @@ -0,0 +1,5 @@ +task: mn +subtask: cc100 +description: Language modeling for the Mongolian language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/mr_cc100.yaml b/subtasks/text_lang/mr_cc100.yaml new file mode 100644 index 0000000..5f2458f --- /dev/null +++ b/subtasks/text_lang/mr_cc100.yaml @@ -0,0 +1,5 @@ +task: mr +subtask: cc100 +description: Language modeling for the Marathi language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/ms_cc100.yaml b/subtasks/text_lang/ms_cc100.yaml new file mode 100644 index 0000000..49aa1d9 --- /dev/null +++ b/subtasks/text_lang/ms_cc100.yaml @@ -0,0 +1,5 @@ +task: ms +subtask: cc100 +description: Language modeling for the Malay language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/my_cc100.yaml b/subtasks/text_lang/my_cc100.yaml new file mode 100644 index 0000000..e34cf84 --- /dev/null +++ b/subtasks/text_lang/my_cc100.yaml @@ -0,0 +1,5 @@ +task: my +subtask: cc100 +description: Language modeling for the Burmese language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/ne_cc100.yaml b/subtasks/text_lang/ne_cc100.yaml new file mode 100644 index 0000000..a58f5ea --- /dev/null +++ b/subtasks/text_lang/ne_cc100.yaml @@ -0,0 +1,5 @@ +task: ne +subtask: cc100 +description: Language modeling for the Nepali language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/nl_cc100.yaml b/subtasks/text_lang/nl_cc100.yaml new file mode 100644 index 0000000..8083ec0 --- /dev/null +++ b/subtasks/text_lang/nl_cc100.yaml @@ -0,0 +1,5 @@ +task: nl +subtask: cc100 +description: Language modeling for the Dutch language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/no_cc100.yaml b/subtasks/text_lang/no_cc100.yaml new file mode 100644 index 0000000..2208a7e --- /dev/null +++ b/subtasks/text_lang/no_cc100.yaml @@ -0,0 +1,5 @@ +task: no +subtask: cc100 +description: Language modeling for the Norwegian language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/or_cc100.yaml b/subtasks/text_lang/or_cc100.yaml new file mode 100644 index 0000000..c7b1bed --- /dev/null +++ b/subtasks/text_lang/or_cc100.yaml @@ -0,0 +1,5 @@ +task: or +subtask: cc100 +description: Language modeling for the Oriya language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/pa_cc100.yaml b/subtasks/text_lang/pa_cc100.yaml new file mode 100644 index 0000000..5528939 --- /dev/null +++ b/subtasks/text_lang/pa_cc100.yaml @@ -0,0 +1,5 @@ +task: pa +subtask: cc100 +description: Language modeling for the Punjabi language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/pl_cc100.yaml b/subtasks/text_lang/pl_cc100.yaml new file mode 100644 index 0000000..03b2f51 --- /dev/null +++ b/subtasks/text_lang/pl_cc100.yaml @@ -0,0 +1,5 @@ +task: pl +subtask: cc100 +description: Language modeling for the Polish language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/ps_cc100.yaml b/subtasks/text_lang/ps_cc100.yaml new file mode 100644 index 0000000..c5c55be --- /dev/null +++ b/subtasks/text_lang/ps_cc100.yaml @@ -0,0 +1,5 @@ +task: ps +subtask: cc100 +description: Language modeling for the Pashto language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/pt_cc100.yaml b/subtasks/text_lang/pt_cc100.yaml new file mode 100644 index 0000000..f6c7aba --- /dev/null +++ b/subtasks/text_lang/pt_cc100.yaml @@ -0,0 +1,5 @@ +task: pt +subtask: cc100 +description: Language modeling for the Portuguese language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/ro_cc100.yaml b/subtasks/text_lang/ro_cc100.yaml new file mode 100644 index 0000000..b9d1382 --- /dev/null +++ b/subtasks/text_lang/ro_cc100.yaml @@ -0,0 +1,5 @@ +task: ro +subtask: cc100 +description: Language modeling for the Romanian language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/ru_cc100.yaml b/subtasks/text_lang/ru_cc100.yaml new file mode 100644 index 0000000..5fd4921 --- /dev/null +++ b/subtasks/text_lang/ru_cc100.yaml @@ -0,0 +1,5 @@ +task: ru +subtask: cc100 +description: Language modeling for the Russian language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/sa_cc100.yaml b/subtasks/text_lang/sa_cc100.yaml new file mode 100644 index 0000000..445c81d --- /dev/null +++ b/subtasks/text_lang/sa_cc100.yaml @@ -0,0 +1,5 @@ +task: sa +subtask: cc100 +description: Language modeling for the Sanskrit language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/si_cc100.yaml b/subtasks/text_lang/si_cc100.yaml new file mode 100644 index 0000000..866a35d --- /dev/null +++ b/subtasks/text_lang/si_cc100.yaml @@ -0,0 +1,5 @@ +task: si +subtask: cc100 +description: Language modeling for the Sinhala language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/sk_cc100.yaml b/subtasks/text_lang/sk_cc100.yaml new file mode 100644 index 0000000..26ee2c8 --- /dev/null +++ b/subtasks/text_lang/sk_cc100.yaml @@ -0,0 +1,5 @@ +task: sk +subtask: cc100 +description: Language modeling for the Slovak language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/sl_cc100.yaml b/subtasks/text_lang/sl_cc100.yaml new file mode 100644 index 0000000..5baaaa2 --- /dev/null +++ b/subtasks/text_lang/sl_cc100.yaml @@ -0,0 +1,5 @@ +task: sl +subtask: cc100 +description: Language modeling for the Slovenian language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/so_cc100.yaml b/subtasks/text_lang/so_cc100.yaml new file mode 100644 index 0000000..73ea64d --- /dev/null +++ b/subtasks/text_lang/so_cc100.yaml @@ -0,0 +1,5 @@ +task: so +subtask: cc100 +description: Language modeling for the Somali language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/sq_cc100.yaml b/subtasks/text_lang/sq_cc100.yaml new file mode 100644 index 0000000..7efc92e --- /dev/null +++ b/subtasks/text_lang/sq_cc100.yaml @@ -0,0 +1,5 @@ +task: sq +subtask: cc100 +description: Language modeling for the Albanian language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/sr_cc100.yaml b/subtasks/text_lang/sr_cc100.yaml new file mode 100644 index 0000000..931fb44 --- /dev/null +++ b/subtasks/text_lang/sr_cc100.yaml @@ -0,0 +1,5 @@ +task: sr +subtask: cc100 +description: Language modeling for the Serbian language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/sv_cc100.yaml b/subtasks/text_lang/sv_cc100.yaml new file mode 100644 index 0000000..1509c1b --- /dev/null +++ b/subtasks/text_lang/sv_cc100.yaml @@ -0,0 +1,5 @@ +task: sv +subtask: cc100 +description: Language modeling for the Swedish language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/sw_cc100.yaml b/subtasks/text_lang/sw_cc100.yaml new file mode 100644 index 0000000..0eea567 --- /dev/null +++ b/subtasks/text_lang/sw_cc100.yaml @@ -0,0 +1,5 @@ +task: sw +subtask: cc100 +description: Language modeling for the Swahili language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/ta_cc100.yaml b/subtasks/text_lang/ta_cc100.yaml new file mode 100644 index 0000000..d93e1ce --- /dev/null +++ b/subtasks/text_lang/ta_cc100.yaml @@ -0,0 +1,5 @@ +task: ta +subtask: cc100 +description: Language modeling for the Tamil language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/te_cc100.yaml b/subtasks/text_lang/te_cc100.yaml new file mode 100644 index 0000000..f64c34b --- /dev/null +++ b/subtasks/text_lang/te_cc100.yaml @@ -0,0 +1,5 @@ +task: te +subtask: cc100 +description: Language modeling for the Telugu language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/th_cc100.yaml b/subtasks/text_lang/th_cc100.yaml new file mode 100644 index 0000000..08070f7 --- /dev/null +++ b/subtasks/text_lang/th_cc100.yaml @@ -0,0 +1,5 @@ +task: th +subtask: cc100 +description: Language modeling for the Thai language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/tl_cc100.yaml b/subtasks/text_lang/tl_cc100.yaml new file mode 100644 index 0000000..d94600c --- /dev/null +++ b/subtasks/text_lang/tl_cc100.yaml @@ -0,0 +1,5 @@ +task: tl +subtask: cc100 +description: Language modeling for the Tagalog language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/tr_cc100.yaml b/subtasks/text_lang/tr_cc100.yaml new file mode 100644 index 0000000..8f293b8 --- /dev/null +++ b/subtasks/text_lang/tr_cc100.yaml @@ -0,0 +1,5 @@ +task: tr +subtask: cc100 +description: Language modeling for the Turkish language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/uk_cc100.yaml b/subtasks/text_lang/uk_cc100.yaml new file mode 100644 index 0000000..0e17de8 --- /dev/null +++ b/subtasks/text_lang/uk_cc100.yaml @@ -0,0 +1,5 @@ +task: uk +subtask: cc100 +description: Language modeling for the Ukrainian language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/ur_cc100.yaml b/subtasks/text_lang/ur_cc100.yaml new file mode 100644 index 0000000..2a62e18 --- /dev/null +++ b/subtasks/text_lang/ur_cc100.yaml @@ -0,0 +1,5 @@ +task: ur +subtask: cc100 +description: Language modeling for the Urdu language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/uz_cc100.yaml b/subtasks/text_lang/uz_cc100.yaml new file mode 100644 index 0000000..e455476 --- /dev/null +++ b/subtasks/text_lang/uz_cc100.yaml @@ -0,0 +1,5 @@ +task: uz +subtask: cc100 +description: Language modeling for the Uzbek language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/vi_cc100.yaml b/subtasks/text_lang/vi_cc100.yaml new file mode 100644 index 0000000..694cde4 --- /dev/null +++ b/subtasks/text_lang/vi_cc100.yaml @@ -0,0 +1,5 @@ +task: vi +subtask: cc100 +description: Language modeling for the Vietnamese language on the CC-100 corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/subtasks/text_lang/zh_cc100.yaml b/subtasks/text_lang/zh_cc100.yaml new file mode 100644 index 0000000..2edf2c5 --- /dev/null +++ b/subtasks/text_lang/zh_cc100.yaml @@ -0,0 +1,6 @@ +task: zh +subtask: cc100 +description: Language modeling for the Chinese (traditional) language on the CC-100 + corpus. +url: https://data.statmt.org/cc-100/ +citation: '' diff --git a/tasks/text_lang/af.yaml b/tasks/text_lang/af.yaml new file mode 100644 index 0000000..2466512 --- /dev/null +++ b/tasks/text_lang/af.yaml @@ -0,0 +1,3 @@ +task: af +displayname: Afrikaans +description: Afrikaans language. diff --git a/tasks/text_lang/az.yaml b/tasks/text_lang/az.yaml new file mode 100644 index 0000000..fe78ed2 --- /dev/null +++ b/tasks/text_lang/az.yaml @@ -0,0 +1,3 @@ +task: az +displayname: Azerbaijani +description: Azerbaijani language. diff --git a/tasks/text_lang/be.yaml b/tasks/text_lang/be.yaml new file mode 100644 index 0000000..d4b835b --- /dev/null +++ b/tasks/text_lang/be.yaml @@ -0,0 +1,3 @@ +task: be +displayname: Belarusian +description: Belarusian language. diff --git a/tasks/text_lang/bg.yaml b/tasks/text_lang/bg.yaml new file mode 100644 index 0000000..6d40136 --- /dev/null +++ b/tasks/text_lang/bg.yaml @@ -0,0 +1,3 @@ +task: bg +displayname: Bulgarian +description: Bulgarian language. diff --git a/tasks/text_lang/ca.yaml b/tasks/text_lang/ca.yaml new file mode 100644 index 0000000..af8b9fa --- /dev/null +++ b/tasks/text_lang/ca.yaml @@ -0,0 +1,3 @@ +task: ca +displayname: Catalan +description: Catalan language. diff --git a/tasks/text_lang/cy.yaml b/tasks/text_lang/cy.yaml new file mode 100644 index 0000000..709e8b5 --- /dev/null +++ b/tasks/text_lang/cy.yaml @@ -0,0 +1,3 @@ +task: cy +displayname: Welsh +description: Welsh language. diff --git a/tasks/text_lang/da.yaml b/tasks/text_lang/da.yaml new file mode 100644 index 0000000..bee9bd4 --- /dev/null +++ b/tasks/text_lang/da.yaml @@ -0,0 +1,3 @@ +task: da +displayname: Danish +description: Danish language. diff --git a/tasks/text_lang/eo.yaml b/tasks/text_lang/eo.yaml new file mode 100644 index 0000000..35af5b6 --- /dev/null +++ b/tasks/text_lang/eo.yaml @@ -0,0 +1,3 @@ +task: eo +displayname: Esperanto +description: Esperanto language. diff --git a/tasks/text_lang/ga.yaml b/tasks/text_lang/ga.yaml new file mode 100644 index 0000000..76583f0 --- /dev/null +++ b/tasks/text_lang/ga.yaml @@ -0,0 +1,3 @@ +task: ga +displayname: Irish +description: Irish language. diff --git a/tasks/text_lang/gl.yaml b/tasks/text_lang/gl.yaml new file mode 100644 index 0000000..e06f11b --- /dev/null +++ b/tasks/text_lang/gl.yaml @@ -0,0 +1,3 @@ +task: gl +displayname: Galician +description: Galician language. diff --git a/tasks/text_lang/gu.yaml b/tasks/text_lang/gu.yaml new file mode 100644 index 0000000..0119a27 --- /dev/null +++ b/tasks/text_lang/gu.yaml @@ -0,0 +1,3 @@ +task: gu +displayname: Gujarati +description: Gujarati language. diff --git a/tasks/text_lang/ha.yaml b/tasks/text_lang/ha.yaml new file mode 100644 index 0000000..5403804 --- /dev/null +++ b/tasks/text_lang/ha.yaml @@ -0,0 +1,3 @@ +task: ha +displayname: Hausa +description: Hausa language. diff --git a/tasks/text_lang/he.yaml b/tasks/text_lang/he.yaml new file mode 100644 index 0000000..4d71164 --- /dev/null +++ b/tasks/text_lang/he.yaml @@ -0,0 +1,3 @@ +task: he +displayname: Hebrew +description: Hebrew language. diff --git a/tasks/text_lang/hr.yaml b/tasks/text_lang/hr.yaml new file mode 100644 index 0000000..d803cb4 --- /dev/null +++ b/tasks/text_lang/hr.yaml @@ -0,0 +1,3 @@ +task: hr +displayname: Croatian +description: Croatian language. diff --git a/tasks/text_lang/kk.yaml b/tasks/text_lang/kk.yaml new file mode 100644 index 0000000..d97e804 --- /dev/null +++ b/tasks/text_lang/kk.yaml @@ -0,0 +1,3 @@ +task: kk +displayname: Kazakh +description: Kazakh language. diff --git a/tasks/text_lang/km.yaml b/tasks/text_lang/km.yaml new file mode 100644 index 0000000..b15d8f8 --- /dev/null +++ b/tasks/text_lang/km.yaml @@ -0,0 +1,3 @@ +task: km +displayname: Central Khmer +description: Central Khmer language. diff --git a/tasks/text_lang/kn.yaml b/tasks/text_lang/kn.yaml new file mode 100644 index 0000000..469c7a1 --- /dev/null +++ b/tasks/text_lang/kn.yaml @@ -0,0 +1,3 @@ +task: kn +displayname: Kannada +description: Kannada language. diff --git a/tasks/text_lang/ku.yaml b/tasks/text_lang/ku.yaml new file mode 100644 index 0000000..6944c05 --- /dev/null +++ b/tasks/text_lang/ku.yaml @@ -0,0 +1,3 @@ +task: ku +displayname: Kurdish +description: Kurdish language. diff --git a/tasks/text_lang/ky.yaml b/tasks/text_lang/ky.yaml new file mode 100644 index 0000000..a9a6d1e --- /dev/null +++ b/tasks/text_lang/ky.yaml @@ -0,0 +1,3 @@ +task: ky +displayname: Kirghiz +description: Kirghiz language. diff --git a/tasks/text_lang/lo.yaml b/tasks/text_lang/lo.yaml new file mode 100644 index 0000000..223d20a --- /dev/null +++ b/tasks/text_lang/lo.yaml @@ -0,0 +1,3 @@ +task: lo +displayname: Lao +description: Lao language. diff --git a/tasks/text_lang/lt.yaml b/tasks/text_lang/lt.yaml new file mode 100644 index 0000000..d3ffbc7 --- /dev/null +++ b/tasks/text_lang/lt.yaml @@ -0,0 +1,3 @@ +task: lt +displayname: Lithuanian +description: Lithuanian language. diff --git a/tasks/text_lang/mk.yaml b/tasks/text_lang/mk.yaml new file mode 100644 index 0000000..9e044c4 --- /dev/null +++ b/tasks/text_lang/mk.yaml @@ -0,0 +1,3 @@ +task: mk +displayname: Macedonian +description: Macedonian language. diff --git a/tasks/text_lang/ml.yaml b/tasks/text_lang/ml.yaml new file mode 100644 index 0000000..2e099b2 --- /dev/null +++ b/tasks/text_lang/ml.yaml @@ -0,0 +1,3 @@ +task: ml +displayname: Malayalam +description: Malayalam language. diff --git a/tasks/text_lang/mn.yaml b/tasks/text_lang/mn.yaml new file mode 100644 index 0000000..a9f1b9d --- /dev/null +++ b/tasks/text_lang/mn.yaml @@ -0,0 +1,3 @@ +task: mn +displayname: Mongolian +description: Mongolian language. diff --git a/tasks/text_lang/mr.yaml b/tasks/text_lang/mr.yaml new file mode 100644 index 0000000..2da985d --- /dev/null +++ b/tasks/text_lang/mr.yaml @@ -0,0 +1,3 @@ +task: mr +displayname: Marathi +description: Marathi language. diff --git a/tasks/text_lang/ms.yaml b/tasks/text_lang/ms.yaml new file mode 100644 index 0000000..7d1fba4 --- /dev/null +++ b/tasks/text_lang/ms.yaml @@ -0,0 +1,3 @@ +task: ms +displayname: Malay +description: Malay language. diff --git a/tasks/text_lang/ne.yaml b/tasks/text_lang/ne.yaml new file mode 100644 index 0000000..3bde155 --- /dev/null +++ b/tasks/text_lang/ne.yaml @@ -0,0 +1,3 @@ +task: ne +displayname: Nepali +description: Nepali language. diff --git a/tasks/text_lang/nl.yaml b/tasks/text_lang/nl.yaml new file mode 100644 index 0000000..0d9a316 --- /dev/null +++ b/tasks/text_lang/nl.yaml @@ -0,0 +1,3 @@ +task: nl +displayname: Dutch +description: Dutch language. diff --git a/tasks/text_lang/no.yaml b/tasks/text_lang/no.yaml new file mode 100644 index 0000000..d399e08 --- /dev/null +++ b/tasks/text_lang/no.yaml @@ -0,0 +1,3 @@ +task: no +displayname: Norwegian +description: Norwegian language. diff --git a/tasks/text_lang/or.yaml b/tasks/text_lang/or.yaml new file mode 100644 index 0000000..a1bdf11 --- /dev/null +++ b/tasks/text_lang/or.yaml @@ -0,0 +1,3 @@ +task: or +displayname: Oriya +description: Oriya language. diff --git a/tasks/text_lang/pa.yaml b/tasks/text_lang/pa.yaml new file mode 100644 index 0000000..70a3c39 --- /dev/null +++ b/tasks/text_lang/pa.yaml @@ -0,0 +1,3 @@ +task: pa +displayname: Punjabi +description: Punjabi language. diff --git a/tasks/text_lang/pl.yaml b/tasks/text_lang/pl.yaml new file mode 100644 index 0000000..9272a38 --- /dev/null +++ b/tasks/text_lang/pl.yaml @@ -0,0 +1,3 @@ +task: pl +displayname: Polish +description: Polish language. diff --git a/tasks/text_lang/ps.yaml b/tasks/text_lang/ps.yaml new file mode 100644 index 0000000..9a29a7e --- /dev/null +++ b/tasks/text_lang/ps.yaml @@ -0,0 +1,3 @@ +task: ps +displayname: Pashto +description: Pashto language. diff --git a/tasks/text_lang/ro.yaml b/tasks/text_lang/ro.yaml new file mode 100644 index 0000000..11becec --- /dev/null +++ b/tasks/text_lang/ro.yaml @@ -0,0 +1,3 @@ +task: ro +displayname: Romanian +description: Romanian language. diff --git a/tasks/text_lang/sa.yaml b/tasks/text_lang/sa.yaml new file mode 100644 index 0000000..a29d91f --- /dev/null +++ b/tasks/text_lang/sa.yaml @@ -0,0 +1,3 @@ +task: sa +displayname: Sanskrit +description: Sanskrit language. diff --git a/tasks/text_lang/si.yaml b/tasks/text_lang/si.yaml new file mode 100644 index 0000000..2cbb41c --- /dev/null +++ b/tasks/text_lang/si.yaml @@ -0,0 +1,3 @@ +task: si +displayname: Sinhala +description: Sinhala language. diff --git a/tasks/text_lang/sk.yaml b/tasks/text_lang/sk.yaml new file mode 100644 index 0000000..41dbf1c --- /dev/null +++ b/tasks/text_lang/sk.yaml @@ -0,0 +1,3 @@ +task: sk +displayname: Slovak +description: Slovak language. diff --git a/tasks/text_lang/sl.yaml b/tasks/text_lang/sl.yaml new file mode 100644 index 0000000..5c16d2f --- /dev/null +++ b/tasks/text_lang/sl.yaml @@ -0,0 +1,3 @@ +task: sl +displayname: Slovenian +description: Slovenian language. diff --git a/tasks/text_lang/so.yaml b/tasks/text_lang/so.yaml new file mode 100644 index 0000000..2003f17 --- /dev/null +++ b/tasks/text_lang/so.yaml @@ -0,0 +1,3 @@ +task: so +displayname: Somali +description: Somali language. diff --git a/tasks/text_lang/sq.yaml b/tasks/text_lang/sq.yaml new file mode 100644 index 0000000..52d7090 --- /dev/null +++ b/tasks/text_lang/sq.yaml @@ -0,0 +1,3 @@ +task: sq +displayname: Albanian +description: Albanian language. diff --git a/tasks/text_lang/sr.yaml b/tasks/text_lang/sr.yaml new file mode 100644 index 0000000..fece5e9 --- /dev/null +++ b/tasks/text_lang/sr.yaml @@ -0,0 +1,3 @@ +task: sr +displayname: Serbian +description: Serbian language. diff --git a/tasks/text_lang/sv.yaml b/tasks/text_lang/sv.yaml new file mode 100644 index 0000000..bfa10cc --- /dev/null +++ b/tasks/text_lang/sv.yaml @@ -0,0 +1,3 @@ +task: sv +displayname: Swedish +description: Swedish language. diff --git a/tasks/text_lang/te.yaml b/tasks/text_lang/te.yaml new file mode 100644 index 0000000..846bc6a --- /dev/null +++ b/tasks/text_lang/te.yaml @@ -0,0 +1,3 @@ +task: te +displayname: Telugu +description: Telugu language. diff --git a/tasks/text_lang/tl.yaml b/tasks/text_lang/tl.yaml new file mode 100644 index 0000000..12790f5 --- /dev/null +++ b/tasks/text_lang/tl.yaml @@ -0,0 +1,3 @@ +task: tl +displayname: Tagalog +description: Tagalog language. diff --git a/tasks/text_lang/uk.yaml b/tasks/text_lang/uk.yaml new file mode 100644 index 0000000..32e5f38 --- /dev/null +++ b/tasks/text_lang/uk.yaml @@ -0,0 +1,3 @@ +task: uk +displayname: Ukrainian +description: Ukrainian language. diff --git a/tasks/text_lang/ur.yaml b/tasks/text_lang/ur.yaml new file mode 100644 index 0000000..2780b0e --- /dev/null +++ b/tasks/text_lang/ur.yaml @@ -0,0 +1,3 @@ +task: ur +displayname: Urdu +description: Urdu language. diff --git a/tasks/text_lang/uz.yaml b/tasks/text_lang/uz.yaml new file mode 100644 index 0000000..3d6b3bf --- /dev/null +++ b/tasks/text_lang/uz.yaml @@ -0,0 +1,3 @@ +task: uz +displayname: Uzbek +description: Uzbek language.