forked from Upaya07/NeurIPS-llm-efficiency-challenge
-
Notifications
You must be signed in to change notification settings - Fork 0
/
selected_NI_tasks
462 lines (462 loc) · 17.9 KB
/
selected_NI_tasks
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
task002_quoref_answer_generation
task004_mctaco_answer_generation_event_duration
task007_mctaco_answer_generation_transient_stationary
task010_mctaco_answer_generation_event_ordering
task013_mctaco_answer_generation_absolute_timepoint
task016_mctaco_answer_generation_frequency
task022_cosmosqa_passage_inappropriate_binary
task024_cosmosqa_answer_generation
task028_drop_answer_generation
task038_qasc_combined_fact
task041_qasc_answer_generation
task047_miscellaneous_answering_science_questions
task049_multirc_questions_needed_to_answer
task051_multirc_correct_answer_single_sentence
task054_multirc_write_correct_answer
task056_multirc_classify_correct_answer
task057_multirc_classify_incorrect_answer
task058_multirc_question_answering
task059_ropes_story_generation
task061_ropes_answer_generation
task062_bigbench_repeat_copy_logic
task065_timetravel_consistent_sentence_classification
task066_timetravel_binary_consistency_classification
task067_abductivenli_answer_generation
task068_abductivenli_incorrect_answer_generation
task069_abductivenli_classification
task070_abductivenli_incorrect_classification
task071_abductivenli_answer_generation
task072_abductivenli_answer_generation
task073_commonsenseqa_answer_generation
task075_squad1.1_answer_generation
task079_conala_concat_strings
task080_piqa_answer_generation
task083_babi_t1_single_supporting_fact_answer_generation
task084_babi_t1_single_supporting_fact_identify_relevant_fact
task088_identify_typo_verification
task089_swap_words_verification
task091_all_elements_from_index_i_to_j
task093_conala_normalize_lists
task094_conala_calculate_mean
task095_conala_max_absolute_value
task096_conala_list_index_subtraction
task097_conala_remove_duplicates
task098_conala_list_intersection
task099_reverse_elements_between_index_i_and_j
task100_concatenate_all_elements_from_index_i_to_j
task101_reverse_and_concatenate_all_elements_from_index_i_to_j
task103_facts2story_long_text_generation
task105_story_cloze-rocstories_sentence_generation
task106_scruples_ethical_judgment
task108_contextualabusedetection_classification
task1135_xcsr_en_commonsense_mc_classification
task113_count_frequency_of_letter
task1148_maximum_ascii_value
task1161_coda19_title_generation
task1163_coda19_section_classification
task1164_coda19_section_correction_classification
task116_com2sense_commonsense_reasoning
task1187_politifact_classification
task1196_atomic_classification_oeffect
task1197_atomic_classification_oreact
task1198_atomic_classification_owant
task1199_atomic_classification_xattr
task1200_atomic_classification_xeffect
task1201_atomic_classification_xintent
task1202_atomic_classification_xneed
task1203_atomic_classification_xreact
task1204_atomic_classification_hinderedby
task1205_atomic_classification_isafter
task1206_atomic_classification_isbefore
task1207_atomic_classification_atlocation
task1208_atomic_classification_xreason
task1209_atomic_classification_objectuse
task1210_atomic_classification_madeupof
task1211_atomic_classification_hassubevent
task1212_atomic_classification_hasproperty
task1213_atomic_classification_desires
task1214_atomic_classification_xwant
task1215_atomic_classification_capableof
task1216_atomic_classification_causes
task122_conala_list_index_addition
task123_conala_sort_dictionary
task124_conala_pair_averages
task125_conala_pair_differences
task1286_openbookqa_question_answering
task1290_xsum_summarization
task1291_multi_news_summarization
task1292_yelp_review_full_text_categorization
task1293_kilt_tasks_hotpotqa_question_answering
task1294_wiki_qa_answer_verification
task1295_adversarial_qa_question_answering
task1296_wiki_hop_question_answering
task1297_qasc_question_answering
task1308_amazonreview_category_classification
task1309_amazonreview_summary_classification
task1310_amazonreview_rating_classification
task1311_amazonreview_rating_classification
task1312_amazonreview_polarity_classification
task1313_amazonreview_polarity_classification
task1327_qa_zre_answer_generation_from_question
task1338_peixian_equity_evaluation_corpus_sentiment_classifier
task1340_msr_text_compression_compression
task1342_amazon_us_reviews_title
task1343_amazon_us_reviews_rating
task1344_glue_entailment_classification
task1346_glue_cola_grammatical_correctness_classification
task1355_sent_comp_summarization
task1356_xlsum_title_generation
task1357_xlsum_summary_generation
task1358_xlsum_title_generation
task1359_numer_sense_answer_generation
task1360_numer_sense_multiple_choice_qa_generation
task1361_movierationales_classification
task1364_hans_answer_generation
task1366_healthfact_classification
task1368_healthfact_sentence_generation
task1369_healthfact_sentence_generation
task136_winowhy_knowledge_categorization
task1378_quarel_correct_answer_generation
task137_detoxifying-lms_classification_toxicity
task1380_quarel_correct_option_generation
task1382_quarel_write_correct_answer
task1385_anli_r1_entailment
task1386_anli_r2_entailment
task1387_anli_r3_entailment
task1388_cb_entailment
task1389_hellaswag_completion
task1392_superglue_multirc_answer_verification
task1399_obqa_answer_generation
task1401_obqa_sentence_generation
task1404_date_conversion
task1410_dart_relationship_extraction
task1411_dart_subject_identification
task1412_web_questions_question_answering
task1413_dart_object_identification
task1415_youtube_caption_corrections_grammar_correction
task1416_youtube_caption_corrections_incorrect_grammar_classification
task1422_mathqa_physics
task1431_head_qa_answer_generation
task1434_head_qa_classification
task1438_doqa_cooking_answer_generation
task1441_doqa_movies_answer_generation
task144_subjqa_question_answering
task1451_drug_dose_extraction
task1488_sarcasmdetection_headline_classification
task1489_sarcasmdetection_tweet_classification
task1495_adverse_drug_event_classification
task1499_dstc3_summarization
task1502_hatexplain_classification
task1503_hatexplain_classification
task1504_hatexplain_answer_generation
task1506_celebrity_minimal_dob_span
task1510_evalution_relation_extraction
task1515_imppres_longtextgeneration
task1516_imppres_naturallanguageinference
task1517_limit_classfication
task1518_limit_answer_generation
task151_tomqa_find_location_easy_clean
task1520_qa_srl_answer_generation
task1529_scitail1.1_classification
task152_tomqa_find_location_easy_noise
task1530_scitail1.1_sentence_generation
task1532_daily_dialog_emotion_classification
task1535_daily_dialog_uniqueness_classification
task1536_daily_dialog_happiness_classification
task153_tomqa_find_location_hard_clean
task1540_parsed_pdfs_summarization
task1541_agnews_classification
task1542_every_ith_element_from_starting
task1548_wiqa_binary_classification
task1549_wiqa_answer_generation_missing_step
task154_tomqa_find_location_hard_noise
task1553_cnn_dailymail_summarization
task1554_scitail_classification
task1555_scitail_answer_generation
task1556_scitail_passage_generation
task1557_jfleg_answer_generation
task1564_triviaqa_answer_generation
task1565_triviaqa_classification
task1568_propara_classification
task156_codah_classification_adversarial
task1572_samsum_summary
task1573_samsum_classification
task157_count_vowels_and_consonants
task1581_eqasc-perturbed_answer_generation
task1586_scifact_title_generation
task158_count_frequency_of_words
task1592_yahoo_answers_topics_classfication
task1593_yahoo_answers_topics_classification
task1597_nyc_slot_filling
task159_check_frequency_of_words_in_sentence_pair
task1601_webquestions_answer_generation
task1604_ethos_text_classification
task1605_ethos_text_classification
task1606_ethos_text_classification
task1607_ethos_text_classification
task1608_xquad_en_answer_generation
task160_replace_letter_in_a_sentence
task1612_sick_label_classification
task1613_sick_given_category_generate_sentence
task1615_sick_tclassify_b_relation_a
task161_count_words_containing_letter
task1625_disfl_qa_asnwer_generation
task162_count_words_starting_with_letter
task1630_openpi_classification
task163_count_words_ending_with_letter
task164_mcscript_question_answering_text
task1656_gooaq_answer_generation
task1658_billsum_summarization
task1659_title_generation
task165_mcscript_question_answering_commonsense
task1661_super_glue_classification
task1706_ljspeech_classification
task170_hotpotqa_answer_generation
task1711_poki_text_generation
task1712_poki_classification
task1720_civil_comments_toxicity_classification
task1721_civil_comments_obscenity_classification
task1722_civil_comments_threat_classification
task1723_civil_comments_sexuallyexplicit_classification
task1724_civil_comments_insult_classification
task1725_civil_comments_severtoxicity_classification
task1727_wiqa_what_is_the_effect
task1729_personachat_generate_next
task1730_personachat_choose_next
task1731_quartz_question_answering
task178_quartz_question_answering
task179_participant_extraction
task180_intervention_extraction
task181_outcome_extraction
task184_snli_entailment_to_neutral_text_modification
task185_snli_contradiction_to_neutral_text_modification
task186_snli_contradiction_to_entailment_text_modification
task187_snli_entailment_to_contradiction_text_modification
task188_snli_neutral_to_entailment_text_modification
task189_snli_neutral_to_contradiction_text_modification
task190_snli_classification
task192_hotpotqa_sentence_generation
task194_duorc_answer_generation
task195_sentiment140_classification
task196_sentiment140_answer_generation
task199_mnli_classification
task200_mnli_entailment_classification
task201_mnli_neutral_classification
task202_mnli_contradiction_classification
task203_mnli_sentence_generation
task209_stancedetection_classification
task213_rocstories_correct_ending_classification
task214_rocstories_incorrect_ending_classification
task216_rocstories_correct_answer_generation
task217_rocstories_ordering_answer_generation
task218_rocstories_swap_order_answer_generation
task219_rocstories_title_answer_generation
task220_rocstories_title_classification
task221_rocstories_two_choice_classification
task222_rocstories_two_chioce_slotting_classification
task223_quartz_explanation_generation
task224_scruples_anecdotes_ethical_judgment
task225_english_language_answer_generation
task228_arc_answer_generation_easy
task229_arc_answer_generation_hard
task230_iirc_passage_classification
task231_iirc_link_classification
task234_iirc_passage_line_answer_generation
task237_iirc_answer_from_subtext_answer_generation
task238_iirc_answer_from_passage_answer_generation
task239_tweetqa_answer_generation
task241_tweetqa_classification
task247_dream_answer_generation
task268_casehold_legal_answer_generation
task269_csrg_counterfactual_story_generation
task270_csrg_counterfactual_context_generation
task277_stereoset_sentence_generation_stereotype
task278_stereoset_sentence_generation_antistereotype
task279_stereoset_classification_stereotype
task280_stereoset_classification_stereotype_type
task282_scruples_event_time
task284_imdb_classification
task285_imdb_answer_generation
task286_olid_offense_judgment
task288_gigaword_summarization
task291_semeval_2020_task4_commonsense_validation
task293_storycommonsense_emotion_text_generation
task295_semeval_2020_task4_commonsense_reasoning
task296_storycloze_correct_end_classification
task297_storycloze_incorrect_end_classification
task298_storycloze_correct_end_classification
task300_storycloze_order_generation
task302_record_classification
task309_race_answer_generation
task310_race_classification
task316_crows-pairs_classification_stereotype
task317_crows-pairs_classification_stereotype_type
task318_stereoset_classification_gender
task319_stereoset_classification_profession
task320_stereoset_classification_race
task321_stereoset_classification_religion
task322_jigsaw_classification_threat
task323_jigsaw_classification_sexually_explicit
task324_jigsaw_classification_disagree
task325_jigsaw_classification_identity_attack
task326_jigsaw_classification_obscene
task327_jigsaw_classification_toxic
task328_jigsaw_classification_insult
task332_tellmewhy_answer_generation
task333_hateeval_classification_hate_en
task335_hateeval_classification_aggresive_en
task337_hateeval_classification_individual_en
task339_record_answer_generation
task344_hybridqa_answer_generation
task352_coda-19_classification
task363_sst2_polarity_classification
task364_regard_social_impact_classification
task376_reverse_order_of_words
task377_remove_words_of_given_length
task378_reverse_words_of_given_length
task379_agnews_topic_classification
task380_boolq_yes_no_question
task385_socialiqa_incorrect_answer_generation
task390_torque_text_span_selection
task391_causal_relationship
task392_inverse_causal_relationship
task393_plausible_result_generation
task397_semeval_2018_task1_tweet_anger_detection
task398_semeval_2018_task1_tweet_joy_detection
task399_semeval_2018_task1_tweet_sadness_detection
task403_creak_commonsense_inference
task418_persent_title_generation
task420_persent_document_sentiment_classification
task421_persent_sentence_sentiment_classification
task422_persent_sentence_sentiment_verification
task423_persent_document_sentiment_verification
task444_com_qa_question_paraphrases_answer_generation
task453_swag_answer_generation
task460_qasper_answer_generation
task469_mrqa_answer_generation
task475_yelp_polarity_classification
task476_cls_english_books_classification
task477_cls_english_dvd_classification
task478_cls_english_music_classification
task488_extract_all_alphabetical_elements_from_list_in_order
task490_mwsc_options_generation
task491_mwsc_answer_generation
task493_review_polarity_classification
task494_review_polarity_answer_generation
task495_semeval_headline_classification
task497_extract_all_numbers_from_list_in_order
task498_scruples_anecdotes_whoiswrong_classification
task499_extract_and_add_all_numbers_from_list
task500_scruples_anecdotes_title_generation
task501_scruples_anecdotes_post_type_verification
task502_scruples_anecdotes_whoiswrong_verification
task503_scruples_anecdotes_isanswerable
task508_scruples_dilemmas_more_ethical_isidentifiable
task510_reddit_tifu_title_summarization
task511_reddit_tifu_long_text_summarization
task512_twitter_emotion_classification
task513_argument_stance_classification
task517_emo_classify_emotion_of_dialogue
task518_emo_different_dialogue_emotions
task521_trivia_question_classification
task522_news_editorial_summary
task550_discofuse_sentence_generation
task569_recipe_nlg_text_generation
task572_recipe_nlg_text_generation
task574_air_dialogue_sentence_generation
task576_curiosity_dialogs_answer_generation
task579_socialiqa_classification
task580_socialiqa_answer_generation
task582_naturalquestion_answer_generation
task586_amazonfood_polarity_classification
task587_amazonfood_polarity_correction_classification
task588_amazonfood_rating_classification
task589_amazonfood_summary_text_generation
task591_sciq_answer_generation
task593_sciq_explanation_generation
task595_mocha_answer_generation
task597_cuad_answer_generation
task598_cuad_answer_generation
task602_wikitext-103_answer_generation
task603_wikitext-103_fill_in_the_blank
task607_sbic_intentional_offense_binary_classification
task608_sbic_sexual_offense_binary_classification
task609_sbic_potentially_offense_binary_classification
task611_mutual_multi_turn_dialogue
task614_glucose_cause_event_detection
task615_moviesqa_answer_generation
task617_amazonreview_category_text_generation
task618_amazonreview_summary_text_generation
task619_ohsumed_abstract_title_generation
task626_xlwic_sentence_based_on_given_word_sentence_generation
task627_xlwic_word_with_same_meaning_sentence_generation
task628_xlwic_word_with_different_meaning_sentence_generation
task629_dbpedia_14_classification
task632_dbpedia_14_classification
task633_dbpedia_14_answer_generation
task640_esnli_classification
task641_esnli_classification
task642_esnli_classification
task645_summarization
task668_extreme_abstract_summarization
task669_ambigqa_answer_generation
task672_amazon_and_yelp_summarization_dataset_summarization
task672_nummersense
task679_hope_edi_english_text_classification
task682_online_privacy_policy_text_classification
task683_online_privacy_policy_text_purpose_answer_generation
task684_online_privacy_policy_text_information_type_generation
task738_perspectrum_classification
task740_lhoestq_answer_generation_quantity
task741_lhoestq_answer_generation_place
task742_lhoestq_answer_generation_frequency
task743_eurlex_summarization
task744_eurlex_classification
task746_yelp_restaurant_review_classification
task747_glucose_cause_emotion_detection
task748_glucose_reverse_cause_event_detection
task749_glucose_reverse_cause_emotion_detection
task758_msr_sqa_question_answer_generation
task761_app_review_classification
task767_craigslist_bargains_classification
task768_qed_text_span_selection
task769_qed_summarization
task819_pec_sentiment_classification
task820_protoqa_answer_generation
task823_peixian-rtgender_sentiment_analysis
task827_copa_commonsense_reasoning
task828_copa_commonsense_cause_effect
task833_poem_sentiment_classification
task843_financial_phrasebank_classification
task844_financial_phrasebank_classification
task846_pubmedqa_classification
task849_pubmedqa_answer_generation
task853_hippocorpus_long_text_generation
task854_hippocorpus_classification
task858_inquisitive_span_detection
task870_msmarco_answer_generation
task875_emotion_classification
task881_schema_guided_dstc8_classification
task887_quail_answer_generation
task888_reviews_classification
task889_goemotions_classification
task890_gcwd_classification
task898_freebase_qa_answer_generation
task902_deceptive_opinion_spam_classification
task903_deceptive_opinion_spam_classification
task904_hate_speech_offensive_classification
task905_hate_speech_offensive_classification
task918_coqa_answer_generation
task923_event2mind_classifier
task926_coached_conv_pref_word_generation
task929_products_reviews_classification
task930_dailydialog_classification
task931_dailydialog_classification
task935_defeasible_nli_atomic_classification
task936_defeasible_nli_snli_classification
task937_defeasible_nli_social_classification
task955_wiki_auto_style_transfer
task958_e2e_nlg_text_generation_parse
task965_librispeech_asr_missing_word_prediction
task966_ruletaker_fact_checking_based_on_given_context
task970_sherliic_causal_relationship