diff --git a/src/alpaca_eval/models_configs/LMCocktail-10.7B-v1/configs.yaml b/src/alpaca_eval/models_configs/LMCocktail-10.7B-v1/configs.yaml index 5b97a047..a0a60a67 100644 --- a/src/alpaca_eval/models_configs/LMCocktail-10.7B-v1/configs.yaml +++ b/src/alpaca_eval/models_configs/LMCocktail-10.7B-v1/configs.yaml @@ -9,7 +9,7 @@ LMCocktail-10.7B-v1: max_new_tokens: 2048 temperature: 0.7 top_p: 1.0 - do_sample: True + use_beam_search: True length_penalty: 2.0 earlth_stopping: True pretty_name: "LMCocktail-10.7B-v1" diff --git a/src/alpaca_eval/models_configs/Llama-3-Instruct-8B-SimPO/configs.yaml b/src/alpaca_eval/models_configs/Llama-3-Instruct-8B-SimPO/configs.yaml index 23a0c57a..057da030 100644 --- a/src/alpaca_eval/models_configs/Llama-3-Instruct-8B-SimPO/configs.yaml +++ b/src/alpaca_eval/models_configs/Llama-3-Instruct-8B-SimPO/configs.yaml @@ -1,7 +1,7 @@ Llama-3-Instruct-8B-SimPO: completions_kwargs: batch_size: 900 - do_sample: true + use_beam_search: true max_new_tokens: 4096 model_kwargs: dtype: bfloat16 diff --git a/src/alpaca_eval/models_configs/Starling-LM-7B-alpha-ExPO/configs.yaml b/src/alpaca_eval/models_configs/Starling-LM-7B-alpha-ExPO/configs.yaml index 13798548..7d1e4fc1 100644 --- a/src/alpaca_eval/models_configs/Starling-LM-7B-alpha-ExPO/configs.yaml +++ b/src/alpaca_eval/models_configs/Starling-LM-7B-alpha-ExPO/configs.yaml @@ -7,7 +7,7 @@ Starling-LM-7B-alpha-ExPO: dtype: 'bfloat16' tokenizer_mode: 'auto' max_new_tokens: 2048 - do_sample: True + use_beam_search: True seed: 42 temperature: 0.7 top_k: 50 diff --git a/src/alpaca_eval/models_configs/Starling-LM-7B-beta-ExPO/configs.yaml b/src/alpaca_eval/models_configs/Starling-LM-7B-beta-ExPO/configs.yaml index c949aca8..eb697a52 100644 --- a/src/alpaca_eval/models_configs/Starling-LM-7B-beta-ExPO/configs.yaml +++ b/src/alpaca_eval/models_configs/Starling-LM-7B-beta-ExPO/configs.yaml @@ -7,7 +7,7 @@ Starling-LM-7B-beta-ExPO: dtype: 'bfloat16' tokenizer_mode: 'auto' max_new_tokens: 2048 - do_sample: True + use_beam_search: True seed: 42 temperature: 0.7 top_k: 50 diff --git a/src/alpaca_eval/models_configs/causallm-14b/configs.yaml b/src/alpaca_eval/models_configs/causallm-14b/configs.yaml index 10faa6ad..95cf1881 100644 --- a/src/alpaca_eval/models_configs/causallm-14b/configs.yaml +++ b/src/alpaca_eval/models_configs/causallm-14b/configs.yaml @@ -8,7 +8,7 @@ causallm-14b: max_new_tokens: 2048 temperature: 0.7 top_p: 1.0 - do_sample: False + use_beam_search: False batch_size: 8 pretty_name: "CausalLM-14B" link: "https://huggingface.co/CausalLM/14B" diff --git a/src/alpaca_eval/models_configs/deita-7b-v1.0/configs.yaml b/src/alpaca_eval/models_configs/deita-7b-v1.0/configs.yaml index da075009..2b0d9b00 100644 --- a/src/alpaca_eval/models_configs/deita-7b-v1.0/configs.yaml +++ b/src/alpaca_eval/models_configs/deita-7b-v1.0/configs.yaml @@ -8,6 +8,6 @@ deita-7b-v1.0: max_new_tokens: 2048 temperature: 0.7 top_p: 1.0 - do_sample: True + use_beam_search: True pretty_name: "DEITA 7B v1.0" link: "https://github.com/hkust-nlp/deita" \ No newline at end of file diff --git a/src/alpaca_eval/models_configs/internlm2-chat-20b-ExPO/configs.yaml b/src/alpaca_eval/models_configs/internlm2-chat-20b-ExPO/configs.yaml index bfb0135a..8d5102b6 100644 --- a/src/alpaca_eval/models_configs/internlm2-chat-20b-ExPO/configs.yaml +++ b/src/alpaca_eval/models_configs/internlm2-chat-20b-ExPO/configs.yaml @@ -8,7 +8,7 @@ internlm2-chat-20b-ExPO: trust_remote_code: True tokenizer_mode: 'auto' max_new_tokens: 2048 - do_sample: True + use_beam_search: True seed: 42 temperature: 0.7 top_k: 50 diff --git a/src/alpaca_eval/models_configs/internlm2-chat-7b-ExPO/configs.yaml b/src/alpaca_eval/models_configs/internlm2-chat-7b-ExPO/configs.yaml index 294d87f6..580b0b29 100644 --- a/src/alpaca_eval/models_configs/internlm2-chat-7b-ExPO/configs.yaml +++ b/src/alpaca_eval/models_configs/internlm2-chat-7b-ExPO/configs.yaml @@ -8,7 +8,7 @@ internlm2-chat-7b-ExPO: trust_remote_code: True tokenizer_mode: 'auto' max_new_tokens: 2048 - do_sample: True + use_beam_search: True seed: 42 temperature: 0.7 top_k: 50 diff --git a/src/alpaca_eval/models_configs/merlinite-7B-AOT/configs.yaml b/src/alpaca_eval/models_configs/merlinite-7B-AOT/configs.yaml index 44d68023..4098058e 100644 --- a/src/alpaca_eval/models_configs/merlinite-7B-AOT/configs.yaml +++ b/src/alpaca_eval/models_configs/merlinite-7B-AOT/configs.yaml @@ -5,7 +5,7 @@ merlinite-7B-AOT: model_name: "merlinite-7B-AOT" model_kwargs: torch_dtype: 'bfloat16' - do_sample: True + use_beam_search: True temperature: 0.7 max_tokens: 2048 top_p: 0.9 diff --git a/src/alpaca_eval/models_configs/openbuddy-llama2-13b-v11.1/configs.yaml b/src/alpaca_eval/models_configs/openbuddy-llama2-13b-v11.1/configs.yaml index 515beeac..3f0d8355 100644 --- a/src/alpaca_eval/models_configs/openbuddy-llama2-13b-v11.1/configs.yaml +++ b/src/alpaca_eval/models_configs/openbuddy-llama2-13b-v11.1/configs.yaml @@ -9,7 +9,7 @@ openbuddy-llama2-13b-v11.1: max_new_tokens: 2000 # on vllm, prompt_tokens+max_new_tokens must be <= model_max_length otherwise it will raise an error and nothing will be generated temperature: 0.7 top_p: 1.0 - do_sample: False + use_beam_search: False batch_size: 16 pretty_name: "OpenBudddy-LLaMA2-13B-v11.1" link: "https://huggingface.co/OpenBuddy/openbuddy-llama2-13b-v11.1-bf16" diff --git a/src/alpaca_eval/models_configs/pairrm-tulu-2-13b/configs.yaml b/src/alpaca_eval/models_configs/pairrm-tulu-2-13b/configs.yaml index 7bff9b9e..f39961be 100644 --- a/src/alpaca_eval/models_configs/pairrm-tulu-2-13b/configs.yaml +++ b/src/alpaca_eval/models_configs/pairrm-tulu-2-13b/configs.yaml @@ -9,7 +9,7 @@ pairrm-tulu-2-13b: max_new_tokens: 7500 temperature: 0.0 top_p: 1.0 - do_sample: False + use_beam_search: False best_of: 16 # number of completions to generate, using PairRM to select the best one batch_size: 800 pretty_name: "PairRM 0.4B+Tulu 2+DPO 13B (best-of-16)" diff --git a/src/alpaca_eval/models_configs/pairrm-tulu-2-70b/configs.yaml b/src/alpaca_eval/models_configs/pairrm-tulu-2-70b/configs.yaml index b63e9618..a3eb79e8 100644 --- a/src/alpaca_eval/models_configs/pairrm-tulu-2-70b/configs.yaml +++ b/src/alpaca_eval/models_configs/pairrm-tulu-2-70b/configs.yaml @@ -9,7 +9,7 @@ pairrm-tulu-2-70b: max_new_tokens: 7500 temperature: 0.0 top_p: 1.0 - do_sample: False + use_beam_search: False best_of: 16 # number of completions to generate, using PairRM to select the best one batch_size: 800 pretty_name: "PairRM 0.4B+Tulu 2+DPO 70B (best-of-16)" diff --git a/src/alpaca_eval/models_configs/phi-2/configs.yaml b/src/alpaca_eval/models_configs/phi-2/configs.yaml index 48bbe029..43679cb9 100644 --- a/src/alpaca_eval/models_configs/phi-2/configs.yaml +++ b/src/alpaca_eval/models_configs/phi-2/configs.yaml @@ -11,6 +11,6 @@ phi-2: max_new_tokens: 2048 temperature: 0.7 top_p: 1.0 - do_sample: True + use_beam_search: True pretty_name: "Phi 2" link: "https://huggingface.co/microsoft/phi-2" diff --git a/src/alpaca_eval/models_configs/tulu-2-dpo-13b-ExPO/configs.yaml b/src/alpaca_eval/models_configs/tulu-2-dpo-13b-ExPO/configs.yaml index 68f32c8f..959d456e 100644 --- a/src/alpaca_eval/models_configs/tulu-2-dpo-13b-ExPO/configs.yaml +++ b/src/alpaca_eval/models_configs/tulu-2-dpo-13b-ExPO/configs.yaml @@ -7,7 +7,7 @@ tulu-2-dpo-13b-ExPO: dtype: 'bfloat16' tokenizer_mode: 'auto' max_new_tokens: 2048 - do_sample: True + use_beam_search: True seed: 42 temperature: 0.7 top_k: 50 diff --git a/src/alpaca_eval/models_configs/tulu-2-dpo-13b/configs.yaml b/src/alpaca_eval/models_configs/tulu-2-dpo-13b/configs.yaml index f704b959..f9f6e2a4 100644 --- a/src/alpaca_eval/models_configs/tulu-2-dpo-13b/configs.yaml +++ b/src/alpaca_eval/models_configs/tulu-2-dpo-13b/configs.yaml @@ -9,7 +9,7 @@ tulu-2-dpo-13b: max_new_tokens: 7500 temperature: 0.0 top_p: 1.0 - do_sample: False + use_beam_search: False batch_size: 800 pretty_name: "Tulu 2+DPO 13B" link: "https://huggingface.co/allenai/tulu-2-dpo-13b" \ No newline at end of file diff --git a/src/alpaca_eval/models_configs/tulu-2-dpo-70b-ExPO/configs.yaml b/src/alpaca_eval/models_configs/tulu-2-dpo-70b-ExPO/configs.yaml index 0057890c..c9bd1411 100644 --- a/src/alpaca_eval/models_configs/tulu-2-dpo-70b-ExPO/configs.yaml +++ b/src/alpaca_eval/models_configs/tulu-2-dpo-70b-ExPO/configs.yaml @@ -8,7 +8,7 @@ tulu-2-dpo-70b-ExPO: tokenizer_mode: 'auto' tensor_parallel_size: 2 # you need at least 2 A100 80GB GPUs to run this model max_new_tokens: 2048 - do_sample: True + use_beam_search: True seed: 42 temperature: 0.7 top_k: 50 diff --git a/src/alpaca_eval/models_configs/tulu-2-dpo-70b/configs.yaml b/src/alpaca_eval/models_configs/tulu-2-dpo-70b/configs.yaml index 98de3280..431b2783 100644 --- a/src/alpaca_eval/models_configs/tulu-2-dpo-70b/configs.yaml +++ b/src/alpaca_eval/models_configs/tulu-2-dpo-70b/configs.yaml @@ -9,7 +9,7 @@ tulu-2-dpo-70b: max_new_tokens: 7500 temperature: 0.0 top_p: 1.0 - do_sample: False + use_beam_search: False batch_size: 800 pretty_name: "Tulu 2+DPO 70B" link: "https://huggingface.co/allenai/tulu-2-dpo-70b" \ No newline at end of file diff --git a/src/alpaca_eval/models_configs/tulu-2-dpo-7b-ExPO/configs.yaml b/src/alpaca_eval/models_configs/tulu-2-dpo-7b-ExPO/configs.yaml index 1a26cae4..b703ef94 100644 --- a/src/alpaca_eval/models_configs/tulu-2-dpo-7b-ExPO/configs.yaml +++ b/src/alpaca_eval/models_configs/tulu-2-dpo-7b-ExPO/configs.yaml @@ -7,7 +7,7 @@ tulu-2-dpo-7b-ExPO: dtype: 'bfloat16' tokenizer_mode: 'auto' max_new_tokens: 2048 - do_sample: True + use_beam_search: True seed: 42 temperature: 0.7 top_k: 50 diff --git a/src/alpaca_eval/models_configs/tulu-2-dpo-7b/configs.yaml b/src/alpaca_eval/models_configs/tulu-2-dpo-7b/configs.yaml index c4a55838..0cf00b9c 100644 --- a/src/alpaca_eval/models_configs/tulu-2-dpo-7b/configs.yaml +++ b/src/alpaca_eval/models_configs/tulu-2-dpo-7b/configs.yaml @@ -9,7 +9,7 @@ tulu-2-dpo-7b: max_new_tokens: 7500 temperature: 0.0 top_p: 1.0 - do_sample: False + use_beam_search: False batch_size: 800 pretty_name: "Tulu 2+DPO 7B" link: "https://huggingface.co/allenai/tulu-2-dpo-7b" \ No newline at end of file diff --git a/src/alpaca_eval/models_configs/zephyr-7b-alpha-ExPO/configs.yaml b/src/alpaca_eval/models_configs/zephyr-7b-alpha-ExPO/configs.yaml index fd8763bb..9da03198 100644 --- a/src/alpaca_eval/models_configs/zephyr-7b-alpha-ExPO/configs.yaml +++ b/src/alpaca_eval/models_configs/zephyr-7b-alpha-ExPO/configs.yaml @@ -7,7 +7,7 @@ zephyr-7b-alpha-ExPO: dtype: 'bfloat16' tokenizer_mode: 'auto' max_new_tokens: 2048 - do_sample: True + use_beam_search: True seed: 42 temperature: 0.7 top_k: 50 diff --git a/src/alpaca_eval/models_configs/zephyr-7b-beta-ExPO/configs.yaml b/src/alpaca_eval/models_configs/zephyr-7b-beta-ExPO/configs.yaml index 259df29f..33ab6da1 100644 --- a/src/alpaca_eval/models_configs/zephyr-7b-beta-ExPO/configs.yaml +++ b/src/alpaca_eval/models_configs/zephyr-7b-beta-ExPO/configs.yaml @@ -7,7 +7,7 @@ zephyr-7b-beta-ExPO: dtype: 'bfloat16' tokenizer_mode: 'auto' max_new_tokens: 2048 - do_sample: True + use_beam_search: True seed: 42 temperature: 0.7 top_k: 50