Skip to content

Commit

Permalink
Add GPO-Llama-3-8B-Instruct-GPM-2B and SPPO-Llama-3-8B-Instruct-GPM-2… (
Browse files Browse the repository at this point in the history
  • Loading branch information
xukp20 authored Oct 19, 2024
1 parent d96bcbd commit 9d8e91d
Show file tree
Hide file tree
Showing 10 changed files with 142,678 additions and 5 deletions.
4,832 changes: 4,832 additions & 0 deletions results/GPO-Llama-3-8B-Instruct-GPM-2B/model_outputs.json

Large diffs are not rendered by default.

Large diffs are not rendered by default.

4,832 changes: 4,832 additions & 0 deletions results/SPPO-Llama-3-8B-Instruct-GPM-2B/model_outputs.json

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
,win_rate,standard_error,n_wins,n_wins_base,n_draws,n_total,discrete_win_rate,mode,avg_length,length_controlled_winrate,lc_standard_error
SelfMoA_gemma-2-9b-it-WPO-HB,77.58955217385297,1.231940914887347,640,165,0,805,79.5031055900621,community,3261,78.53928111481099,0.3042788133382446
Shopee-SlimMoA-v1,75.61428659805350,1.2706274059194700,621,184,0,805,77.14285714285720,community,1994,77.4515432873834,0.43017522149239600
blendaxai-gm-l6-vo31,69.11033492869565,1.3280735654354863,562,242,1,805,69.87577639751554,community,1809,76.91981221023656,0.5725365663132986
Shopee-SlimMoA-v1,75.6142865980535,1.27062740591947,621,184,0,805,77.1428571428572,community,1994,77.4515432873834,0.430175221492396
blendaxai-gm-l6-vo31,69.11033492869565,1.3280735654354865,562,242,1,805,69.87577639751554,community,1809,76.91981221023656,0.5725365663132986
gemma-2-9b-it-WPO-HB,77.82503168985093,1.2355857177790277,640,163,2,805,79.62732919254658,community,2285,76.72506842726064,0.4242603928637889
SelfMoA_gemma-2-9b-it-SimPO,71.9958856144492,1.3495341826849294,597,208,0,805,74.16149068322981,community,1930,75.04950944068965,0.44287068760098436
SelfMoA_gemma-2-9b-it-SimPO,71.9958856144492,1.3495341826849294,597,208,0,805,74.16149068322981,community,1930,75.04950944068965,0.4428706876009843
blendaxai-gm-l3-v35,73.41035740244067,1.254951147343878,607,196,2,805,75.527950310559,community,2186,73.37270365010379,0.6163911450738288
gemma-2-9b-it-SimPO,65.86422561532919,1.423459922555078,540,264,1,805,67.14285714285714,community,1833,72.3508446939842,0.5167873784867067
openpipe-moa-gpt-4-turbo-v1,63.15493451236265,1.422980098799326,515,283,7,805,64.40993788819875,community,1856,68.37866250336802,0.7309418614587613
Expand All @@ -24,7 +24,7 @@ gpt4_1106_preview_verbose,64.30360147101865,1.3348590089025316,525,268,12,805,65
gpt-4o-mini-2024-07-18,44.65413862507926,1.4572395578449813,350,451,4,805,43.72670807453416,minimal,1861,50.727144855901976,0.8284734951761676
Storm-7B,50.26886905528583,1.4728176780737183,397,408,0,805,49.31677018633541,community,2045,50.45110959343775,
gpt4_1106_preview,50.0,0.0,0,0,805,805,50.0,minimal,2049,50.0,
REBEL-Llama-3-8B-Instruct-Armo,48.43655307668638,1.480341435123528,394,410,1,805,49.006211180124225,community,1965,49.314293536857114,0.7061879308002301
REBEL-Llama-3-8B-Instruct-Armo,48.43655307668638,1.480341435123528,394,410,1,805,49.00621118012423,community,1965,49.31429353685712,0.7061879308002301
Infinity-Instruct-7M-Gen-Llama3_1-70B,37.46327383827497,1.4734130373862548,299,501,5,805,37.453416149068325,community,1654,46.10043331712677,0.822439983375277
Llama-3-Instruct-8B-SimPO-ExPO,40.63285400856655,1.4439449942168028,325,479,1,805,40.43478260869565,community,1765,45.78021783946177,
Llama-3-Instruct-8B-SimPO,40.52977498461182,1.422574464675002,319,485,1,805,39.68944099378882,community,1825,44.65131348921881,0.8800655791760451
Expand All @@ -38,13 +38,15 @@ claude-3-opus-20240229,29.10526953334248,1.3941539442369442,223,579,3,805,27.888
Infinity-Instruct-7M-Gen-mistral-7B,34.347412485016434,1.412595625747994,263,541,1,805,32.732919254658384,community,1742,39.66949964831439,0.8048310993594987
Meta-Llama-3.1-405B-Instruct-Turbo,39.10666895419877,1.4335939943941904,305,497,3,805,38.07453416149068,minimal,1988,39.25732749961743,0.9064666759144326
SPPO-Llama-3-Instruct-8B-PairRM,39.67286090605648,1.424722356202499,310,494,1,805,38.57142857142858,community,2066,38.56280663670214,0.8694594533275739
GPO-Llama-3-8B-Instruct-GPM-2B,48.87200127423316,1.4567650924969209,394,411,0,805,48.94409937888199,community,2613,38.4334071653788,0.7965862068931436
gpt4,23.576789314782605,1.275704201206918,179,618,8,805,22.732919254658384,verified,1365,38.12808974440021,
Qwen2-72B-Instruct,29.8527557752399,1.3690032071830978,231,569,5,805,29.006211180124225,verified,1626,38.07461345451606,0.8956826164517345
Meta-Llama-3.1-70B-Instruct-Turbo,39.12691443804968,1.4277422726408466,306,496,3,805,38.19875776397515,minimal,2044,38.05512453607286,0.9009912768416926
Infinity-Instruct-3M-0625-Llama3-70B,24.277231851026183,1.3152941480778837,188,613,4,805,23.60248447204969,community,1294,37.97881098506053,0.8189316873655579
aligner-2b_qwen1.5-72b-chat,31.773037737123104,1.2392772646245978,180,473,152,805,31.801242236024844,community,1812,36.725868878524274,
Qwen1.5-72B-Chat,26.49828339562733,1.304236164893057,201,600,4,805,25.217391304347824,verified,1549,36.571754111987296,
gpt4_0314,22.073258928708075,1.2466725494608204,172,627,6,805,21.73913043478261,verified,1371,35.30706121640206,
SPPO-Llama-3-8B-Instruct-GPM-2B,45.44098127183851,1.4552017482034645,362,443,0,805,44.96894409937888,community,2490,35.30471134991328,0.8108797072336295
Ein-70B-v0.1,24.84472049689441,1.521406431103307,199,604,2,805,24.84472049689441,community,1467,35.029054008520646,
claude-3-sonnet-20240229,25.556325292273296,1.3419811051815638,193,608,4,805,24.22360248447205,minimal,1420,34.87247436243302,
FsfairX-Zephyr-Chat-v0.1,35.94648644102434,1.4410058098036145,285,517,3,805,35.59006211180124,community,2275,34.78744762311656,
Expand Down Expand Up @@ -211,4 +213,4 @@ oasst-sft-pythia-12b,1.790114083180124,0.3985580883049341,13,790,2,805,1.7391304
guanaco-13b,3.469596859739131,0.5518606725700214,22,780,3,805,2.919254658385093,verified,1774,3.003787329611614,
guanaco-7b,2.880002266173913,0.5202924149314048,21,783,1,805,2.670807453416149,verified,1364,2.871116813131697,
Qwen1.5-1.8B-Chat,3.70555681579365,0.5811750995496215,27,774,3,804,3.544776119402985,verified,2673,2.588498849185137,
baichuan-13b-chat,1.9921455615279504,0.4176985079331233,14,790,1,805,1.8012422360248446,community,1727,2.062170253598568,
baichuan-13b-chat,1.9921455615279504,0.4176985079331233,14,790,1,805,1.8012422360248446,community,1727,2.062170253598568,
Original file line number Diff line number Diff line change
Expand Up @@ -189,3 +189,5 @@ Llama-3-8B-Instruct-SkillMix,-0.3007600604906024,0.4853486472189554,-0.280872752
REBEL-Llama-3-8B-Instruct-Armo,-1.0427168605260002,0.6464073051877255,0.0395191056877229
SelfMoA_gemma-2-9b-it-SimPO,-0.8425253084188749,0.5482697859900880,1.2874783673834935
SelfMoA_gemma-2-9b-it-WPO-HB,0.2523363342614252,0.3970191588440620,1.4137351138484051
GPO-Llama-3-8B-Instruct-GPM-2B,-1.1688688988236986,0.7678817822697138,-0.4997466376902971
SPPO-Llama-3-8B-Instruct-GPM-2B,-1.2289746990068291,0.8046474033904255,-0.6767509934260389
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
GPO-Llama-3-8B-Instruct-GPM-2B:
completions_kwargs:
batch_size: 900
max_new_tokens: 4096
model_kwargs:
dtype: bfloat16
model_name: "general-preference/GPO-Llama-3-8B-Instruct-GPM-2B"
stop_token_ids:
- 128001
- 128009
temperature: 0.9
top_p: 1.0
use_beam_search: false
fn_completions: vllm_local_completions
pretty_name: GPO-Llama-3-8B-Instruct-GPM-2B
prompt_template: GPO-Llama-3-8B-Instruct-GPM-2B/prompt.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
<|begin_of_text|><|start_header_id|>user<|end_header_id|>

{instruction}<|eot_id|><|start_header_id|>assistant<|end_header_id|>


Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
SPPO-Llama-3-8B-Instruct-GPM-2B:
completions_kwargs:
batch_size: 900
max_new_tokens: 4096
model_kwargs:
dtype: bfloat16
model_name: "general-preference/SPPO-Llama-3-8B-Instruct-GPM-2B"
stop_token_ids:
- 128001
- 128009
temperature: 0.9
top_p: 1.0
use_beam_search: false
fn_completions: vllm_local_completions
pretty_name: SPPO-Llama-3-8B-Instruct-GPM-2B
prompt_template: SPPO-Llama-3-8B-Instruct-GPM-2B/prompt.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
<|begin_of_text|><|start_header_id|>user<|end_header_id|>

{instruction}<|eot_id|><|start_header_id|>assistant<|end_header_id|>


0 comments on commit 9d8e91d

Please sign in to comment.