From 4cebe786558caf5aae8fae9cf59215bdc11503aa Mon Sep 17 00:00:00 2001 From: Luca Soldaini Date: Fri, 13 Dec 2024 12:59:06 -0800 Subject: [PATCH] sampling --- configs/peteish-anneal/olmoe_mix.yaml | 44 +++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 configs/peteish-anneal/olmoe_mix.yaml diff --git a/configs/peteish-anneal/olmoe_mix.yaml b/configs/peteish-anneal/olmoe_mix.yaml new file mode 100644 index 00000000..b7615fbc --- /dev/null +++ b/configs/peteish-anneal/olmoe_mix.yaml @@ -0,0 +1,44 @@ +target_size: 200G + +sources: + - source: s3://ai2-llm/preprocessed/dolmino-mix-1124/allenai/gpt-neox-olmo-dolma-v1_5/dclm/*.npy + mix_percent: 0.5 + + - source: s3://ai2-llm/preprocessed/dolmino-mix-1124/allenai/gpt-neox-olmo-dolma-v1_5/pes2o/*.npy + mix_percent: 0.0585 + + - source: s3://ai2-llm/preprocessed/dolmino-mix-1124/allenai/gpt-neox-olmo-dolma-v1_5/flan/*.npy + mix_percent: 0.1660 + + - source: s3://ai2-llm/preprocessed/dolmino-mix-1124/allenai/gpt-neox-olmo-dolma-v1_5/math/codesearchnet-owmfilter/*.npy + sample_percent: 1.0 + + - source: s3://ai2-llm/preprocessed/dolmino-mix-1124/allenai/gpt-neox-olmo-dolma-v1_5/math/dolmino_math_synth/basic_math/*.npy + sample_percent: 1.0 + + - source: s3://ai2-llm/preprocessed/dolmino-mix-1124/allenai/gpt-neox-olmo-dolma-v1_5/math/dolmino_math_synth/gsm_mind/*.npy + sample_percent: 1.0 + + - source: s3://ai2-llm/preprocessed/dolmino-mix-1124/allenai/gpt-neox-olmo-dolma-v1_5/math/dolmino_math_synth/gsm8k-synth/resample_v1_6x/*.npy + sample_percent: 1.0 + + - source: s3://ai2-llm/preprocessed/dolmino-mix-1124/allenai/gpt-neox-olmo-dolma-v1_5/math/gsm8k/*.npy + sample_percent: 1.0 + + - source: s3://ai2-llm/preprocessed/dolmino-mix-1124/allenai/gpt-neox-olmo-dolma-v1_5/math/mathcoder2-synthmath/*/*.npy + sample_percent: 1.0 + + - source: s3://ai2-llm/preprocessed/dolmino-mix-1124/allenai/gpt-neox-olmo-dolma-v1_5/math/metamath-owmfilter/*.npy + sample_percent: 1.0 + + - source: s3://ai2-llm/preprocessed/dolmino-mix-1124/allenai/gpt-neox-olmo-dolma-v1_5/math/tinyGSM-MIND/*.npy + sample_percent: 1.0 + + - source: s3://ai2-llm/preprocessed/dolmino-mix-1124/allenai/gpt-neox-olmo-dolma-v1_5/math/tulu_math/*.npy + sample_percent: 1.0 + + - source: s3://ai2-llm/preprocessed/dolmino-mix-1124/allenai/gpt-neox-olmo-dolma-v1_5/stackexchange/*.npy + sample_percent: 1.0 + + - source: s3://ai2-llm/preprocessed/dolmino-mix-1124/allenai/gpt-neox-olmo-dolma-v1_5/wiki/*.npy + sample_percent: 1.0