From 50a7704d1d285f14195eeacfa16ca4643b1ca2be Mon Sep 17 00:00:00 2001 From: Dirk Groeneveld Date: Thu, 22 Feb 2024 17:02:35 -0800 Subject: [PATCH 01/60] Makes R2 work, and adds an ablation config --- configs/olmo7-ablation.yaml | 629 ++++++++++++++++++++++++++++++++++++ pyproject.toml | 2 +- 2 files changed, 630 insertions(+), 1 deletion(-) create mode 100644 configs/olmo7-ablation.yaml diff --git a/configs/olmo7-ablation.yaml b/configs/olmo7-ablation.yaml new file mode 100644 index 000000000..0dd810e90 --- /dev/null +++ b/configs/olmo7-ablation.yaml @@ -0,0 +1,629 @@ +run_name: olmo7-ablation +seed: 6198 +dry_run: false + +wandb: + name: ${run_name} + project: olmo7-ablations + group: olmo7-ablation + +model: + d_model: 4096 + n_heads: 32 + n_layers: 32 + # mlp_ratio: 6 + mlp_hidden_size: 22016 + weight_tying: false + alibi: false + rope: true + flash_attention: true + attention_dropout: 0.0 + attention_layer_norm: false + multi_query_attention: false + include_bias: false + block_type: sequential + layer_norm_type: default + layer_norm_with_affine: false + bias_for_layer_norm: false + attention_layer_norm_with_affine: false + activation_type: swiglu + residual_dropout: 0.0 + embedding_dropout: 0.0 + max_sequence_length: 2048 + vocab_size: 50280 + embedding_size: 50304 + eos_token_id: 0 + pad_token_id: 1 + init_device: meta + init_fn: mitchell + +compile: null + +optimizer: + name: adamw + learning_rate: 1.5e-4 + weight_decay: 0.1 + betas: + - 0.9 + - 0.95 + metrics_log_interval: 10 + +scheduler: + name: linear_with_warmup + t_warmup: 1000 + alpha_f: 0.1 + +tokenizer: + identifier: tokenizers/allenai_eleuther-ai-gpt-neox-20b-pii-special.json + truncate_direction: right + +save_folder: ${oc.env:CHECKPOINTS_PATH}/${oc.env:SLURM_JOB_ID,${run_name}} +save_overwrite: false +# Sharded checkpoints (best for restarts) +save_interval: 1000 +save_num_checkpoints_to_keep: -1 +save_interval_ephemeral: 100 +# Unsharded checkpoints (for final storage) +save_interval_unsharded: null +save_num_unsharded_checkpoints_to_keep: -1 + +#load_path: r2://olmo-checkpoints/ai2-llm/olmo-medium/hrshlkzq/step119000-unsharded/ # 0.5T +#load_path: r2://olmo-checkpoints/ai2-llm/olmo-medium/j18wauyq/step238000-unsharded/ # 1.0T +load_path: r2://olmo-checkpoints/ai2-llm/olmo-medium/4xel5n7e/step358000-unsharded/ # 1.5T +#load_path: r2://olmo-checkpoints/ai2-llm/olmo-medium/xtruaap8/step477000-unsharded/ # 2.0T + +no_pre_train_checkpoint: true +reset_optimizer_state: true +reset_trainer_state: true + +max_duration: 100e9T +global_train_batch_size: 2048 +device_train_microbatch_size: 2 +time_limit: null + +precision: amp_bf16 + +fsdp: + wrapping_strategy: by_block + precision: mixed + +max_grad_norm: 1.0 +max_grad_norm_ratio: null + +speed_monitor: + window_size: 20 + +eval_interval: ${save_interval} +eval_subset_num_batches: -1 +device_eval_batch_size: ${device_train_microbatch_size} +evaluators: + - label: all-small-ppl-validation + data: + num_workers: 0 + drop_last: true + # pin_memory: true + # prefetch_factor: 1 + # persistent_workers: false + # timeout: 0 + datasets: + 4chan-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/4chan/val.npy + c4_100_domains-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/c4_100_domains/val.npy + c4_en-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/c4_en/val.npy + gab-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/gab/val.npy + ice-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/ice/val.npy + m2d2_s2orc-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/m2d2_s2orc/val.npy + m2d2_wiki-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/m2d2_wiki/val.npy + manosphere-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/manosphere/val.npy + mc4_en-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/mc4_en/val.npy + pile-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/pile/val.npy + ptb-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/ptb/val.npy + twitterAEE-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/twitterAEE/val.npy + wikitext_103-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/wikitext_103/val.npy + + ########################## + # Downstream evaluations # + ########################## + - label: piqa + type: downstream + + - label: hellaswag + type: downstream + + - label: winogrande + type: downstream + + - label: openbook_qa + type: downstream + + # - label: boolq # requires implemention of the pmi_dc matrix + # type: downstream + + - label: sciq + type: downstream + + - label: arc_easy + type: downstream + + # - label: arc_challenge # requires implemention of the pmi_dc matrix + # type: downstream + + - label: copa + type: downstream + + - label: rte + type: downstream + + - label: commitment_bank + type: downstream + + - label: mrpc + type: downstream + + - label: sst2 + type: downstream + +data: + pad_direction: right + num_workers: 16 + drop_last: true + pin_memory: true + prefetch_factor: 1 + persistent_workers: true + timeout: 0 + paths: + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-000-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-000-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-001-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-001-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-002-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-002-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-003-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-003-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-004-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-004-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-005-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-005-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-006-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-006-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-006-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-007-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-007-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-008-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-008-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-008-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-009-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-009-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-010-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-010-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-010-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-011-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-011-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-012-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-012-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-013-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-013-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-013-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-014-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-014-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-014-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-015-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-015-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-016-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-016-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-017-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-017-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-018-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-018-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-019-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-019-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-020-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-020-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-021-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-021-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-022-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-022-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-023-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-023-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-024-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-024-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-025-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-025-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-025-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-026-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-026-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-027-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-027-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-027-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-028-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-028-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-028-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-029-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-029-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-030-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-030-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-031-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-031-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-032-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-032-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-033-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-033-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-033-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-034-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-034-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-034-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-035-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-035-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-036-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-036-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-037-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-037-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-038-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-038-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-039-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-039-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-040-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-040-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-041-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-041-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-042-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-042-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-042-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-043-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-043-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-043-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-044-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-044-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-044-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-045-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-045-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-046-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-046-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-046-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-046-00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-047-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-047-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-048-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-048-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-049-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-049-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-050-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-050-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-051-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-051-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-052-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-052-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-052-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-053-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-053-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-053-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-054-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-054-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-055-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-055-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-055-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-056-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-056-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-056-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-057-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-057-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-057-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-058-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-058-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-059-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-059-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-060-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-060-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-061-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-061-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-062-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-062-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-062-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-063-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-063-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-063-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-064-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-064-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-064-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-065-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-065-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-065-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-066-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-066-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-067-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-067-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-068-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-068-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-069-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-069-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-070-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-070-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-071-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-071-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-072-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-072-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-073-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-073-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-074-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-074-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-075-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-075-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-076-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-076-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-077-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-077-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-078-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-078-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-079-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-079-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-080-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-080-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-081-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-081-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-082-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-082-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-083-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-083-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-084-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-084-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-085-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-085-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-086-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-086-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-087-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-087-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-088-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-088-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-089-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-089-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-089-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-090-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-090-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-091-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-091-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-091-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-092-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-092-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-093-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-093-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-093-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-094-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-094-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-094-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-095-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-095-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-096-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-096-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-097-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-097-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-097-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-098-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-098-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-099-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-099-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-100-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-100-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-100-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-101-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-101-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-102-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-102-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-103-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-103-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-104-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-104-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-105-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-105-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-106-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-106-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-106-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-107-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-107-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-108-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-108-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-109-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-109-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-109-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-110-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-110-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-110-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-111-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-111-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-112-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-112-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-113-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-113-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-114-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-114-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-114-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-115-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-115-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-116-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-116-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-117-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-117-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-118-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-118-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-119-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-119-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-120-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-120-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-120-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-121-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-121-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-122-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-122-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-122-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-123-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-123-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-123-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-124-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-124-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-125-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-125-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-126-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-126-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-127-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-127-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-127-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-128-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-128-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-129-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-129-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-129-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-130-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-130-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-131-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-131-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-132-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-132-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-133-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-133-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-133-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-134-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-134-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-134-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-135-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-135-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-135-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-136-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-136-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-137-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-137-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-137-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-138-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-138-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-139-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-139-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-140-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-140-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-141-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-141-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-141-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-142-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-142-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-142-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-143-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-143-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-144-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-144-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-144-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-145-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-145-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-145-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-146-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-146-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-146-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-147-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-147-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-147-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-148-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-148-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-149-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-149-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-149-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-150-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-150-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-150-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-150-00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-151-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-151-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-152-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-152-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-153-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-153-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-154-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-154-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-155-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-155-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-155-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-156-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-156-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-157-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-157-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-157-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-158-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-158-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-159-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-159-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-160-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-160-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-161-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-161-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-161-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-162-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-162-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-163-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-163-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-164-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-164-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-165-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-165-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-165-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-166-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-166-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-166-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-167-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-167-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-167-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-168-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-168-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-169-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-169-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-170-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-170-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-171-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-171-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-172-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-172-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-173-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-173-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-173-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-174-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-174-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-174-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-175-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-175-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-175-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-176-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-176-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-176-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-177-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-177-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-178-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-178-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-179-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-179-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-180-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-180-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-181-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-181-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-182-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-182-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-182-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-183-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-183-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-183-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-184-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-184-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-185-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-185-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-185-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-186-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-186-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-186-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-187-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-187-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-187-00002.npy diff --git a/pyproject.toml b/pyproject.toml index db9af8201..5d7600f36 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,7 +21,7 @@ dependencies = [ "google-cloud-storage", "tokenizers", "packaging", - "cached_path", + "cached_path>=1.6.0", "transformers", ] From 404ea3034d9f1060407403e13e1fda9dd790e143 Mon Sep 17 00:00:00 2001 From: Dirk Groeneveld Date: Tue, 27 Feb 2024 02:22:21 +0200 Subject: [PATCH 02/60] Script for running ablations on LUMI --- scripts/lumi/olmo7-ablations.sh | 65 +++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 scripts/lumi/olmo7-ablations.sh diff --git a/scripts/lumi/olmo7-ablations.sh b/scripts/lumi/olmo7-ablations.sh new file mode 100644 index 000000000..e91b6cce2 --- /dev/null +++ b/scripts/lumi/olmo7-ablations.sh @@ -0,0 +1,65 @@ +#!/bin/bash +#SBATCH --job-name=olmo7-ablation +#SBATCH --account=project_462000229 +#SBATCH --output=/pfs/lustref1/flash/project_462000229/logs/%j.log +#SBATCH --nodes=128 # Total number of nodes +#SBATCH --ntasks-per-node=8 +#SBATCH --gpus-per-node=8 # Allocate one gpu per MPI rank +#SBATCH --cpus-per-task=6 +#SBATCH --time=39:15:00 +#SBATCH --mem=0 # All memory on the node +#SBATCH --partition=standard-g + +WANDB_GROUP=$1 +shift + +export OLMO_CONTAINER=llm-lumi-torch21_latest.sif + +export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK +export MPICH_GPU_SUPPORT_ENABLED=1 +export NCCL_SOCKET_IFNAME=hsn +export NCCL_NET_GDR_LEVEL=3 +export MIOPEN_USER_DB_PATH=/tmp/${USER}-miopen-cache-${SLURM_JOB_ID} +export MIOPEN_CUSTOM_CACHE_DIR=${MIOPEN_USER_DB_PATH} +export CXI_FORK_SAFE=1 +export CXI_FORK_SAFE_HP=1 +export FI_CXI_DISABLE_CQ_HUGETLB=1 + +# We need to set this to avoid "Cassini Event Queue overflow detected." errors. +export FI_CXI_DEFAULT_CQ_SIZE=131072 + +#export NCCL_DEBUG=INFO +export PYTHONPATH=.:${PYTHONPATH} +export ROCM_PATH=/opt/rocm +export SINGULARITYENV_LD_LIBRARY_PATH=/usr/local/lib:/opt/cray/libfabric/1.15.2.0/lib64 + +# Try playing with max_split_size_mb if you run into OOM errors. +#export PYTORCH_HIP_ALLOC_CONF=max_split_size_mb:128 + +export HF_DATASETS_OFFLINE=1 + +export DATA_PATH=$FLASH_DIR/preprocessed/olmo-mix +export CHECKPOINTS_PATH=$FLASH_DIR/checkpoints +export EVAL_DATA_PATH=$SCRATCH_DIR/eval-data + +srun \ + --cpus-per-task=$SLURM_CPUS_PER_TASK \ + --distribution=block:block \ + --kill-on-bad-exit \ + scripts/run_with_environment.sh \ + singularity exec \ + -B"$PROJECT_DIR:$PROJECT_DIR" \ + -B"$FLASH_DIR:$FLASH_DIR" \ + -B"$SCRATCH_DIR:$SCRATCH_DIR" \ + -B /opt/cray:/opt/cray \ + -B /usr/lib64/libcxi.so.1:/usr/lib64/libcxi.so.1 \ + -B /usr/lib64/libjson-c.so.3:/usr/lib64/libjson-c.so.3 \ + $PROJECT_DIR/containers/$OLMO_CONTAINER \ + python scripts/train.py configs/olmo7-ablation.yaml ${@} \ + --run_name=${SLURM_JOB_ID} \ + --activation_checkpointing=fine_grained \ + --fsdp.wrapping_strategy=one_in_four \ + --fsdp.sharding_strategy=FULL_SHARD \ + --sharded_checkpointer=local \ + --time_limit=$((39 * 60 * 60)) \ + --wandb.group=$WANDB_GROUP From 005c406e7992e10d2057ec66803b1a6d73077ccf Mon Sep 17 00:00:00 2001 From: Dirk Groeneveld Date: Mon, 26 Feb 2024 16:42:39 -0800 Subject: [PATCH 03/60] It's no longer just s3. --- olmo/util.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/olmo/util.py b/olmo/util.py index 71ee67e60..902926605 100644 --- a/olmo/util.py +++ b/olmo/util.py @@ -509,12 +509,12 @@ def _s3_upload( _wait_before_retry(attempt) if err is not None: - raise OlmoNetworkError("Failed to check object existence during s3 upload") from err + raise OlmoNetworkError(f"Failed to check object existence during {scheme} upload") from err try: _get_s3_client(scheme).upload_file(source, bucket_name, key) except boto_exceptions.ClientError as e: - raise OlmoNetworkError("Failed to upload to s3") from e + raise OlmoNetworkError(f"Failed to upload to {scheme}") from e def _s3_file_size(scheme: str, bucket_name: str, key: str, max_attempts: int = 3) -> int: @@ -531,7 +531,7 @@ def _s3_file_size(scheme: str, bucket_name: str, key: str, max_attempts: int = 3 log.warning("%s failed attempt %d with retriable error: %s", _s3_file_size.__name__, attempt, err) _wait_before_retry(attempt) - raise OlmoNetworkError("Failed to get s3 file size") from err + raise OlmoNetworkError(f"Failed to get {scheme} file size") from err def _s3_get_bytes_range( @@ -549,7 +549,7 @@ def _s3_get_bytes_range( ) except boto_exceptions.ClientError as e: if int(e.response["Error"]["Code"]) == 404: - raise FileNotFoundError(f"s3://{bucket_name}/{key}") from e + raise FileNotFoundError(f"{scheme}://{bucket_name}/{key}") from e err = e except (boto_exceptions.HTTPClientError, boto_exceptions.ConnectionError) as e: # ResponseStreamingError (subclass of HTTPClientError) can happen as @@ -570,7 +570,7 @@ def _s3_get_bytes_range( # This can cause an irrelevant exception (e.g. KeyError: 'error'), resulting # in us losing the true exception info. To avoid this, we change the exception # to a type that has a single-parameter constructor. - raise OlmoNetworkError("Failed to get bytes range from s3") from err + raise OlmoNetworkError(f"Failed to get bytes range from {scheme}") from err def _s3_find_latest_checkpoint(scheme: str, bucket_name: str, prefix: str) -> Optional[str]: @@ -592,7 +592,7 @@ def _s3_find_latest_checkpoint(scheme: str, bucket_name: str, prefix: str) -> Op # We prioritize sharded checkpoints over unsharded ones. if step > latest_step or (step == latest_step and not checkpoint_name.endswith("-unsharded")): latest_step = step - latest_checkpoint = f"s3://ai2-llm/{prefix}" + latest_checkpoint = f"{scheme}://ai2-llm/{prefix}" return latest_checkpoint From 399d33c055cd0d1c595aab2ab09d6c7e6808c472 Mon Sep 17 00:00:00 2001 From: Dirk Groeneveld Date: Mon, 26 Feb 2024 16:52:24 -0800 Subject: [PATCH 04/60] We now think the 1T checkpoint is better. --- configs/olmo7-ablation.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/configs/olmo7-ablation.yaml b/configs/olmo7-ablation.yaml index 0dd810e90..94cee1151 100644 --- a/configs/olmo7-ablation.yaml +++ b/configs/olmo7-ablation.yaml @@ -68,8 +68,8 @@ save_interval_unsharded: null save_num_unsharded_checkpoints_to_keep: -1 #load_path: r2://olmo-checkpoints/ai2-llm/olmo-medium/hrshlkzq/step119000-unsharded/ # 0.5T -#load_path: r2://olmo-checkpoints/ai2-llm/olmo-medium/j18wauyq/step238000-unsharded/ # 1.0T -load_path: r2://olmo-checkpoints/ai2-llm/olmo-medium/4xel5n7e/step358000-unsharded/ # 1.5T +load_path: r2://olmo-checkpoints/ai2-llm/olmo-medium/j18wauyq/step238000-unsharded/ # 1.0T +#load_path: r2://olmo-checkpoints/ai2-llm/olmo-medium/4xel5n7e/step358000-unsharded/ # 1.5T #load_path: r2://olmo-checkpoints/ai2-llm/olmo-medium/xtruaap8/step477000-unsharded/ # 2.0T no_pre_train_checkpoint: true From 6d993f3dbd3858629f3e6b8612e0977c4fab49ea Mon Sep 17 00:00:00 2001 From: Dirk Groeneveld Date: Mon, 26 Feb 2024 16:58:25 -0800 Subject: [PATCH 05/60] Try the `spawn` start method --- olmo/data/__init__.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/olmo/data/__init__.py b/olmo/data/__init__.py index 52421b57a..3ebc1102b 100644 --- a/olmo/data/__init__.py +++ b/olmo/data/__init__.py @@ -1,5 +1,6 @@ from pathlib import Path from typing import Any, Dict, List, Optional, cast +import multiprocessing as mp from torch.utils.data import DataLoader, DistributedSampler @@ -50,6 +51,7 @@ def build_eval_dataloader( batch_size: int, shuffle: bool = True, ) -> DataLoader: + mp.set_start_method('spawn') dataset = build_memmap_dataset(train_config, data_config, include_instance_metadata=True) collator = DataCollator(pad_direction=data_config.pad_direction, pad_token_id=train_config.model.pad_token_id) if data_config.drop_last: @@ -80,6 +82,7 @@ def build_eval_dataloader( def build_train_dataloader(train_config: TrainConfig) -> DataLoader: assert train_config.device_train_batch_size is not None + mp.set_start_method('spawn') collator = DataCollator( pad_direction=train_config.data.pad_direction, pad_token_id=train_config.model.pad_token_id ) From a67053f5c983e1d3ec25e0055a80452432c7eea7 Mon Sep 17 00:00:00 2001 From: Dirk Groeneveld Date: Mon, 26 Feb 2024 17:11:15 -0800 Subject: [PATCH 06/60] Revert "Try the `spawn` start method" This reverts commit 6d993f3dbd3858629f3e6b8612e0977c4fab49ea. --- olmo/data/__init__.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/olmo/data/__init__.py b/olmo/data/__init__.py index 3ebc1102b..52421b57a 100644 --- a/olmo/data/__init__.py +++ b/olmo/data/__init__.py @@ -1,6 +1,5 @@ from pathlib import Path from typing import Any, Dict, List, Optional, cast -import multiprocessing as mp from torch.utils.data import DataLoader, DistributedSampler @@ -51,7 +50,6 @@ def build_eval_dataloader( batch_size: int, shuffle: bool = True, ) -> DataLoader: - mp.set_start_method('spawn') dataset = build_memmap_dataset(train_config, data_config, include_instance_metadata=True) collator = DataCollator(pad_direction=data_config.pad_direction, pad_token_id=train_config.model.pad_token_id) if data_config.drop_last: @@ -82,7 +80,6 @@ def build_eval_dataloader( def build_train_dataloader(train_config: TrainConfig) -> DataLoader: assert train_config.device_train_batch_size is not None - mp.set_start_method('spawn') collator = DataCollator( pad_direction=train_config.data.pad_direction, pad_token_id=train_config.model.pad_token_id ) From 80fba0c9f293f464a407b3bbe5da415e7f1812d0 Mon Sep 17 00:00:00 2001 From: Dirk Groeneveld Date: Mon, 26 Feb 2024 17:11:27 -0800 Subject: [PATCH 07/60] Set start method right away --- scripts/train.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/train.py b/scripts/train.py index de97e31be..19292d268 100644 --- a/scripts/train.py +++ b/scripts/train.py @@ -8,6 +8,7 @@ import torch import torch.distributed as dist +import multiprocessing as mp import wandb from packaging import version from torch.distributed.fsdp import FullyShardedDataParallel as FSDP @@ -240,6 +241,7 @@ def dummy_init_fn(module: torch.nn.Module) -> None: if __name__ == "__main__": + mp.set_start_method("spawn") # Initialize process group. dist.init_process_group(backend="nccl") From 0cc7f2005f7a5e08155221b1cb883b0faa0be1f3 Mon Sep 17 00:00:00 2001 From: Dirk Groeneveld Date: Tue, 27 Feb 2024 22:07:25 -0800 Subject: [PATCH 08/60] Different seed, so we don't train on the same data twice --- configs/olmo7-ablation.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/olmo7-ablation.yaml b/configs/olmo7-ablation.yaml index 94cee1151..bdc565135 100644 --- a/configs/olmo7-ablation.yaml +++ b/configs/olmo7-ablation.yaml @@ -1,5 +1,5 @@ run_name: olmo7-ablation -seed: 6198 +seed: 61394 dry_run: false wandb: From cdb6ad96bff262a527a9f1ac316cca218ee27376 Mon Sep 17 00:00:00 2001 From: Dirk Groeneveld Date: Wed, 28 Feb 2024 12:01:00 -0800 Subject: [PATCH 09/60] Config for MosaicML --- configs/mcli/olmo7-ablation.yaml | 35 ++++++++++++++++++++++++++++++++ configs/olmo7-ablation.yaml | 2 +- 2 files changed, 36 insertions(+), 1 deletion(-) create mode 100644 configs/mcli/olmo7-ablation.yaml diff --git a/configs/mcli/olmo7-ablation.yaml b/configs/mcli/olmo7-ablation.yaml new file mode 100644 index 000000000..4c4424f8a --- /dev/null +++ b/configs/mcli/olmo7-ablation.yaml @@ -0,0 +1,35 @@ +run_name: olmo7b-ablation # can't have "_" or "." here +image: mosaicml/pytorch:2.1.2_cu121-python3.10-ubuntu20.04 +gpu_num: 64 +cluster: r7z2 +gpu_type: a100_40gb +integrations: + - integration_type: git_repo + git_repo: allenai/LLM + git_branch: olmo7-ablations + #git_commit: d765e8819f5b0be204c96b0b519de2372b0da729 + pip_install: -e . + ssh_clone: true +command: |- + pip freeze + mkdir -p /root/.cache/torch/ + + export OMP_NUM_THREADS=8 + export LOG_FILTER_TYPE=local_rank0_only + #export OLMO_NO_SSL=1 + + cd LLM + + torchrun \ + --master_addr $MASTER_ADDR \ + --master_port $MASTER_PORT \ + --nnodes $NUM_NODES \ + --node_rank $NODE_RANK \ + --nproc_per_node 8 \ + scripts/train.py configs/olmo7b-ablation.yaml \ + --run_name=olmo7b-ablation-baseline \ + --wandb.name=baseline \ + --compile.fullgraph=false \ + --model.flash_attention=true \ + --save_folder=runs/ \ + --device_train_microbatch_size=2 diff --git a/configs/olmo7-ablation.yaml b/configs/olmo7-ablation.yaml index bdc565135..2b74c7ab1 100644 --- a/configs/olmo7-ablation.yaml +++ b/configs/olmo7-ablation.yaml @@ -84,7 +84,7 @@ time_limit: null precision: amp_bf16 fsdp: - wrapping_strategy: by_block + wrapping_strategy: by_block_and_size precision: mixed max_grad_norm: 1.0 From 305d1a86d37dfdf2436757a20e31cbba520feddd Mon Sep 17 00:00:00 2001 From: Dirk Groeneveld Date: Wed, 28 Feb 2024 12:02:32 -0800 Subject: [PATCH 10/60] Mcli has changed its format --- configs/mcli/olmo7-ablation.yaml | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/configs/mcli/olmo7-ablation.yaml b/configs/mcli/olmo7-ablation.yaml index 4c4424f8a..6bd50343a 100644 --- a/configs/mcli/olmo7-ablation.yaml +++ b/configs/mcli/olmo7-ablation.yaml @@ -1,8 +1,9 @@ -run_name: olmo7b-ablation # can't have "_" or "." here +name: olmo7b-ablation # can't have "_" or "." here image: mosaicml/pytorch:2.1.2_cu121-python3.10-ubuntu20.04 -gpu_num: 64 -cluster: r7z2 -gpu_type: a100_40gb +compute: + gpu_num: 64 + cluster: r7z2 + gpu_type: a100_40gb integrations: - integration_type: git_repo git_repo: allenai/LLM From e2d0631a01b85126ffa235b98525983f766e8036 Mon Sep 17 00:00:00 2001 From: Dirk Groeneveld Date: Wed, 28 Feb 2024 12:14:00 -0800 Subject: [PATCH 11/60] Config tweaks --- configs/mcli/olmo7-ablation.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/configs/mcli/olmo7-ablation.yaml b/configs/mcli/olmo7-ablation.yaml index 6bd50343a..d56ff4ed7 100644 --- a/configs/mcli/olmo7-ablation.yaml +++ b/configs/mcli/olmo7-ablation.yaml @@ -1,8 +1,8 @@ -name: olmo7b-ablation # can't have "_" or "." here +name: olmo7-ablation # can't have "_" or "." here image: mosaicml/pytorch:2.1.2_cu121-python3.10-ubuntu20.04 compute: gpu_num: 64 - cluster: r7z2 + cluster: r12z3 gpu_type: a100_40gb integrations: - integration_type: git_repo @@ -20,7 +20,7 @@ command: |- #export OLMO_NO_SSL=1 cd LLM - + torchrun \ --master_addr $MASTER_ADDR \ --master_port $MASTER_PORT \ @@ -28,7 +28,7 @@ command: |- --node_rank $NODE_RANK \ --nproc_per_node 8 \ scripts/train.py configs/olmo7b-ablation.yaml \ - --run_name=olmo7b-ablation-baseline \ + --run_name=olmo7-ablation-baseline \ --wandb.name=baseline \ --compile.fullgraph=false \ --model.flash_attention=true \ From 251e89ac60a424e359d4eb8792f7dfe3076eae7c Mon Sep 17 00:00:00 2001 From: Dirk Groeneveld Date: Wed, 28 Feb 2024 12:52:26 -0800 Subject: [PATCH 12/60] More mcli changes --- configs/mcli/olmo7-ablation.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/mcli/olmo7-ablation.yaml b/configs/mcli/olmo7-ablation.yaml index d56ff4ed7..9448dd836 100644 --- a/configs/mcli/olmo7-ablation.yaml +++ b/configs/mcli/olmo7-ablation.yaml @@ -1,7 +1,7 @@ name: olmo7-ablation # can't have "_" or "." here image: mosaicml/pytorch:2.1.2_cu121-python3.10-ubuntu20.04 compute: - gpu_num: 64 + gpus: 64 cluster: r12z3 gpu_type: a100_40gb integrations: From f979478be402652ba7f1a78336bc88e7691bb15a Mon Sep 17 00:00:00 2001 From: Oyvind Tafjord Date: Wed, 28 Feb 2024 13:31:32 -0800 Subject: [PATCH 13/60] Update downstream tasks --- configs/olmo7-ablation.yaml | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/configs/olmo7-ablation.yaml b/configs/olmo7-ablation.yaml index 2b74c7ab1..0d4e7929e 100644 --- a/configs/olmo7-ablation.yaml +++ b/configs/olmo7-ablation.yaml @@ -148,8 +148,8 @@ evaluators: - label: openbook_qa type: downstream - # - label: boolq # requires implemention of the pmi_dc matrix - # type: downstream + - label: boolq + type: downstream - label: sciq type: downstream @@ -157,24 +157,36 @@ evaluators: - label: arc_easy type: downstream - # - label: arc_challenge # requires implemention of the pmi_dc matrix - # type: downstream - - - label: copa + - label: arc_challenge type: downstream - - label: rte + - label: mmlu_stem type: downstream - - label: commitment_bank + - label: mmlu_humanities type: downstream - - label: mrpc + - label: mmlu_social_sciences type: downstream - - label: sst2 + - label: mmlu_other type: downstream + #- label: copa + # type: downstream + + #- label: rte + # type: downstream + + #- label: commitment_bank + # type: downstream + + #- label: mrpc + # type: downstream + + #- label: sst2 + # type: downstream + data: pad_direction: right num_workers: 16 From 0074545b85277e69588d0270f67657968842ca8a Mon Sep 17 00:00:00 2001 From: Dirk Groeneveld Date: Wed, 28 Feb 2024 13:58:49 -0800 Subject: [PATCH 14/60] We also changed our formats. --- configs/mcli/olmo7-ablation.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/mcli/olmo7-ablation.yaml b/configs/mcli/olmo7-ablation.yaml index 9448dd836..875cc53a2 100644 --- a/configs/mcli/olmo7-ablation.yaml +++ b/configs/mcli/olmo7-ablation.yaml @@ -9,7 +9,7 @@ integrations: git_repo: allenai/LLM git_branch: olmo7-ablations #git_commit: d765e8819f5b0be204c96b0b519de2372b0da729 - pip_install: -e . + pip_install: -e .[train] ssh_clone: true command: |- pip freeze From c1d664b501d3a1b81f47cd1e0d3f54d379496435 Mon Sep 17 00:00:00 2001 From: Dirk Groeneveld Date: Wed, 28 Feb 2024 14:06:44 -0800 Subject: [PATCH 15/60] It's not my day today. --- configs/mcli/olmo7-ablation.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/mcli/olmo7-ablation.yaml b/configs/mcli/olmo7-ablation.yaml index 875cc53a2..f275fc002 100644 --- a/configs/mcli/olmo7-ablation.yaml +++ b/configs/mcli/olmo7-ablation.yaml @@ -27,7 +27,7 @@ command: |- --nnodes $NUM_NODES \ --node_rank $NODE_RANK \ --nproc_per_node 8 \ - scripts/train.py configs/olmo7b-ablation.yaml \ + scripts/train.py configs/olmo7-ablation.yaml \ --run_name=olmo7-ablation-baseline \ --wandb.name=baseline \ --compile.fullgraph=false \ From 08df810561ed8119e6440f5d3666eafdc0e54fc6 Mon Sep 17 00:00:00 2001 From: Dirk Groeneveld Date: Wed, 28 Feb 2024 14:54:07 -0800 Subject: [PATCH 16/60] Changelog --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 933ed3ec2..8db34f19a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,10 +13,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Added the option to directly pass input embeddings to `OLMo` and `OLMoForCausalLM`. - Added support for Python 3.8. - Added code to throw an error if `output_attentions` is set to `True` in forward call to `OLMoForCausalLM`. This functionality hasn't been implemented yet. +- Correct scheme displayed in error messages that come from R2 +- Fixed running with multiple data loading workers in LUMI ### Added - Added `output_hidden_states` argument and associated functionality to `OLMo` and `OLMoForCausalLM` to return model intermediate hidden states. - Added MMLU downstream evaluation tasks. +- Ability to read from R2 like we read from S3 + ## [v0.2.4](https://github.com/allenai/OLMo/releases/tag/v0.2.4) - 2024-02-02 From 0b7c26eae0db68e758a76f431bf94952d4ac0686 Mon Sep 17 00:00:00 2001 From: Dirk Groeneveld Date: Wed, 28 Feb 2024 14:56:59 -0800 Subject: [PATCH 17/60] isort --- scripts/train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/train.py b/scripts/train.py index 19292d268..fca309a7f 100644 --- a/scripts/train.py +++ b/scripts/train.py @@ -2,13 +2,13 @@ import gzip import logging +import multiprocessing as mp import sys from pathlib import Path from typing import Optional, TextIO import torch import torch.distributed as dist -import multiprocessing as mp import wandb from packaging import version from torch.distributed.fsdp import FullyShardedDataParallel as FSDP From 1961c2568ddeaa9d432ae454d3a568f89eeede25 Mon Sep 17 00:00:00 2001 From: Dirk Groeneveld Date: Thu, 29 Feb 2024 00:54:21 -0800 Subject: [PATCH 18/60] Huggingface offline datasets --- configs/mcli/olmo7-ablation.yaml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/configs/mcli/olmo7-ablation.yaml b/configs/mcli/olmo7-ablation.yaml index f275fc002..473ecdb36 100644 --- a/configs/mcli/olmo7-ablation.yaml +++ b/configs/mcli/olmo7-ablation.yaml @@ -1,7 +1,7 @@ name: olmo7-ablation # can't have "_" or "." here image: mosaicml/pytorch:2.1.2_cu121-python3.10-ubuntu20.04 compute: - gpus: 64 + gpus: 16 cluster: r12z3 gpu_type: a100_40gb integrations: @@ -19,6 +19,12 @@ command: |- export LOG_FILTER_TYPE=local_rank0_only #export OLMO_NO_SSL=1 + # warm up huggingface cache + pushd /root/.cache + curl "https://storage.googleapis.com/dirkgr-public/huggingface_cache.tar.gz" | tar -xzf - + popd + export HF_DATASETS_OFFLINE=1 + cd LLM torchrun \ From 9e927f16886fdac271a00a58c216a7e6df864e5d Mon Sep 17 00:00:00 2001 From: Dirk Groeneveld Date: Thu, 29 Feb 2024 01:03:38 -0800 Subject: [PATCH 19/60] Not sure what's going on with openbookqa. Luca says it's redirects. --- olmo/eval/downstream.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/olmo/eval/downstream.py b/olmo/eval/downstream.py index 815a24956..02a1ed4de 100644 --- a/olmo/eval/downstream.py +++ b/olmo/eval/downstream.py @@ -567,7 +567,7 @@ class OpenBookQA(ICLMultiChoiceTaskDataset): metric_type = "len_norm" - def __init__(self, tokenizer, dataset_path="openbookqa", dataset_name=None): + def __init__(self, tokenizer, dataset_path="openbookqa", dataset_name="main"): super().__init__( tokenizer=tokenizer, dataset_path=dataset_path, From c3c1a28f37de7ddcdc6ce4289b346bd395e7eac9 Mon Sep 17 00:00:00 2001 From: Dirk Groeneveld Date: Thu, 29 Feb 2024 01:18:13 -0800 Subject: [PATCH 20/60] Don't use compile --- configs/mcli/olmo7-ablation.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/configs/mcli/olmo7-ablation.yaml b/configs/mcli/olmo7-ablation.yaml index 473ecdb36..2de0b3f11 100644 --- a/configs/mcli/olmo7-ablation.yaml +++ b/configs/mcli/olmo7-ablation.yaml @@ -36,7 +36,7 @@ command: |- scripts/train.py configs/olmo7-ablation.yaml \ --run_name=olmo7-ablation-baseline \ --wandb.name=baseline \ - --compile.fullgraph=false \ --model.flash_attention=true \ --save_folder=runs/ \ - --device_train_microbatch_size=2 + --activation_checkpointing=fine_grained \ + --device_train_microbatch_size=4 From 7c2b1dda60c5a7bdc8d75437b917c8378e1fd1e9 Mon Sep 17 00:00:00 2001 From: Dirk Groeneveld Date: Thu, 29 Feb 2024 01:37:03 -0800 Subject: [PATCH 21/60] Back to microbatch of 2 --- configs/mcli/olmo7-ablation.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/configs/mcli/olmo7-ablation.yaml b/configs/mcli/olmo7-ablation.yaml index 2de0b3f11..bbf73771b 100644 --- a/configs/mcli/olmo7-ablation.yaml +++ b/configs/mcli/olmo7-ablation.yaml @@ -38,5 +38,4 @@ command: |- --wandb.name=baseline \ --model.flash_attention=true \ --save_folder=runs/ \ - --activation_checkpointing=fine_grained \ - --device_train_microbatch_size=4 + --device_train_microbatch_size=2 From 5083517d99aadeb5cbf766e3958b8f469faf2e63 Mon Sep 17 00:00:00 2001 From: Dirk Groeneveld Date: Thu, 29 Feb 2024 09:26:21 -0800 Subject: [PATCH 22/60] Same settings as we did for OLMo 7B --- configs/mcli/olmo7-ablation.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/configs/mcli/olmo7-ablation.yaml b/configs/mcli/olmo7-ablation.yaml index bbf73771b..18d636486 100644 --- a/configs/mcli/olmo7-ablation.yaml +++ b/configs/mcli/olmo7-ablation.yaml @@ -37,5 +37,8 @@ command: |- --run_name=olmo7-ablation-baseline \ --wandb.name=baseline \ --model.flash_attention=true \ + --fsdp.wrapping_strategy=by_block \ + --fsdp.sharding_strategy=FULL_SHARD \ + --compile.fullgraph=false \ --save_folder=runs/ \ --device_train_microbatch_size=2 From b744d5ef4eb0ebadae82ebb383b421cfaabbc54b Mon Sep 17 00:00:00 2001 From: Dirk Groeneveld Date: Thu, 29 Feb 2024 09:59:43 -0800 Subject: [PATCH 23/60] Turn off compile --- configs/mcli/olmo7-ablation.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/configs/mcli/olmo7-ablation.yaml b/configs/mcli/olmo7-ablation.yaml index 18d636486..aa4bfcc7f 100644 --- a/configs/mcli/olmo7-ablation.yaml +++ b/configs/mcli/olmo7-ablation.yaml @@ -39,6 +39,5 @@ command: |- --model.flash_attention=true \ --fsdp.wrapping_strategy=by_block \ --fsdp.sharding_strategy=FULL_SHARD \ - --compile.fullgraph=false \ --save_folder=runs/ \ --device_train_microbatch_size=2 From 38f88171cd3c8c8e6cfb7e0b0106f0629c136417 Mon Sep 17 00:00:00 2001 From: Dirk Groeneveld Date: Thu, 29 Feb 2024 10:18:13 -0800 Subject: [PATCH 24/60] Lots of checkpointing --- configs/mcli/olmo7-ablation.yaml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/configs/mcli/olmo7-ablation.yaml b/configs/mcli/olmo7-ablation.yaml index aa4bfcc7f..546779002 100644 --- a/configs/mcli/olmo7-ablation.yaml +++ b/configs/mcli/olmo7-ablation.yaml @@ -1,7 +1,7 @@ name: olmo7-ablation # can't have "_" or "." here image: mosaicml/pytorch:2.1.2_cu121-python3.10-ubuntu20.04 compute: - gpus: 16 + gpus: 32 cluster: r12z3 gpu_type: a100_40gb integrations: @@ -37,7 +37,8 @@ command: |- --run_name=olmo7-ablation-baseline \ --wandb.name=baseline \ --model.flash_attention=true \ - --fsdp.wrapping_strategy=by_block \ + --fsdp.wrapping_strategy=by_block_and_size \ --fsdp.sharding_strategy=FULL_SHARD \ --save_folder=runs/ \ - --device_train_microbatch_size=2 + --activation_checkpointing=whole_layer \ + --device_train_microbatch_size=4 From d849d40e19e0ece9a75092b9debbb0d29559ddd5 Mon Sep 17 00:00:00 2001 From: Dirk Groeneveld Date: Thu, 29 Feb 2024 11:26:29 -0800 Subject: [PATCH 25/60] Old version of torch --- configs/mcli/olmo7-ablation.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/configs/mcli/olmo7-ablation.yaml b/configs/mcli/olmo7-ablation.yaml index 546779002..697d076bc 100644 --- a/configs/mcli/olmo7-ablation.yaml +++ b/configs/mcli/olmo7-ablation.yaml @@ -1,5 +1,5 @@ name: olmo7-ablation # can't have "_" or "." here -image: mosaicml/pytorch:2.1.2_cu121-python3.10-ubuntu20.04 +image: mosaicml/pytorch:2.0.1_cu118-python3.10-ubuntu20.04 compute: gpus: 32 cluster: r12z3 @@ -37,8 +37,8 @@ command: |- --run_name=olmo7-ablation-baseline \ --wandb.name=baseline \ --model.flash_attention=true \ - --fsdp.wrapping_strategy=by_block_and_size \ + --fsdp.wrapping_strategy=by_block \ --fsdp.sharding_strategy=FULL_SHARD \ --save_folder=runs/ \ - --activation_checkpointing=whole_layer \ - --device_train_microbatch_size=4 + --device_train_microbatch_size=2 \ + --compile.fullgraph=false From d7b2e597f6d491d75c6153dc00b9f54d8926a2a5 Mon Sep 17 00:00:00 2001 From: Dirk Groeneveld Date: Thu, 29 Feb 2024 12:55:00 -0800 Subject: [PATCH 26/60] Revert "Old version of torch" This reverts commit d849d40e19e0ece9a75092b9debbb0d29559ddd5. --- configs/mcli/olmo7-ablation.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/configs/mcli/olmo7-ablation.yaml b/configs/mcli/olmo7-ablation.yaml index 697d076bc..546779002 100644 --- a/configs/mcli/olmo7-ablation.yaml +++ b/configs/mcli/olmo7-ablation.yaml @@ -1,5 +1,5 @@ name: olmo7-ablation # can't have "_" or "." here -image: mosaicml/pytorch:2.0.1_cu118-python3.10-ubuntu20.04 +image: mosaicml/pytorch:2.1.2_cu121-python3.10-ubuntu20.04 compute: gpus: 32 cluster: r12z3 @@ -37,8 +37,8 @@ command: |- --run_name=olmo7-ablation-baseline \ --wandb.name=baseline \ --model.flash_attention=true \ - --fsdp.wrapping_strategy=by_block \ + --fsdp.wrapping_strategy=by_block_and_size \ --fsdp.sharding_strategy=FULL_SHARD \ --save_folder=runs/ \ - --device_train_microbatch_size=2 \ - --compile.fullgraph=false + --activation_checkpointing=whole_layer \ + --device_train_microbatch_size=4 From c7738637cd995525ab79fa3ff6452d4651eda3ec Mon Sep 17 00:00:00 2001 From: Dirk Groeneveld Date: Thu, 29 Feb 2024 12:57:36 -0800 Subject: [PATCH 27/60] mbsz 3 --- configs/mcli/olmo7-ablation.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/configs/mcli/olmo7-ablation.yaml b/configs/mcli/olmo7-ablation.yaml index 546779002..eb03e4114 100644 --- a/configs/mcli/olmo7-ablation.yaml +++ b/configs/mcli/olmo7-ablation.yaml @@ -41,4 +41,5 @@ command: |- --fsdp.sharding_strategy=FULL_SHARD \ --save_folder=runs/ \ --activation_checkpointing=whole_layer \ - --device_train_microbatch_size=4 + --device_train_microbatch_size=3 \ + --global_train_batch_size=1536 From 75aacd808be5c8be29141fce1cfa4cd2459a646d Mon Sep 17 00:00:00 2001 From: Dirk Groeneveld Date: Thu, 29 Feb 2024 21:48:06 -0800 Subject: [PATCH 28/60] More GPUs, bigger batch --- configs/mcli/olmo7-ablation.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/configs/mcli/olmo7-ablation.yaml b/configs/mcli/olmo7-ablation.yaml index eb03e4114..2c2c2c058 100644 --- a/configs/mcli/olmo7-ablation.yaml +++ b/configs/mcli/olmo7-ablation.yaml @@ -1,7 +1,7 @@ name: olmo7-ablation # can't have "_" or "." here image: mosaicml/pytorch:2.1.2_cu121-python3.10-ubuntu20.04 compute: - gpus: 32 + gpus: 128 cluster: r12z3 gpu_type: a100_40gb integrations: @@ -42,4 +42,4 @@ command: |- --save_folder=runs/ \ --activation_checkpointing=whole_layer \ --device_train_microbatch_size=3 \ - --global_train_batch_size=1536 + --global_train_batch_size=6144 From a02ae9c6784d922eead0b6512163063b57ba0f7f Mon Sep 17 00:00:00 2001 From: Dirk Groeneveld Date: Thu, 29 Feb 2024 21:50:23 -0800 Subject: [PATCH 29/60] Set a group name --- configs/mcli/olmo7-ablation.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/configs/mcli/olmo7-ablation.yaml b/configs/mcli/olmo7-ablation.yaml index 2c2c2c058..9492de6f6 100644 --- a/configs/mcli/olmo7-ablation.yaml +++ b/configs/mcli/olmo7-ablation.yaml @@ -42,4 +42,5 @@ command: |- --save_folder=runs/ \ --activation_checkpointing=whole_layer \ --device_train_microbatch_size=3 \ - --global_train_batch_size=6144 + --global_train_batch_size=6144 \ + --wandb.group=baseline2 From 23951d10956e9c5b96472effa8ea9a17695ffac6 Mon Sep 17 00:00:00 2001 From: Dirk Groeneveld Date: Fri, 1 Mar 2024 16:12:13 -0800 Subject: [PATCH 30/60] Save and eval more often --- configs/olmo7-ablation.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/configs/olmo7-ablation.yaml b/configs/olmo7-ablation.yaml index 0d4e7929e..bfe40e48b 100644 --- a/configs/olmo7-ablation.yaml +++ b/configs/olmo7-ablation.yaml @@ -60,9 +60,8 @@ tokenizer: save_folder: ${oc.env:CHECKPOINTS_PATH}/${oc.env:SLURM_JOB_ID,${run_name}} save_overwrite: false # Sharded checkpoints (best for restarts) -save_interval: 1000 +save_interval: 200 save_num_checkpoints_to_keep: -1 -save_interval_ephemeral: 100 # Unsharded checkpoints (for final storage) save_interval_unsharded: null save_num_unsharded_checkpoints_to_keep: -1 From 3d71dd51a9ae4bdd7f29ec782cff19b3919dda17 Mon Sep 17 00:00:00 2001 From: Dirk Groeneveld Date: Fri, 1 Mar 2024 17:36:28 -0800 Subject: [PATCH 31/60] Dolma 1.7 config --- ...tion.yaml => olmo7-ablation-baseline.yaml} | 0 configs/mcli/olmo7-ablation-dolma17.yaml | 46 + ...tion.yaml => olmo7-ablation-baseline.yaml} | 0 configs/olmo7-ablation-dolma17.yaml | 1479 +++++++++++++++++ 4 files changed, 1525 insertions(+) rename configs/mcli/{olmo7-ablation.yaml => olmo7-ablation-baseline.yaml} (100%) create mode 100644 configs/mcli/olmo7-ablation-dolma17.yaml rename configs/{olmo7-ablation.yaml => olmo7-ablation-baseline.yaml} (100%) create mode 100644 configs/olmo7-ablation-dolma17.yaml diff --git a/configs/mcli/olmo7-ablation.yaml b/configs/mcli/olmo7-ablation-baseline.yaml similarity index 100% rename from configs/mcli/olmo7-ablation.yaml rename to configs/mcli/olmo7-ablation-baseline.yaml diff --git a/configs/mcli/olmo7-ablation-dolma17.yaml b/configs/mcli/olmo7-ablation-dolma17.yaml new file mode 100644 index 000000000..4b51908b3 --- /dev/null +++ b/configs/mcli/olmo7-ablation-dolma17.yaml @@ -0,0 +1,46 @@ +name: olmo7-ablation # can't have "_" or "." here +image: mosaicml/pytorch:2.1.2_cu121-python3.10-ubuntu20.04 +compute: + gpus: 256 + cluster: r12z3 + gpu_type: a100_40gb +integrations: + - integration_type: git_repo + git_repo: allenai/LLM + git_branch: olmo7-ablations + #git_commit: d765e8819f5b0be204c96b0b519de2372b0da729 + pip_install: -e .[train] + ssh_clone: true +command: |- + pip freeze + mkdir -p /root/.cache/torch/ + + export OMP_NUM_THREADS=8 + export LOG_FILTER_TYPE=local_rank0_only + #export OLMO_NO_SSL=1 + + # warm up huggingface cache + pushd /root/.cache + curl "https://storage.googleapis.com/dirkgr-public/huggingface_cache.tar.gz" | tar -xzf - + popd + export HF_DATASETS_OFFLINE=1 + + cd LLM + + torchrun \ + --master_addr $MASTER_ADDR \ + --master_port $MASTER_PORT \ + --nnodes $NUM_NODES \ + --node_rank $NODE_RANK \ + --nproc_per_node 8 \ + scripts/train.py configs/olmo7-ablation-dolma17.yaml \ + --run_name=olmo7-ablation-dolma17 \ + --wandb.name=dolma17 \ + --model.flash_attention=true \ + --fsdp.wrapping_strategy=by_block_and_size \ + --fsdp.sharding_strategy=FULL_SHARD \ + --save_folder=runs/ \ + --activation_checkpointing=whole_layer \ + --device_train_microbatch_size=3 \ + --global_train_batch_size=6144 \ + --wandb.group=dolma17 diff --git a/configs/olmo7-ablation.yaml b/configs/olmo7-ablation-baseline.yaml similarity index 100% rename from configs/olmo7-ablation.yaml rename to configs/olmo7-ablation-baseline.yaml diff --git a/configs/olmo7-ablation-dolma17.yaml b/configs/olmo7-ablation-dolma17.yaml new file mode 100644 index 000000000..09392a291 --- /dev/null +++ b/configs/olmo7-ablation-dolma17.yaml @@ -0,0 +1,1479 @@ +run_name: olmo7-ablation +seed: 61394 +dry_run: false + +wandb: + name: ${run_name} + project: olmo7-ablations + group: olmo7-ablation + +model: + d_model: 4096 + n_heads: 32 + n_layers: 32 + # mlp_ratio: 6 + mlp_hidden_size: 22016 + weight_tying: false + alibi: false + rope: true + flash_attention: true + attention_dropout: 0.0 + attention_layer_norm: false + multi_query_attention: false + include_bias: false + block_type: sequential + layer_norm_type: default + layer_norm_with_affine: false + bias_for_layer_norm: false + attention_layer_norm_with_affine: false + activation_type: swiglu + residual_dropout: 0.0 + embedding_dropout: 0.0 + max_sequence_length: 2048 + vocab_size: 50280 + embedding_size: 50304 + eos_token_id: 0 + pad_token_id: 1 + init_device: meta + init_fn: mitchell + +compile: null + +optimizer: + name: adamw + learning_rate: 1.5e-4 + weight_decay: 0.1 + betas: + - 0.9 + - 0.95 + metrics_log_interval: 10 + +scheduler: + name: linear_with_warmup + t_warmup: 1000 + alpha_f: 0.1 + +tokenizer: + identifier: tokenizers/allenai_eleuther-ai-gpt-neox-20b-pii-special.json + truncate_direction: right + +save_folder: ${oc.env:CHECKPOINTS_PATH}/${oc.env:SLURM_JOB_ID,${run_name}} +save_overwrite: false +# Sharded checkpoints (best for restarts) +save_interval: 200 +save_num_checkpoints_to_keep: -1 +# Unsharded checkpoints (for final storage) +save_interval_unsharded: null +save_num_unsharded_checkpoints_to_keep: -1 + +#load_path: r2://olmo-checkpoints/ai2-llm/olmo-medium/hrshlkzq/step119000-unsharded/ # 0.5T +load_path: r2://olmo-checkpoints/ai2-llm/olmo-medium/j18wauyq/step238000-unsharded/ # 1.0T +#load_path: r2://olmo-checkpoints/ai2-llm/olmo-medium/4xel5n7e/step358000-unsharded/ # 1.5T +#load_path: r2://olmo-checkpoints/ai2-llm/olmo-medium/xtruaap8/step477000-unsharded/ # 2.0T + +no_pre_train_checkpoint: true +reset_optimizer_state: true +reset_trainer_state: true + +max_duration: 100e9T +global_train_batch_size: 2048 +device_train_microbatch_size: 2 +time_limit: null + +precision: amp_bf16 + +fsdp: + wrapping_strategy: by_block_and_size + precision: mixed + +max_grad_norm: 1.0 +max_grad_norm_ratio: null + +speed_monitor: + window_size: 20 + +eval_interval: ${save_interval} +eval_subset_num_batches: -1 +device_eval_batch_size: ${device_train_microbatch_size} +evaluators: + - label: all-small-ppl-validation + data: + num_workers: 0 + drop_last: true + # pin_memory: true + # prefetch_factor: 1 + # persistent_workers: false + # timeout: 0 + datasets: + 4chan-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/4chan/val.npy + c4_100_domains-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/c4_100_domains/val.npy + c4_en-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/c4_en/val.npy + gab-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/gab/val.npy + ice-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/ice/val.npy + m2d2_s2orc-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/m2d2_s2orc/val.npy + m2d2_wiki-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/m2d2_wiki/val.npy + manosphere-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/manosphere/val.npy + mc4_en-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/mc4_en/val.npy + pile-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/pile/val.npy + ptb-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/ptb/val.npy + twitterAEE-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/twitterAEE/val.npy + wikitext_103-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/wikitext_103/val.npy + + ########################## + # Downstream evaluations # + ########################## + - label: piqa + type: downstream + + - label: hellaswag + type: downstream + + - label: winogrande + type: downstream + + - label: openbook_qa + type: downstream + + - label: boolq + type: downstream + + - label: sciq + type: downstream + + - label: arc_easy + type: downstream + + - label: arc_challenge + type: downstream + + - label: mmlu_stem + type: downstream + + - label: mmlu_humanities + type: downstream + + - label: mmlu_social_sciences + type: downstream + + - label: mmlu_other + type: downstream + + #- label: copa + # type: downstream + + #- label: rte + # type: downstream + + #- label: commitment_bank + # type: downstream + + #- label: mrpc + # type: downstream + + #- label: sst2 + # type: downstream + +data: + pad_direction: right + num_workers: 16 + drop_last: true + pin_memory: true + prefetch_factor: 1 + persistent_workers: true + timeout: 0 + paths: + ######### NON WEB DATA ######### + # ~> GUTENBERG BOOKS (5.256 GT) + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-0-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-1-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-2-00000.npy + # ~> PES2O STEM PAPERS (57.21 GT) + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + # ~> WIKIPEDIA & WIKIBOOKS (3.689 GT) + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/wiki/gpt-neox-olmo-dolma-v1_5/part-0-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/wiki/gpt-neox-olmo-dolma-v1_5/part-1-00000.npy + # ~> REDPAJAMA STACKEXCHANGE (19.63 GT) + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + # ~> REDPAJAMA ARXIV (19.63 GT) + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + # ~> PROOFPILE2 ALGEBRAIC STACK (12.623 GT) + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + # ~> PROOFPILE2 OPENWEBMATH (12.734 GT) + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + # ~> TULU FLAN V0 (1.84 GT) + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-0-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-1-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-2-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-3-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-4-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-5-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-6-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-7-00000.npy + #################################### + ######### CODE ######### + # ~> STARCODER (263.775 GT) + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-00-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-03-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-04-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-05-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-06-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-07-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-08-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-09-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-10-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-11-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-12-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-13-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-14-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-15-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-16-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-17-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-18-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-19-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-20-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-21-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-22-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-23-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-24-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-25-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-26-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-26-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-27-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-27-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-28-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-29-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-30-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-30-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-31-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-31-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-32-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-32-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-33-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-33-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-34-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-34-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-35-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-35-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-36-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-36-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-37-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-37-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-38-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-38-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-39-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-39-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-40-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-40-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-41-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-41-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-42-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-42-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-43-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-43-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-44-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-44-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-45-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-46-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-46-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-47-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-47-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-48-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-48-00001.npy + #################################### + ######### WEB HIGH QUALITY ######### + # ~> C4 (174.418 GT) + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-26-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-27-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-28-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-29-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-30-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-31-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-32-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-33-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-34-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-35-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-36-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-37-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-38-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-39-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-40-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-41-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-42-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-43-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-44-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-45-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-46-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-47-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-48-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-49-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-50-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-51-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-52-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-53-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-54-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-55-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-56-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-57-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-58-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-59-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-60-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-61-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-62-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-63-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-64-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-65-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-66-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-67-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-68-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-69-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-70-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-71-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-72-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-73-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-74-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-75-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-76-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-77-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-78-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-79-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-80-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-81-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-82-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-83-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-84-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-85-00000.npy + # ~> REDDIT (79.988 GT) + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-26-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-27-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-28-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-29-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-30-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-31-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-32-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-33-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-34-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-35-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-36-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-37-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-38-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-39-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-40-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-41-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-42-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-43-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-44-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-45-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-46-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-47-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-48-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-49-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-50-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-51-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-52-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-53-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-54-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-55-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-56-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-57-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-58-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-59-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-60-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-61-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-62-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-63-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-64-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-65-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-66-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-67-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-68-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-69-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-70-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-71-00000.npy + # ~> FALCON (547.341 GT) + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-000-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-000-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-000-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-001-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-001-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-001-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-002-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-002-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-002-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-003-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-003-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-003-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-004-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-004-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-004-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-004-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-005-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-005-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-005-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-006-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-006-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-006-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-007-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-007-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-008-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-008-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-008-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-008-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-009-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-009-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-009-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-010-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-010-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-010-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-011-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-011-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-011-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-012-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-012-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-012-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-012-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-013-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-013-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-013-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-014-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-014-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-014-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-015-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-015-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-015-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-016-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-016-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-016-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-017-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-017-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-017-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-017-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-018-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-018-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-018-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-019-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-019-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-019-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-020-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-020-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-020-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-021-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-021-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-021-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-021-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-022-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-022-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-022-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-023-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-023-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-023-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-024-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-024-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-024-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-025-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-025-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-025-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-025-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-026-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-026-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-026-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-027-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-027-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-027-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-028-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-028-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-028-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-029-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-029-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-029-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-030-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-030-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-030-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-030-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-031-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-031-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-031-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-032-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-032-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-032-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-033-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-033-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-033-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-034-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-034-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-034-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-034-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-035-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-035-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-035-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-036-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-036-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-036-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-037-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-037-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-037-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-038-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-038-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-038-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-038-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-039-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-039-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-039-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-040-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-040-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-040-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-041-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-041-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-041-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-042-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-042-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-042-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-043-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-043-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-043-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-043-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-044-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-044-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-044-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-045-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-045-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-045-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-046-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-046-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-046-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-047-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-047-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-047-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-047-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-048-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-048-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-048-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-049-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-049-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-049-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-050-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-050-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-050-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-051-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-051-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-051-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-051-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-052-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-052-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-052-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-053-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-053-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-053-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-054-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-054-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-054-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-055-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-055-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-055-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-055-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-056-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-056-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-056-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-057-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-057-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-057-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-058-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-058-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-058-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-059-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-059-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-059-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-060-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-060-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-060-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-060-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-061-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-061-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-061-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-062-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-062-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-062-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-063-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-063-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-063-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-064-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-064-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-064-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-064-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-065-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-065-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-065-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-066-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-066-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-066-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-067-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-067-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-067-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-068-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-068-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-068-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-068-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-069-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-069-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-069-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-070-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-070-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-070-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-071-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-071-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-071-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-072-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-072-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-072-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-073-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-073-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-073-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-073-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-074-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-074-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-074-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-075-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-075-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-075-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-076-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-076-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-076-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-077-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-077-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-077-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-077-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-078-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-078-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-078-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-079-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-079-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-079-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-080-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-080-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-080-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-081-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-081-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-081-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-081-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-082-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-082-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-082-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-083-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-083-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-083-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-084-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-084-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-084-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-085-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-085-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-085-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-086-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-086-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-086-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-086-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-087-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-087-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-087-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-088-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-088-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-088-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-089-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-089-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-089-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-090-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-090-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-090-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-090-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-091-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-091-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-091-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-092-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-092-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-092-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-093-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-093-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-093-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-094-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-094-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-094-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-094-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-095-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-095-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-095-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-096-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-096-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-096-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-097-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-097-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-097-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-098-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-098-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-098-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-099-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-099-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-099-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-099-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-100-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-100-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-100-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-101-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-101-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-101-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-102-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-102-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-102-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-103-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-103-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-103-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-103-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-104-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-104-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-104-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-105-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-105-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-105-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-106-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-106-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-106-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-107-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-107-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-107-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-107-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-108-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-108-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-108-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-109-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-109-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-109-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-110-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-110-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-110-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-111-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-111-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-111-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-111-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-112-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-112-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-112-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-113-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-113-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-113-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-114-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-114-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-114-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-115-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-115-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-115-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-116-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-116-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-116-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-116-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-117-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-117-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-117-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-118-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-118-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-118-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-119-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-119-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-119-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-120-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-120-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-120-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-120-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-121-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-121-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-121-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-122-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-122-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-122-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-123-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-123-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-123-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-124-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-124-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-124-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-124-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-125-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-125-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-125-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-126-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-126-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-126-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-127-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-127-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-127-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-128-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-128-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-128-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-129-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-129-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-129-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-129-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-130-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-130-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-130-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-131-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-131-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-131-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-132-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-132-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-132-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-133-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-133-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-133-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-133-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-134-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-134-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-134-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-135-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-135-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-135-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-136-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-136-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-136-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-137-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-137-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-137-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-137-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-138-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-138-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-138-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-139-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-139-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-139-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-140-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-140-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-140-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-141-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-141-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-141-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-142-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-142-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-142-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-142-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-143-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-143-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-143-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-144-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-144-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-144-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-145-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-145-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-145-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-146-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-146-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-146-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-146-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-147-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-147-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-147-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-148-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-148-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-148-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-149-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-149-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-149-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-150-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-150-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-150-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-150-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-151-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-151-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-151-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-152-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-152-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-152-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-153-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-153-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-153-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-154-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-154-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-154-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-155-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-155-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-155-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-155-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-156-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-156-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-156-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-157-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-157-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-157-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-158-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-158-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-158-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-159-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-159-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-159-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-159-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-160-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-160-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-160-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-161-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-161-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-161-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-162-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-162-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-162-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-163-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-163-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-163-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-163-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-164-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-164-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-164-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-165-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-165-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-165-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-166-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-166-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-166-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-167-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-167-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-167-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-168-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-168-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-168-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-168-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-169-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-169-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-169-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-170-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-170-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-170-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-171-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-171-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-171-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-172-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-172-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-172-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-172-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-173-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-173-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-173-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-174-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-174-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-174-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-175-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-175-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-175-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-176-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-176-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-176-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-176-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-177-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-177-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-177-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-178-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-178-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-178-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-179-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-179-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-179-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-180-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-180-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-180-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-180-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-181-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-181-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-181-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-182-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-182-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-182-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-183-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-183-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-183-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-184-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-184-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-184-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-185-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-185-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-185-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-185-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-186-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-186-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-186-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-187-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-187-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-187-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-188-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-188-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-188-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-189-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-189-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-189-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-189-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-190-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-190-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-190-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-191-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-191-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-191-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-192-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-192-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-192-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-193-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-193-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-193-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-193-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-194-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-194-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-194-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-195-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-195-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-195-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-196-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-196-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-196-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-197-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-197-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-197-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-198-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-198-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-198-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-198-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-199-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-199-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-199-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-200-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-200-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-200-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-201-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-201-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-201-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-202-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-202-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-202-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-202-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-203-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-203-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-203-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-204-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-204-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-204-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-205-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-205-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-205-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-206-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-206-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-206-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-206-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-207-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-207-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-207-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-208-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-208-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-208-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-209-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-209-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-209-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-210-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-210-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-210-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-211-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-211-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-211-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-211-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-212-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-212-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-212-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-213-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-213-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-213-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-214-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-214-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-214-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-215-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-215-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-215-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-215-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-216-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-216-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-216-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-217-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-217-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-217-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-218-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-218-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-218-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-219-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-219-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-219-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-219-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-220-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-220-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-220-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-221-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-221-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-221-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-222-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-222-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-222-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-223-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-223-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-223-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-224-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-224-00001.npy + #################################### + ######### WEB REST ######### + # ~> DOLMA CC HEAD 33% (192.264 GT) + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-003-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-007-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-012-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-015-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-020-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-025-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-032-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-035-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-040-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-043-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-044-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-046-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-048-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-052-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-053-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-056-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-069-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-070-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-071-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-075-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-085-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-097-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-102-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-104-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-105-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-109-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-110-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-112-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-117-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-124-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-125-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-127-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-128-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-129-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-132-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-134-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-135-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-136-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-139-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-141-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-142-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-144-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-149-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-152-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-156-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-159-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-164-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-167-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-169-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-172-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-173-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-174-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-177-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-186-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-187-00000.npy + # ~> DOLMA CC MIDDLE 33% (189.606 GT) + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-003-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-007-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-012-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-015-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-020-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-025-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-032-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-035-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-040-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-043-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-044-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-046-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-048-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-052-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-053-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-056-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-069-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-070-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-071-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-075-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-085-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-097-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-102-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-104-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-105-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-109-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-110-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-112-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-117-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-124-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-125-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-127-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-128-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-129-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-132-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-134-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-135-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-136-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-139-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-141-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-142-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-144-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-149-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-152-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-156-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-159-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-164-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-167-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-169-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-172-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-173-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-174-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-177-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-186-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-187-00000.npy + # ~> DOLMA CC TAIL 33% (294.252 GT) + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-003-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-007-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-012-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-015-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-020-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-025-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-032-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-035-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-040-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-043-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-044-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-046-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-048-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-052-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-053-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-056-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-069-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-070-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-071-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-075-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-085-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-097-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-102-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-104-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-105-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-109-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-110-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-112-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-117-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-124-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-125-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-127-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-128-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-129-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-132-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-134-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-135-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-136-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-139-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-141-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-142-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-144-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-149-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-152-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-156-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-159-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-164-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-167-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-169-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-172-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-173-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-174-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-177-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-186-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-187-00000.npy \ No newline at end of file From ead9ac88b224b88de68ec812e7a291785f62fb79 Mon Sep 17 00:00:00 2001 From: Dirk Groeneveld Date: Fri, 1 Mar 2024 17:41:36 -0800 Subject: [PATCH 32/60] Run less GPUs for longer --- configs/mcli/olmo7-ablation-dolma17.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/configs/mcli/olmo7-ablation-dolma17.yaml b/configs/mcli/olmo7-ablation-dolma17.yaml index 4b51908b3..1db127731 100644 --- a/configs/mcli/olmo7-ablation-dolma17.yaml +++ b/configs/mcli/olmo7-ablation-dolma17.yaml @@ -1,8 +1,8 @@ name: olmo7-ablation # can't have "_" or "." here image: mosaicml/pytorch:2.1.2_cu121-python3.10-ubuntu20.04 compute: - gpus: 256 - cluster: r12z3 + gpus: 128 + cluster: r7z2 gpu_type: a100_40gb integrations: - integration_type: git_repo From 20b6514c4d19538234e59c76b38b9bb4477ce2fe Mon Sep 17 00:00:00 2001 From: Dirk Groeneveld Date: Fri, 1 Mar 2024 22:06:34 -0800 Subject: [PATCH 33/60] Configure remote save folders --- configs/mcli/olmo7-ablation-baseline.yaml | 3 ++- configs/mcli/olmo7-ablation-dolma17.yaml | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/configs/mcli/olmo7-ablation-baseline.yaml b/configs/mcli/olmo7-ablation-baseline.yaml index 9492de6f6..0ac846220 100644 --- a/configs/mcli/olmo7-ablation-baseline.yaml +++ b/configs/mcli/olmo7-ablation-baseline.yaml @@ -43,4 +43,5 @@ command: |- --activation_checkpointing=whole_layer \ --device_train_microbatch_size=3 \ --global_train_batch_size=6144 \ - --wandb.group=baseline2 + --wandb.group=baseline3 \ + --remote_save_folder=s3://ai2-llm/checkpoints/olmo7-ablation/baseline3 diff --git a/configs/mcli/olmo7-ablation-dolma17.yaml b/configs/mcli/olmo7-ablation-dolma17.yaml index 1db127731..08482b690 100644 --- a/configs/mcli/olmo7-ablation-dolma17.yaml +++ b/configs/mcli/olmo7-ablation-dolma17.yaml @@ -2,7 +2,7 @@ name: olmo7-ablation # can't have "_" or "." here image: mosaicml/pytorch:2.1.2_cu121-python3.10-ubuntu20.04 compute: gpus: 128 - cluster: r7z2 + cluster: r12z3 gpu_type: a100_40gb integrations: - integration_type: git_repo @@ -43,4 +43,5 @@ command: |- --activation_checkpointing=whole_layer \ --device_train_microbatch_size=3 \ --global_train_batch_size=6144 \ - --wandb.group=dolma17 + --wandb.group=dolma17 \ + --remote_save_folder=s3://ai2-llm/checkpoints/olmo7-ablation/dolma17 From 85492da32035da61d18b43d662978f554e02091b Mon Sep 17 00:00:00 2001 From: Dirk Groeneveld Date: Fri, 1 Mar 2024 22:22:10 -0800 Subject: [PATCH 34/60] Give better names to the configs. Also run the baseline somewhere else. --- configs/mcli/olmo7-ablation-baseline.yaml | 8 ++++---- configs/mcli/olmo7-ablation-dolma17.yaml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/configs/mcli/olmo7-ablation-baseline.yaml b/configs/mcli/olmo7-ablation-baseline.yaml index 0ac846220..c8bfda3ed 100644 --- a/configs/mcli/olmo7-ablation-baseline.yaml +++ b/configs/mcli/olmo7-ablation-baseline.yaml @@ -1,8 +1,8 @@ -name: olmo7-ablation # can't have "_" or "." here +name: olmo7-ablation-baseline # can't have "_" or "." here image: mosaicml/pytorch:2.1.2_cu121-python3.10-ubuntu20.04 compute: - gpus: 128 - cluster: r12z3 + gpus: 64 + cluster: r7z2 gpu_type: a100_40gb integrations: - integration_type: git_repo @@ -33,7 +33,7 @@ command: |- --nnodes $NUM_NODES \ --node_rank $NODE_RANK \ --nproc_per_node 8 \ - scripts/train.py configs/olmo7-ablation.yaml \ + scripts/train.py configs/olmo7-ablation-baseline.yaml \ --run_name=olmo7-ablation-baseline \ --wandb.name=baseline \ --model.flash_attention=true \ diff --git a/configs/mcli/olmo7-ablation-dolma17.yaml b/configs/mcli/olmo7-ablation-dolma17.yaml index 08482b690..91434475c 100644 --- a/configs/mcli/olmo7-ablation-dolma17.yaml +++ b/configs/mcli/olmo7-ablation-dolma17.yaml @@ -1,4 +1,4 @@ -name: olmo7-ablation # can't have "_" or "." here +name: olmo7-ablation-dolma17 # can't have "_" or "." here image: mosaicml/pytorch:2.1.2_cu121-python3.10-ubuntu20.04 compute: gpus: 128 From a4c0f09e3570e6f528a076e500192bc19067efa5 Mon Sep 17 00:00:00 2001 From: Dirk Groeneveld Date: Fri, 1 Mar 2024 23:02:53 -0800 Subject: [PATCH 35/60] Warm the cache for starter checkpoints --- docker/Dockerfile.olmo7-ablations.mcli | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 docker/Dockerfile.olmo7-ablations.mcli diff --git a/docker/Dockerfile.olmo7-ablations.mcli b/docker/Dockerfile.olmo7-ablations.mcli new file mode 100644 index 000000000..c1f28fc7e --- /dev/null +++ b/docker/Dockerfile.olmo7-ablations.mcli @@ -0,0 +1,18 @@ +FROM mosaicml/pytorch:2.1.2_cu121-python3.10-ubuntu20.04 + +# warm up huggingface cache +RUN pushd /root/.cache \ + curl "https://storage.googleapis.com/dirkgr-public/huggingface_cache.tar.gz" | tar -xzf - \ + popd +ENV HF_DATASETS_OFFLINE=1 + +# warm up cache of checkpoints +RUN pip install --no-cache cached_path +RUN python -c "import cached_path; cached_path.cached_path('r2://olmo-checkpoints/ai2-llm/olmo-medium/hrshlkzq/step119000-unsharded/model.pt')" +RUN python -c "import cached_path; cached_path.cached_path('r2://olmo-checkpoints/ai2-llm/olmo-medium/j18wauyq/step238000-unsharded/model.pt')" +RUN python -c "import cached_path; cached_path.cached_path('r2://olmo-checkpoints/ai2-llm/olmo-medium/4xel5n7e/step358000-unsharded/model.pt')" +RUN python -c "import cached_path; cached_path.cached_path('r2://olmo-checkpoints/ai2-llm/olmo-medium/xtruaap8/step477000-unsharded/model.pt')" +RUN python -c "import cached_path; cached_path.cached_path('r2://olmo-checkpoints/ai2-llm/olmo-medium/hrshlkzq/step119000-unsharded/train.pt')" +RUN python -c "import cached_path; cached_path.cached_path('r2://olmo-checkpoints/ai2-llm/olmo-medium/j18wauyq/step238000-unsharded/train.pt')" +RUN python -c "import cached_path; cached_path.cached_path('r2://olmo-checkpoints/ai2-llm/olmo-medium/4xel5n7e/step358000-unsharded/train.pt')" +RUN python -c "import cached_path; cached_path.cached_path('r2://olmo-checkpoints/ai2-llm/olmo-medium/xtruaap8/step477000-unsharded/train.pt')" From 5e3bda78210c60520254d638d8d18231a250599c Mon Sep 17 00:00:00 2001 From: Dirk Groeneveld Date: Mon, 4 Mar 2024 11:48:44 -0800 Subject: [PATCH 36/60] Revert "Warm the cache for starter checkpoints" This reverts commit a4c0f09e3570e6f528a076e500192bc19067efa5. --- docker/Dockerfile.olmo7-ablations.mcli | 18 ------------------ 1 file changed, 18 deletions(-) delete mode 100644 docker/Dockerfile.olmo7-ablations.mcli diff --git a/docker/Dockerfile.olmo7-ablations.mcli b/docker/Dockerfile.olmo7-ablations.mcli deleted file mode 100644 index c1f28fc7e..000000000 --- a/docker/Dockerfile.olmo7-ablations.mcli +++ /dev/null @@ -1,18 +0,0 @@ -FROM mosaicml/pytorch:2.1.2_cu121-python3.10-ubuntu20.04 - -# warm up huggingface cache -RUN pushd /root/.cache \ - curl "https://storage.googleapis.com/dirkgr-public/huggingface_cache.tar.gz" | tar -xzf - \ - popd -ENV HF_DATASETS_OFFLINE=1 - -# warm up cache of checkpoints -RUN pip install --no-cache cached_path -RUN python -c "import cached_path; cached_path.cached_path('r2://olmo-checkpoints/ai2-llm/olmo-medium/hrshlkzq/step119000-unsharded/model.pt')" -RUN python -c "import cached_path; cached_path.cached_path('r2://olmo-checkpoints/ai2-llm/olmo-medium/j18wauyq/step238000-unsharded/model.pt')" -RUN python -c "import cached_path; cached_path.cached_path('r2://olmo-checkpoints/ai2-llm/olmo-medium/4xel5n7e/step358000-unsharded/model.pt')" -RUN python -c "import cached_path; cached_path.cached_path('r2://olmo-checkpoints/ai2-llm/olmo-medium/xtruaap8/step477000-unsharded/model.pt')" -RUN python -c "import cached_path; cached_path.cached_path('r2://olmo-checkpoints/ai2-llm/olmo-medium/hrshlkzq/step119000-unsharded/train.pt')" -RUN python -c "import cached_path; cached_path.cached_path('r2://olmo-checkpoints/ai2-llm/olmo-medium/j18wauyq/step238000-unsharded/train.pt')" -RUN python -c "import cached_path; cached_path.cached_path('r2://olmo-checkpoints/ai2-llm/olmo-medium/4xel5n7e/step358000-unsharded/train.pt')" -RUN python -c "import cached_path; cached_path.cached_path('r2://olmo-checkpoints/ai2-llm/olmo-medium/xtruaap8/step477000-unsharded/train.pt')" From c077d03cbe1d3824ff17d3df143918e3edc882d3 Mon Sep 17 00:00:00 2001 From: Dirk Groeneveld Date: Mon, 4 Mar 2024 11:51:15 -0800 Subject: [PATCH 37/60] LLM is now OLMo --- configs/mcli/olmo7-ablation-baseline.yaml | 4 ++-- configs/mcli/olmo7-ablation-dolma17.yaml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/configs/mcli/olmo7-ablation-baseline.yaml b/configs/mcli/olmo7-ablation-baseline.yaml index c8bfda3ed..27b41dec5 100644 --- a/configs/mcli/olmo7-ablation-baseline.yaml +++ b/configs/mcli/olmo7-ablation-baseline.yaml @@ -6,7 +6,7 @@ compute: gpu_type: a100_40gb integrations: - integration_type: git_repo - git_repo: allenai/LLM + git_repo: allenai/OLMo git_branch: olmo7-ablations #git_commit: d765e8819f5b0be204c96b0b519de2372b0da729 pip_install: -e .[train] @@ -25,7 +25,7 @@ command: |- popd export HF_DATASETS_OFFLINE=1 - cd LLM + cd OLMo torchrun \ --master_addr $MASTER_ADDR \ diff --git a/configs/mcli/olmo7-ablation-dolma17.yaml b/configs/mcli/olmo7-ablation-dolma17.yaml index 91434475c..96ade4d6f 100644 --- a/configs/mcli/olmo7-ablation-dolma17.yaml +++ b/configs/mcli/olmo7-ablation-dolma17.yaml @@ -6,7 +6,7 @@ compute: gpu_type: a100_40gb integrations: - integration_type: git_repo - git_repo: allenai/LLM + git_repo: allenai/OLMo git_branch: olmo7-ablations #git_commit: d765e8819f5b0be204c96b0b519de2372b0da729 pip_install: -e .[train] @@ -25,7 +25,7 @@ command: |- popd export HF_DATASETS_OFFLINE=1 - cd LLM + cd OLMo torchrun \ --master_addr $MASTER_ADDR \ From 17891f1a66fc9fafb9fdbf08d5f9210d3bc1b729 Mon Sep 17 00:00:00 2001 From: Dirk Groeneveld Date: Mon, 4 Mar 2024 12:00:14 -0800 Subject: [PATCH 38/60] Adds ability to show all logs --- olmo/util.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/olmo/util.py b/olmo/util.py index 902926605..3905fe917 100644 --- a/olmo/util.py +++ b/olmo/util.py @@ -59,6 +59,7 @@ def __repr__(self) -> str: class LogFilterType(StrEnum): rank0_only = "rank0_only" local_rank0_only = "local_rank0_only" + firehose = "firehose" def log_extra_field(field_name: str, field_value: Any) -> None: @@ -126,11 +127,12 @@ def local_rank0_filter(record: logging.LogRecord) -> int: else: return 0 - filter = None if log_filter_type == LogFilterType.rank0_only: filter = rank0_filter elif log_filter_type == LogFilterType.local_rank0_only: filter = local_rank0_filter # type: ignore + elif log_filter_type == LogFilterType.firehose: + filter = None else: raise ValueError(log_filter_type) From ce70c8b723acb864a32c7c4edbea600365334e5d Mon Sep 17 00:00:00 2001 From: Dirk Groeneveld Date: Mon, 4 Mar 2024 12:00:24 -0800 Subject: [PATCH 39/60] Uses ability to show all logs --- configs/mcli/olmo7-ablation-baseline.yaml | 2 +- configs/mcli/olmo7-ablation-dolma17.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/configs/mcli/olmo7-ablation-baseline.yaml b/configs/mcli/olmo7-ablation-baseline.yaml index 27b41dec5..e94804a85 100644 --- a/configs/mcli/olmo7-ablation-baseline.yaml +++ b/configs/mcli/olmo7-ablation-baseline.yaml @@ -16,7 +16,7 @@ command: |- mkdir -p /root/.cache/torch/ export OMP_NUM_THREADS=8 - export LOG_FILTER_TYPE=local_rank0_only + export LOG_FILTER_TYPE=firehose #export OLMO_NO_SSL=1 # warm up huggingface cache diff --git a/configs/mcli/olmo7-ablation-dolma17.yaml b/configs/mcli/olmo7-ablation-dolma17.yaml index 96ade4d6f..d688ce1a9 100644 --- a/configs/mcli/olmo7-ablation-dolma17.yaml +++ b/configs/mcli/olmo7-ablation-dolma17.yaml @@ -16,7 +16,7 @@ command: |- mkdir -p /root/.cache/torch/ export OMP_NUM_THREADS=8 - export LOG_FILTER_TYPE=local_rank0_only + export LOG_FILTER_TYPE=firehose #export OLMO_NO_SSL=1 # warm up huggingface cache From 835dfcf9edfc1ef7f73861072dd468049e19b9e0 Mon Sep 17 00:00:00 2001 From: Dirk Groeneveld Date: Wed, 6 Mar 2024 14:17:40 -0800 Subject: [PATCH 40/60] It's called `all_ranks` now. --- configs/mcli/olmo7-ablation-baseline.yaml | 2 +- configs/mcli/olmo7-ablation-dolma17.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/configs/mcli/olmo7-ablation-baseline.yaml b/configs/mcli/olmo7-ablation-baseline.yaml index e94804a85..805138d98 100644 --- a/configs/mcli/olmo7-ablation-baseline.yaml +++ b/configs/mcli/olmo7-ablation-baseline.yaml @@ -16,7 +16,7 @@ command: |- mkdir -p /root/.cache/torch/ export OMP_NUM_THREADS=8 - export LOG_FILTER_TYPE=firehose + export LOG_FILTER_TYPE=all_ranks #export OLMO_NO_SSL=1 # warm up huggingface cache diff --git a/configs/mcli/olmo7-ablation-dolma17.yaml b/configs/mcli/olmo7-ablation-dolma17.yaml index d688ce1a9..30c3b70ec 100644 --- a/configs/mcli/olmo7-ablation-dolma17.yaml +++ b/configs/mcli/olmo7-ablation-dolma17.yaml @@ -16,7 +16,7 @@ command: |- mkdir -p /root/.cache/torch/ export OMP_NUM_THREADS=8 - export LOG_FILTER_TYPE=firehose + export LOG_FILTER_TYPE=all_ranks #export OLMO_NO_SSL=1 # warm up huggingface cache From c21f6b9f73f62d428c626c04a6e51d6fbb88a8e1 Mon Sep 17 00:00:00 2001 From: Dirk Groeneveld Date: Wed, 6 Mar 2024 14:18:02 -0800 Subject: [PATCH 41/60] New MMLU evals --- configs/olmo7-ablation-dolma17.yaml | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/configs/olmo7-ablation-dolma17.yaml b/configs/olmo7-ablation-dolma17.yaml index 09392a291..8ae01387d 100644 --- a/configs/olmo7-ablation-dolma17.yaml +++ b/configs/olmo7-ablation-dolma17.yaml @@ -1,11 +1,11 @@ -run_name: olmo7-ablation +run_name: olmo7-ablation-dolma17 seed: 61394 dry_run: false wandb: name: ${run_name} project: olmo7-ablations - group: olmo7-ablation + group: olmo7-ablation-dolma17 model: d_model: 4096 @@ -171,6 +171,18 @@ evaluators: - label: mmlu_other type: downstream + - label: mmlu_stem_var + type: downstream + + - label: mmlu_humanities_var + type: downstream + + - label: mmlu_social_sciences_var + type: downstream + + - label: mmlu_other_var + type: downstream + #- label: copa # type: downstream From 5411dc950afd3525193327bed4fdfecd616d6f5f Mon Sep 17 00:00:00 2001 From: Dirk Groeneveld Date: Wed, 6 Mar 2024 14:18:49 -0800 Subject: [PATCH 42/60] Config for dedupedocs --- configs/mcli/olmo7-ablation-dedupedocs.yaml | 47 + configs/olmo7-ablation-dedupedocs.yaml | 1618 +++++++++++++++++++ 2 files changed, 1665 insertions(+) create mode 100644 configs/mcli/olmo7-ablation-dedupedocs.yaml create mode 100644 configs/olmo7-ablation-dedupedocs.yaml diff --git a/configs/mcli/olmo7-ablation-dedupedocs.yaml b/configs/mcli/olmo7-ablation-dedupedocs.yaml new file mode 100644 index 000000000..30856d568 --- /dev/null +++ b/configs/mcli/olmo7-ablation-dedupedocs.yaml @@ -0,0 +1,47 @@ +name: olmo7-ablation-dedupedocs # can't have "_" or "." here +image: mosaicml/pytorch:2.1.2_cu121-python3.10-ubuntu20.04 +compute: + gpus: 64 + cluster: r14z3p2 + gpu_type: h100_80gb +integrations: + - integration_type: git_repo + git_repo: allenai/OLMo + git_branch: olmo7-ablations + #git_commit: d765e8819f5b0be204c96b0b519de2372b0da729 + pip_install: -e .[train] + ssh_clone: true +command: |- + pip freeze + mkdir -p /root/.cache/torch/ + + export OMP_NUM_THREADS=8 + export LOG_FILTER_TYPE=all_ranks + #export OLMO_NO_SSL=1 + + # warm up huggingface cache + pushd /root/.cache + curl "https://storage.googleapis.com/dirkgr-public/huggingface_cache.tar.gz" | tar -xzf - + popd + export HF_DATASETS_OFFLINE=1 + + cd OLMo + + torchrun \ + --master_addr $MASTER_ADDR \ + --master_port $MASTER_PORT \ + --nnodes $NUM_NODES \ + --node_rank $NODE_RANK \ + --nproc_per_node 8 \ + scripts/train.py configs/olmo7-ablation-dedupedocs.yaml \ + --run_name=olmo7-ablation-dedupedocs \ + --wandb.name=dedupedocs \ + --model.flash_attention=true \ + --fsdp.wrapping_strategy=by_block_and_size \ + --fsdp.sharding_strategy=FULL_SHARD \ + --save_folder=runs/ \ + --activation_checkpointing=whole_layer \ + --device_train_microbatch_size=12 \ + --global_train_batch_size=6144 \ + --wandb.group=dedupedocs \ + --remote_save_folder=s3://ai2-llm/checkpoints/olmo7-ablation/dedupedocs diff --git a/configs/olmo7-ablation-dedupedocs.yaml b/configs/olmo7-ablation-dedupedocs.yaml new file mode 100644 index 000000000..41a157ba7 --- /dev/null +++ b/configs/olmo7-ablation-dedupedocs.yaml @@ -0,0 +1,1618 @@ +run_name: olmo7-ablation-dedupedocs +seed: 61394 +dry_run: false + +wandb: + name: ${run_name} + project: olmo7-ablations + group: olmo7-ablation-dedupedocs + +model: + d_model: 4096 + n_heads: 32 + n_layers: 32 + # mlp_ratio: 6 + mlp_hidden_size: 22016 + weight_tying: false + alibi: false + rope: true + flash_attention: true + attention_dropout: 0.0 + attention_layer_norm: false + multi_query_attention: false + include_bias: false + block_type: sequential + layer_norm_type: default + layer_norm_with_affine: false + bias_for_layer_norm: false + attention_layer_norm_with_affine: false + activation_type: swiglu + residual_dropout: 0.0 + embedding_dropout: 0.0 + max_sequence_length: 2048 + vocab_size: 50280 + embedding_size: 50304 + eos_token_id: 0 + pad_token_id: 1 + init_device: meta + init_fn: mitchell + +compile: null + +optimizer: + name: adamw + learning_rate: 1.5e-4 + weight_decay: 0.1 + betas: + - 0.9 + - 0.95 + metrics_log_interval: 10 + +scheduler: + name: linear_with_warmup + t_warmup: 1000 + alpha_f: 0.1 + +tokenizer: + identifier: tokenizers/allenai_eleuther-ai-gpt-neox-20b-pii-special.json + truncate_direction: right + +save_folder: ${oc.env:CHECKPOINTS_PATH}/${oc.env:SLURM_JOB_ID,${run_name}} +save_overwrite: false +# Sharded checkpoints (best for restarts) +save_interval: 200 +save_num_checkpoints_to_keep: -1 +# Unsharded checkpoints (for final storage) +save_interval_unsharded: null +save_num_unsharded_checkpoints_to_keep: -1 + +#load_path: r2://olmo-checkpoints/ai2-llm/olmo-medium/hrshlkzq/step119000-unsharded/ # 0.5T +load_path: r2://olmo-checkpoints/ai2-llm/olmo-medium/j18wauyq/step238000-unsharded/ # 1.0T +#load_path: r2://olmo-checkpoints/ai2-llm/olmo-medium/4xel5n7e/step358000-unsharded/ # 1.5T +#load_path: r2://olmo-checkpoints/ai2-llm/olmo-medium/xtruaap8/step477000-unsharded/ # 2.0T + +no_pre_train_checkpoint: true +reset_optimizer_state: true +reset_trainer_state: true + +max_duration: 100e9T +global_train_batch_size: 2048 +device_train_microbatch_size: 2 +time_limit: null + +precision: amp_bf16 + +fsdp: + wrapping_strategy: by_block_and_size + precision: mixed + +max_grad_norm: 1.0 +max_grad_norm_ratio: null + +speed_monitor: + window_size: 20 + +eval_interval: ${save_interval} +eval_subset_num_batches: -1 +device_eval_batch_size: ${device_train_microbatch_size} +evaluators: + - label: all-small-ppl-validation + data: + num_workers: 0 + drop_last: true + # pin_memory: true + # prefetch_factor: 1 + # persistent_workers: false + # timeout: 0 + datasets: + 4chan-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/4chan/val.npy + c4_100_domains-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/c4_100_domains/val.npy + c4_en-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/c4_en/val.npy + gab-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/gab/val.npy + ice-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/ice/val.npy + m2d2_s2orc-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/m2d2_s2orc/val.npy + m2d2_wiki-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/m2d2_wiki/val.npy + manosphere-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/manosphere/val.npy + mc4_en-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/mc4_en/val.npy + pile-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/pile/val.npy + ptb-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/ptb/val.npy + twitterAEE-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/twitterAEE/val.npy + wikitext_103-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/wikitext_103/val.npy + + ########################## + # Downstream evaluations # + ########################## + - label: piqa + type: downstream + + - label: hellaswag + type: downstream + + - label: winogrande + type: downstream + + - label: openbook_qa + type: downstream + + - label: boolq + type: downstream + + - label: sciq + type: downstream + + - label: arc_easy + type: downstream + + - label: arc_challenge + type: downstream + + - label: mmlu_stem + type: downstream + + - label: mmlu_humanities + type: downstream + + - label: mmlu_social_sciences + type: downstream + + - label: mmlu_other + type: downstream + + - label: mmlu_stem_var + type: downstream + + - label: mmlu_humanities_var + type: downstream + + - label: mmlu_social_sciences_var + type: downstream + + - label: mmlu_other_var + type: downstream + + #- label: copa + # type: downstream + + #- label: rte + # type: downstream + + #- label: commitment_bank + # type: downstream + + #- label: mrpc + # type: downstream + + #- label: sst2 + # type: downstream + +data: + pad_direction: right + num_workers: 16 + drop_last: true + pin_memory: true + prefetch_factor: 1 + persistent_workers: true + timeout: 0 + paths: + ######### NON WEB DATA ######### + # ~> GUTENBERG BOOKS (5.256 GT) + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-0-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-1-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-2-00000.npy + # ~> PES2O STEM PAPERS (57.21 GT) + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + # ~> WIKIPEDIA & WIKIBOOKS (3.689 GT) + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/wiki/gpt-neox-olmo-dolma-v1_5/part-0-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/wiki/gpt-neox-olmo-dolma-v1_5/part-1-00000.npy + # ~> REDPAJAMA STACKEXCHANGE (19.63 GT) + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + # ~> REDPAJAMA ARXIV (19.63 GT) + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + # ~> PROOFPILE2 ALGEBRAIC STACK (12.623 GT) + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + # ~> PROOFPILE2 OPENWEBMATH (12.734 GT) + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + # ~> TULU FLAN V0 (1.84 GT) + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-0-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-1-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-2-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-3-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-4-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-5-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-6-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-7-00000.npy + #################################### + ######### CODE ######### + # ~> STARCODER (263.775 GT) + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-00-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-03-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-04-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-05-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-06-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-07-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-08-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-09-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-10-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-11-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-12-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-13-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-14-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-15-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-16-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-17-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-18-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-19-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-20-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-21-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-22-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-23-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-24-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-25-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-26-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-26-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-27-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-27-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-28-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-29-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-30-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-30-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-31-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-31-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-32-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-32-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-33-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-33-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-34-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-34-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-35-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-35-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-36-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-36-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-37-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-37-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-38-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-38-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-39-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-39-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-40-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-40-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-41-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-41-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-42-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-42-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-43-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-43-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-44-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-44-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-45-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-46-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-46-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-47-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-47-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-48-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-48-00001.npy + #################################### + ######### WEB HIGH QUALITY ######### + # ~> C4 (157.2 GT) + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-26-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-27-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-28-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-29-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-30-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-31-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-32-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-33-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-34-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-35-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-36-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-37-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-38-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-39-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-40-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-41-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-42-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-43-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-44-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-45-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-46-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-47-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-48-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-49-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-50-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-51-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-52-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-53-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-54-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-55-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-56-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-57-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-58-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-59-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-60-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-61-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-62-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-63-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-64-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-65-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-66-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-67-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-68-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-69-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-70-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-71-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-72-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-73-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-74-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-75-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-76-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-77-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-78-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-79-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-80-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-81-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-82-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-83-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-84-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-85-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-86-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-87-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-88-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-89-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-90-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-91-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-92-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-93-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-94-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-95-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-96-00000.npy + # ~> REDDIT (79.988 GT) + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-26-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-27-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-28-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-29-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-30-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-31-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-32-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-33-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-34-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-35-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-36-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-37-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-38-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-39-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-40-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-41-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-42-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-43-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-44-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-45-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-46-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-47-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-48-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-49-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-50-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-51-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-52-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-53-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-54-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-55-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-56-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-57-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-58-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-59-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-60-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-61-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-62-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-63-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-64-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-65-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-66-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-67-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-68-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-69-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-70-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-71-00000.npy + # ~> FALCON (547.341 GT) + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-000-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-000-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-000-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-001-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-001-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-001-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-002-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-002-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-002-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-003-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-003-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-003-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-004-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-004-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-004-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-004-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-005-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-005-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-005-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-006-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-006-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-006-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-007-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-007-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-008-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-008-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-008-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-008-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-009-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-009-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-009-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-010-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-010-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-010-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-011-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-011-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-011-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-012-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-012-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-012-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-012-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-013-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-013-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-013-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-014-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-014-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-014-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-015-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-015-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-015-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-016-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-016-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-016-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-017-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-017-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-017-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-017-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-018-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-018-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-018-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-019-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-019-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-019-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-020-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-020-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-020-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-021-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-021-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-021-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-021-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-022-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-022-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-022-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-023-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-023-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-023-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-024-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-024-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-024-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-025-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-025-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-025-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-025-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-026-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-026-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-026-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-027-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-027-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-027-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-028-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-028-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-028-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-029-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-029-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-029-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-030-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-030-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-030-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-030-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-031-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-031-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-031-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-032-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-032-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-032-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-033-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-033-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-033-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-034-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-034-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-034-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-034-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-035-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-035-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-035-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-036-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-036-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-036-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-037-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-037-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-037-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-038-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-038-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-038-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-038-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-039-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-039-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-039-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-040-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-040-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-040-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-041-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-041-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-041-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-042-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-042-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-042-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-043-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-043-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-043-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-043-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-044-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-044-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-044-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-045-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-045-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-045-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-046-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-046-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-046-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-047-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-047-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-047-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-047-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-048-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-048-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-048-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-049-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-049-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-049-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-050-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-050-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-050-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-051-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-051-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-051-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-051-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-052-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-052-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-052-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-053-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-053-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-053-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-054-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-054-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-054-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-055-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-055-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-055-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-055-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-056-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-056-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-056-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-057-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-057-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-057-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-058-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-058-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-058-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-059-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-059-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-059-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-060-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-060-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-060-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-060-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-061-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-061-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-061-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-062-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-062-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-062-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-063-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-063-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-063-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-064-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-064-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-064-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-064-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-065-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-065-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-065-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-066-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-066-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-066-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-067-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-067-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-067-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-068-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-068-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-068-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-068-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-069-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-069-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-069-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-070-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-070-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-070-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-071-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-071-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-071-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-072-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-072-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-072-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-073-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-073-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-073-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-073-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-074-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-074-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-074-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-075-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-075-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-075-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-076-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-076-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-076-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-077-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-077-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-077-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-077-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-078-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-078-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-078-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-079-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-079-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-079-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-080-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-080-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-080-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-081-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-081-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-081-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-081-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-082-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-082-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-082-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-083-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-083-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-083-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-084-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-084-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-084-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-085-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-085-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-085-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-086-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-086-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-086-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-086-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-087-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-087-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-087-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-088-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-088-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-088-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-089-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-089-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-089-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-090-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-090-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-090-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-090-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-091-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-091-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-091-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-092-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-092-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-092-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-093-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-093-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-093-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-094-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-094-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-094-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-094-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-095-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-095-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-095-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-096-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-096-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-096-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-097-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-097-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-097-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-098-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-098-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-098-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-099-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-099-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-099-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-099-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-100-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-100-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-100-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-101-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-101-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-101-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-102-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-102-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-102-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-103-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-103-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-103-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-103-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-104-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-104-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-104-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-105-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-105-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-105-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-106-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-106-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-106-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-107-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-107-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-107-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-107-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-108-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-108-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-108-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-109-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-109-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-109-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-110-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-110-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-110-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-111-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-111-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-111-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-111-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-112-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-112-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-112-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-113-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-113-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-113-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-114-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-114-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-114-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-115-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-115-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-115-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-116-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-116-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-116-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-116-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-117-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-117-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-117-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-118-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-118-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-118-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-119-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-119-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-119-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-120-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-120-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-120-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-120-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-121-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-121-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-121-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-122-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-122-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-122-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-123-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-123-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-123-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-124-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-124-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-124-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-124-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-125-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-125-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-125-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-126-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-126-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-126-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-127-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-127-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-127-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-128-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-128-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-128-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-129-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-129-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-129-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-129-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-130-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-130-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-130-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-131-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-131-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-131-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-132-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-132-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-132-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-133-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-133-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-133-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-133-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-134-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-134-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-134-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-135-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-135-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-135-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-136-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-136-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-136-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-137-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-137-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-137-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-137-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-138-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-138-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-138-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-139-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-139-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-139-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-140-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-140-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-140-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-141-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-141-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-141-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-142-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-142-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-142-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-142-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-143-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-143-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-143-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-144-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-144-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-144-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-145-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-145-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-145-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-146-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-146-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-146-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-146-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-147-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-147-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-147-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-148-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-148-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-148-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-149-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-149-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-149-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-150-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-150-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-150-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-150-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-151-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-151-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-151-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-152-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-152-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-152-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-153-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-153-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-153-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-154-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-154-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-154-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-155-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-155-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-155-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-155-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-156-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-156-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-156-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-157-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-157-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-157-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-158-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-158-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-158-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-159-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-159-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-159-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-159-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-160-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-160-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-160-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-161-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-161-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-161-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-162-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-162-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-162-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-163-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-163-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-163-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-163-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-164-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-164-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-164-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-165-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-165-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-165-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-166-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-166-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-166-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-167-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-167-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-167-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-168-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-168-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-168-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-168-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-169-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-169-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-169-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-170-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-170-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-170-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-171-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-171-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-171-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-172-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-172-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-172-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-172-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-173-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-173-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-173-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-174-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-174-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-174-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-175-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-175-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-175-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-176-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-176-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-176-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-176-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-177-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-177-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-177-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-178-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-178-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-178-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-179-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-179-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-179-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-180-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-180-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-180-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-180-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-181-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-181-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-181-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-182-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-182-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-182-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-183-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-183-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-183-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-184-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-184-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-184-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-185-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-185-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-185-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-185-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-186-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-186-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-186-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-187-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-187-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-187-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-188-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-188-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-188-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-189-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-189-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-189-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-189-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-190-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-190-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-190-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-191-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-191-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-191-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-192-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-192-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-192-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-193-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-193-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-193-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-193-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-194-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-194-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-194-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-195-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-195-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-195-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-196-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-196-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-196-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-197-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-197-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-197-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-198-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-198-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-198-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-198-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-199-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-199-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-199-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-200-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-200-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-200-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-201-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-201-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-201-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-202-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-202-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-202-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-202-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-203-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-203-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-203-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-204-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-204-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-204-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-205-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-205-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-205-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-206-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-206-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-206-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-206-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-207-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-207-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-207-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-208-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-208-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-208-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-209-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-209-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-209-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-210-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-210-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-210-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-211-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-211-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-211-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-211-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-212-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-212-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-212-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-213-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-213-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-213-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-214-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-214-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-214-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-215-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-215-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-215-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-215-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-216-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-216-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-216-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-217-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-217-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-217-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-218-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-218-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-218-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-219-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-219-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-219-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-219-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-220-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-220-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-220-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-221-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-221-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-221-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-222-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-222-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-222-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-223-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-223-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-223-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-224-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-224-00001.npy + #################################### + ######### WEB REST ######### + # ~> DOLMA CC HEAD 50% (187.2 GT) + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-001-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-003-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-004-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-005-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-007-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-014-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-017-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-018-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-019-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-020-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-021-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-022-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-023-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-029-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-030-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-032-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-033-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-035-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-044-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-045-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-049-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-050-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-055-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-056-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-059-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-060-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-062-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-063-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-065-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-067-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-069-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-072-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-073-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-074-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-075-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-076-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-080-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-081-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-082-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-084-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-085-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-087-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-088-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-091-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-095-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-096-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-098-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-099-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-102-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-103-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-104-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-107-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-108-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-111-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-112-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-115-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-118-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-119-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-121-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-123-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-126-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-127-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-128-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-130-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-133-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-136-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-137-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-138-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-139-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-141-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-142-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-143-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-144-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-145-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-149-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-150-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-153-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-154-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-156-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-158-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-161-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-165-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-168-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-174-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-175-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-176-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-177-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-178-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-180-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-182-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-183-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-184-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-186-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-187-00000.npy + # ~> DOLMA CC MIDDLE 33% (242.05 GT) + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-000-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-003-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-004-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-006-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-007-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-010-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-011-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-012-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-013-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-014-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-015-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-019-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-020-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-021-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-024-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-026-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-031-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-033-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-034-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-035-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-036-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-039-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-043-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-044-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-045-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-046-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-047-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-048-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-050-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-051-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-052-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-053-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-056-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-059-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-062-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-063-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-064-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-066-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-070-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-071-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-072-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-073-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-074-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-075-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-080-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-083-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-084-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-089-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-090-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-092-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-093-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-096-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-098-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-099-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-100-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-103-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-105-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-108-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-115-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-117-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-121-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-123-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-126-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-131-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-134-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-135-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-138-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-142-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-143-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-144-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-147-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-148-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-150-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-151-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-152-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-154-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-155-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-157-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-159-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-161-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-162-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-163-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-164-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-170-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-171-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-172-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-175-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-179-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-180-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-181-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-184-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-186-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-187-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-188-00000.npy + # ~> DOLMA CC TAIL 33% (268.05 GT) + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-000-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-002-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-003-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-004-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-008-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-013-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-014-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-016-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-018-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-019-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-021-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-022-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-023-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-026-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-027-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-029-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-030-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-032-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-036-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-037-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-039-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-041-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-043-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-046-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-048-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-050-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-051-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-052-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-053-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-055-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-061-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-063-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-066-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-068-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-071-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-072-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-073-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-074-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-076-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-077-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-081-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-082-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-083-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-084-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-086-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-088-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-090-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-091-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-092-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-093-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-098-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-101-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-103-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-104-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-105-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-107-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-108-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-109-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-115-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-116-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-121-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-123-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-125-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-126-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-129-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-133-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-134-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-136-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-137-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-140-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-142-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-148-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-149-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-153-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-156-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-157-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-159-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-161-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-162-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-164-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-165-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-171-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-172-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-173-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-174-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-176-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-177-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-178-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-181-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-182-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-183-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-184-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-185-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-187-00000.npy \ No newline at end of file From f4ebb62a62d7ffa382b8736d558e9b685446d628 Mon Sep 17 00:00:00 2001 From: Dirk Groeneveld Date: Wed, 6 Mar 2024 14:27:01 -0800 Subject: [PATCH 43/60] Disable the MMLU var evals --- configs/olmo7-ablation-dedupedocs.yaml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/configs/olmo7-ablation-dedupedocs.yaml b/configs/olmo7-ablation-dedupedocs.yaml index 41a157ba7..1256f1fb8 100644 --- a/configs/olmo7-ablation-dedupedocs.yaml +++ b/configs/olmo7-ablation-dedupedocs.yaml @@ -171,17 +171,17 @@ evaluators: - label: mmlu_other type: downstream - - label: mmlu_stem_var - type: downstream + #- label: mmlu_stem_var + # type: downstream - - label: mmlu_humanities_var - type: downstream + #- label: mmlu_humanities_var + # type: downstream - - label: mmlu_social_sciences_var - type: downstream + #- label: mmlu_social_sciences_var + # type: downstream - - label: mmlu_other_var - type: downstream + #- label: mmlu_other_var + # type: downstream #- label: copa # type: downstream From 1a32becc0f48772f6629f9107a1a256340576d73 Mon Sep 17 00:00:00 2001 From: Dirk Groeneveld Date: Wed, 6 Mar 2024 14:36:01 -0800 Subject: [PATCH 44/60] Bring back the vars --- configs/olmo7-ablation-dedupedocs.yaml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/configs/olmo7-ablation-dedupedocs.yaml b/configs/olmo7-ablation-dedupedocs.yaml index 1256f1fb8..41a157ba7 100644 --- a/configs/olmo7-ablation-dedupedocs.yaml +++ b/configs/olmo7-ablation-dedupedocs.yaml @@ -171,17 +171,17 @@ evaluators: - label: mmlu_other type: downstream - #- label: mmlu_stem_var - # type: downstream + - label: mmlu_stem_var + type: downstream - #- label: mmlu_humanities_var - # type: downstream + - label: mmlu_humanities_var + type: downstream - #- label: mmlu_social_sciences_var - # type: downstream + - label: mmlu_social_sciences_var + type: downstream - #- label: mmlu_other_var - # type: downstream + - label: mmlu_other_var + type: downstream #- label: copa # type: downstream From aad1e82311c9b9d767797b6878e49e9495c324dc Mon Sep 17 00:00:00 2001 From: Oyvind Tafjord Date: Wed, 6 Mar 2024 14:37:44 -0800 Subject: [PATCH 45/60] Fix uninitialized prompts bug --- olmo/eval/downstream.py | 1 + 1 file changed, 1 insertion(+) diff --git a/olmo/eval/downstream.py b/olmo/eval/downstream.py index 914b787e4..ae3c4e8b9 100644 --- a/olmo/eval/downstream.py +++ b/olmo/eval/downstream.py @@ -1099,6 +1099,7 @@ def __init__( if dataset_name in cats: dataset_names.append(name) self.dev_set = {} + prompts = [None] if prompt_variations == 1: prompts = [None, "inst", "inst+1", "inst+2", "inst+3", "inst+4", "inst+5"] # Need to grab the dev set for the few-shot prompts From b8ad5b86b9e728bc2b4e194fbd005878c604e1a3 Mon Sep 17 00:00:00 2001 From: Dirk Groeneveld Date: Wed, 6 Mar 2024 15:09:05 -0800 Subject: [PATCH 46/60] No more checkpointing --- configs/mcli/olmo7-ablation-dedupedocs.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/configs/mcli/olmo7-ablation-dedupedocs.yaml b/configs/mcli/olmo7-ablation-dedupedocs.yaml index 30856d568..683e4385f 100644 --- a/configs/mcli/olmo7-ablation-dedupedocs.yaml +++ b/configs/mcli/olmo7-ablation-dedupedocs.yaml @@ -40,8 +40,7 @@ command: |- --fsdp.wrapping_strategy=by_block_and_size \ --fsdp.sharding_strategy=FULL_SHARD \ --save_folder=runs/ \ - --activation_checkpointing=whole_layer \ - --device_train_microbatch_size=12 \ + --device_train_microbatch_size=3 \ --global_train_batch_size=6144 \ --wandb.group=dedupedocs \ --remote_save_folder=s3://ai2-llm/checkpoints/olmo7-ablation/dedupedocs From 0ada79901d442f538d6e56a024852620c4910f71 Mon Sep 17 00:00:00 2001 From: Dirk Groeneveld Date: Wed, 6 Mar 2024 15:29:37 -0800 Subject: [PATCH 47/60] More speed --- configs/mcli/olmo7-ablation-dedupedocs.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/configs/mcli/olmo7-ablation-dedupedocs.yaml b/configs/mcli/olmo7-ablation-dedupedocs.yaml index 683e4385f..f85c3435b 100644 --- a/configs/mcli/olmo7-ablation-dedupedocs.yaml +++ b/configs/mcli/olmo7-ablation-dedupedocs.yaml @@ -37,10 +37,11 @@ command: |- --run_name=olmo7-ablation-dedupedocs \ --wandb.name=dedupedocs \ --model.flash_attention=true \ + --compile.fullgraph=false \ --fsdp.wrapping_strategy=by_block_and_size \ --fsdp.sharding_strategy=FULL_SHARD \ --save_folder=runs/ \ - --device_train_microbatch_size=3 \ + --device_train_microbatch_size=6 \ --global_train_batch_size=6144 \ --wandb.group=dedupedocs \ --remote_save_folder=s3://ai2-llm/checkpoints/olmo7-ablation/dedupedocs From 608dfe55a16029502b1cda20b42623a2949ecf88 Mon Sep 17 00:00:00 2001 From: Dirk Groeneveld Date: Wed, 6 Mar 2024 15:54:29 -0800 Subject: [PATCH 48/60] Compile is still broken --- configs/mcli/olmo7-ablation-dedupedocs.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/configs/mcli/olmo7-ablation-dedupedocs.yaml b/configs/mcli/olmo7-ablation-dedupedocs.yaml index f85c3435b..f1d9a7bcd 100644 --- a/configs/mcli/olmo7-ablation-dedupedocs.yaml +++ b/configs/mcli/olmo7-ablation-dedupedocs.yaml @@ -37,7 +37,6 @@ command: |- --run_name=olmo7-ablation-dedupedocs \ --wandb.name=dedupedocs \ --model.flash_attention=true \ - --compile.fullgraph=false \ --fsdp.wrapping_strategy=by_block_and_size \ --fsdp.sharding_strategy=FULL_SHARD \ --save_folder=runs/ \ From d43401150703ed4aac7fb7160997c35ac3c1fd7d Mon Sep 17 00:00:00 2001 From: Dirk Groeneveld Date: Wed, 6 Mar 2024 16:08:16 -0800 Subject: [PATCH 49/60] Let's try SHARD_GRAD_OP --- configs/mcli/olmo7-ablation-dedupedocs.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/configs/mcli/olmo7-ablation-dedupedocs.yaml b/configs/mcli/olmo7-ablation-dedupedocs.yaml index f1d9a7bcd..ccd84be45 100644 --- a/configs/mcli/olmo7-ablation-dedupedocs.yaml +++ b/configs/mcli/olmo7-ablation-dedupedocs.yaml @@ -38,9 +38,9 @@ command: |- --wandb.name=dedupedocs \ --model.flash_attention=true \ --fsdp.wrapping_strategy=by_block_and_size \ - --fsdp.sharding_strategy=FULL_SHARD \ + --fsdp.sharding_strategy=SHARD_GRAD_OP \ --save_folder=runs/ \ - --device_train_microbatch_size=6 \ + --device_train_microbatch_size=3 \ --global_train_batch_size=6144 \ --wandb.group=dedupedocs \ --remote_save_folder=s3://ai2-llm/checkpoints/olmo7-ablation/dedupedocs From 86e9a3fbf73b348c117af952bb5d6e21206733c3 Mon Sep 17 00:00:00 2001 From: Dirk Groeneveld Date: Thu, 7 Mar 2024 17:11:11 -0800 Subject: [PATCH 50/60] Config for dedupeparas --- configs/olmo7-ablation-dedupeparas.yaml | 1625 ++++++++++++++++++ scripts/beaker/olmo7-ablation-dedupeparas.sh | 35 + 2 files changed, 1660 insertions(+) create mode 100644 configs/olmo7-ablation-dedupeparas.yaml create mode 100755 scripts/beaker/olmo7-ablation-dedupeparas.sh diff --git a/configs/olmo7-ablation-dedupeparas.yaml b/configs/olmo7-ablation-dedupeparas.yaml new file mode 100644 index 000000000..663c91a41 --- /dev/null +++ b/configs/olmo7-ablation-dedupeparas.yaml @@ -0,0 +1,1625 @@ +run_name: olmo7-ablation-dedupedocs +seed: 61394 +dry_run: false + +wandb: + name: ${run_name} + project: olmo7-ablations + group: olmo7-ablation-dedupedocs + +model: + d_model: 4096 + n_heads: 32 + n_layers: 32 + # mlp_ratio: 6 + mlp_hidden_size: 22016 + weight_tying: false + alibi: false + rope: true + flash_attention: true + attention_dropout: 0.0 + attention_layer_norm: false + multi_query_attention: false + include_bias: false + block_type: sequential + layer_norm_type: default + layer_norm_with_affine: false + bias_for_layer_norm: false + attention_layer_norm_with_affine: false + activation_type: swiglu + residual_dropout: 0.0 + embedding_dropout: 0.0 + max_sequence_length: 2048 + vocab_size: 50280 + embedding_size: 50304 + eos_token_id: 0 + pad_token_id: 1 + init_device: meta + init_fn: mitchell + +compile: null + +optimizer: + name: adamw + learning_rate: 1.5e-4 + weight_decay: 0.1 + betas: + - 0.9 + - 0.95 + metrics_log_interval: 10 + +scheduler: + name: linear_with_warmup + t_warmup: 1000 + alpha_f: 0.1 + +tokenizer: + identifier: tokenizers/allenai_eleuther-ai-gpt-neox-20b-pii-special.json + truncate_direction: right + +save_folder: ${oc.env:CHECKPOINTS_PATH}/${oc.env:SLURM_JOB_ID,${run_name}} +save_overwrite: false +# Sharded checkpoints (best for restarts) +save_interval: 200 +save_num_checkpoints_to_keep: -1 +# Unsharded checkpoints (for final storage) +save_interval_unsharded: null +save_num_unsharded_checkpoints_to_keep: -1 + +#load_path: r2://olmo-checkpoints/ai2-llm/olmo-medium/hrshlkzq/step119000-unsharded/ # 0.5T +load_path: r2://olmo-checkpoints/ai2-llm/olmo-medium/j18wauyq/step238000-unsharded/ # 1.0T +#load_path: r2://olmo-checkpoints/ai2-llm/olmo-medium/4xel5n7e/step358000-unsharded/ # 1.5T +#load_path: r2://olmo-checkpoints/ai2-llm/olmo-medium/xtruaap8/step477000-unsharded/ # 2.0T + +no_pre_train_checkpoint: true +reset_optimizer_state: true +reset_trainer_state: true + +max_duration: 100e9T +global_train_batch_size: 2048 +device_train_microbatch_size: 2 +time_limit: null + +precision: amp_bf16 + +fsdp: + wrapping_strategy: by_block_and_size + precision: mixed + +max_grad_norm: 1.0 +max_grad_norm_ratio: null + +speed_monitor: + window_size: 20 + +eval_interval: ${save_interval} +eval_subset_num_batches: -1 +device_eval_batch_size: ${device_train_microbatch_size} +evaluators: + - label: all-small-ppl-validation + data: + num_workers: 0 + drop_last: true + # pin_memory: true + # prefetch_factor: 1 + # persistent_workers: false + # timeout: 0 + datasets: + 4chan-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/4chan/val.npy + c4_100_domains-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/c4_100_domains/val.npy + c4_en-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/c4_en/val.npy + gab-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/gab/val.npy + ice-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/ice/val.npy + m2d2_s2orc-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/m2d2_s2orc/val.npy + m2d2_wiki-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/m2d2_wiki/val.npy + manosphere-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/manosphere/val.npy + mc4_en-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/mc4_en/val.npy + pile-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/pile/val.npy + ptb-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/ptb/val.npy + twitterAEE-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/twitterAEE/val.npy + wikitext_103-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/wikitext_103/val.npy + + ########################## + # Downstream evaluations # + ########################## + - label: piqa + type: downstream + + - label: hellaswag + type: downstream + + - label: winogrande + type: downstream + + - label: openbook_qa + type: downstream + + - label: boolq + type: downstream + + - label: sciq + type: downstream + + - label: arc_easy + type: downstream + + - label: arc_challenge + type: downstream + + - label: mmlu_stem + type: downstream + + - label: mmlu_humanities + type: downstream + + - label: mmlu_social_sciences + type: downstream + + - label: mmlu_other + type: downstream + + - label: mmlu_stem_var + type: downstream + + - label: mmlu_humanities_var + type: downstream + + - label: mmlu_social_sciences_var + type: downstream + + - label: mmlu_other_var + type: downstream + + #- label: copa + # type: downstream + + #- label: rte + # type: downstream + + #- label: commitment_bank + # type: downstream + + #- label: mrpc + # type: downstream + + #- label: sst2 + # type: downstream + +data: + pad_direction: right + num_workers: 16 + drop_last: true + pin_memory: true + prefetch_factor: 1 + persistent_workers: true + timeout: 0 + paths: + ######### NON WEB DATA ######### + # ~> GUTENBERG BOOKS (5.256 GT) + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-0-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-1-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-2-00000.npy + # ~> PES2O STEM PAPERS (57.21 GT) + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + # ~> WIKIPEDIA & WIKIBOOKS (3.689 GT) + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/wiki/gpt-neox-olmo-dolma-v1_5/part-0-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/wiki/gpt-neox-olmo-dolma-v1_5/part-1-00000.npy + # ~> REDPAJAMA STACKEXCHANGE (19.63 GT) + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + # ~> REDPAJAMA ARXIV (19.63 GT) + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + # ~> PROOFPILE2 ALGEBRAIC STACK (12.623 GT) + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + # ~> PROOFPILE2 OPENWEBMATH (12.734 GT) + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + # ~> TULU FLAN V0 (1.84 GT) + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-0-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-1-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-2-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-3-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-4-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-5-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-6-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-7-00000.npy + #################################### + ######### CODE ######### + # ~> STARCODER (263.775 GT) + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-00-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-03-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-04-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-05-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-06-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-07-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-08-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-09-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-10-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-11-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-12-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-13-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-14-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-15-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-16-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-17-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-18-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-19-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-20-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-21-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-22-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-23-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-24-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-25-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-26-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-26-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-27-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-27-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-28-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-29-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-30-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-30-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-31-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-31-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-32-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-32-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-33-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-33-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-34-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-34-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-35-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-35-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-36-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-36-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-37-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-37-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-38-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-38-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-39-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-39-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-40-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-40-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-41-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-41-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-42-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-42-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-43-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-43-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-44-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-44-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-45-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-46-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-46-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-47-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-47-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-48-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-48-00001.npy + #################################### + ######### WEB HIGH QUALITY ######### + # ~> C4 (157.2 GT) + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-26-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-27-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-28-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-29-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-30-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-31-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-32-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-33-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-34-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-35-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-36-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-37-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-38-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-39-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-40-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-41-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-42-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-43-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-44-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-45-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-46-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-47-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-48-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-49-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-50-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-51-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-52-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-53-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-54-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-55-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-56-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-57-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-58-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-59-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-60-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-61-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-62-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-63-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-64-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-65-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-66-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-67-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-68-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-69-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-70-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-71-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-72-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-73-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-74-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-75-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-76-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-77-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-78-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-79-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-80-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-81-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-82-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-83-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-84-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-85-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-86-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-87-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-88-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-89-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-90-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-91-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-92-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-93-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-94-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-95-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-96-00000.npy + # ~> REDDIT (79.988 GT) + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-26-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-27-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-28-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-29-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-30-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-31-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-32-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-33-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-34-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-35-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-36-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-37-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-38-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-39-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-40-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-41-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-42-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-43-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-44-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-45-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-46-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-47-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-48-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-49-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-50-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-51-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-52-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-53-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-54-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-55-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-56-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-57-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-58-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-59-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-60-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-61-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-62-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-63-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-64-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-65-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-66-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-67-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-68-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-69-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-70-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-71-00000.npy + # ~> FALCON (547.341 GT) + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-000-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-000-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-000-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-001-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-001-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-001-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-002-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-002-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-002-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-003-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-003-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-003-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-004-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-004-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-004-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-004-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-005-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-005-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-005-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-006-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-006-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-006-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-007-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-007-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-008-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-008-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-008-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-008-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-009-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-009-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-009-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-010-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-010-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-010-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-011-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-011-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-011-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-012-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-012-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-012-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-012-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-013-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-013-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-013-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-014-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-014-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-014-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-015-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-015-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-015-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-016-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-016-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-016-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-017-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-017-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-017-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-017-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-018-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-018-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-018-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-019-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-019-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-019-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-020-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-020-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-020-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-021-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-021-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-021-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-021-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-022-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-022-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-022-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-023-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-023-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-023-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-024-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-024-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-024-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-025-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-025-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-025-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-025-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-026-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-026-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-026-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-027-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-027-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-027-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-028-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-028-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-028-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-029-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-029-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-029-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-030-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-030-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-030-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-030-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-031-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-031-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-031-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-032-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-032-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-032-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-033-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-033-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-033-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-034-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-034-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-034-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-034-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-035-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-035-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-035-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-036-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-036-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-036-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-037-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-037-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-037-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-038-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-038-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-038-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-038-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-039-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-039-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-039-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-040-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-040-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-040-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-041-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-041-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-041-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-042-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-042-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-042-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-043-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-043-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-043-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-043-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-044-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-044-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-044-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-045-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-045-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-045-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-046-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-046-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-046-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-047-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-047-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-047-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-047-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-048-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-048-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-048-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-049-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-049-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-049-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-050-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-050-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-050-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-051-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-051-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-051-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-051-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-052-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-052-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-052-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-053-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-053-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-053-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-054-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-054-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-054-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-055-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-055-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-055-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-055-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-056-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-056-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-056-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-057-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-057-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-057-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-058-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-058-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-058-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-059-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-059-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-059-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-060-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-060-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-060-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-060-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-061-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-061-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-061-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-062-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-062-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-062-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-063-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-063-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-063-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-064-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-064-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-064-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-064-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-065-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-065-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-065-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-066-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-066-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-066-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-067-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-067-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-067-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-068-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-068-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-068-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-068-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-069-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-069-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-069-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-070-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-070-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-070-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-071-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-071-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-071-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-072-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-072-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-072-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-073-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-073-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-073-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-073-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-074-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-074-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-074-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-075-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-075-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-075-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-076-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-076-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-076-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-077-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-077-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-077-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-077-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-078-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-078-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-078-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-079-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-079-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-079-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-080-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-080-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-080-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-081-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-081-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-081-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-081-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-082-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-082-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-082-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-083-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-083-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-083-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-084-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-084-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-084-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-085-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-085-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-085-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-086-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-086-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-086-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-086-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-087-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-087-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-087-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-088-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-088-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-088-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-089-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-089-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-089-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-090-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-090-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-090-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-090-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-091-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-091-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-091-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-092-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-092-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-092-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-093-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-093-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-093-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-094-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-094-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-094-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-094-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-095-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-095-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-095-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-096-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-096-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-096-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-097-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-097-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-097-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-098-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-098-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-098-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-099-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-099-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-099-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-099-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-100-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-100-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-100-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-101-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-101-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-101-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-102-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-102-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-102-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-103-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-103-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-103-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-103-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-104-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-104-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-104-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-105-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-105-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-105-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-106-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-106-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-106-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-107-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-107-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-107-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-107-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-108-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-108-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-108-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-109-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-109-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-109-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-110-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-110-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-110-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-111-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-111-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-111-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-111-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-112-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-112-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-112-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-113-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-113-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-113-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-114-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-114-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-114-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-115-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-115-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-115-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-116-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-116-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-116-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-116-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-117-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-117-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-117-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-118-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-118-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-118-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-119-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-119-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-119-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-120-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-120-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-120-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-120-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-121-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-121-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-121-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-122-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-122-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-122-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-123-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-123-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-123-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-124-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-124-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-124-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-124-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-125-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-125-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-125-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-126-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-126-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-126-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-127-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-127-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-127-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-128-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-128-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-128-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-129-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-129-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-129-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-129-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-130-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-130-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-130-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-131-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-131-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-131-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-132-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-132-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-132-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-133-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-133-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-133-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-133-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-134-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-134-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-134-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-135-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-135-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-135-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-136-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-136-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-136-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-137-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-137-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-137-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-137-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-138-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-138-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-138-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-139-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-139-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-139-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-140-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-140-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-140-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-141-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-141-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-141-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-142-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-142-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-142-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-142-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-143-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-143-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-143-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-144-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-144-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-144-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-145-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-145-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-145-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-146-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-146-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-146-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-146-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-147-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-147-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-147-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-148-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-148-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-148-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-149-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-149-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-149-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-150-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-150-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-150-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-150-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-151-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-151-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-151-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-152-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-152-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-152-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-153-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-153-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-153-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-154-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-154-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-154-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-155-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-155-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-155-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-155-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-156-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-156-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-156-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-157-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-157-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-157-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-158-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-158-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-158-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-159-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-159-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-159-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-159-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-160-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-160-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-160-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-161-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-161-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-161-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-162-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-162-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-162-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-163-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-163-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-163-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-163-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-164-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-164-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-164-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-165-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-165-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-165-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-166-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-166-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-166-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-167-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-167-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-167-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-168-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-168-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-168-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-168-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-169-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-169-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-169-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-170-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-170-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-170-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-171-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-171-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-171-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-172-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-172-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-172-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-172-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-173-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-173-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-173-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-174-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-174-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-174-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-175-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-175-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-175-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-176-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-176-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-176-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-176-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-177-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-177-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-177-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-178-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-178-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-178-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-179-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-179-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-179-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-180-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-180-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-180-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-180-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-181-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-181-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-181-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-182-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-182-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-182-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-183-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-183-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-183-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-184-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-184-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-184-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-185-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-185-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-185-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-185-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-186-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-186-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-186-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-187-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-187-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-187-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-188-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-188-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-188-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-189-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-189-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-189-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-189-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-190-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-190-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-190-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-191-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-191-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-191-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-192-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-192-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-192-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-193-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-193-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-193-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-193-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-194-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-194-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-194-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-195-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-195-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-195-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-196-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-196-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-196-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-197-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-197-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-197-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-198-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-198-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-198-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-198-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-199-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-199-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-199-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-200-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-200-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-200-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-201-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-201-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-201-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-202-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-202-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-202-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-202-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-203-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-203-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-203-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-204-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-204-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-204-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-205-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-205-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-205-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-206-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-206-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-206-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-206-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-207-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-207-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-207-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-208-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-208-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-208-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-209-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-209-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-209-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-210-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-210-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-210-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-211-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-211-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-211-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-211-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-212-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-212-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-212-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-213-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-213-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-213-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-214-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-214-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-214-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-215-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-215-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-215-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-215-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-216-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-216-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-216-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-217-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-217-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-217-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-218-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-218-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-218-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-219-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-219-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-219-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-219-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-220-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-220-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-220-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-221-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-221-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-221-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-222-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-222-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-222-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-223-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-223-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-223-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-224-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-224-00001.npy + #################################### + ######### WEB REST ######### + # ~> DOLMA CC HEAD 50% (187.2 GT) + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-000-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-008-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-010-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-011-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-012-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-013-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-015-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-016-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-017-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-018-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-021-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-024-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-025-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-028-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-030-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-033-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-035-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-036-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-038-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-039-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-040-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-041-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-042-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-043-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-044-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-045-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-048-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-049-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-053-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-055-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-060-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-061-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-066-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-069-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-070-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-073-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-076-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-080-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-081-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-082-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-084-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-085-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-087-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-088-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-089-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-092-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-093-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-094-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-095-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-099-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-100-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-101-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-103-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-105-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-106-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-107-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-108-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-109-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-110-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-121-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-124-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-125-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-126-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-127-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-129-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-130-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-131-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-132-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-134-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-135-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-136-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-139-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-140-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-141-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-144-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-146-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-148-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-152-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-153-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-155-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-159-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-162-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-163-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-166-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-170-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-171-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-177-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-178-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-181-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-182-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-183-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-184-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-185-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-187-00000.npy + # ~> DOLMA CC MIDDLE 33% (242.05 GT) + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-003-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-005-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-006-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-008-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-010-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-014-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-015-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-018-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-020-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-021-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-023-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-025-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-026-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-027-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-028-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-029-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-030-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-033-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-034-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-037-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-041-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-043-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-049-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-050-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-051-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-055-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-059-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-061-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-062-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-064-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-066-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-069-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-073-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-074-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-077-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-079-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-080-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-088-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-090-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-091-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-093-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-094-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-095-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-096-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-097-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-098-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-099-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-100-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-101-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-105-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-107-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-108-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-109-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-110-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-112-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-116-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-118-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-119-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-121-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-123-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-124-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-126-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-127-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-128-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-130-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-133-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-135-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-138-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-139-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-144-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-145-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-146-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-148-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-149-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-150-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-151-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-152-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-158-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-161-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-164-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-166-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-167-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-168-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-169-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-170-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-171-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-173-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-174-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-180-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-185-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-186-00000.npy + # ~> DOLMA CC TAIL 33% (268.05 GT) + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-001-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-005-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-006-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-007-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-008-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-009-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-010-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-012-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-013-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-015-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-017-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-018-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-021-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-022-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-024-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-025-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-026-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-029-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-031-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-032-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-034-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-041-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-042-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-044-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-046-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-048-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-049-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-051-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-052-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-053-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-054-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-058-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-059-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-061-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-062-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-063-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-065-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-072-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-073-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-075-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-076-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-077-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-078-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-080-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-081-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-084-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-085-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-086-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-091-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-092-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-096-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-097-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-100-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-102-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-103-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-104-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-105-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-106-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-107-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-108-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-109-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-110-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-112-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-114-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-115-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-117-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-120-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-126-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-127-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-128-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-131-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-134-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-135-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-136-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-137-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-138-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-139-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-142-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-144-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-145-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-146-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-147-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-150-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-151-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-153-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-154-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-162-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-165-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-166-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-167-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-169-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-170-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-172-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-173-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-174-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-175-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-177-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-179-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-180-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-181-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-182-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-184-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-187-00000.npy \ No newline at end of file diff --git a/scripts/beaker/olmo7-ablation-dedupeparas.sh b/scripts/beaker/olmo7-ablation-dedupeparas.sh new file mode 100755 index 000000000..4c992012c --- /dev/null +++ b/scripts/beaker/olmo7-ablation-dedupeparas.sh @@ -0,0 +1,35 @@ +#!/usr/bin/env bash + +set -ex + +CONFIG_PATH=configs/olmo7-ablation-dedupeparas.yaml +NUM_NODES=8 +ARGS='--run_name=olmo7-ablation-dedupeparas --wandb.name=dedupeparas --model.flash_attention=true --fsdp.wrapping_strategy=by_block_and_size --fsdp.sharding_strategy=SHARD_GRAD_OP --save_folder=runs/ --device_train_microbatch_size=3 --global_train_batch_size=6144 --wandb.group=dedupeparas --remote_save_folder=s3://ai2-llm/checkpoints/olmo7-ablation/dedupeparas' + +gantry run \ + --allow-dirty \ + --workspace ai2/llm-testing \ + --task-name olmo7-ablation-dedupeparas \ + --description olmo7-ablation-dedupeparas \ + --priority high \ + --beaker-image olmo-torch2-gantry \ + --cluster ai2/general-cirrascale-a100-80g-ib \ + --gpus 8 \ + --replicas "${NUM_NODES}" \ + --leader-selection \ + --host-networking \ + --nfs \ + --mount /net/nfs.cirrascale/allennlp/petew/cache:/root/.cache \ + --env LOG_FILTER_TYPE=local_rank0_only \ + --env OMP_NUM_THREADS=8 \ + --env OLMO_TASK=model \ + --env-secret WANDB_API_KEY=WANDB_API_KEY \ + --env-secret AWS_ACCESS_KEY_ID=AWS_ACCESS_KEY_ID \ + --env-secret AWS_SECRET_ACCESS_KEY=AWS_SECRET_ACCESS_KEY \ + --env-secret R2_ACCESS_KEY_ID=R2_ACCESS_KEY_ID \ + --env-secret R2_SECRET_ACCESS_KEY=R2_SECRET_ACCESS_KEY \ + --env-secret R2_ENDPOINT_URL=R2_ENDPOINT_URL \ + --shared-memory 10GiB \ + --venv base \ + --yes \ + -- /bin/bash -c "torchrun --nnodes ${NUM_NODES}:${NUM_NODES} --nproc-per-node 8 --rdzv_id=101 --rdzv_backend=c10d --rdzv_endpoint=\$BEAKER_LEADER_REPLICA_HOSTNAME:29400 scripts/train.py ${CONFIG_PATH} ${ARGS}" From 6524f87cccfd082790f726610fce7200033b4704 Mon Sep 17 00:00:00 2001 From: Dirk Groeneveld Date: Fri, 8 Mar 2024 14:52:07 -0800 Subject: [PATCH 51/60] New cluster who dis? --- scripts/beaker/olmo7-ablation-dedupeparas.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/scripts/beaker/olmo7-ablation-dedupeparas.sh b/scripts/beaker/olmo7-ablation-dedupeparas.sh index 4c992012c..c9ae90d77 100755 --- a/scripts/beaker/olmo7-ablation-dedupeparas.sh +++ b/scripts/beaker/olmo7-ablation-dedupeparas.sh @@ -13,13 +13,12 @@ gantry run \ --description olmo7-ablation-dedupeparas \ --priority high \ --beaker-image olmo-torch2-gantry \ - --cluster ai2/general-cirrascale-a100-80g-ib \ + --cluster ai2/pluto-cirrascale \ --gpus 8 \ --replicas "${NUM_NODES}" \ --leader-selection \ --host-networking \ --nfs \ - --mount /net/nfs.cirrascale/allennlp/petew/cache:/root/.cache \ --env LOG_FILTER_TYPE=local_rank0_only \ --env OMP_NUM_THREADS=8 \ --env OLMO_TASK=model \ From f6407401dc987be1d1c14c161fe319d92c7f3f58 Mon Sep 17 00:00:00 2001 From: Dirk Groeneveld Date: Fri, 8 Mar 2024 15:03:05 -0800 Subject: [PATCH 52/60] Missing budget --- scripts/beaker/olmo7-ablation-dedupeparas.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/beaker/olmo7-ablation-dedupeparas.sh b/scripts/beaker/olmo7-ablation-dedupeparas.sh index c9ae90d77..729e949c4 100755 --- a/scripts/beaker/olmo7-ablation-dedupeparas.sh +++ b/scripts/beaker/olmo7-ablation-dedupeparas.sh @@ -19,6 +19,8 @@ gantry run \ --leader-selection \ --host-networking \ --nfs \ + --mount /net/nfs.cirrascale/allennlp/petew/cache:/root/.cache \ + --budget ai2/oe-training \ --env LOG_FILTER_TYPE=local_rank0_only \ --env OMP_NUM_THREADS=8 \ --env OLMO_TASK=model \ From a0cb8556873d43c1afcffec5fd5b16c1979ce003 Mon Sep 17 00:00:00 2001 From: Dirk Groeneveld Date: Fri, 8 Mar 2024 15:25:08 -0800 Subject: [PATCH 53/60] Warm HF Cache in Beaker --- scripts/beaker/olmo7-ablation-dedupeparas.sh | 2 +- scripts/beaker/warm_hf_cache.sh | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) create mode 100644 scripts/beaker/warm_hf_cache.sh diff --git a/scripts/beaker/olmo7-ablation-dedupeparas.sh b/scripts/beaker/olmo7-ablation-dedupeparas.sh index 729e949c4..0f9e6badf 100755 --- a/scripts/beaker/olmo7-ablation-dedupeparas.sh +++ b/scripts/beaker/olmo7-ablation-dedupeparas.sh @@ -33,4 +33,4 @@ gantry run \ --shared-memory 10GiB \ --venv base \ --yes \ - -- /bin/bash -c "torchrun --nnodes ${NUM_NODES}:${NUM_NODES} --nproc-per-node 8 --rdzv_id=101 --rdzv_backend=c10d --rdzv_endpoint=\$BEAKER_LEADER_REPLICA_HOSTNAME:29400 scripts/train.py ${CONFIG_PATH} ${ARGS}" + -- /bin/bash -c "source scripts/beaker/warm_hf_cache.sh && torchrun --nnodes ${NUM_NODES}:${NUM_NODES} --nproc-per-node 8 --rdzv_id=101 --rdzv_backend=c10d --rdzv_endpoint=\$BEAKER_LEADER_REPLICA_HOSTNAME:29400 scripts/train.py ${CONFIG_PATH} ${ARGS}" diff --git a/scripts/beaker/warm_hf_cache.sh b/scripts/beaker/warm_hf_cache.sh new file mode 100644 index 000000000..91481f3ea --- /dev/null +++ b/scripts/beaker/warm_hf_cache.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash + +set -ex + +mkdir -p /root/.cache +pushd /root/.cache +curl "https://storage.googleapis.com/dirkgr-public/huggingface_cache.tar.gz" | tar -xzf - +popd +export HF_DATASETS_OFFLINE=1 From 963aa0bfcfa25912fa5aaf687b862e0bb4047577 Mon Sep 17 00:00:00 2001 From: Dirk Groeneveld Date: Mon, 11 Mar 2024 17:38:40 -0700 Subject: [PATCH 54/60] Adds a script to continue the baseline run on Beaker --- scripts/beaker/olmo7-ablation-baseline.sh | 36 +++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100755 scripts/beaker/olmo7-ablation-baseline.sh diff --git a/scripts/beaker/olmo7-ablation-baseline.sh b/scripts/beaker/olmo7-ablation-baseline.sh new file mode 100755 index 000000000..032419720 --- /dev/null +++ b/scripts/beaker/olmo7-ablation-baseline.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash + +set -ex + +CONFIG_PATH=configs/olmo7-ablation-baseline.yaml +NUM_NODES=4 +ARGS='--run_name=olmo7-ablation-baseline --wandb.name=baseline --model.flash_attention=true --fsdp.wrapping_strategy=by_block_and_size --fsdp.sharding_strategy=SHARD_GRAD_OP --save_folder=runs/ --device_train_microbatch_size=3 --global_train_batch_size=6144 --wandb.group=baseline --remote_save_folder=s3://ai2-llm/checkpoints/olmo7-ablation/baseline3 --load_path=s3://ai2-llm/checkpoints/olmo7-ablation/baseline3/step7800' + +gantry run \ + --allow-dirty \ + --workspace ai2/llm-testing \ + --task-name olmo7-ablation-baseline \ + --description olmo7-ablation-baseline \ + --priority high \ + --beaker-image olmo-torch2-gantry \ + --cluster ai2/pluto-cirrascale \ + --gpus 8 \ + --replicas "${NUM_NODES}" \ + --leader-selection \ + --host-networking \ + --nfs \ + --mount /net/nfs.cirrascale/allennlp/petew/cache:/root/.cache \ + --budget ai2/oe-training \ + --env LOG_FILTER_TYPE=local_rank0_only \ + --env OMP_NUM_THREADS=8 \ + --env OLMO_TASK=model \ + --env-secret WANDB_API_KEY=WANDB_API_KEY \ + --env-secret AWS_ACCESS_KEY_ID=AWS_ACCESS_KEY_ID \ + --env-secret AWS_SECRET_ACCESS_KEY=AWS_SECRET_ACCESS_KEY \ + --env-secret R2_ACCESS_KEY_ID=R2_ACCESS_KEY_ID \ + --env-secret R2_SECRET_ACCESS_KEY=R2_SECRET_ACCESS_KEY \ + --env-secret R2_ENDPOINT_URL=R2_ENDPOINT_URL \ + --shared-memory 10GiB \ + --venv base \ + --yes \ + -- /bin/bash -c "source scripts/beaker/warm_hf_cache.sh && torchrun --nnodes ${NUM_NODES}:${NUM_NODES} --nproc-per-node 8 --rdzv_id=101 --rdzv_backend=c10d --rdzv_endpoint=\$BEAKER_LEADER_REPLICA_HOSTNAME:29400 scripts/train.py ${CONFIG_PATH} ${ARGS}" From 66dc9536b75ae12f99e57eda3eb505a26558d1d4 Mon Sep 17 00:00:00 2001 From: Dirk Groeneveld Date: Mon, 11 Mar 2024 23:02:02 -0700 Subject: [PATCH 55/60] 8 nodes --- scripts/beaker/olmo7-ablation-baseline.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/beaker/olmo7-ablation-baseline.sh b/scripts/beaker/olmo7-ablation-baseline.sh index 032419720..cd64e59e1 100755 --- a/scripts/beaker/olmo7-ablation-baseline.sh +++ b/scripts/beaker/olmo7-ablation-baseline.sh @@ -3,7 +3,7 @@ set -ex CONFIG_PATH=configs/olmo7-ablation-baseline.yaml -NUM_NODES=4 +NUM_NODES=8 ARGS='--run_name=olmo7-ablation-baseline --wandb.name=baseline --model.flash_attention=true --fsdp.wrapping_strategy=by_block_and_size --fsdp.sharding_strategy=SHARD_GRAD_OP --save_folder=runs/ --device_train_microbatch_size=3 --global_train_batch_size=6144 --wandb.group=baseline --remote_save_folder=s3://ai2-llm/checkpoints/olmo7-ablation/baseline3 --load_path=s3://ai2-llm/checkpoints/olmo7-ablation/baseline3/step7800' gantry run \ From 87fc58d6b85f47f8423bf3dd111a7846346b78e6 Mon Sep 17 00:00:00 2001 From: Dirk Groeneveld Date: Mon, 11 Mar 2024 23:14:52 -0700 Subject: [PATCH 56/60] 2xnonweb --- configs/olmo7-ablation-2xnonweb.yaml | 1704 +++++++++++++++++++++ scripts/beaker/olmo7-ablation-2xnonweb.sh | 36 + 2 files changed, 1740 insertions(+) create mode 100644 configs/olmo7-ablation-2xnonweb.yaml create mode 100755 scripts/beaker/olmo7-ablation-2xnonweb.sh diff --git a/configs/olmo7-ablation-2xnonweb.yaml b/configs/olmo7-ablation-2xnonweb.yaml new file mode 100644 index 000000000..c9b96dc46 --- /dev/null +++ b/configs/olmo7-ablation-2xnonweb.yaml @@ -0,0 +1,1704 @@ +run_name: olmo7-ablation-2xnonweb +seed: 61394 +dry_run: false + +wandb: + name: ${run_name} + project: olmo7-ablations + group: olmo7-ablation-2xnonweb + +model: + d_model: 4096 + n_heads: 32 + n_layers: 32 + # mlp_ratio: 6 + mlp_hidden_size: 22016 + weight_tying: false + alibi: false + rope: true + flash_attention: true + attention_dropout: 0.0 + attention_layer_norm: false + multi_query_attention: false + include_bias: false + block_type: sequential + layer_norm_type: default + layer_norm_with_affine: false + bias_for_layer_norm: false + attention_layer_norm_with_affine: false + activation_type: swiglu + residual_dropout: 0.0 + embedding_dropout: 0.0 + max_sequence_length: 2048 + vocab_size: 50280 + embedding_size: 50304 + eos_token_id: 0 + pad_token_id: 1 + init_device: meta + init_fn: mitchell + +compile: null + +optimizer: + name: adamw + learning_rate: 1.5e-4 + weight_decay: 0.1 + betas: + - 0.9 + - 0.95 + metrics_log_interval: 10 + +scheduler: + name: linear_with_warmup + t_warmup: 1000 + alpha_f: 0.1 + +tokenizer: + identifier: tokenizers/allenai_eleuther-ai-gpt-neox-20b-pii-special.json + truncate_direction: right + +save_folder: ${oc.env:CHECKPOINTS_PATH}/${oc.env:SLURM_JOB_ID,${run_name}} +save_overwrite: false +# Sharded checkpoints (best for restarts) +save_interval: 200 +save_num_checkpoints_to_keep: -1 +# Unsharded checkpoints (for final storage) +save_interval_unsharded: null +save_num_unsharded_checkpoints_to_keep: -1 + +#load_path: r2://olmo-checkpoints/ai2-llm/olmo-medium/hrshlkzq/step119000-unsharded/ # 0.5T +load_path: r2://olmo-checkpoints/ai2-llm/olmo-medium/j18wauyq/step238000-unsharded/ # 1.0T +#load_path: r2://olmo-checkpoints/ai2-llm/olmo-medium/4xel5n7e/step358000-unsharded/ # 1.5T +#load_path: r2://olmo-checkpoints/ai2-llm/olmo-medium/xtruaap8/step477000-unsharded/ # 2.0T + +no_pre_train_checkpoint: true +reset_optimizer_state: true +reset_trainer_state: true + +max_duration: 100e9T +global_train_batch_size: 2048 +device_train_microbatch_size: 2 +time_limit: null + +precision: amp_bf16 + +fsdp: + wrapping_strategy: by_block_and_size + precision: mixed + +max_grad_norm: 1.0 +max_grad_norm_ratio: null + +speed_monitor: + window_size: 20 + +eval_interval: ${save_interval} +eval_subset_num_batches: -1 +device_eval_batch_size: ${device_train_microbatch_size} +evaluators: + - label: all-small-ppl-validation + data: + num_workers: 0 + drop_last: true + # pin_memory: true + # prefetch_factor: 1 + # persistent_workers: false + # timeout: 0 + datasets: + 4chan-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/4chan/val.npy + c4_100_domains-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/c4_100_domains/val.npy + c4_en-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/c4_en/val.npy + gab-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/gab/val.npy + ice-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/ice/val.npy + m2d2_s2orc-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/m2d2_s2orc/val.npy + m2d2_wiki-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/m2d2_wiki/val.npy + manosphere-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/manosphere/val.npy + mc4_en-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/mc4_en/val.npy + pile-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/pile/val.npy + ptb-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/ptb/val.npy + twitterAEE-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/twitterAEE/val.npy + wikitext_103-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/wikitext_103/val.npy + + ########################## + # Downstream evaluations # + ########################## + - label: piqa + type: downstream + + - label: hellaswag + type: downstream + + - label: winogrande + type: downstream + + - label: openbook_qa + type: downstream + + - label: boolq + type: downstream + + - label: sciq + type: downstream + + - label: arc_easy + type: downstream + + - label: arc_challenge + type: downstream + + - label: mmlu_stem + type: downstream + + - label: mmlu_humanities + type: downstream + + - label: mmlu_social_sciences + type: downstream + + - label: mmlu_other + type: downstream + + - label: mmlu_stem_var + type: downstream + + - label: mmlu_humanities_var + type: downstream + + - label: mmlu_social_sciences_var + type: downstream + + - label: mmlu_other_var + type: downstream + + #- label: copa + # type: downstream + + #- label: rte + # type: downstream + + #- label: commitment_bank + # type: downstream + + #- label: mrpc + # type: downstream + + #- label: sst2 + # type: downstream + +data: + pad_direction: right + num_workers: 16 + drop_last: true + pin_memory: true + prefetch_factor: 1 + persistent_workers: true + timeout: 0 + paths: + ######### NON WEB DATA ######### + # ~> GUTENBERG BOOKS (5.256 GT x 3) + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-0-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-1-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-2-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-0-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-1-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-2-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-0-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-1-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-2-00000.npy + # ~> PES2O STEM PAPERS (57.21 GT x 2) + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + # ~> WIKIPEDIA & WIKIBOOKS (3.689 GT x 3) + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/wiki/gpt-neox-olmo-dolma-v1_5/part-0-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/wiki/gpt-neox-olmo-dolma-v1_5/part-1-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/wiki/gpt-neox-olmo-dolma-v1_5/part-0-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/wiki/gpt-neox-olmo-dolma-v1_5/part-1-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/wiki/gpt-neox-olmo-dolma-v1_5/part-0-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/wiki/gpt-neox-olmo-dolma-v1_5/part-1-00000.npy + # ~> REDPAJAMA STACKEXCHANGE (19.63 GT x 2) + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + # ~> CC NEWS (15 GT x 2) + - s3://ai2-llm/preprocessed/cc-news/v2/gpt-neox-olmo-dolma-v1_5/part-0-00000.npy + - s3://ai2-llm/preprocessed/cc-news/v2/gpt-neox-olmo-dolma-v1_5/part-0-00001.npy + - s3://ai2-llm/preprocessed/cc-news/v2/gpt-neox-olmo-dolma-v1_5/part-1-00000.npy + - s3://ai2-llm/preprocessed/cc-news/v2/gpt-neox-olmo-dolma-v1_5/part-1-00001.npy + - s3://ai2-llm/preprocessed/cc-news/v2/gpt-neox-olmo-dolma-v1_5/part-2-00000.npy + - s3://ai2-llm/preprocessed/cc-news/v2/gpt-neox-olmo-dolma-v1_5/part-3-00000.npy + - s3://ai2-llm/preprocessed/cc-news/v2/gpt-neox-olmo-dolma-v1_5/part-3-00001.npy + - s3://ai2-llm/preprocessed/cc-news/v2/gpt-neox-olmo-dolma-v1_5/part-4-00000.npy + - s3://ai2-llm/preprocessed/cc-news/v2/gpt-neox-olmo-dolma-v1_5/part-4-00001.npy + - s3://ai2-llm/preprocessed/cc-news/v2/gpt-neox-olmo-dolma-v1_5/part-5-00000.npy + - s3://ai2-llm/preprocessed/cc-news/v2/gpt-neox-olmo-dolma-v1_5/part-5-00001.npy + - s3://ai2-llm/preprocessed/cc-news/v2/gpt-neox-olmo-dolma-v1_5/part-6-00000.npy + - s3://ai2-llm/preprocessed/cc-news/v2/gpt-neox-olmo-dolma-v1_5/part-6-00001.npy + - s3://ai2-llm/preprocessed/cc-news/v2/gpt-neox-olmo-dolma-v1_5/part-7-00000.npy + - s3://ai2-llm/preprocessed/cc-news/v2/gpt-neox-olmo-dolma-v1_5/part-7-00001.npy + - s3://ai2-llm/preprocessed/cc-news/v2/gpt-neox-olmo-dolma-v1_5/part-8-00000.npy + - s3://ai2-llm/preprocessed/cc-news/v2/gpt-neox-olmo-dolma-v1_5/part-8-00001.npy + - s3://ai2-llm/preprocessed/cc-news/v2/gpt-neox-olmo-dolma-v1_5/part-0-00000.npy + - s3://ai2-llm/preprocessed/cc-news/v2/gpt-neox-olmo-dolma-v1_5/part-0-00001.npy + - s3://ai2-llm/preprocessed/cc-news/v2/gpt-neox-olmo-dolma-v1_5/part-1-00000.npy + - s3://ai2-llm/preprocessed/cc-news/v2/gpt-neox-olmo-dolma-v1_5/part-1-00001.npy + - s3://ai2-llm/preprocessed/cc-news/v2/gpt-neox-olmo-dolma-v1_5/part-2-00000.npy + - s3://ai2-llm/preprocessed/cc-news/v2/gpt-neox-olmo-dolma-v1_5/part-3-00000.npy + - s3://ai2-llm/preprocessed/cc-news/v2/gpt-neox-olmo-dolma-v1_5/part-3-00001.npy + - s3://ai2-llm/preprocessed/cc-news/v2/gpt-neox-olmo-dolma-v1_5/part-4-00000.npy + - s3://ai2-llm/preprocessed/cc-news/v2/gpt-neox-olmo-dolma-v1_5/part-4-00001.npy + - s3://ai2-llm/preprocessed/cc-news/v2/gpt-neox-olmo-dolma-v1_5/part-5-00000.npy + - s3://ai2-llm/preprocessed/cc-news/v2/gpt-neox-olmo-dolma-v1_5/part-5-00001.npy + - s3://ai2-llm/preprocessed/cc-news/v2/gpt-neox-olmo-dolma-v1_5/part-6-00000.npy + - s3://ai2-llm/preprocessed/cc-news/v2/gpt-neox-olmo-dolma-v1_5/part-6-00001.npy + - s3://ai2-llm/preprocessed/cc-news/v2/gpt-neox-olmo-dolma-v1_5/part-7-00000.npy + - s3://ai2-llm/preprocessed/cc-news/v2/gpt-neox-olmo-dolma-v1_5/part-7-00001.npy + - s3://ai2-llm/preprocessed/cc-news/v2/gpt-neox-olmo-dolma-v1_5/part-8-00000.npy + - s3://ai2-llm/preprocessed/cc-news/v2/gpt-neox-olmo-dolma-v1_5/part-8-00001.npy + # ~> REDPAJAMA ARXIV (19.63 GT x 2) + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + # ~> PROOFPILE2 ALGEBRAIC STACK (12.623 GT) + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + # ~> PROOFPILE2 OPENWEBMATH (12.734 GT x 2) + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + # ~> TULU FLAN V0 (1.84 GT x 5) + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-0-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-1-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-2-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-3-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-4-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-5-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-6-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-7-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-0-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-1-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-2-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-3-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-4-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-5-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-6-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-7-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-0-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-1-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-2-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-3-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-4-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-5-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-6-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-7-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-0-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-1-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-2-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-3-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-4-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-5-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-6-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-7-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-0-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-1-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-2-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-3-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-4-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-5-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-6-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-7-00000.npy + #################################### + ######### CODE ######### + # ~> STARCODER (263.775 GT) + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-00-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-03-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-04-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-05-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-06-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-07-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-08-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-09-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-10-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-11-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-12-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-13-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-14-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-15-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-16-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-17-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-18-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-19-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-20-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-21-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-22-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-23-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-24-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-25-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-26-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-26-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-27-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-27-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-28-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-29-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-30-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-30-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-31-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-31-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-32-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-32-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-33-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-33-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-34-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-34-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-35-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-35-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-36-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-36-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-37-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-37-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-38-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-38-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-39-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-39-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-40-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-40-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-41-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-41-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-42-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-42-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-43-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-43-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-44-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-44-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-45-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-46-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-46-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-47-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-47-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-48-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-48-00001.npy + #################################### + ######### WEB HIGH QUALITY ######### + # ~> C4 (157.2 GT) + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-26-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-27-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-28-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-29-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-30-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-31-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-32-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-33-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-34-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-35-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-36-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-37-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-38-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-39-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-40-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-41-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-42-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-43-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-44-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-45-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-46-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-47-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-48-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-49-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-50-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-51-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-52-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-53-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-54-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-55-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-56-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-57-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-58-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-59-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-60-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-61-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-62-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-63-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-64-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-65-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-66-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-67-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-68-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-69-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-70-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-71-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-72-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-73-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-74-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-75-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-76-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-77-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-78-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-79-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-80-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-81-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-82-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-83-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-84-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-85-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-86-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-87-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-88-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-89-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-90-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-91-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-92-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-93-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-94-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-95-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-96-00000.npy + # ~> REDDIT (79.988 GT) + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-26-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-27-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-28-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-29-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-30-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-31-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-32-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-33-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-34-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-35-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-36-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-37-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-38-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-39-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-40-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-41-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-42-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-43-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-44-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-45-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-46-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-47-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-48-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-49-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-50-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-51-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-52-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-53-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-54-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-55-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-56-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-57-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-58-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-59-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-60-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-61-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-62-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-63-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-64-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-65-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-66-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-67-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-68-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-69-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-70-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-71-00000.npy + # ~> FALCON (547.341 GT) + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-000-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-000-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-000-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-001-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-001-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-001-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-002-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-002-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-002-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-003-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-003-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-003-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-004-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-004-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-004-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-004-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-005-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-005-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-005-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-006-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-006-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-006-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-007-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-007-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-008-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-008-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-008-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-008-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-009-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-009-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-009-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-010-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-010-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-010-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-011-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-011-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-011-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-012-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-012-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-012-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-012-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-013-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-013-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-013-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-014-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-014-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-014-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-015-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-015-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-015-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-016-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-016-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-016-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-017-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-017-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-017-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-017-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-018-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-018-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-018-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-019-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-019-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-019-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-020-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-020-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-020-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-021-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-021-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-021-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-021-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-022-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-022-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-022-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-023-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-023-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-023-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-024-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-024-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-024-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-025-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-025-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-025-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-025-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-026-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-026-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-026-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-027-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-027-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-027-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-028-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-028-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-028-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-029-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-029-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-029-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-030-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-030-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-030-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-030-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-031-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-031-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-031-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-032-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-032-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-032-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-033-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-033-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-033-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-034-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-034-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-034-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-034-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-035-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-035-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-035-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-036-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-036-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-036-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-037-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-037-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-037-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-038-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-038-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-038-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-038-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-039-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-039-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-039-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-040-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-040-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-040-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-041-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-041-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-041-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-042-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-042-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-042-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-043-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-043-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-043-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-043-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-044-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-044-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-044-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-045-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-045-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-045-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-046-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-046-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-046-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-047-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-047-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-047-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-047-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-048-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-048-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-048-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-049-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-049-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-049-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-050-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-050-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-050-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-051-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-051-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-051-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-051-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-052-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-052-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-052-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-053-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-053-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-053-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-054-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-054-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-054-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-055-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-055-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-055-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-055-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-056-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-056-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-056-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-057-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-057-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-057-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-058-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-058-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-058-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-059-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-059-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-059-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-060-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-060-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-060-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-060-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-061-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-061-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-061-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-062-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-062-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-062-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-063-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-063-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-063-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-064-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-064-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-064-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-064-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-065-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-065-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-065-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-066-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-066-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-066-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-067-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-067-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-067-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-068-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-068-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-068-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-068-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-069-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-069-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-069-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-070-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-070-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-070-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-071-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-071-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-071-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-072-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-072-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-072-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-073-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-073-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-073-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-073-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-074-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-074-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-074-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-075-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-075-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-075-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-076-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-076-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-076-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-077-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-077-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-077-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-077-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-078-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-078-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-078-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-079-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-079-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-079-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-080-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-080-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-080-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-081-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-081-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-081-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-081-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-082-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-082-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-082-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-083-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-083-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-083-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-084-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-084-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-084-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-085-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-085-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-085-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-086-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-086-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-086-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-086-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-087-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-087-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-087-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-088-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-088-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-088-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-089-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-089-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-089-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-090-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-090-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-090-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-090-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-091-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-091-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-091-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-092-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-092-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-092-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-093-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-093-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-093-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-094-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-094-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-094-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-094-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-095-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-095-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-095-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-096-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-096-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-096-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-097-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-097-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-097-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-098-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-098-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-098-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-099-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-099-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-099-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-099-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-100-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-100-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-100-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-101-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-101-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-101-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-102-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-102-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-102-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-103-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-103-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-103-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-103-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-104-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-104-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-104-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-105-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-105-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-105-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-106-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-106-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-106-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-107-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-107-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-107-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-107-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-108-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-108-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-108-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-109-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-109-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-109-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-110-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-110-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-110-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-111-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-111-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-111-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-111-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-112-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-112-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-112-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-113-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-113-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-113-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-114-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-114-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-114-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-115-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-115-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-115-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-116-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-116-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-116-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-116-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-117-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-117-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-117-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-118-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-118-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-118-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-119-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-119-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-119-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-120-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-120-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-120-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-120-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-121-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-121-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-121-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-122-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-122-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-122-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-123-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-123-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-123-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-124-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-124-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-124-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-124-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-125-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-125-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-125-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-126-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-126-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-126-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-127-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-127-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-127-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-128-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-128-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-128-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-129-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-129-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-129-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-129-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-130-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-130-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-130-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-131-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-131-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-131-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-132-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-132-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-132-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-133-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-133-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-133-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-133-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-134-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-134-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-134-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-135-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-135-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-135-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-136-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-136-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-136-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-137-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-137-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-137-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-137-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-138-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-138-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-138-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-139-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-139-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-139-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-140-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-140-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-140-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-141-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-141-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-141-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-142-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-142-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-142-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-142-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-143-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-143-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-143-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-144-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-144-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-144-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-145-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-145-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-145-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-146-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-146-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-146-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-146-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-147-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-147-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-147-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-148-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-148-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-148-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-149-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-149-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-149-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-150-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-150-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-150-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-150-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-151-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-151-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-151-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-152-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-152-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-152-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-153-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-153-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-153-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-154-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-154-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-154-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-155-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-155-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-155-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-155-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-156-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-156-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-156-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-157-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-157-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-157-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-158-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-158-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-158-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-159-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-159-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-159-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-159-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-160-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-160-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-160-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-161-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-161-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-161-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-162-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-162-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-162-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-163-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-163-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-163-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-163-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-164-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-164-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-164-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-165-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-165-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-165-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-166-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-166-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-166-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-167-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-167-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-167-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-168-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-168-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-168-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-168-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-169-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-169-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-169-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-170-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-170-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-170-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-171-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-171-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-171-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-172-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-172-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-172-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-172-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-173-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-173-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-173-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-174-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-174-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-174-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-175-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-175-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-175-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-176-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-176-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-176-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-176-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-177-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-177-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-177-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-178-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-178-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-178-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-179-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-179-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-179-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-180-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-180-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-180-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-180-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-181-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-181-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-181-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-182-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-182-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-182-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-183-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-183-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-183-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-184-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-184-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-184-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-185-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-185-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-185-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-185-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-186-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-186-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-186-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-187-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-187-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-187-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-188-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-188-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-188-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-189-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-189-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-189-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-189-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-190-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-190-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-190-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-191-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-191-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-191-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-192-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-192-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-192-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-193-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-193-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-193-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-193-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-194-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-194-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-194-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-195-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-195-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-195-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-196-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-196-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-196-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-197-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-197-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-197-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-198-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-198-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-198-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-198-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-199-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-199-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-199-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-200-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-200-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-200-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-201-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-201-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-201-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-202-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-202-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-202-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-202-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-203-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-203-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-203-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-204-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-204-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-204-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-205-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-205-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-205-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-206-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-206-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-206-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-206-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-207-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-207-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-207-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-208-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-208-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-208-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-209-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-209-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-209-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-210-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-210-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-210-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-211-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-211-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-211-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-211-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-212-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-212-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-212-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-213-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-213-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-213-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-214-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-214-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-214-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-215-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-215-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-215-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-215-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-216-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-216-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-216-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-217-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-217-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-217-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-218-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-218-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-218-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-219-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-219-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-219-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-219-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-220-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-220-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-220-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-221-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-221-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-221-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-222-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-222-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-222-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-223-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-223-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-223-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-224-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-224-00001.npy + #################################### + ######### WEB REST ######### + # ~> DOLMA CC HEAD 35% (127.9 GT) + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-010-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-011-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-012-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-013-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-015-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-016-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-018-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-021-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-024-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-025-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-028-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-030-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-035-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-038-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-039-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-040-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-041-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-042-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-053-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-055-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-060-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-061-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-069-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-070-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-073-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-076-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-082-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-084-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-087-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-088-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-092-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-094-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-095-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-099-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-101-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-103-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-105-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-107-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-108-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-109-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-110-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-124-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-126-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-127-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-129-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-134-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-135-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-136-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-140-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-144-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-146-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-152-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-155-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-159-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-162-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-163-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-166-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-170-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-171-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-177-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-181-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-182-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-184-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-185-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-187-00000.npy + # ~> DOLMA CC MIDDLE 35 (164.5 GT) + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-003-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-005-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-008-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-010-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-014-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-015-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-020-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-021-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-023-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-025-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-027-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-029-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-030-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-033-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-041-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-049-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-050-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-051-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-059-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-061-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-062-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-069-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-074-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-077-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-079-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-080-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-088-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-091-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-096-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-099-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-100-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-105-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-107-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-109-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-110-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-116-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-119-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-124-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-127-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-128-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-133-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-135-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-138-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-139-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-144-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-145-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-146-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-148-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-149-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-150-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-151-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-152-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-158-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-161-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-164-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-166-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-167-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-168-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-170-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-171-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-173-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-180-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-186-00000.npy + # ~> DOLMA CC TAIL 35% (179.1 GT) + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-001-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-006-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-007-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-008-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-009-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-012-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-013-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-015-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-017-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-018-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-021-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-022-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-025-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-029-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-032-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-034-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-041-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-042-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-044-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-048-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-049-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-051-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-053-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-058-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-062-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-065-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-072-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-076-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-077-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-078-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-081-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-084-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-085-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-092-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-096-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-100-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-102-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-103-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-104-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-106-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-107-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-110-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-112-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-114-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-115-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-120-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-126-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-127-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-128-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-131-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-134-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-135-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-136-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-137-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-139-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-142-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-146-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-147-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-151-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-153-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-154-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-162-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-165-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-170-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-172-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-173-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-174-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-177-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-180-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-181-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-187-00000.npy \ No newline at end of file diff --git a/scripts/beaker/olmo7-ablation-2xnonweb.sh b/scripts/beaker/olmo7-ablation-2xnonweb.sh new file mode 100755 index 000000000..58de8261a --- /dev/null +++ b/scripts/beaker/olmo7-ablation-2xnonweb.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash + +set -ex + +CONFIG_PATH=configs/olmo7-ablation-2xnonweb.yaml +NUM_NODES=8 +ARGS='--run_name=olmo7-ablation-2xnonweb --wandb.name=2xnonweb --model.flash_attention=true --fsdp.wrapping_strategy=by_block_and_size --fsdp.sharding_strategy=SHARD_GRAD_OP --save_folder=runs/ --device_train_microbatch_size=3 --global_train_batch_size=6144 --wandb.group=2xnonweb --remote_save_folder=s3://ai2-llm/checkpoints/olmo7-ablation/2xnonweb' + +gantry run \ + --allow-dirty \ + --workspace ai2/llm-testing \ + --task-name olmo7-ablation-2xnonweb \ + --description olmo7-ablation-2xnonweb \ + --priority high \ + --beaker-image olmo-torch2-gantry \ + --cluster ai2/pluto-cirrascale \ + --gpus 8 \ + --replicas "${NUM_NODES}" \ + --leader-selection \ + --host-networking \ + --nfs \ + --mount /net/nfs.cirrascale/allennlp/petew/cache:/root/.cache \ + --budget ai2/oe-training \ + --env LOG_FILTER_TYPE=local_rank0_only \ + --env OMP_NUM_THREADS=8 \ + --env OLMO_TASK=model \ + --env-secret WANDB_API_KEY=WANDB_API_KEY \ + --env-secret AWS_ACCESS_KEY_ID=AWS_ACCESS_KEY_ID \ + --env-secret AWS_SECRET_ACCESS_KEY=AWS_SECRET_ACCESS_KEY \ + --env-secret R2_ACCESS_KEY_ID=R2_ACCESS_KEY_ID \ + --env-secret R2_SECRET_ACCESS_KEY=R2_SECRET_ACCESS_KEY \ + --env-secret R2_ENDPOINT_URL=R2_ENDPOINT_URL \ + --shared-memory 10GiB \ + --venv base \ + --yes \ + -- /bin/bash -c "source scripts/beaker/warm_hf_cache.sh && torchrun --nnodes ${NUM_NODES}:${NUM_NODES} --nproc-per-node 8 --rdzv_id=101 --rdzv_backend=c10d --rdzv_endpoint=\$BEAKER_LEADER_REPLICA_HOSTNAME:29400 scripts/train.py ${CONFIG_PATH} ${ARGS}" From 6ff85b9f393705b5162e3758dcbf2c224692d9e5 Mon Sep 17 00:00:00 2001 From: Dirk Groeneveld Date: Mon, 11 Mar 2024 23:39:23 -0700 Subject: [PATCH 57/60] Refheavy --- ...blation-2xnonweb.yaml => olmo7-ablation-refheavy.yaml} | 4 ++-- ...o7-ablation-2xnonweb.sh => olmo7-ablation-refheavy.sh} | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) rename configs/{olmo7-ablation-2xnonweb.yaml => olmo7-ablation-refheavy.yaml} (99%) rename scripts/beaker/{olmo7-ablation-2xnonweb.sh => olmo7-ablation-refheavy.sh} (81%) diff --git a/configs/olmo7-ablation-2xnonweb.yaml b/configs/olmo7-ablation-refheavy.yaml similarity index 99% rename from configs/olmo7-ablation-2xnonweb.yaml rename to configs/olmo7-ablation-refheavy.yaml index c9b96dc46..e764892c9 100644 --- a/configs/olmo7-ablation-2xnonweb.yaml +++ b/configs/olmo7-ablation-refheavy.yaml @@ -1,11 +1,11 @@ -run_name: olmo7-ablation-2xnonweb +run_name: olmo7-ablation-refheavy seed: 61394 dry_run: false wandb: name: ${run_name} project: olmo7-ablations - group: olmo7-ablation-2xnonweb + group: olmo7-ablation-refheavy model: d_model: 4096 diff --git a/scripts/beaker/olmo7-ablation-2xnonweb.sh b/scripts/beaker/olmo7-ablation-refheavy.sh similarity index 81% rename from scripts/beaker/olmo7-ablation-2xnonweb.sh rename to scripts/beaker/olmo7-ablation-refheavy.sh index 58de8261a..fe1c61aa1 100755 --- a/scripts/beaker/olmo7-ablation-2xnonweb.sh +++ b/scripts/beaker/olmo7-ablation-refheavy.sh @@ -2,15 +2,15 @@ set -ex -CONFIG_PATH=configs/olmo7-ablation-2xnonweb.yaml +CONFIG_PATH=configs/olmo7-ablation-refheavy.yaml NUM_NODES=8 -ARGS='--run_name=olmo7-ablation-2xnonweb --wandb.name=2xnonweb --model.flash_attention=true --fsdp.wrapping_strategy=by_block_and_size --fsdp.sharding_strategy=SHARD_GRAD_OP --save_folder=runs/ --device_train_microbatch_size=3 --global_train_batch_size=6144 --wandb.group=2xnonweb --remote_save_folder=s3://ai2-llm/checkpoints/olmo7-ablation/2xnonweb' +ARGS='--run_name=olmo7-ablation-refheavy --wandb.name=refheavy --model.flash_attention=true --fsdp.wrapping_strategy=by_block_and_size --fsdp.sharding_strategy=SHARD_GRAD_OP --save_folder=runs/ --device_train_microbatch_size=3 --global_train_batch_size=6144 --wandb.group=refheavy --remote_save_folder=s3://ai2-llm/checkpoints/olmo7-ablation/refheavy' gantry run \ --allow-dirty \ --workspace ai2/llm-testing \ - --task-name olmo7-ablation-2xnonweb \ - --description olmo7-ablation-2xnonweb \ + --task-name olmo7-ablation-refheavy \ + --description olmo7-ablation-refheavy \ --priority high \ --beaker-image olmo-torch2-gantry \ --cluster ai2/pluto-cirrascale \ From cd5c19628b74528e706199a032bccb45f12228b1 Mon Sep 17 00:00:00 2001 From: Dirk Groeneveld Date: Mon, 18 Mar 2024 11:06:18 -0700 Subject: [PATCH 58/60] Final2 config --- configs/olmo7-ablation-final2.yaml | 1258 +++++++++++++++++++++++ scripts/beaker/olmo7-ablation-final2.sh | 36 + 2 files changed, 1294 insertions(+) create mode 100644 configs/olmo7-ablation-final2.yaml create mode 100755 scripts/beaker/olmo7-ablation-final2.sh diff --git a/configs/olmo7-ablation-final2.yaml b/configs/olmo7-ablation-final2.yaml new file mode 100644 index 000000000..6e0b465e5 --- /dev/null +++ b/configs/olmo7-ablation-final2.yaml @@ -0,0 +1,1258 @@ +run_name: olmo7-ablation-final2 +seed: 61394 +dry_run: false + +wandb: + name: ${run_name} + project: olmo7-ablations + group: olmo7-ablation-final2 + +model: + d_model: 4096 + n_heads: 32 + n_layers: 32 + # mlp_ratio: 6 + mlp_hidden_size: 22016 + weight_tying: false + alibi: false + rope: true + flash_attention: true + attention_dropout: 0.0 + attention_layer_norm: false + multi_query_attention: false + include_bias: false + block_type: sequential + layer_norm_type: default + layer_norm_with_affine: false + bias_for_layer_norm: false + attention_layer_norm_with_affine: false + activation_type: swiglu + residual_dropout: 0.0 + embedding_dropout: 0.0 + max_sequence_length: 2048 + vocab_size: 50280 + embedding_size: 50304 + eos_token_id: 0 + pad_token_id: 1 + init_device: meta + init_fn: mitchell + +compile: null + +optimizer: + name: adamw + learning_rate: 1.5e-4 + weight_decay: 0.1 + betas: + - 0.9 + - 0.95 + metrics_log_interval: 10 + +scheduler: + name: linear_with_warmup + t_warmup: 1000 + alpha_f: 0.1 + +tokenizer: + identifier: tokenizers/allenai_eleuther-ai-gpt-neox-20b-pii-special.json + truncate_direction: right + +save_folder: ${oc.env:CHECKPOINTS_PATH}/${oc.env:SLURM_JOB_ID,${run_name}} +save_overwrite: false +# Sharded checkpoints (best for restarts) +save_interval: 200 +save_num_checkpoints_to_keep: -1 +# Unsharded checkpoints (for final storage) +save_interval_unsharded: null +save_num_unsharded_checkpoints_to_keep: -1 + +#load_path: r2://olmo-checkpoints/ai2-llm/olmo-medium/hrshlkzq/step119000-unsharded/ # 0.5T +load_path: r2://olmo-checkpoints/ai2-llm/olmo-medium/j18wauyq/step238000-unsharded/ # 1.0T +#load_path: r2://olmo-checkpoints/ai2-llm/olmo-medium/4xel5n7e/step358000-unsharded/ # 1.5T +#load_path: r2://olmo-checkpoints/ai2-llm/olmo-medium/xtruaap8/step477000-unsharded/ # 2.0T + +no_pre_train_checkpoint: true +reset_optimizer_state: true +reset_trainer_state: true + +max_duration: 100e9T +global_train_batch_size: 2048 +device_train_microbatch_size: 2 +time_limit: null + +precision: amp_bf16 + +fsdp: + wrapping_strategy: by_block_and_size + precision: mixed + +max_grad_norm: 1.0 +max_grad_norm_ratio: null + +speed_monitor: + window_size: 20 + +eval_interval: ${save_interval} +eval_subset_num_batches: -1 +device_eval_batch_size: ${device_train_microbatch_size} +evaluators: + - label: all-small-ppl-validation + data: + num_workers: 0 + drop_last: true + # pin_memory: true + # prefetch_factor: 1 + # persistent_workers: false + # timeout: 0 + datasets: + 4chan-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/4chan/val.npy + c4_100_domains-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/c4_100_domains/val.npy + c4_en-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/c4_en/val.npy + gab-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/gab/val.npy + ice-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/ice/val.npy + m2d2_s2orc-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/m2d2_s2orc/val.npy + m2d2_wiki-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/m2d2_wiki/val.npy + manosphere-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/manosphere/val.npy + mc4_en-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/mc4_en/val.npy + pile-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/pile/val.npy + ptb-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/ptb/val.npy + twitterAEE-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/twitterAEE/val.npy + wikitext_103-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/wikitext_103/val.npy + + ########################## + # Downstream evaluations # + ########################## + - label: piqa + type: downstream + + - label: hellaswag + type: downstream + + - label: winogrande + type: downstream + + - label: openbook_qa + type: downstream + + - label: boolq + type: downstream + + - label: sciq + type: downstream + + - label: arc_easy + type: downstream + + - label: arc_challenge + type: downstream + + - label: mmlu_stem + type: downstream + + - label: mmlu_humanities + type: downstream + + - label: mmlu_social_sciences + type: downstream + + - label: mmlu_other + type: downstream + + - label: mmlu_stem_var + type: downstream + + - label: mmlu_humanities_var + type: downstream + + - label: mmlu_social_sciences_var + type: downstream + + - label: mmlu_other_var + type: downstream + + #- label: copa + # type: downstream + + #- label: rte + # type: downstream + + #- label: commitment_bank + # type: downstream + + #- label: mrpc + # type: downstream + + #- label: sst2 + # type: downstream + +data: + pad_direction: right + num_workers: 16 + drop_last: true + pin_memory: true + prefetch_factor: 1 + persistent_workers: true + timeout: 0 + paths: + ######### NON WEB DATA ######### + # ~> GUTENBERG BOOKS (5.256 GT) + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-0-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-1-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-2-00000.npy + # ~> PES2O STEM PAPERS (57.21 GT) + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + # ~> WIKIPEDIA & WIKIBOOKS (3.689 GT) + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/wiki/gpt-neox-olmo-dolma-v1_5/part-0-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/wiki/gpt-neox-olmo-dolma-v1_5/part-1-00000.npy + # MEGAWIKA v1 (4.6 GT) + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-01-00001.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-02-00001.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-03-00001.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-04-00001.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-05-00001.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-06-00001.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-07-00001.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-08-00001.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-09-00001.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-10-00001.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-11-00001.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-12-00001.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-13-00001.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-14-00001.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-16-00001.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-17-00001.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-18-00001.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-19-00001.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-20-00001.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-21-00001.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-22-00001.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-23-00001.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-23-00002.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-24-00001.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-25-00001.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-26-00000.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-26-00001.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-27-00000.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-27-00001.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-28-00000.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-29-00000.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-30-00000.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-30-00001.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-31-00000.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-31-00001.npy + # ~> REDPAJAMA STACKEXCHANGE (19.63 GT) + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + # ~> REDPAJAMA ARXIV (19.63 GT) + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + # ~> PROOFPILE2 ALGEBRAIC STACK (12.623 GT) + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + # ~> PROOFPILE2 OPENWEBMATH (12.734 GT) + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + # ~> TULU FLAN V1 (16.5 G v2-decontaminated-60M-shots_all-upweight_1-dialog_true-sep_newline) + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-26-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-27-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-28-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-29-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-30-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-31-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-32-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-33-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-34-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-35-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-36-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-37-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-38-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-39-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-40-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-41-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-42-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-43-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-44-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-45-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-46-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-47-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-48-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-49-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-50-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-51-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-52-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-53-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-54-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-55-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-56-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-57-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-58-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-59-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-60-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-61-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-62-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-63-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-64-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-65-00000.npy + # ~> CC NEWS (14.3 GT) + - s3://ai2-llm/preprocessed/cc-news/v3/gpt-neox-olmo-dolma-v1_5/part-0-00000.npy + - s3://ai2-llm/preprocessed/cc-news/v3/gpt-neox-olmo-dolma-v1_5/part-0-00001.npy + - s3://ai2-llm/preprocessed/cc-news/v3/gpt-neox-olmo-dolma-v1_5/part-0-00002.npy + - s3://ai2-llm/preprocessed/cc-news/v3/gpt-neox-olmo-dolma-v1_5/part-0-00003.npy + - s3://ai2-llm/preprocessed/cc-news/v3/gpt-neox-olmo-dolma-v1_5/part-1-00000.npy + - s3://ai2-llm/preprocessed/cc-news/v3/gpt-neox-olmo-dolma-v1_5/part-1-00001.npy + - s3://ai2-llm/preprocessed/cc-news/v3/gpt-neox-olmo-dolma-v1_5/part-1-00002.npy + - s3://ai2-llm/preprocessed/cc-news/v3/gpt-neox-olmo-dolma-v1_5/part-1-00003.npy + - s3://ai2-llm/preprocessed/cc-news/v3/gpt-neox-olmo-dolma-v1_5/part-2-00000.npy + - s3://ai2-llm/preprocessed/cc-news/v3/gpt-neox-olmo-dolma-v1_5/part-2-00001.npy + - s3://ai2-llm/preprocessed/cc-news/v3/gpt-neox-olmo-dolma-v1_5/part-2-00002.npy + - s3://ai2-llm/preprocessed/cc-news/v3/gpt-neox-olmo-dolma-v1_5/part-2-00003.npy + - s3://ai2-llm/preprocessed/cc-news/v3/gpt-neox-olmo-dolma-v1_5/part-3-00000.npy + - s3://ai2-llm/preprocessed/cc-news/v3/gpt-neox-olmo-dolma-v1_5/part-3-00001.npy + - s3://ai2-llm/preprocessed/cc-news/v3/gpt-neox-olmo-dolma-v1_5/part-3-00002.npy + - s3://ai2-llm/preprocessed/cc-news/v3/gpt-neox-olmo-dolma-v1_5/part-3-00003.npy + - s3://ai2-llm/preprocessed/cc-news/v3/gpt-neox-olmo-dolma-v1_5/part-4-00000.npy + - s3://ai2-llm/preprocessed/cc-news/v3/gpt-neox-olmo-dolma-v1_5/part-4-00001.npy + - s3://ai2-llm/preprocessed/cc-news/v3/gpt-neox-olmo-dolma-v1_5/part-4-00002.npy + - s3://ai2-llm/preprocessed/cc-news/v3/gpt-neox-olmo-dolma-v1_5/part-4-00003.npy + - s3://ai2-llm/preprocessed/cc-news/v3/gpt-neox-olmo-dolma-v1_5/part-5-00000.npy + - s3://ai2-llm/preprocessed/cc-news/v3/gpt-neox-olmo-dolma-v1_5/part-5-00001.npy + - s3://ai2-llm/preprocessed/cc-news/v3/gpt-neox-olmo-dolma-v1_5/part-6-00000.npy + - s3://ai2-llm/preprocessed/cc-news/v3/gpt-neox-olmo-dolma-v1_5/part-6-00001.npy + - s3://ai2-llm/preprocessed/cc-news/v3/gpt-neox-olmo-dolma-v1_5/part-6-00002.npy + - s3://ai2-llm/preprocessed/cc-news/v3/gpt-neox-olmo-dolma-v1_5/part-6-00003.npy + - s3://ai2-llm/preprocessed/cc-news/v3/gpt-neox-olmo-dolma-v1_5/part-7-00000.npy + - s3://ai2-llm/preprocessed/cc-news/v3/gpt-neox-olmo-dolma-v1_5/part-7-00001.npy + - s3://ai2-llm/preprocessed/cc-news/v3/gpt-neox-olmo-dolma-v1_5/part-7-00002.npy + - s3://ai2-llm/preprocessed/cc-news/v3/gpt-neox-olmo-dolma-v1_5/part-7-00003.npy + - s3://ai2-llm/preprocessed/cc-news/v3/gpt-neox-olmo-dolma-v1_5/part-8-00000.npy + - s3://ai2-llm/preprocessed/cc-news/v3/gpt-neox-olmo-dolma-v1_5/part-8-00001.npy + - s3://ai2-llm/preprocessed/cc-news/v3/gpt-neox-olmo-dolma-v1_5/part-8-00002.npy + #################################### + ######### CODE ######### + # ~> STARCODER (263.775 GT) + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-00-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-03-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-04-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-05-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-06-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-07-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-08-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-09-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-10-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-11-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-12-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-13-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-14-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-15-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-16-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-17-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-18-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-19-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-20-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-21-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-22-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-23-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-24-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-25-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-26-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-26-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-27-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-27-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-28-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-29-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-30-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-30-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-31-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-31-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-32-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-32-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-33-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-33-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-34-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-34-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-35-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-35-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-36-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-36-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-37-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-37-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-38-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-38-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-39-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-39-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-40-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-40-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-41-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-41-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-42-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-42-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-43-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-43-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-44-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-44-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-45-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-46-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-46-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-47-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-47-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-48-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-48-00001.npy + #################################### + ######### WEB HIGH QUALITY ######### + # ~> C4 (138.4 GT) + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-000-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-001-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-002-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-003-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-004-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-005-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-006-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-007-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-008-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-009-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-010-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-011-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-012-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-013-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-014-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-015-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-016-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-017-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-018-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-019-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-020-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-021-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-022-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-023-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-024-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-025-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-026-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-027-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-028-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-029-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-030-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-031-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-032-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-033-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-034-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-035-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-036-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-037-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-038-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-039-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-040-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-041-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-042-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-043-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-044-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-045-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-046-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-047-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-048-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-049-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-050-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-051-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-052-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-053-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-054-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-055-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-056-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-057-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-058-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-059-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-060-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-061-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-062-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-063-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-064-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-065-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-066-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-067-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-068-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-069-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-070-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-071-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-072-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-073-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-074-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-075-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-076-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-077-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-078-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-079-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-080-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-081-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-082-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-083-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-084-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-085-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-086-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-087-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-088-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-089-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-090-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-091-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-092-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-093-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-094-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-095-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-096-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-097-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-098-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-099-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-100-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-101-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-102-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-103-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-104-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-105-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-106-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-107-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-108-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-109-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-110-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-111-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-112-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-113-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-114-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-115-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-116-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-117-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-118-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-119-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-120-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-121-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-122-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-123-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-124-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-125-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-126-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-127-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-128-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-129-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-130-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-131-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-132-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-133-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-134-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-135-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-136-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-137-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-138-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-139-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-140-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-141-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-142-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-143-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-144-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-145-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-146-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-147-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-148-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-149-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-150-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-151-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-152-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-153-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-154-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-155-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-156-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-157-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-158-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-159-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-160-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-161-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-162-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-163-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-164-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-165-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-166-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-167-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-168-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-169-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-170-00000.npy + # ~> REDDIT (79.9 GT) + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-26-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-27-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-28-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-29-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-30-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-31-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-32-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-33-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-34-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-35-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-36-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-37-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-38-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-39-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-40-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-41-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-42-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-43-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-44-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-45-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-46-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-47-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-48-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-49-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-50-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-51-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-52-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-53-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-54-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-55-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-56-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-57-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-58-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-59-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-60-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-61-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-62-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-63-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-64-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-65-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-66-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-67-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-68-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-69-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-70-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-71-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-72-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-73-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-74-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-75-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-76-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-77-00000.npy + # ~> FALCON (547.341 GT) + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-000-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-001-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-002-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-003-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-004-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-005-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-006-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-007-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-008-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-009-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-010-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-011-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-012-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-013-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-014-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-015-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-016-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-017-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-018-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-019-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-020-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-021-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-022-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-023-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-024-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-025-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-026-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-027-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-028-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-029-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-030-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-031-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-032-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-033-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-034-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-035-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-036-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-037-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-038-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-039-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-040-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-041-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-042-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-043-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-044-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-045-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-046-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-047-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-048-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-049-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-050-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-051-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-052-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-053-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-054-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-055-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-056-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-057-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-058-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-059-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-060-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-061-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-062-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-063-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-064-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-065-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-066-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-067-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-068-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-069-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-070-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-071-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-072-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-073-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-074-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-075-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-076-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-077-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-078-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-079-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-080-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-081-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-082-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-083-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-084-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-085-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-086-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-087-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-088-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-089-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-090-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-091-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-092-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-093-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-094-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-095-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-096-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-097-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-098-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-099-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-100-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-101-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-102-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-103-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-104-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-105-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-106-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-107-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-108-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-109-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-110-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-111-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-112-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-113-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-114-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-115-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-116-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-117-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-118-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-119-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-120-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-121-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-122-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-123-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-124-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-125-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-126-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-127-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-128-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-129-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-130-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-131-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-132-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-133-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-134-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-135-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-136-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-137-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-138-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-139-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-140-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-141-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-142-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-143-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-144-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-145-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-146-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-147-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-148-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-149-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-150-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-151-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-152-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-153-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-154-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-155-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-156-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-157-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-158-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-159-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-160-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-161-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-162-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-163-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-164-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-165-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-166-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-167-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-168-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-169-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-170-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-171-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-172-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-173-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-174-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-175-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-176-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-177-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-178-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-179-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-180-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-181-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-182-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-183-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-184-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-185-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-186-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-187-00000.npy + #################################### + ######### WEB REST ######### + # ~> DOLMA CC HEAD 50% (178.4 GT) + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-000-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-001-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-002-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-003-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-004-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-005-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-006-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-006-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-007-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-008-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-009-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-010-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-011-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-012-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-013-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-014-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-015-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-016-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-017-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-018-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-019-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-020-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-021-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-022-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-023-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-024-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-025-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-026-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-027-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-028-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-029-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-030-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-031-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-032-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-033-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-034-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-035-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-036-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-037-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-038-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-039-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-040-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-041-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-042-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-043-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-044-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-045-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-046-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-047-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-048-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-049-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-050-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-051-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-052-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-053-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-054-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-055-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-056-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-057-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-058-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-059-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-060-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-061-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-062-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-063-00000.npy + # ~> DOLMA CC MIDDLE 33% (242.05 GT) + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-000-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-001-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-002-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-003-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-004-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-005-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-006-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-007-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-008-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-009-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-010-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-011-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-012-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-013-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-014-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-015-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-016-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-017-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-018-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-019-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-020-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-021-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-022-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-023-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-024-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-025-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-026-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-027-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-028-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-029-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-030-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-031-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-032-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-033-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-034-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-035-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-036-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-037-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-038-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-039-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-040-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-041-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-042-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-043-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-044-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-045-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-046-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-047-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-048-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-049-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-050-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-051-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-052-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-053-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-054-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-055-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-056-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-057-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-058-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-059-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-060-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-061-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-062-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-063-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-064-00000.npy + # ~> DOLMA CC TAIL 33% (191.4 GT) + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-000-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-001-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-002-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-003-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-004-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-005-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-006-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-007-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-008-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-009-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-010-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-011-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-012-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-013-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-014-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-015-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-016-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-017-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-018-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-019-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-020-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-021-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-022-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-023-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-024-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-025-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-026-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-027-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-028-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-029-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-030-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-031-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-032-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-033-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-034-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-035-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-036-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-037-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-038-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-039-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-040-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-041-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-042-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-043-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-044-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-045-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-046-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-047-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-048-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-049-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-050-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-051-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-052-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-053-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-054-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-055-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-056-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-057-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-058-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-059-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-060-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-061-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-062-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-063-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-064-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-065-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-066-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-067-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-068-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-069-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-070-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-071-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-072-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-073-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-074-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-075-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-076-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-077-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-078-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-079-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-080-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-081-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-082-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-083-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-084-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-085-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-086-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-087-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-088-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-089-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-090-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-091-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-092-00000.npy \ No newline at end of file diff --git a/scripts/beaker/olmo7-ablation-final2.sh b/scripts/beaker/olmo7-ablation-final2.sh new file mode 100755 index 000000000..3fbf72573 --- /dev/null +++ b/scripts/beaker/olmo7-ablation-final2.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash + +set -ex + +CONFIG_PATH=configs/olmo7-ablation-final2.yaml +NUM_NODES=8 +ARGS='--run_name=olmo7-ablation-final2 --wandb.name=final2 --model.flash_attention=true --fsdp.wrapping_strategy=by_block_and_size --fsdp.sharding_strategy=SHARD_GRAD_OP --save_folder=runs/ --device_train_microbatch_size=3 --global_train_batch_size=6144 --wandb.group=final2 --remote_save_folder=s3://ai2-llm/checkpoints/olmo7-ablation/final2' + +gantry run \ + --allow-dirty \ + --workspace ai2/llm-testing \ + --task-name olmo7-ablation-final2 \ + --description olmo7-ablation-final2 \ + --priority high \ + --beaker-image olmo-torch2-gantry \ + --cluster ai2/pluto-cirrascale \ + --gpus 8 \ + --replicas "${NUM_NODES}" \ + --leader-selection \ + --host-networking \ + --nfs \ + --mount /net/nfs.cirrascale/allennlp/petew/cache:/root/.cache \ + --budget ai2/oe-training \ + --env LOG_FILTER_TYPE=local_rank0_only \ + --env OMP_NUM_THREADS=8 \ + --env OLMO_TASK=model \ + --env-secret WANDB_API_KEY=WANDB_API_KEY \ + --env-secret AWS_ACCESS_KEY_ID=AWS_ACCESS_KEY_ID \ + --env-secret AWS_SECRET_ACCESS_KEY=AWS_SECRET_ACCESS_KEY \ + --env-secret R2_ACCESS_KEY_ID=R2_ACCESS_KEY_ID \ + --env-secret R2_SECRET_ACCESS_KEY=R2_SECRET_ACCESS_KEY \ + --env-secret R2_ENDPOINT_URL=R2_ENDPOINT_URL \ + --shared-memory 10GiB \ + --venv base \ + --yes \ + -- /bin/bash -c "source scripts/beaker/warm_hf_cache.sh && torchrun --nnodes ${NUM_NODES}:${NUM_NODES} --nproc-per-node 8 --rdzv_id=101 --rdzv_backend=c10d --rdzv_endpoint=\$BEAKER_LEADER_REPLICA_HOSTNAME:29400 scripts/train.py ${CONFIG_PATH} ${ARGS}" From 1fadaf8950d97fcf1adde62f28de14eeb79bada0 Mon Sep 17 00:00:00 2001 From: Dirk Groeneveld Date: Mon, 18 Mar 2024 11:08:06 -0700 Subject: [PATCH 59/60] Indentation to make comparisons work --- configs/olmo7-ablation-dedupedocs.yaml | 2818 ++++++++++++------------ 1 file changed, 1409 insertions(+), 1409 deletions(-) diff --git a/configs/olmo7-ablation-dedupedocs.yaml b/configs/olmo7-ablation-dedupedocs.yaml index 41a157ba7..6cd75f2b6 100644 --- a/configs/olmo7-ablation-dedupedocs.yaml +++ b/configs/olmo7-ablation-dedupedocs.yaml @@ -207,1412 +207,1412 @@ data: persistent_workers: true timeout: 0 paths: - ######### NON WEB DATA ######### - # ~> GUTENBERG BOOKS (5.256 GT) - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-0-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-1-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-2-00000.npy - # ~> PES2O STEM PAPERS (57.21 GT) - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy - # ~> WIKIPEDIA & WIKIBOOKS (3.689 GT) - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/wiki/gpt-neox-olmo-dolma-v1_5/part-0-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/wiki/gpt-neox-olmo-dolma-v1_5/part-1-00000.npy - # ~> REDPAJAMA STACKEXCHANGE (19.63 GT) - - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy - - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy - - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy - - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy - - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy - - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy - - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy - - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy - - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy - - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy - - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy - - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy - - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy - - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy - - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy - - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy - - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy - - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy - - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy - - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy - - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy - - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy - - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy - - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy - - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy - - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy - # ~> REDPAJAMA ARXIV (19.63 GT) - - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy - - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy - - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy - - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy - - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy - - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy - - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy - - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy - - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy - - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy - - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy - - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy - - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy - - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy - - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy - - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy - - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy - - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy - - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy - - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy - - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy - - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy - - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy - - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy - - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy - - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy - # ~> PROOFPILE2 ALGEBRAIC STACK (12.623 GT) - - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy - - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy - - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy - - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy - - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy - - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy - - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy - - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy - - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy - - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy - - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy - - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy - - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy - - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy - - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy - - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy - # ~> PROOFPILE2 OPENWEBMATH (12.734 GT) - - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy - - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy - - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy - - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy - - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy - - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy - - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy - - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy - - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy - - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy - - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy - - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy - - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy - # ~> TULU FLAN V0 (1.84 GT) - - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-0-00000.npy - - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-1-00000.npy - - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-2-00000.npy - - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-3-00000.npy - - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-4-00000.npy - - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-5-00000.npy - - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-6-00000.npy - - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-7-00000.npy - #################################### - ######### CODE ######### - # ~> STARCODER (263.775 GT) - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-00-00001.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-03-00001.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-04-00001.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-05-00001.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-06-00001.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-07-00001.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-08-00001.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-09-00001.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-10-00001.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-11-00001.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-12-00001.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-13-00001.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-14-00001.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-15-00001.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-16-00001.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-17-00001.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-18-00001.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-19-00001.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-20-00001.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-21-00001.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-22-00001.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-23-00001.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-24-00001.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-25-00001.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-26-00000.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-26-00001.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-27-00000.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-27-00001.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-28-00000.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-29-00000.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-30-00000.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-30-00001.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-31-00000.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-31-00001.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-32-00000.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-32-00001.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-33-00000.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-33-00001.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-34-00000.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-34-00001.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-35-00000.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-35-00001.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-36-00000.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-36-00001.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-37-00000.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-37-00001.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-38-00000.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-38-00001.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-39-00000.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-39-00001.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-40-00000.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-40-00001.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-41-00000.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-41-00001.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-42-00000.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-42-00001.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-43-00000.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-43-00001.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-44-00000.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-44-00001.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-45-00000.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-46-00000.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-46-00001.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-47-00000.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-47-00001.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-48-00000.npy - - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-48-00001.npy - #################################### - ######### WEB HIGH QUALITY ######### - # ~> C4 (157.2 GT) - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-26-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-27-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-28-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-29-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-30-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-31-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-32-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-33-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-34-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-35-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-36-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-37-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-38-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-39-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-40-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-41-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-42-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-43-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-44-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-45-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-46-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-47-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-48-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-49-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-50-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-51-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-52-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-53-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-54-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-55-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-56-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-57-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-58-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-59-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-60-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-61-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-62-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-63-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-64-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-65-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-66-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-67-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-68-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-69-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-70-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-71-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-72-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-73-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-74-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-75-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-76-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-77-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-78-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-79-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-80-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-81-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-82-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-83-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-84-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-85-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-86-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-87-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-88-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-89-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-90-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-91-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-92-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-93-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-94-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-95-00000.npy - - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-96-00000.npy - # ~> REDDIT (79.988 GT) - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-26-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-27-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-28-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-29-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-30-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-31-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-32-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-33-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-34-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-35-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-36-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-37-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-38-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-39-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-40-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-41-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-42-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-43-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-44-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-45-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-46-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-47-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-48-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-49-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-50-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-51-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-52-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-53-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-54-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-55-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-56-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-57-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-58-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-59-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-60-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-61-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-62-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-63-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-64-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-65-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-66-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-67-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-68-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-69-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-70-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-71-00000.npy - # ~> FALCON (547.341 GT) - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-000-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-000-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-000-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-001-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-001-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-001-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-002-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-002-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-002-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-003-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-003-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-003-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-004-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-004-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-004-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-004-00003.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-005-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-005-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-005-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-006-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-006-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-006-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-007-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-007-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-008-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-008-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-008-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-008-00003.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-009-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-009-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-009-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-010-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-010-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-010-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-011-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-011-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-011-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-012-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-012-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-012-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-012-00003.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-013-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-013-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-013-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-014-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-014-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-014-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-015-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-015-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-015-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-016-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-016-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-016-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-017-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-017-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-017-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-017-00003.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-018-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-018-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-018-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-019-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-019-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-019-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-020-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-020-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-020-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-021-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-021-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-021-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-021-00003.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-022-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-022-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-022-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-023-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-023-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-023-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-024-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-024-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-024-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-025-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-025-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-025-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-025-00003.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-026-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-026-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-026-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-027-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-027-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-027-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-028-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-028-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-028-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-029-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-029-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-029-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-030-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-030-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-030-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-030-00003.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-031-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-031-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-031-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-032-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-032-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-032-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-033-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-033-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-033-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-034-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-034-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-034-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-034-00003.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-035-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-035-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-035-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-036-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-036-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-036-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-037-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-037-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-037-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-038-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-038-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-038-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-038-00003.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-039-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-039-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-039-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-040-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-040-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-040-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-041-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-041-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-041-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-042-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-042-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-042-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-043-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-043-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-043-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-043-00003.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-044-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-044-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-044-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-045-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-045-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-045-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-046-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-046-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-046-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-047-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-047-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-047-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-047-00003.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-048-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-048-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-048-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-049-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-049-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-049-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-050-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-050-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-050-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-051-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-051-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-051-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-051-00003.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-052-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-052-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-052-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-053-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-053-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-053-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-054-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-054-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-054-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-055-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-055-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-055-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-055-00003.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-056-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-056-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-056-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-057-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-057-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-057-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-058-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-058-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-058-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-059-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-059-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-059-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-060-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-060-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-060-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-060-00003.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-061-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-061-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-061-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-062-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-062-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-062-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-063-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-063-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-063-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-064-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-064-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-064-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-064-00003.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-065-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-065-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-065-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-066-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-066-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-066-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-067-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-067-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-067-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-068-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-068-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-068-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-068-00003.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-069-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-069-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-069-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-070-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-070-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-070-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-071-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-071-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-071-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-072-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-072-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-072-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-073-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-073-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-073-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-073-00003.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-074-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-074-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-074-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-075-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-075-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-075-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-076-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-076-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-076-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-077-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-077-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-077-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-077-00003.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-078-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-078-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-078-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-079-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-079-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-079-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-080-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-080-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-080-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-081-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-081-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-081-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-081-00003.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-082-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-082-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-082-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-083-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-083-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-083-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-084-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-084-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-084-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-085-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-085-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-085-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-086-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-086-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-086-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-086-00003.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-087-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-087-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-087-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-088-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-088-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-088-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-089-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-089-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-089-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-090-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-090-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-090-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-090-00003.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-091-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-091-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-091-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-092-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-092-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-092-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-093-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-093-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-093-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-094-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-094-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-094-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-094-00003.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-095-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-095-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-095-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-096-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-096-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-096-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-097-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-097-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-097-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-098-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-098-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-098-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-099-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-099-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-099-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-099-00003.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-100-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-100-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-100-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-101-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-101-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-101-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-102-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-102-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-102-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-103-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-103-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-103-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-103-00003.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-104-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-104-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-104-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-105-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-105-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-105-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-106-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-106-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-106-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-107-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-107-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-107-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-107-00003.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-108-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-108-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-108-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-109-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-109-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-109-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-110-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-110-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-110-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-111-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-111-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-111-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-111-00003.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-112-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-112-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-112-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-113-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-113-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-113-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-114-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-114-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-114-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-115-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-115-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-115-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-116-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-116-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-116-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-116-00003.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-117-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-117-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-117-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-118-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-118-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-118-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-119-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-119-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-119-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-120-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-120-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-120-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-120-00003.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-121-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-121-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-121-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-122-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-122-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-122-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-123-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-123-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-123-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-124-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-124-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-124-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-124-00003.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-125-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-125-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-125-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-126-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-126-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-126-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-127-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-127-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-127-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-128-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-128-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-128-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-129-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-129-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-129-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-129-00003.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-130-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-130-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-130-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-131-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-131-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-131-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-132-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-132-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-132-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-133-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-133-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-133-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-133-00003.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-134-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-134-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-134-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-135-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-135-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-135-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-136-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-136-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-136-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-137-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-137-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-137-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-137-00003.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-138-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-138-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-138-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-139-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-139-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-139-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-140-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-140-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-140-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-141-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-141-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-141-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-142-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-142-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-142-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-142-00003.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-143-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-143-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-143-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-144-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-144-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-144-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-145-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-145-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-145-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-146-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-146-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-146-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-146-00003.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-147-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-147-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-147-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-148-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-148-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-148-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-149-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-149-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-149-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-150-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-150-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-150-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-150-00003.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-151-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-151-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-151-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-152-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-152-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-152-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-153-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-153-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-153-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-154-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-154-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-154-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-155-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-155-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-155-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-155-00003.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-156-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-156-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-156-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-157-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-157-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-157-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-158-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-158-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-158-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-159-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-159-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-159-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-159-00003.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-160-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-160-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-160-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-161-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-161-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-161-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-162-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-162-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-162-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-163-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-163-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-163-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-163-00003.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-164-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-164-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-164-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-165-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-165-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-165-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-166-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-166-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-166-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-167-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-167-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-167-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-168-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-168-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-168-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-168-00003.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-169-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-169-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-169-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-170-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-170-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-170-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-171-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-171-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-171-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-172-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-172-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-172-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-172-00003.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-173-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-173-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-173-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-174-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-174-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-174-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-175-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-175-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-175-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-176-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-176-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-176-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-176-00003.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-177-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-177-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-177-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-178-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-178-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-178-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-179-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-179-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-179-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-180-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-180-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-180-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-180-00003.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-181-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-181-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-181-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-182-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-182-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-182-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-183-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-183-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-183-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-184-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-184-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-184-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-185-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-185-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-185-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-185-00003.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-186-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-186-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-186-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-187-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-187-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-187-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-188-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-188-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-188-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-189-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-189-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-189-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-189-00003.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-190-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-190-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-190-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-191-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-191-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-191-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-192-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-192-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-192-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-193-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-193-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-193-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-193-00003.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-194-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-194-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-194-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-195-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-195-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-195-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-196-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-196-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-196-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-197-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-197-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-197-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-198-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-198-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-198-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-198-00003.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-199-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-199-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-199-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-200-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-200-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-200-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-201-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-201-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-201-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-202-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-202-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-202-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-202-00003.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-203-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-203-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-203-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-204-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-204-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-204-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-205-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-205-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-205-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-206-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-206-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-206-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-206-00003.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-207-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-207-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-207-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-208-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-208-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-208-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-209-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-209-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-209-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-210-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-210-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-210-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-211-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-211-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-211-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-211-00003.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-212-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-212-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-212-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-213-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-213-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-213-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-214-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-214-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-214-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-215-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-215-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-215-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-215-00003.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-216-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-216-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-216-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-217-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-217-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-217-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-218-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-218-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-218-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-219-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-219-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-219-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-219-00003.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-220-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-220-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-220-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-221-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-221-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-221-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-222-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-222-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-222-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-223-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-223-00001.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-223-00002.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-224-00000.npy - - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-224-00001.npy - #################################### - ######### WEB REST ######### - # ~> DOLMA CC HEAD 50% (187.2 GT) - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-001-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-003-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-004-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-005-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-007-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-014-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-017-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-018-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-019-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-020-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-021-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-022-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-023-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-029-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-030-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-032-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-033-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-035-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-044-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-045-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-049-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-050-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-055-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-056-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-059-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-060-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-062-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-063-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-065-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-067-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-069-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-072-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-073-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-074-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-075-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-076-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-080-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-081-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-082-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-084-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-085-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-087-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-088-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-091-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-095-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-096-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-098-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-099-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-102-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-103-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-104-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-107-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-108-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-111-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-112-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-115-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-118-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-119-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-121-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-123-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-126-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-127-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-128-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-130-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-133-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-136-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-137-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-138-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-139-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-141-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-142-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-143-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-144-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-145-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-149-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-150-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-153-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-154-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-156-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-158-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-161-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-165-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-168-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-174-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-175-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-176-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-177-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-178-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-180-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-182-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-183-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-184-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-186-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-187-00000.npy - # ~> DOLMA CC MIDDLE 33% (242.05 GT) - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-000-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-003-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-004-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-006-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-007-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-010-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-011-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-012-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-013-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-014-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-015-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-019-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-020-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-021-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-024-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-026-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-031-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-033-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-034-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-035-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-036-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-039-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-043-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-044-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-045-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-046-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-047-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-048-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-050-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-051-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-052-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-053-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-056-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-059-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-062-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-063-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-064-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-066-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-070-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-071-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-072-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-073-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-074-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-075-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-080-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-083-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-084-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-089-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-090-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-092-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-093-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-096-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-098-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-099-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-100-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-103-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-105-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-108-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-115-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-117-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-121-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-123-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-126-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-131-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-134-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-135-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-138-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-142-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-143-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-144-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-147-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-148-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-150-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-151-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-152-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-154-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-155-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-157-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-159-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-161-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-162-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-163-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-164-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-170-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-171-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-172-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-175-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-179-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-180-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-181-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-184-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-186-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-187-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-188-00000.npy - # ~> DOLMA CC TAIL 33% (268.05 GT) - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-000-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-002-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-003-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-004-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-008-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-013-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-014-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-016-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-018-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-019-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-021-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-022-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-023-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-026-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-027-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-029-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-030-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-032-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-036-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-037-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-039-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-041-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-043-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-046-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-048-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-050-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-051-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-052-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-053-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-055-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-061-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-063-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-066-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-068-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-071-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-072-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-073-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-074-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-076-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-077-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-081-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-082-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-083-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-084-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-086-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-088-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-090-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-091-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-092-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-093-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-098-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-101-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-103-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-104-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-105-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-107-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-108-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-109-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-115-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-116-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-121-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-123-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-125-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-126-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-129-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-133-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-134-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-136-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-137-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-140-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-142-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-148-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-149-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-153-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-156-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-157-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-159-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-161-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-162-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-164-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-165-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-171-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-172-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-173-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-174-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-176-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-177-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-178-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-181-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-182-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-183-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-184-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-185-00000.npy - - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-187-00000.npy \ No newline at end of file + ######### NON WEB DATA ######### + # ~> GUTENBERG BOOKS (5.256 GT) + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-0-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-1-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-2-00000.npy + # ~> PES2O STEM PAPERS (57.21 GT) + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + # ~> WIKIPEDIA & WIKIBOOKS (3.689 GT) + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/wiki/gpt-neox-olmo-dolma-v1_5/part-0-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/wiki/gpt-neox-olmo-dolma-v1_5/part-1-00000.npy + # ~> REDPAJAMA STACKEXCHANGE (19.63 GT) + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + # ~> REDPAJAMA ARXIV (19.63 GT) + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + # ~> PROOFPILE2 ALGEBRAIC STACK (12.623 GT) + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + # ~> PROOFPILE2 OPENWEBMATH (12.734 GT) + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + # ~> TULU FLAN V0 (1.84 GT) + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-0-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-1-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-2-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-3-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-4-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-5-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-6-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-7-00000.npy + #################################### + ######### CODE ######### + # ~> STARCODER (263.775 GT) + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-00-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-03-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-04-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-05-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-06-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-07-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-08-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-09-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-10-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-11-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-12-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-13-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-14-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-15-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-16-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-17-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-18-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-19-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-20-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-21-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-22-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-23-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-24-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-25-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-26-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-26-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-27-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-27-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-28-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-29-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-30-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-30-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-31-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-31-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-32-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-32-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-33-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-33-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-34-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-34-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-35-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-35-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-36-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-36-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-37-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-37-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-38-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-38-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-39-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-39-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-40-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-40-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-41-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-41-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-42-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-42-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-43-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-43-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-44-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-44-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-45-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-46-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-46-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-47-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-47-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-48-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-48-00001.npy + #################################### + ######### WEB HIGH QUALITY ######### + # ~> C4 (157.2 GT) + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-26-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-27-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-28-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-29-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-30-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-31-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-32-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-33-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-34-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-35-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-36-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-37-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-38-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-39-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-40-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-41-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-42-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-43-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-44-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-45-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-46-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-47-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-48-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-49-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-50-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-51-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-52-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-53-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-54-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-55-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-56-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-57-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-58-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-59-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-60-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-61-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-62-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-63-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-64-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-65-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-66-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-67-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-68-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-69-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-70-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-71-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-72-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-73-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-74-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-75-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-76-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-77-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-78-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-79-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-80-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-81-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-82-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-83-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-84-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-85-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-86-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-87-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-88-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-89-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-90-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-91-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-92-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-93-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-94-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-95-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-96-00000.npy + # ~> REDDIT (79.988 GT) + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-26-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-27-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-28-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-29-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-30-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-31-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-32-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-33-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-34-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-35-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-36-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-37-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-38-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-39-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-40-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-41-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-42-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-43-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-44-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-45-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-46-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-47-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-48-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-49-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-50-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-51-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-52-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-53-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-54-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-55-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-56-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-57-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-58-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-59-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-60-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-61-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-62-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-63-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-64-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-65-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-66-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-67-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-68-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-69-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-70-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-71-00000.npy + # ~> FALCON (547.341 GT) + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-000-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-000-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-000-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-001-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-001-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-001-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-002-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-002-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-002-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-003-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-003-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-003-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-004-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-004-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-004-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-004-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-005-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-005-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-005-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-006-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-006-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-006-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-007-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-007-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-008-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-008-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-008-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-008-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-009-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-009-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-009-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-010-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-010-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-010-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-011-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-011-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-011-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-012-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-012-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-012-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-012-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-013-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-013-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-013-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-014-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-014-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-014-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-015-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-015-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-015-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-016-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-016-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-016-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-017-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-017-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-017-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-017-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-018-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-018-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-018-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-019-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-019-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-019-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-020-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-020-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-020-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-021-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-021-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-021-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-021-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-022-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-022-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-022-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-023-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-023-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-023-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-024-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-024-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-024-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-025-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-025-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-025-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-025-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-026-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-026-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-026-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-027-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-027-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-027-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-028-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-028-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-028-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-029-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-029-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-029-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-030-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-030-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-030-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-030-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-031-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-031-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-031-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-032-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-032-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-032-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-033-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-033-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-033-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-034-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-034-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-034-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-034-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-035-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-035-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-035-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-036-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-036-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-036-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-037-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-037-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-037-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-038-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-038-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-038-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-038-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-039-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-039-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-039-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-040-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-040-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-040-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-041-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-041-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-041-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-042-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-042-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-042-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-043-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-043-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-043-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-043-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-044-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-044-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-044-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-045-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-045-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-045-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-046-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-046-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-046-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-047-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-047-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-047-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-047-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-048-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-048-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-048-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-049-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-049-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-049-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-050-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-050-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-050-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-051-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-051-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-051-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-051-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-052-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-052-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-052-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-053-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-053-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-053-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-054-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-054-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-054-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-055-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-055-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-055-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-055-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-056-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-056-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-056-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-057-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-057-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-057-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-058-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-058-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-058-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-059-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-059-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-059-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-060-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-060-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-060-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-060-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-061-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-061-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-061-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-062-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-062-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-062-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-063-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-063-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-063-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-064-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-064-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-064-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-064-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-065-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-065-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-065-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-066-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-066-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-066-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-067-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-067-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-067-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-068-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-068-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-068-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-068-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-069-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-069-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-069-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-070-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-070-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-070-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-071-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-071-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-071-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-072-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-072-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-072-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-073-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-073-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-073-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-073-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-074-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-074-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-074-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-075-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-075-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-075-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-076-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-076-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-076-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-077-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-077-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-077-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-077-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-078-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-078-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-078-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-079-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-079-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-079-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-080-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-080-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-080-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-081-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-081-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-081-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-081-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-082-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-082-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-082-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-083-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-083-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-083-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-084-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-084-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-084-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-085-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-085-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-085-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-086-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-086-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-086-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-086-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-087-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-087-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-087-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-088-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-088-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-088-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-089-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-089-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-089-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-090-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-090-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-090-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-090-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-091-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-091-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-091-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-092-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-092-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-092-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-093-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-093-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-093-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-094-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-094-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-094-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-094-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-095-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-095-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-095-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-096-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-096-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-096-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-097-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-097-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-097-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-098-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-098-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-098-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-099-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-099-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-099-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-099-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-100-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-100-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-100-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-101-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-101-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-101-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-102-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-102-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-102-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-103-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-103-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-103-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-103-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-104-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-104-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-104-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-105-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-105-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-105-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-106-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-106-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-106-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-107-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-107-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-107-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-107-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-108-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-108-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-108-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-109-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-109-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-109-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-110-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-110-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-110-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-111-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-111-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-111-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-111-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-112-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-112-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-112-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-113-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-113-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-113-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-114-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-114-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-114-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-115-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-115-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-115-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-116-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-116-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-116-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-116-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-117-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-117-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-117-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-118-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-118-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-118-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-119-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-119-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-119-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-120-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-120-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-120-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-120-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-121-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-121-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-121-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-122-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-122-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-122-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-123-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-123-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-123-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-124-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-124-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-124-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-124-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-125-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-125-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-125-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-126-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-126-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-126-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-127-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-127-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-127-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-128-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-128-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-128-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-129-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-129-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-129-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-129-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-130-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-130-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-130-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-131-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-131-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-131-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-132-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-132-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-132-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-133-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-133-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-133-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-133-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-134-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-134-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-134-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-135-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-135-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-135-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-136-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-136-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-136-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-137-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-137-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-137-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-137-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-138-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-138-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-138-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-139-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-139-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-139-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-140-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-140-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-140-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-141-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-141-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-141-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-142-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-142-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-142-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-142-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-143-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-143-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-143-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-144-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-144-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-144-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-145-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-145-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-145-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-146-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-146-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-146-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-146-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-147-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-147-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-147-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-148-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-148-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-148-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-149-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-149-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-149-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-150-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-150-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-150-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-150-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-151-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-151-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-151-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-152-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-152-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-152-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-153-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-153-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-153-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-154-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-154-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-154-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-155-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-155-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-155-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-155-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-156-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-156-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-156-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-157-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-157-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-157-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-158-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-158-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-158-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-159-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-159-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-159-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-159-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-160-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-160-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-160-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-161-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-161-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-161-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-162-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-162-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-162-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-163-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-163-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-163-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-163-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-164-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-164-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-164-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-165-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-165-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-165-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-166-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-166-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-166-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-167-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-167-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-167-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-168-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-168-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-168-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-168-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-169-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-169-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-169-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-170-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-170-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-170-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-171-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-171-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-171-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-172-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-172-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-172-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-172-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-173-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-173-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-173-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-174-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-174-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-174-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-175-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-175-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-175-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-176-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-176-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-176-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-176-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-177-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-177-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-177-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-178-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-178-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-178-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-179-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-179-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-179-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-180-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-180-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-180-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-180-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-181-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-181-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-181-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-182-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-182-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-182-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-183-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-183-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-183-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-184-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-184-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-184-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-185-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-185-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-185-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-185-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-186-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-186-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-186-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-187-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-187-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-187-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-188-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-188-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-188-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-189-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-189-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-189-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-189-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-190-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-190-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-190-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-191-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-191-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-191-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-192-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-192-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-192-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-193-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-193-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-193-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-193-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-194-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-194-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-194-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-195-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-195-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-195-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-196-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-196-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-196-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-197-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-197-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-197-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-198-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-198-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-198-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-198-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-199-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-199-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-199-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-200-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-200-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-200-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-201-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-201-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-201-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-202-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-202-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-202-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-202-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-203-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-203-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-203-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-204-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-204-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-204-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-205-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-205-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-205-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-206-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-206-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-206-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-206-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-207-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-207-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-207-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-208-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-208-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-208-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-209-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-209-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-209-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-210-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-210-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-210-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-211-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-211-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-211-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-211-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-212-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-212-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-212-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-213-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-213-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-213-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-214-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-214-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-214-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-215-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-215-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-215-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-215-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-216-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-216-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-216-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-217-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-217-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-217-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-218-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-218-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-218-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-219-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-219-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-219-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-219-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-220-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-220-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-220-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-221-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-221-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-221-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-222-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-222-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-222-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-223-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-223-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-223-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-224-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-224-00001.npy + #################################### + ######### WEB REST ######### + # ~> DOLMA CC HEAD 50% (187.2 GT) + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-001-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-003-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-004-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-005-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-007-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-014-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-017-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-018-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-019-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-020-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-021-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-022-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-023-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-029-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-030-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-032-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-033-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-035-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-044-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-045-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-049-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-050-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-055-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-056-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-059-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-060-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-062-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-063-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-065-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-067-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-069-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-072-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-073-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-074-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-075-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-076-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-080-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-081-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-082-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-084-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-085-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-087-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-088-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-091-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-095-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-096-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-098-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-099-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-102-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-103-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-104-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-107-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-108-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-111-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-112-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-115-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-118-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-119-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-121-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-123-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-126-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-127-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-128-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-130-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-133-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-136-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-137-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-138-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-139-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-141-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-142-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-143-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-144-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-145-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-149-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-150-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-153-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-154-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-156-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-158-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-161-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-165-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-168-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-174-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-175-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-176-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-177-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-178-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-180-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-182-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-183-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-184-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-186-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-187-00000.npy + # ~> DOLMA CC MIDDLE 33% (242.05 GT) + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-000-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-003-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-004-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-006-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-007-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-010-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-011-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-012-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-013-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-014-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-015-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-019-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-020-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-021-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-024-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-026-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-031-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-033-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-034-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-035-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-036-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-039-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-043-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-044-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-045-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-046-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-047-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-048-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-050-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-051-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-052-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-053-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-056-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-059-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-062-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-063-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-064-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-066-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-070-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-071-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-072-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-073-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-074-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-075-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-080-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-083-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-084-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-089-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-090-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-092-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-093-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-096-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-098-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-099-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-100-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-103-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-105-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-108-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-115-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-117-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-121-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-123-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-126-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-131-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-134-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-135-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-138-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-142-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-143-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-144-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-147-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-148-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-150-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-151-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-152-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-154-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-155-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-157-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-159-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-161-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-162-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-163-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-164-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-170-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-171-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-172-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-175-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-179-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-180-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-181-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-184-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-186-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-187-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-188-00000.npy + # ~> DOLMA CC TAIL 33% (268.05 GT) + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-000-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-002-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-003-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-004-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-008-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-013-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-014-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-016-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-018-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-019-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-021-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-022-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-023-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-026-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-027-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-029-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-030-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-032-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-036-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-037-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-039-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-041-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-043-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-046-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-048-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-050-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-051-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-052-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-053-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-055-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-061-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-063-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-066-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-068-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-071-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-072-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-073-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-074-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-076-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-077-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-081-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-082-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-083-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-084-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-086-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-088-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-090-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-091-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-092-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-093-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-098-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-101-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-103-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-104-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-105-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-107-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-108-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-109-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-115-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-116-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-121-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-123-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-125-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-126-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-129-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-133-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-134-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-136-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-137-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-140-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-142-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-148-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-149-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-153-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-156-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-157-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-159-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-161-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-162-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-164-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-165-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-171-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-172-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-173-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-174-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-176-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-177-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-178-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-181-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-182-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-183-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-184-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-185-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-187-00000.npy \ No newline at end of file From 8949bd85bdcbffdd3dbfe5bf4d82158905e8ac9b Mon Sep 17 00:00:00 2001 From: Luca Soldaini Date: Thu, 21 Mar 2024 15:51:09 -0700 Subject: [PATCH 60/60] Added deprecation for memmap (#517) Co-authored-by: Pete --- .github/workflows/main.yml | 2 +- scripts/prepare_memmap_dataset.py | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 1093adb3d..9da15bccb 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -62,7 +62,7 @@ jobs: task: name: Data pipeline run: | - python scripts/prepare_memmap_dataset.py test_fixtures/*.json.gz -o /tmp/c4-sample.npy --validate + python scripts/prepare_memmap_dataset.py test_fixtures/*.json.gz -o /tmp/c4-sample.npy --validate --ack-deprecated steps: - uses: actions/checkout@v3 diff --git a/scripts/prepare_memmap_dataset.py b/scripts/prepare_memmap_dataset.py index 7a802ff4c..f3b9aff31 100644 --- a/scripts/prepare_memmap_dataset.py +++ b/scripts/prepare_memmap_dataset.py @@ -374,6 +374,7 @@ def make_source_and_target( "--safe-mode/--fast-mode", default=False, help="Safe mode caches locally and decompresses using gzip.open" ) @click.option("-j", "--workers", "max_workers", type=int, default=1, help="Defaults to number of CPUs") +@click.option("--ack-deprecated", is_flag=True, help="Acknowledge that this command is deprecated") def main( src: Tuple[str, ...], output: str, @@ -389,7 +390,20 @@ def main( paths_per_worker: int = 1, max_workers: int = 1, cache_dir: Optional[str] = None, + ack_deprecated: bool = False, ): + print("WARNING: THIS SCRIPT IS DEPRECATED!!!") + print( + "Consider using the tokenization tool in the Dolma toolkit: " + "https://github.com/allenai/dolma/blob/main/docs/tokenize.md" + ) + + if not ack_deprecated: + continue_question = input("Do you want to continue? [y/N]: ") + if not (c := continue_question.lower().strip()) or c != "y": + print("Aborting.") + return + print("=== CONFIGURATION ===") print(f"src: {src}") print(f"output: {output}")